membot 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/membot.md +25 -10
- package/.cursor/rules/membot.mdc +25 -10
- package/README.md +35 -4
- package/package.json +8 -5
- package/scripts/apply-patches.sh +0 -11
- package/src/cli.ts +2 -2
- package/src/commands/login-page.mustache +50 -0
- package/src/commands/login.ts +83 -0
- package/src/config/schemas.ts +17 -5
- package/src/constants.ts +13 -1
- package/src/context.ts +1 -24
- package/src/db/files.ts +21 -25
- package/src/db/migrations/003-downloader-columns.ts +58 -0
- package/src/db/migrations.ts +2 -1
- package/src/ingest/converter/index.ts +9 -0
- package/src/ingest/converter/xlsx.ts +111 -0
- package/src/ingest/downloaders/browser.ts +180 -0
- package/src/ingest/downloaders/generic-web.ts +81 -0
- package/src/ingest/downloaders/github.ts +178 -0
- package/src/ingest/downloaders/google-docs.ts +56 -0
- package/src/ingest/downloaders/google-shared.ts +86 -0
- package/src/ingest/downloaders/google-sheets.ts +58 -0
- package/src/ingest/downloaders/google-slides.ts +53 -0
- package/src/ingest/downloaders/index.ts +182 -0
- package/src/ingest/downloaders/linear.ts +291 -0
- package/src/ingest/fetcher.ts +104 -129
- package/src/ingest/ingest.ts +43 -70
- package/src/mcp/instructions.ts +4 -2
- package/src/operations/add.ts +6 -4
- package/src/operations/info.ts +4 -6
- package/src/operations/move.ts +2 -3
- package/src/operations/refresh.ts +2 -4
- package/src/operations/remove.ts +23 -2
- package/src/operations/tree.ts +1 -1
- package/src/operations/types.ts +1 -1
- package/src/refresh/runner.ts +59 -114
- package/src/types/text-modules.d.ts +5 -0
- package/patches/@evantahler%2Fmcpx@0.21.4.patch +0 -51
- package/src/commands/mcpx.ts +0 -112
- package/src/ingest/agent-fetcher.ts +0 -639
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import { HelpfulError } from "../../errors.ts";
|
|
2
|
+
import { sha256Hex } from "../local-reader.ts";
|
|
3
|
+
import type { DownloadedRemote, Downloader, DownloaderCtx } from "./index.ts";
|
|
4
|
+
|
|
5
|
+
const ISSUE_OR_PR = /^\/([^/]+)\/([^/]+)\/(issues|pull)\/(\d+)(?:$|\/|#|\?)/;
|
|
6
|
+
|
|
7
|
+
const API_BASE = "https://api.github.com";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* GitHub issues and PRs via the REST API. The user sets a personal
|
|
11
|
+
* access token once via `membot config set downloaders.github.api_key
|
|
12
|
+
* <PAT>` (or via the `GITHUB_TOKEN` env var, which `gh auth token`
|
|
13
|
+
* happens to populate), and we fetch the issue/PR + every comment as
|
|
14
|
+
* structured JSON, then render to markdown.
|
|
15
|
+
*
|
|
16
|
+
* Why API instead of rendering github.com HTML: the rendered page
|
|
17
|
+
* works for public, network-cooperative cases but stalls when GitHub
|
|
18
|
+
* shows interstitials (rate-limit, abuse, login challenges) and
|
|
19
|
+
* captures hundreds of KB of GitHub chrome that the embedder doesn't
|
|
20
|
+
* care about. The API gives us the exact body and comment thread in
|
|
21
|
+
* a few KB.
|
|
22
|
+
*
|
|
23
|
+
* Public repos: the `api_key` is optional — we'll send unauthenticated
|
|
24
|
+
* requests if it's blank, which works for public content but gets
|
|
25
|
+
* rate-limited at 60 req/hr. Private repos require the token.
|
|
26
|
+
*/
|
|
27
|
+
export const githubDownloader: Downloader = {
|
|
28
|
+
name: "github",
|
|
29
|
+
description: "GitHub issues + PRs (github.com/<owner>/<repo>/(issues|pull)/<n>) — uses the GitHub REST API.",
|
|
30
|
+
logins: [
|
|
31
|
+
{
|
|
32
|
+
kind: "api_key",
|
|
33
|
+
name: "GitHub",
|
|
34
|
+
url: "https://github.com/settings/tokens",
|
|
35
|
+
setupCommand: "membot config set downloaders.github.api_key <PAT>",
|
|
36
|
+
description: "create a fine-grained token with repo:read access (or use GITHUB_TOKEN env var)",
|
|
37
|
+
},
|
|
38
|
+
],
|
|
39
|
+
requiresApiKey: false,
|
|
40
|
+
matches(url) {
|
|
41
|
+
return url.hostname === "github.com" && ISSUE_OR_PR.test(url.pathname);
|
|
42
|
+
},
|
|
43
|
+
async download(url, ctx): Promise<DownloadedRemote> {
|
|
44
|
+
const args = parseIssueUrl(url);
|
|
45
|
+
const owner = args.owner as string;
|
|
46
|
+
const repo = args.repo as string;
|
|
47
|
+
const number = args.number as number;
|
|
48
|
+
|
|
49
|
+
const token = (ctx.config.downloaders.github.api_key || process.env.GITHUB_TOKEN || "").trim();
|
|
50
|
+
ctx.onProgress?.("fetching issue");
|
|
51
|
+
const issue = await getJson<GithubIssue>(`/repos/${owner}/${repo}/issues/${number}`, token, url);
|
|
52
|
+
ctx.onProgress?.("fetching comments");
|
|
53
|
+
const comments = await getJson<GithubComment[]>(
|
|
54
|
+
`/repos/${owner}/${repo}/issues/${number}/comments?per_page=100`,
|
|
55
|
+
token,
|
|
56
|
+
url,
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
const isPullRequest = !!issue.pull_request;
|
|
60
|
+
const markdown = renderIssue(issue, comments, isPullRequest);
|
|
61
|
+
const bytes = new TextEncoder().encode(markdown);
|
|
62
|
+
return {
|
|
63
|
+
bytes,
|
|
64
|
+
sha256: sha256Hex(bytes),
|
|
65
|
+
mimeType: "text/markdown",
|
|
66
|
+
downloader: "github",
|
|
67
|
+
downloaderArgs: args,
|
|
68
|
+
sourceUrl: url.toString(),
|
|
69
|
+
};
|
|
70
|
+
},
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
interface GithubIssue {
|
|
74
|
+
number: number;
|
|
75
|
+
title: string;
|
|
76
|
+
body: string | null;
|
|
77
|
+
state: string;
|
|
78
|
+
html_url: string;
|
|
79
|
+
user: { login: string } | null;
|
|
80
|
+
assignees: Array<{ login: string }> | null;
|
|
81
|
+
labels: Array<{ name: string } | string> | null;
|
|
82
|
+
created_at: string;
|
|
83
|
+
updated_at: string;
|
|
84
|
+
closed_at: string | null;
|
|
85
|
+
pull_request?: unknown;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
interface GithubComment {
|
|
89
|
+
body: string | null;
|
|
90
|
+
user: { login: string } | null;
|
|
91
|
+
created_at: string;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async function getJson<T>(path: string, token: string, url: URL): Promise<T> {
|
|
95
|
+
const headers: Record<string, string> = {
|
|
96
|
+
Accept: "application/vnd.github+json",
|
|
97
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
98
|
+
"User-Agent": "membot",
|
|
99
|
+
};
|
|
100
|
+
if (token !== "") headers.Authorization = `Bearer ${token}`;
|
|
101
|
+
|
|
102
|
+
const response = await fetch(`${API_BASE}${path}`, { headers });
|
|
103
|
+
if (response.status === 401 || response.status === 403) {
|
|
104
|
+
throw new HelpfulError({
|
|
105
|
+
kind: "auth_error",
|
|
106
|
+
message: `GitHub API returned ${response.status} for ${url.toString()}.`,
|
|
107
|
+
hint:
|
|
108
|
+
token === ""
|
|
109
|
+
? "Set a personal access token: create one at https://github.com/settings/tokens, then `membot config set downloaders.github.api_key <PAT>` (or set $GITHUB_TOKEN)."
|
|
110
|
+
: "The configured API key is missing repo:read access for this repo, or has expired. Re-create the token and run `membot config set downloaders.github.api_key <PAT>`.",
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
if (response.status === 404) {
|
|
114
|
+
throw new HelpfulError({
|
|
115
|
+
kind: "not_found",
|
|
116
|
+
message: `GitHub returned 404 for ${url.toString()}.`,
|
|
117
|
+
hint: "Verify the URL exists. Private repos require an API key with the right scope.",
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
if (!response.ok) {
|
|
121
|
+
throw new HelpfulError({
|
|
122
|
+
kind: "network_error",
|
|
123
|
+
message: `GitHub API returned ${response.status} ${response.statusText} for ${url.toString()}.`,
|
|
124
|
+
hint: "Retry; if the failure persists, run with --verbose for the full response.",
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
return (await response.json()) as T;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function parseIssueUrl(url: URL): Record<string, unknown> {
|
|
131
|
+
const match = url.pathname.match(ISSUE_OR_PR);
|
|
132
|
+
if (!match) {
|
|
133
|
+
throw new HelpfulError({
|
|
134
|
+
kind: "input_error",
|
|
135
|
+
message: `not a GitHub issue/PR URL: ${url.toString()}`,
|
|
136
|
+
hint: "Pass a URL like https://github.com/<owner>/<repo>/issues/<n> or .../pull/<n>.",
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
return { owner: match[1], repo: match[2], kind: match[3], number: Number(match[4]) };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function renderIssue(issue: GithubIssue, comments: GithubComment[], isPr: boolean): string {
|
|
143
|
+
const lines: string[] = [];
|
|
144
|
+
const kind = isPr ? "PR" : "Issue";
|
|
145
|
+
lines.push(`# ${kind} #${issue.number}: ${issue.title}`);
|
|
146
|
+
lines.push("");
|
|
147
|
+
lines.push(`- URL: ${issue.html_url}`);
|
|
148
|
+
lines.push(`- State: ${issue.state}${issue.closed_at ? ` (closed ${issue.closed_at})` : ""}`);
|
|
149
|
+
if (issue.user) lines.push(`- Author: @${issue.user.login}`);
|
|
150
|
+
if (issue.assignees && issue.assignees.length > 0) {
|
|
151
|
+
lines.push(`- Assignees: ${issue.assignees.map((a) => `@${a.login}`).join(", ")}`);
|
|
152
|
+
}
|
|
153
|
+
if (issue.labels && issue.labels.length > 0) {
|
|
154
|
+
const labels = issue.labels.map((l) => (typeof l === "string" ? l : l.name)).filter(Boolean);
|
|
155
|
+
if (labels.length > 0) lines.push(`- Labels: ${labels.join(", ")}`);
|
|
156
|
+
}
|
|
157
|
+
lines.push(`- Created: ${issue.created_at}`);
|
|
158
|
+
lines.push(`- Updated: ${issue.updated_at}`);
|
|
159
|
+
lines.push("");
|
|
160
|
+
if (issue.body && issue.body.trim() !== "") {
|
|
161
|
+
lines.push("## Description");
|
|
162
|
+
lines.push("");
|
|
163
|
+
lines.push(issue.body.trim());
|
|
164
|
+
lines.push("");
|
|
165
|
+
}
|
|
166
|
+
if (comments.length > 0) {
|
|
167
|
+
lines.push(`## Comments (${comments.length})`);
|
|
168
|
+
lines.push("");
|
|
169
|
+
for (const c of comments) {
|
|
170
|
+
const author = c.user ? `@${c.user.login}` : "(unknown)";
|
|
171
|
+
lines.push(`### ${author} — ${c.created_at}`);
|
|
172
|
+
lines.push("");
|
|
173
|
+
lines.push((c.body ?? "").trim());
|
|
174
|
+
lines.push("");
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return lines.join("\n").trim();
|
|
178
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { HelpfulError } from "../../errors.ts";
|
|
2
|
+
import { sha256Hex } from "../local-reader.ts";
|
|
3
|
+
import { fetchWithBrowserCookies } from "./google-shared.ts";
|
|
4
|
+
import type { DownloadedRemote, Downloader } from "./index.ts";
|
|
5
|
+
|
|
6
|
+
const DOC_PATH = /^\/document\/d\/([a-zA-Z0-9_-]+)/;
|
|
7
|
+
const DOCX_MIME = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Download a Google Doc as a `.docx` blob via the canonical export
|
|
11
|
+
* endpoint. Authentication uses cookies pulled from the persistent
|
|
12
|
+
* chromium profile (populated by `membot login`); the fetch itself
|
|
13
|
+
* is a plain Node `fetch`, not Playwright's APIRequestContext, to
|
|
14
|
+
* dodge a Playwright bug that crashes parsing Set-Cookie headers
|
|
15
|
+
* from Google's same-origin redirects.
|
|
16
|
+
*/
|
|
17
|
+
export const googleDocsDownloader: Downloader = {
|
|
18
|
+
name: "google-docs",
|
|
19
|
+
description: "Google Docs (docs.google.com/document/d/<id>) — exports as .docx via the user's logged-in session.",
|
|
20
|
+
logins: [
|
|
21
|
+
{
|
|
22
|
+
kind: "browser",
|
|
23
|
+
name: "Google",
|
|
24
|
+
url: "https://accounts.google.com/signin",
|
|
25
|
+
description: "covers Docs, Sheets, and Slides",
|
|
26
|
+
},
|
|
27
|
+
],
|
|
28
|
+
matches(url) {
|
|
29
|
+
return url.hostname === "docs.google.com" && DOC_PATH.test(url.pathname);
|
|
30
|
+
},
|
|
31
|
+
async download(url, ctx): Promise<DownloadedRemote> {
|
|
32
|
+
const docId = extractDocId(url);
|
|
33
|
+
const exportUrl = `https://docs.google.com/document/d/${docId}/export?format=docx`;
|
|
34
|
+
const body = await fetchWithBrowserCookies(exportUrl, ctx, "Google Docs", url);
|
|
35
|
+
return {
|
|
36
|
+
bytes: new Uint8Array(body),
|
|
37
|
+
sha256: sha256Hex(body),
|
|
38
|
+
mimeType: DOCX_MIME,
|
|
39
|
+
downloader: "google-docs",
|
|
40
|
+
downloaderArgs: { document_id: docId },
|
|
41
|
+
sourceUrl: url.toString(),
|
|
42
|
+
};
|
|
43
|
+
},
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
function extractDocId(url: URL): string {
|
|
47
|
+
const match = url.pathname.match(DOC_PATH);
|
|
48
|
+
if (!match || !match[1]) {
|
|
49
|
+
throw new HelpfulError({
|
|
50
|
+
kind: "input_error",
|
|
51
|
+
message: `not a Google Docs URL: ${url.toString()}`,
|
|
52
|
+
hint: "Pass a URL like https://docs.google.com/document/d/<DOC_ID>/edit.",
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
return match[1];
|
|
56
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { HelpfulError } from "../../errors.ts";
|
|
2
|
+
import type { DownloaderCtx } from "./index.ts";
|
|
3
|
+
|
|
4
|
+
const USER_AGENT =
|
|
5
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Fetch a Google export URL using cookies from the persistent
|
|
9
|
+
* chromium profile. Uses Node's built-in `fetch` (not Playwright's
|
|
10
|
+
* APIRequestContext) because Playwright crashes when parsing
|
|
11
|
+
* Set-Cookie headers on same-origin Google redirects (its
|
|
12
|
+
* `_parseSetCookieHeader` calls `new URL(responseUrl)` with a
|
|
13
|
+
* relative path and throws `ERR_INVALID_URL`).
|
|
14
|
+
*
|
|
15
|
+
* Same redirect handling rules as the Playwright path used to do:
|
|
16
|
+
* follow same-origin internal redirects (Google may bounce the
|
|
17
|
+
* download via `/exportInternal` or similar) but bail with a clean
|
|
18
|
+
* `auth_error` if Google sends us to `accounts.google.com/ServiceLogin`
|
|
19
|
+
* because the user isn't signed in.
|
|
20
|
+
*/
|
|
21
|
+
export async function fetchWithBrowserCookies(
|
|
22
|
+
exportUrl: string,
|
|
23
|
+
ctx: DownloaderCtx,
|
|
24
|
+
serviceName: string,
|
|
25
|
+
sourceUrl: URL,
|
|
26
|
+
): Promise<Buffer> {
|
|
27
|
+
ctx.onProgress?.(`downloading from ${serviceName.toLowerCase()}`);
|
|
28
|
+
const cookieHeader = await ctx.pool.cookieHeader(exportUrl);
|
|
29
|
+
|
|
30
|
+
let currentUrl = exportUrl;
|
|
31
|
+
for (let hop = 0; hop < 5; hop++) {
|
|
32
|
+
const response = await fetch(currentUrl, {
|
|
33
|
+
headers: {
|
|
34
|
+
Cookie: cookieHeader,
|
|
35
|
+
"User-Agent": USER_AGENT,
|
|
36
|
+
Accept: "*/*",
|
|
37
|
+
},
|
|
38
|
+
redirect: "manual",
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
if (response.status >= 200 && response.status < 300) {
|
|
42
|
+
return Buffer.from(await response.arrayBuffer());
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (response.status >= 300 && response.status < 400) {
|
|
46
|
+
const location = response.headers.get("location");
|
|
47
|
+
if (!location) {
|
|
48
|
+
throw new HelpfulError({
|
|
49
|
+
kind: "network_error",
|
|
50
|
+
message: `${serviceName} returned ${response.status} for ${sourceUrl.toString()} with no Location header.`,
|
|
51
|
+
hint: "Open the URL in your browser to verify it exists and is shared with you.",
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
const next = new URL(location, currentUrl);
|
|
55
|
+
if (next.hostname === "accounts.google.com" || /\/ServiceLogin/i.test(next.pathname)) {
|
|
56
|
+
throw new HelpfulError({
|
|
57
|
+
kind: "auth_error",
|
|
58
|
+
message: `${serviceName} redirected ${sourceUrl.toString()} to a Google login page.`,
|
|
59
|
+
hint: "Run `membot login` and sign into Google in the browser that opens, then re-run.",
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
currentUrl = next.toString();
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (response.status === 401 || response.status === 403) {
|
|
67
|
+
throw new HelpfulError({
|
|
68
|
+
kind: "auth_error",
|
|
69
|
+
message: `${serviceName} returned ${response.status} for ${sourceUrl.toString()}.`,
|
|
70
|
+
hint: "Run `membot login` and sign into Google in the browser that opens, then re-run.",
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
throw new HelpfulError({
|
|
75
|
+
kind: "network_error",
|
|
76
|
+
message: `${serviceName} returned ${response.status} ${response.statusText} for ${sourceUrl.toString()}.`,
|
|
77
|
+
hint: "Open the URL in your browser to verify it's accessible to your account.",
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
throw new HelpfulError({
|
|
82
|
+
kind: "network_error",
|
|
83
|
+
message: `${serviceName} bounced through too many redirects for ${sourceUrl.toString()}.`,
|
|
84
|
+
hint: "Re-run the command; if the failure persists, open the URL in your browser to investigate.",
|
|
85
|
+
});
|
|
86
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { HelpfulError } from "../../errors.ts";
|
|
2
|
+
import { sha256Hex } from "../local-reader.ts";
|
|
3
|
+
import { fetchWithBrowserCookies } from "./google-shared.ts";
|
|
4
|
+
import type { DownloadedRemote, Downloader } from "./index.ts";
|
|
5
|
+
|
|
6
|
+
const SHEET_PATH = /^\/spreadsheets\/d\/([a-zA-Z0-9_-]+)/;
|
|
7
|
+
|
|
8
|
+
const XLSX_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Download a Google Sheet as `.xlsx` (the workbook's native format)
|
|
12
|
+
* — the export includes **every tab** in a single file. The bytes
|
|
13
|
+
* flow through `convertXlsx`, which renders each tab as a markdown
|
|
14
|
+
* `## <tab name>` section with a real GitHub-flavored pipe table.
|
|
15
|
+
* Cleaner than the PDF route (preserves cell structure, no layout
|
|
16
|
+
* truncation) and `format=html` is no longer supported by Google.
|
|
17
|
+
*/
|
|
18
|
+
export const googleSheetsDownloader: Downloader = {
|
|
19
|
+
name: "google-sheets",
|
|
20
|
+
description:
|
|
21
|
+
"Google Sheets (docs.google.com/spreadsheets/d/<id>) — exports every tab as .xlsx, rendered to markdown tables locally.",
|
|
22
|
+
logins: [
|
|
23
|
+
{
|
|
24
|
+
kind: "browser",
|
|
25
|
+
name: "Google",
|
|
26
|
+
url: "https://accounts.google.com/signin",
|
|
27
|
+
description: "covers Docs, Sheets, and Slides",
|
|
28
|
+
},
|
|
29
|
+
],
|
|
30
|
+
matches(url) {
|
|
31
|
+
return url.hostname === "docs.google.com" && SHEET_PATH.test(url.pathname);
|
|
32
|
+
},
|
|
33
|
+
async download(url, ctx): Promise<DownloadedRemote> {
|
|
34
|
+
const sheetId = extractSheetId(url);
|
|
35
|
+
const exportUrl = `https://docs.google.com/spreadsheets/d/${sheetId}/export?format=xlsx`;
|
|
36
|
+
const body = await fetchWithBrowserCookies(exportUrl, ctx, "Google Sheets", url);
|
|
37
|
+
return {
|
|
38
|
+
bytes: new Uint8Array(body),
|
|
39
|
+
sha256: sha256Hex(body),
|
|
40
|
+
mimeType: XLSX_MIME,
|
|
41
|
+
downloader: "google-sheets",
|
|
42
|
+
downloaderArgs: { sheet_id: sheetId },
|
|
43
|
+
sourceUrl: url.toString(),
|
|
44
|
+
};
|
|
45
|
+
},
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
function extractSheetId(url: URL): string {
|
|
49
|
+
const match = url.pathname.match(SHEET_PATH);
|
|
50
|
+
if (!match || !match[1]) {
|
|
51
|
+
throw new HelpfulError({
|
|
52
|
+
kind: "input_error",
|
|
53
|
+
message: `not a Google Sheets URL: ${url.toString()}`,
|
|
54
|
+
hint: "Pass a URL like https://docs.google.com/spreadsheets/d/<SHEET_ID>/edit.",
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
return match[1];
|
|
58
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { HelpfulError } from "../../errors.ts";
|
|
2
|
+
import { sha256Hex } from "../local-reader.ts";
|
|
3
|
+
import { fetchWithBrowserCookies } from "./google-shared.ts";
|
|
4
|
+
import type { DownloadedRemote, Downloader } from "./index.ts";
|
|
5
|
+
|
|
6
|
+
const SLIDE_PATH = /^\/presentation\/d\/([a-zA-Z0-9_-]+)/;
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Download a Google Slides deck as a PDF via the canonical export
|
|
10
|
+
* endpoint. PDF preserves layout and text-on-slides faithfully; the
|
|
11
|
+
* existing `convertPdf` pipeline (unpdf) extracts the speaker text +
|
|
12
|
+
* bullets without losing slide ordering.
|
|
13
|
+
*/
|
|
14
|
+
export const googleSlidesDownloader: Downloader = {
|
|
15
|
+
name: "google-slides",
|
|
16
|
+
description: "Google Slides (docs.google.com/presentation/d/<id>) — exports as PDF for layout-faithful conversion.",
|
|
17
|
+
logins: [
|
|
18
|
+
{
|
|
19
|
+
kind: "browser",
|
|
20
|
+
name: "Google",
|
|
21
|
+
url: "https://accounts.google.com/signin",
|
|
22
|
+
description: "covers Docs, Sheets, and Slides",
|
|
23
|
+
},
|
|
24
|
+
],
|
|
25
|
+
matches(url) {
|
|
26
|
+
return url.hostname === "docs.google.com" && SLIDE_PATH.test(url.pathname);
|
|
27
|
+
},
|
|
28
|
+
async download(url, ctx): Promise<DownloadedRemote> {
|
|
29
|
+
const slidesId = extractSlidesId(url);
|
|
30
|
+
const exportUrl = `https://docs.google.com/presentation/d/${slidesId}/export/pdf`;
|
|
31
|
+
const body = await fetchWithBrowserCookies(exportUrl, ctx, "Google Slides", url);
|
|
32
|
+
return {
|
|
33
|
+
bytes: new Uint8Array(body),
|
|
34
|
+
sha256: sha256Hex(body),
|
|
35
|
+
mimeType: "application/pdf",
|
|
36
|
+
downloader: "google-slides",
|
|
37
|
+
downloaderArgs: { slides_id: slidesId },
|
|
38
|
+
sourceUrl: url.toString(),
|
|
39
|
+
};
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
function extractSlidesId(url: URL): string {
|
|
44
|
+
const match = url.pathname.match(SLIDE_PATH);
|
|
45
|
+
if (!match || !match[1]) {
|
|
46
|
+
throw new HelpfulError({
|
|
47
|
+
kind: "input_error",
|
|
48
|
+
message: `not a Google Slides URL: ${url.toString()}`,
|
|
49
|
+
hint: "Pass a URL like https://docs.google.com/presentation/d/<SLIDES_ID>/edit.",
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
return match[1];
|
|
53
|
+
}
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
import type { MembotConfig } from "../../config/schemas.ts";
|
|
2
|
+
import type { logger as Logger } from "../../output/logger.ts";
|
|
3
|
+
import type { BrowserPool } from "./browser.ts";
|
|
4
|
+
import { genericWebDownloader } from "./generic-web.ts";
|
|
5
|
+
import { githubDownloader } from "./github.ts";
|
|
6
|
+
import { googleDocsDownloader } from "./google-docs.ts";
|
|
7
|
+
import { googleSheetsDownloader } from "./google-sheets.ts";
|
|
8
|
+
import { googleSlidesDownloader } from "./google-slides.ts";
|
|
9
|
+
import { linearDownloader } from "./linear.ts";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* The shape every URL fetch produces — drop-in replacement for the
|
|
13
|
+
* old `FetchedRemote` shape. `downloader` + `downloaderArgs` get
|
|
14
|
+
* persisted on the row so refresh replays the same downloader against
|
|
15
|
+
* the same URL deterministically (no LLM, no agent loop).
|
|
16
|
+
*/
|
|
17
|
+
export interface DownloadedRemote {
|
|
18
|
+
bytes: Uint8Array;
|
|
19
|
+
sha256: string;
|
|
20
|
+
mimeType: string;
|
|
21
|
+
downloader: string;
|
|
22
|
+
downloaderArgs: Record<string, unknown>;
|
|
23
|
+
sourceUrl: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface DownloaderCtx {
|
|
27
|
+
pool: BrowserPool;
|
|
28
|
+
logger: typeof Logger;
|
|
29
|
+
config: MembotConfig;
|
|
30
|
+
/**
|
|
31
|
+
* Optional sublabel hook for the host's progress spinner. Long-running
|
|
32
|
+
* downloaders (multi-query GraphQL, paginated REST fetches, headless
|
|
33
|
+
* browser navigation) can call this with short status strings —
|
|
34
|
+
* "fetching", "rendering", "parsing 3/4 pages" — and the CLI will
|
|
35
|
+
* surface them under the per-entry progress bar. No-op when the host
|
|
36
|
+
* doesn't supply one (e.g. MCP server, JSON-mode CLI).
|
|
37
|
+
*/
|
|
38
|
+
onProgress?: (sublabel: string) => void;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* One tactic for fetching a URL. Specific downloaders (Google,
|
|
43
|
+
* GitHub, Linear) match URLs by host/pattern and hit the canonical
|
|
44
|
+
* export endpoint; the generic-web downloader is the registry's
|
|
45
|
+
* always-matching catch-all (HEADs the URL, prints to PDF if HTML,
|
|
46
|
+
* else streams the raw bytes through). Adding a 6th service is one
|
|
47
|
+
* file — implement `Downloader`, register it here.
|
|
48
|
+
*
|
|
49
|
+
* If a downloader requires a logged-in browser session, it declares
|
|
50
|
+
* one or more `LoginEntry` objects; the `membot login` page collects
|
|
51
|
+
* those across every downloader, dedupes by URL, and renders one
|
|
52
|
+
* button per service.
|
|
53
|
+
*/
|
|
54
|
+
export interface Downloader {
|
|
55
|
+
name: string;
|
|
56
|
+
description: string;
|
|
57
|
+
matches(url: URL): boolean;
|
|
58
|
+
download(url: URL, ctx: DownloaderCtx): Promise<DownloadedRemote>;
|
|
59
|
+
logins?: LoginEntry[];
|
|
60
|
+
/**
|
|
61
|
+
* Force the BrowserPool into headed mode for this downloader's
|
|
62
|
+
* fetches. Used for SPAs that detect headless Chromium and refuse
|
|
63
|
+
* to hydrate; we don't currently use it (services that needed it
|
|
64
|
+
* have moved to the API-key flow), but the hook remains for
|
|
65
|
+
* future cookie-based downloaders.
|
|
66
|
+
*/
|
|
67
|
+
requireHeaded?: boolean;
|
|
68
|
+
/**
|
|
69
|
+
* The downloader authenticates via a config-stored API key, not
|
|
70
|
+
* browser cookies. The fetcher uses this to skip the auto-login
|
|
71
|
+
* browser prompt on `auth_error` (opening a browser doesn't help
|
|
72
|
+
* when the missing credential is in the config file).
|
|
73
|
+
*/
|
|
74
|
+
requiresApiKey?: boolean;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* A service the user might need to set up before fetches against it
|
|
79
|
+
* succeed. Two flavors:
|
|
80
|
+
* - `kind: "browser"` — the user clicks a link in the `membot login`
|
|
81
|
+
* browser, signs in, and closes the window. Cookies + IndexedDB
|
|
82
|
+
* land in the persistent profile and downloaders use them
|
|
83
|
+
* automatically.
|
|
84
|
+
* - `kind: "api_key"` — the user visits the service's API-key page,
|
|
85
|
+
* copies the key, and runs the displayed `setupCommand`. The key
|
|
86
|
+
* lives in `~/.membot/config.json` and downloaders read it from
|
|
87
|
+
* `ctx.config`.
|
|
88
|
+
*
|
|
89
|
+
* Multiple downloaders can declare the same `LoginEntry` (e.g. all
|
|
90
|
+
* three Google downloaders share Google sign-in); the login page
|
|
91
|
+
* dedupes by `(kind, url)`.
|
|
92
|
+
*/
|
|
93
|
+
export type LoginEntry = BrowserLoginEntry | ApiKeyLoginEntry;
|
|
94
|
+
|
|
95
|
+
export interface BrowserLoginEntry {
|
|
96
|
+
kind: "browser";
|
|
97
|
+
/** Display name (e.g. "Google"). */
|
|
98
|
+
name: string;
|
|
99
|
+
/** Login URL the button opens. */
|
|
100
|
+
url: string;
|
|
101
|
+
/** Optional one-liner shown next to the button. */
|
|
102
|
+
description?: string;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export interface ApiKeyLoginEntry {
|
|
106
|
+
kind: "api_key";
|
|
107
|
+
/** Display name (e.g. "Linear"). */
|
|
108
|
+
name: string;
|
|
109
|
+
/** Settings page where the user creates the key. */
|
|
110
|
+
url: string;
|
|
111
|
+
/** Shell command the user copies — e.g. `membot config set linear.api_key <KEY>`. */
|
|
112
|
+
setupCommand: string;
|
|
113
|
+
/** Optional one-liner shown next to the link. */
|
|
114
|
+
description?: string;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const REGISTRY: Downloader[] = [
|
|
118
|
+
googleDocsDownloader,
|
|
119
|
+
googleSheetsDownloader,
|
|
120
|
+
googleSlidesDownloader,
|
|
121
|
+
githubDownloader,
|
|
122
|
+
linearDownloader,
|
|
123
|
+
genericWebDownloader,
|
|
124
|
+
];
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Find the first downloader that matches `url`. Returns `null` only
|
|
128
|
+
* if `url` doesn't parse — in normal use the generic-web downloader
|
|
129
|
+
* matches everything else, so callers can treat `findDownloader` as
|
|
130
|
+
* total over valid URLs.
|
|
131
|
+
*/
|
|
132
|
+
export function findDownloader(url: string | URL): Downloader | null {
|
|
133
|
+
let parsed: URL;
|
|
134
|
+
try {
|
|
135
|
+
parsed = typeof url === "string" ? new URL(url) : url;
|
|
136
|
+
} catch {
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
for (const d of REGISTRY) {
|
|
140
|
+
if (d.matches(parsed)) return d;
|
|
141
|
+
}
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/** Lookup by name (used by refresh to replay a persisted downloader). */
|
|
146
|
+
export function findDownloaderByName(name: string): Downloader | null {
|
|
147
|
+
return REGISTRY.find((d) => d.name === name) ?? null;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/** Read-only view of every registered downloader. */
|
|
151
|
+
export function listDownloaders(): readonly Downloader[] {
|
|
152
|
+
return REGISTRY;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Collect every `LoginEntry` declared by a downloader, deduped by URL
|
|
157
|
+
* within each kind. Used by `membot login` to render one button per
|
|
158
|
+
* service (browser-login) and one set of instructions per service
|
|
159
|
+
* (api-key) even when multiple downloaders share the same setup
|
|
160
|
+
* (e.g. Google Docs / Sheets / Slides all share Google sign-in).
|
|
161
|
+
*/
|
|
162
|
+
export function collectLoginEntries(): { browser: BrowserLoginEntry[]; apiKey: ApiKeyLoginEntry[] } {
|
|
163
|
+
const browser = new Map<string, BrowserLoginEntry>();
|
|
164
|
+
const apiKey = new Map<string, ApiKeyLoginEntry>();
|
|
165
|
+
for (const d of REGISTRY) {
|
|
166
|
+
if (!d.logins) continue;
|
|
167
|
+
for (const entry of d.logins) {
|
|
168
|
+
if (entry.kind === "browser") {
|
|
169
|
+
if (!browser.has(entry.url)) browser.set(entry.url, entry);
|
|
170
|
+
} else {
|
|
171
|
+
if (!apiKey.has(entry.url)) apiKey.set(entry.url, entry);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
return { browser: [...browser.values()], apiKey: [...apiKey.values()] };
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Compute a stable sha256 hex digest of the bytes. Re-exposed here
|
|
180
|
+
* because every downloader uses it.
|
|
181
|
+
*/
|
|
182
|
+
export { sha256Hex } from "../local-reader.ts";
|