membot 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.claude/skills/membot.md +25 -10
  2. package/.cursor/rules/membot.mdc +25 -10
  3. package/README.md +35 -4
  4. package/package.json +8 -5
  5. package/scripts/apply-patches.sh +0 -11
  6. package/src/cli.ts +2 -2
  7. package/src/commands/login-page.mustache +50 -0
  8. package/src/commands/login.ts +83 -0
  9. package/src/config/schemas.ts +17 -5
  10. package/src/constants.ts +13 -1
  11. package/src/context.ts +1 -24
  12. package/src/db/files.ts +21 -25
  13. package/src/db/migrations/003-downloader-columns.ts +58 -0
  14. package/src/db/migrations.ts +2 -1
  15. package/src/ingest/converter/index.ts +9 -0
  16. package/src/ingest/converter/xlsx.ts +111 -0
  17. package/src/ingest/downloaders/browser.ts +180 -0
  18. package/src/ingest/downloaders/generic-web.ts +81 -0
  19. package/src/ingest/downloaders/github.ts +178 -0
  20. package/src/ingest/downloaders/google-docs.ts +56 -0
  21. package/src/ingest/downloaders/google-shared.ts +86 -0
  22. package/src/ingest/downloaders/google-sheets.ts +58 -0
  23. package/src/ingest/downloaders/google-slides.ts +53 -0
  24. package/src/ingest/downloaders/index.ts +182 -0
  25. package/src/ingest/downloaders/linear.ts +291 -0
  26. package/src/ingest/fetcher.ts +104 -129
  27. package/src/ingest/ingest.ts +43 -70
  28. package/src/mcp/instructions.ts +4 -2
  29. package/src/operations/add.ts +6 -4
  30. package/src/operations/info.ts +4 -6
  31. package/src/operations/move.ts +2 -3
  32. package/src/operations/refresh.ts +2 -4
  33. package/src/operations/remove.ts +23 -2
  34. package/src/operations/tree.ts +1 -1
  35. package/src/operations/types.ts +1 -1
  36. package/src/refresh/runner.ts +59 -114
  37. package/src/types/text-modules.d.ts +5 -0
  38. package/patches/@evantahler%2Fmcpx@0.21.4.patch +0 -51
  39. package/src/commands/mcpx.ts +0 -112
  40. package/src/ingest/agent-fetcher.ts +0 -639
@@ -0,0 +1,178 @@
1
+ import { HelpfulError } from "../../errors.ts";
2
+ import { sha256Hex } from "../local-reader.ts";
3
+ import type { DownloadedRemote, Downloader, DownloaderCtx } from "./index.ts";
4
+
5
+ const ISSUE_OR_PR = /^\/([^/]+)\/([^/]+)\/(issues|pull)\/(\d+)(?:$|\/|#|\?)/;
6
+
7
+ const API_BASE = "https://api.github.com";
8
+
9
+ /**
10
+ * GitHub issues and PRs via the REST API. The user sets a personal
11
+ * access token once via `membot config set downloaders.github.api_key
12
+ * <PAT>` (or via the `GITHUB_TOKEN` env var, which `gh auth token`
13
+ * happens to populate), and we fetch the issue/PR + every comment as
14
+ * structured JSON, then render to markdown.
15
+ *
16
+ * Why API instead of rendering github.com HTML: the rendered page
17
+ * works for public, network-cooperative cases but stalls when GitHub
18
+ * shows interstitials (rate-limit, abuse, login challenges) and
19
+ * captures hundreds of KB of GitHub chrome that the embedder doesn't
20
+ * care about. The API gives us the exact body and comment thread in
21
+ * a few KB.
22
+ *
23
+ * Public repos: the `api_key` is optional — we'll send unauthenticated
24
+ * requests if it's blank, which works for public content but gets
25
+ * rate-limited at 60 req/hr. Private repos require the token.
26
+ */
27
+ export const githubDownloader: Downloader = {
28
+ name: "github",
29
+ description: "GitHub issues + PRs (github.com/<owner>/<repo>/(issues|pull)/<n>) — uses the GitHub REST API.",
30
+ logins: [
31
+ {
32
+ kind: "api_key",
33
+ name: "GitHub",
34
+ url: "https://github.com/settings/tokens",
35
+ setupCommand: "membot config set downloaders.github.api_key <PAT>",
36
+ description: "create a fine-grained token with repo:read access (or use GITHUB_TOKEN env var)",
37
+ },
38
+ ],
39
+ requiresApiKey: false,
40
+ matches(url) {
41
+ return url.hostname === "github.com" && ISSUE_OR_PR.test(url.pathname);
42
+ },
43
+ async download(url, ctx): Promise<DownloadedRemote> {
44
+ const args = parseIssueUrl(url);
45
+ const owner = args.owner as string;
46
+ const repo = args.repo as string;
47
+ const number = args.number as number;
48
+
49
+ const token = (ctx.config.downloaders.github.api_key || process.env.GITHUB_TOKEN || "").trim();
50
+ ctx.onProgress?.("fetching issue");
51
+ const issue = await getJson<GithubIssue>(`/repos/${owner}/${repo}/issues/${number}`, token, url);
52
+ ctx.onProgress?.("fetching comments");
53
+ const comments = await getJson<GithubComment[]>(
54
+ `/repos/${owner}/${repo}/issues/${number}/comments?per_page=100`,
55
+ token,
56
+ url,
57
+ );
58
+
59
+ const isPullRequest = !!issue.pull_request;
60
+ const markdown = renderIssue(issue, comments, isPullRequest);
61
+ const bytes = new TextEncoder().encode(markdown);
62
+ return {
63
+ bytes,
64
+ sha256: sha256Hex(bytes),
65
+ mimeType: "text/markdown",
66
+ downloader: "github",
67
+ downloaderArgs: args,
68
+ sourceUrl: url.toString(),
69
+ };
70
+ },
71
+ };
72
+
73
+ interface GithubIssue {
74
+ number: number;
75
+ title: string;
76
+ body: string | null;
77
+ state: string;
78
+ html_url: string;
79
+ user: { login: string } | null;
80
+ assignees: Array<{ login: string }> | null;
81
+ labels: Array<{ name: string } | string> | null;
82
+ created_at: string;
83
+ updated_at: string;
84
+ closed_at: string | null;
85
+ pull_request?: unknown;
86
+ }
87
+
88
+ interface GithubComment {
89
+ body: string | null;
90
+ user: { login: string } | null;
91
+ created_at: string;
92
+ }
93
+
94
+ async function getJson<T>(path: string, token: string, url: URL): Promise<T> {
95
+ const headers: Record<string, string> = {
96
+ Accept: "application/vnd.github+json",
97
+ "X-GitHub-Api-Version": "2022-11-28",
98
+ "User-Agent": "membot",
99
+ };
100
+ if (token !== "") headers.Authorization = `Bearer ${token}`;
101
+
102
+ const response = await fetch(`${API_BASE}${path}`, { headers });
103
+ if (response.status === 401 || response.status === 403) {
104
+ throw new HelpfulError({
105
+ kind: "auth_error",
106
+ message: `GitHub API returned ${response.status} for ${url.toString()}.`,
107
+ hint:
108
+ token === ""
109
+ ? "Set a personal access token: create one at https://github.com/settings/tokens, then `membot config set downloaders.github.api_key <PAT>` (or set $GITHUB_TOKEN)."
110
+ : "The configured API key is missing repo:read access for this repo, or has expired. Re-create the token and run `membot config set downloaders.github.api_key <PAT>`.",
111
+ });
112
+ }
113
+ if (response.status === 404) {
114
+ throw new HelpfulError({
115
+ kind: "not_found",
116
+ message: `GitHub returned 404 for ${url.toString()}.`,
117
+ hint: "Verify the URL exists. Private repos require an API key with the right scope.",
118
+ });
119
+ }
120
+ if (!response.ok) {
121
+ throw new HelpfulError({
122
+ kind: "network_error",
123
+ message: `GitHub API returned ${response.status} ${response.statusText} for ${url.toString()}.`,
124
+ hint: "Retry; if the failure persists, run with --verbose for the full response.",
125
+ });
126
+ }
127
+ return (await response.json()) as T;
128
+ }
129
+
130
+ function parseIssueUrl(url: URL): Record<string, unknown> {
131
+ const match = url.pathname.match(ISSUE_OR_PR);
132
+ if (!match) {
133
+ throw new HelpfulError({
134
+ kind: "input_error",
135
+ message: `not a GitHub issue/PR URL: ${url.toString()}`,
136
+ hint: "Pass a URL like https://github.com/<owner>/<repo>/issues/<n> or .../pull/<n>.",
137
+ });
138
+ }
139
+ return { owner: match[1], repo: match[2], kind: match[3], number: Number(match[4]) };
140
+ }
141
+
142
+ function renderIssue(issue: GithubIssue, comments: GithubComment[], isPr: boolean): string {
143
+ const lines: string[] = [];
144
+ const kind = isPr ? "PR" : "Issue";
145
+ lines.push(`# ${kind} #${issue.number}: ${issue.title}`);
146
+ lines.push("");
147
+ lines.push(`- URL: ${issue.html_url}`);
148
+ lines.push(`- State: ${issue.state}${issue.closed_at ? ` (closed ${issue.closed_at})` : ""}`);
149
+ if (issue.user) lines.push(`- Author: @${issue.user.login}`);
150
+ if (issue.assignees && issue.assignees.length > 0) {
151
+ lines.push(`- Assignees: ${issue.assignees.map((a) => `@${a.login}`).join(", ")}`);
152
+ }
153
+ if (issue.labels && issue.labels.length > 0) {
154
+ const labels = issue.labels.map((l) => (typeof l === "string" ? l : l.name)).filter(Boolean);
155
+ if (labels.length > 0) lines.push(`- Labels: ${labels.join(", ")}`);
156
+ }
157
+ lines.push(`- Created: ${issue.created_at}`);
158
+ lines.push(`- Updated: ${issue.updated_at}`);
159
+ lines.push("");
160
+ if (issue.body && issue.body.trim() !== "") {
161
+ lines.push("## Description");
162
+ lines.push("");
163
+ lines.push(issue.body.trim());
164
+ lines.push("");
165
+ }
166
+ if (comments.length > 0) {
167
+ lines.push(`## Comments (${comments.length})`);
168
+ lines.push("");
169
+ for (const c of comments) {
170
+ const author = c.user ? `@${c.user.login}` : "(unknown)";
171
+ lines.push(`### ${author} — ${c.created_at}`);
172
+ lines.push("");
173
+ lines.push((c.body ?? "").trim());
174
+ lines.push("");
175
+ }
176
+ }
177
+ return lines.join("\n").trim();
178
+ }
@@ -0,0 +1,56 @@
1
+ import { HelpfulError } from "../../errors.ts";
2
+ import { sha256Hex } from "../local-reader.ts";
3
+ import { fetchWithBrowserCookies } from "./google-shared.ts";
4
+ import type { DownloadedRemote, Downloader } from "./index.ts";
5
+
6
+ const DOC_PATH = /^\/document\/d\/([a-zA-Z0-9_-]+)/;
7
+ const DOCX_MIME = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
8
+
9
+ /**
10
+ * Download a Google Doc as a `.docx` blob via the canonical export
11
+ * endpoint. Authentication uses cookies pulled from the persistent
12
+ * chromium profile (populated by `membot login`); the fetch itself
13
+ * is a plain Node `fetch`, not Playwright's APIRequestContext, to
14
+ * dodge a Playwright bug that crashes parsing Set-Cookie headers
15
+ * from Google's same-origin redirects.
16
+ */
17
+ export const googleDocsDownloader: Downloader = {
18
+ name: "google-docs",
19
+ description: "Google Docs (docs.google.com/document/d/<id>) — exports as .docx via the user's logged-in session.",
20
+ logins: [
21
+ {
22
+ kind: "browser",
23
+ name: "Google",
24
+ url: "https://accounts.google.com/signin",
25
+ description: "covers Docs, Sheets, and Slides",
26
+ },
27
+ ],
28
+ matches(url) {
29
+ return url.hostname === "docs.google.com" && DOC_PATH.test(url.pathname);
30
+ },
31
+ async download(url, ctx): Promise<DownloadedRemote> {
32
+ const docId = extractDocId(url);
33
+ const exportUrl = `https://docs.google.com/document/d/${docId}/export?format=docx`;
34
+ const body = await fetchWithBrowserCookies(exportUrl, ctx, "Google Docs", url);
35
+ return {
36
+ bytes: new Uint8Array(body),
37
+ sha256: sha256Hex(body),
38
+ mimeType: DOCX_MIME,
39
+ downloader: "google-docs",
40
+ downloaderArgs: { document_id: docId },
41
+ sourceUrl: url.toString(),
42
+ };
43
+ },
44
+ };
45
+
46
+ function extractDocId(url: URL): string {
47
+ const match = url.pathname.match(DOC_PATH);
48
+ if (!match || !match[1]) {
49
+ throw new HelpfulError({
50
+ kind: "input_error",
51
+ message: `not a Google Docs URL: ${url.toString()}`,
52
+ hint: "Pass a URL like https://docs.google.com/document/d/<DOC_ID>/edit.",
53
+ });
54
+ }
55
+ return match[1];
56
+ }
@@ -0,0 +1,86 @@
1
+ import { HelpfulError } from "../../errors.ts";
2
+ import type { DownloaderCtx } from "./index.ts";
3
+
4
+ const USER_AGENT =
5
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36";
6
+
7
+ /**
8
+ * Fetch a Google export URL using cookies from the persistent
9
+ * chromium profile. Uses Node's built-in `fetch` (not Playwright's
10
+ * APIRequestContext) because Playwright crashes when parsing
11
+ * Set-Cookie headers on same-origin Google redirects (its
12
+ * `_parseSetCookieHeader` calls `new URL(responseUrl)` with a
13
+ * relative path and throws `ERR_INVALID_URL`).
14
+ *
15
+ * Same redirect handling rules as the Playwright path used to do:
16
+ * follow same-origin internal redirects (Google may bounce the
17
+ * download via `/exportInternal` or similar) but bail with a clean
18
+ * `auth_error` if Google sends us to `accounts.google.com/ServiceLogin`
19
+ * because the user isn't signed in.
20
+ */
21
+ export async function fetchWithBrowserCookies(
22
+ exportUrl: string,
23
+ ctx: DownloaderCtx,
24
+ serviceName: string,
25
+ sourceUrl: URL,
26
+ ): Promise<Buffer> {
27
+ ctx.onProgress?.(`downloading from ${serviceName.toLowerCase()}`);
28
+ const cookieHeader = await ctx.pool.cookieHeader(exportUrl);
29
+
30
+ let currentUrl = exportUrl;
31
+ for (let hop = 0; hop < 5; hop++) {
32
+ const response = await fetch(currentUrl, {
33
+ headers: {
34
+ Cookie: cookieHeader,
35
+ "User-Agent": USER_AGENT,
36
+ Accept: "*/*",
37
+ },
38
+ redirect: "manual",
39
+ });
40
+
41
+ if (response.status >= 200 && response.status < 300) {
42
+ return Buffer.from(await response.arrayBuffer());
43
+ }
44
+
45
+ if (response.status >= 300 && response.status < 400) {
46
+ const location = response.headers.get("location");
47
+ if (!location) {
48
+ throw new HelpfulError({
49
+ kind: "network_error",
50
+ message: `${serviceName} returned ${response.status} for ${sourceUrl.toString()} with no Location header.`,
51
+ hint: "Open the URL in your browser to verify it exists and is shared with you.",
52
+ });
53
+ }
54
+ const next = new URL(location, currentUrl);
55
+ if (next.hostname === "accounts.google.com" || /\/ServiceLogin/i.test(next.pathname)) {
56
+ throw new HelpfulError({
57
+ kind: "auth_error",
58
+ message: `${serviceName} redirected ${sourceUrl.toString()} to a Google login page.`,
59
+ hint: "Run `membot login` and sign into Google in the browser that opens, then re-run.",
60
+ });
61
+ }
62
+ currentUrl = next.toString();
63
+ continue;
64
+ }
65
+
66
+ if (response.status === 401 || response.status === 403) {
67
+ throw new HelpfulError({
68
+ kind: "auth_error",
69
+ message: `${serviceName} returned ${response.status} for ${sourceUrl.toString()}.`,
70
+ hint: "Run `membot login` and sign into Google in the browser that opens, then re-run.",
71
+ });
72
+ }
73
+
74
+ throw new HelpfulError({
75
+ kind: "network_error",
76
+ message: `${serviceName} returned ${response.status} ${response.statusText} for ${sourceUrl.toString()}.`,
77
+ hint: "Open the URL in your browser to verify it's accessible to your account.",
78
+ });
79
+ }
80
+
81
+ throw new HelpfulError({
82
+ kind: "network_error",
83
+ message: `${serviceName} bounced through too many redirects for ${sourceUrl.toString()}.`,
84
+ hint: "Re-run the command; if the failure persists, open the URL in your browser to investigate.",
85
+ });
86
+ }
@@ -0,0 +1,58 @@
1
+ import { HelpfulError } from "../../errors.ts";
2
+ import { sha256Hex } from "../local-reader.ts";
3
+ import { fetchWithBrowserCookies } from "./google-shared.ts";
4
+ import type { DownloadedRemote, Downloader } from "./index.ts";
5
+
6
+ const SHEET_PATH = /^\/spreadsheets\/d\/([a-zA-Z0-9_-]+)/;
7
+
8
+ const XLSX_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
9
+
10
+ /**
11
+ * Download a Google Sheet as `.xlsx` (the workbook's native format)
12
+ * — the export includes **every tab** in a single file. The bytes
13
+ * flow through `convertXlsx`, which renders each tab as a markdown
14
+ * `## <tab name>` section with a real GitHub-flavored pipe table.
15
+ * Cleaner than the PDF route (preserves cell structure, no layout
16
+ * truncation) and `format=html` is no longer supported by Google.
17
+ */
18
+ export const googleSheetsDownloader: Downloader = {
19
+ name: "google-sheets",
20
+ description:
21
+ "Google Sheets (docs.google.com/spreadsheets/d/<id>) — exports every tab as .xlsx, rendered to markdown tables locally.",
22
+ logins: [
23
+ {
24
+ kind: "browser",
25
+ name: "Google",
26
+ url: "https://accounts.google.com/signin",
27
+ description: "covers Docs, Sheets, and Slides",
28
+ },
29
+ ],
30
+ matches(url) {
31
+ return url.hostname === "docs.google.com" && SHEET_PATH.test(url.pathname);
32
+ },
33
+ async download(url, ctx): Promise<DownloadedRemote> {
34
+ const sheetId = extractSheetId(url);
35
+ const exportUrl = `https://docs.google.com/spreadsheets/d/${sheetId}/export?format=xlsx`;
36
+ const body = await fetchWithBrowserCookies(exportUrl, ctx, "Google Sheets", url);
37
+ return {
38
+ bytes: new Uint8Array(body),
39
+ sha256: sha256Hex(body),
40
+ mimeType: XLSX_MIME,
41
+ downloader: "google-sheets",
42
+ downloaderArgs: { sheet_id: sheetId },
43
+ sourceUrl: url.toString(),
44
+ };
45
+ },
46
+ };
47
+
48
+ function extractSheetId(url: URL): string {
49
+ const match = url.pathname.match(SHEET_PATH);
50
+ if (!match || !match[1]) {
51
+ throw new HelpfulError({
52
+ kind: "input_error",
53
+ message: `not a Google Sheets URL: ${url.toString()}`,
54
+ hint: "Pass a URL like https://docs.google.com/spreadsheets/d/<SHEET_ID>/edit.",
55
+ });
56
+ }
57
+ return match[1];
58
+ }
@@ -0,0 +1,53 @@
1
+ import { HelpfulError } from "../../errors.ts";
2
+ import { sha256Hex } from "../local-reader.ts";
3
+ import { fetchWithBrowserCookies } from "./google-shared.ts";
4
+ import type { DownloadedRemote, Downloader } from "./index.ts";
5
+
6
+ const SLIDE_PATH = /^\/presentation\/d\/([a-zA-Z0-9_-]+)/;
7
+
8
+ /**
9
+ * Download a Google Slides deck as a PDF via the canonical export
10
+ * endpoint. PDF preserves layout and text-on-slides faithfully; the
11
+ * existing `convertPdf` pipeline (unpdf) extracts the speaker text +
12
+ * bullets without losing slide ordering.
13
+ */
14
+ export const googleSlidesDownloader: Downloader = {
15
+ name: "google-slides",
16
+ description: "Google Slides (docs.google.com/presentation/d/<id>) — exports as PDF for layout-faithful conversion.",
17
+ logins: [
18
+ {
19
+ kind: "browser",
20
+ name: "Google",
21
+ url: "https://accounts.google.com/signin",
22
+ description: "covers Docs, Sheets, and Slides",
23
+ },
24
+ ],
25
+ matches(url) {
26
+ return url.hostname === "docs.google.com" && SLIDE_PATH.test(url.pathname);
27
+ },
28
+ async download(url, ctx): Promise<DownloadedRemote> {
29
+ const slidesId = extractSlidesId(url);
30
+ const exportUrl = `https://docs.google.com/presentation/d/${slidesId}/export/pdf`;
31
+ const body = await fetchWithBrowserCookies(exportUrl, ctx, "Google Slides", url);
32
+ return {
33
+ bytes: new Uint8Array(body),
34
+ sha256: sha256Hex(body),
35
+ mimeType: "application/pdf",
36
+ downloader: "google-slides",
37
+ downloaderArgs: { slides_id: slidesId },
38
+ sourceUrl: url.toString(),
39
+ };
40
+ },
41
+ };
42
+
43
+ function extractSlidesId(url: URL): string {
44
+ const match = url.pathname.match(SLIDE_PATH);
45
+ if (!match || !match[1]) {
46
+ throw new HelpfulError({
47
+ kind: "input_error",
48
+ message: `not a Google Slides URL: ${url.toString()}`,
49
+ hint: "Pass a URL like https://docs.google.com/presentation/d/<SLIDES_ID>/edit.",
50
+ });
51
+ }
52
+ return match[1];
53
+ }
@@ -0,0 +1,182 @@
1
+ import type { MembotConfig } from "../../config/schemas.ts";
2
+ import type { logger as Logger } from "../../output/logger.ts";
3
+ import type { BrowserPool } from "./browser.ts";
4
+ import { genericWebDownloader } from "./generic-web.ts";
5
+ import { githubDownloader } from "./github.ts";
6
+ import { googleDocsDownloader } from "./google-docs.ts";
7
+ import { googleSheetsDownloader } from "./google-sheets.ts";
8
+ import { googleSlidesDownloader } from "./google-slides.ts";
9
+ import { linearDownloader } from "./linear.ts";
10
+
11
+ /**
12
+ * The shape every URL fetch produces — drop-in replacement for the
13
+ * old `FetchedRemote` shape. `downloader` + `downloaderArgs` get
14
+ * persisted on the row so refresh replays the same downloader against
15
+ * the same URL deterministically (no LLM, no agent loop).
16
+ */
17
+ export interface DownloadedRemote {
18
+ bytes: Uint8Array;
19
+ sha256: string;
20
+ mimeType: string;
21
+ downloader: string;
22
+ downloaderArgs: Record<string, unknown>;
23
+ sourceUrl: string;
24
+ }
25
+
26
+ export interface DownloaderCtx {
27
+ pool: BrowserPool;
28
+ logger: typeof Logger;
29
+ config: MembotConfig;
30
+ /**
31
+ * Optional sublabel hook for the host's progress spinner. Long-running
32
+ * downloaders (multi-query GraphQL, paginated REST fetches, headless
33
+ * browser navigation) can call this with short status strings —
34
+ * "fetching", "rendering", "parsing 3/4 pages" — and the CLI will
35
+ * surface them under the per-entry progress bar. No-op when the host
36
+ * doesn't supply one (e.g. MCP server, JSON-mode CLI).
37
+ */
38
+ onProgress?: (sublabel: string) => void;
39
+ }
40
+
41
+ /**
42
+ * One tactic for fetching a URL. Specific downloaders (Google,
43
+ * GitHub, Linear) match URLs by host/pattern and hit the canonical
44
+ * export endpoint; the generic-web downloader is the registry's
45
+ * always-matching catch-all (HEADs the URL, prints to PDF if HTML,
46
+ * else streams the raw bytes through). Adding a 6th service is one
47
+ * file — implement `Downloader`, register it here.
48
+ *
49
+ * If a downloader requires a logged-in browser session, it declares
50
+ * one or more `LoginEntry` objects; the `membot login` page collects
51
+ * those across every downloader, dedupes by URL, and renders one
52
+ * button per service.
53
+ */
54
+ export interface Downloader {
55
+ name: string;
56
+ description: string;
57
+ matches(url: URL): boolean;
58
+ download(url: URL, ctx: DownloaderCtx): Promise<DownloadedRemote>;
59
+ logins?: LoginEntry[];
60
+ /**
61
+ * Force the BrowserPool into headed mode for this downloader's
62
+ * fetches. Used for SPAs that detect headless Chromium and refuse
63
+ * to hydrate; we don't currently use it (services that needed it
64
+ * have moved to the API-key flow), but the hook remains for
65
+ * future cookie-based downloaders.
66
+ */
67
+ requireHeaded?: boolean;
68
+ /**
69
+ * The downloader authenticates via a config-stored API key, not
70
+ * browser cookies. The fetcher uses this to skip the auto-login
71
+ * browser prompt on `auth_error` (opening a browser doesn't help
72
+ * when the missing credential is in the config file).
73
+ */
74
+ requiresApiKey?: boolean;
75
+ }
76
+
77
+ /**
78
+ * A service the user might need to set up before fetches against it
79
+ * succeed. Two flavors:
80
+ * - `kind: "browser"` — the user clicks a link in the `membot login`
81
+ * browser, signs in, and closes the window. Cookies + IndexedDB
82
+ * land in the persistent profile and downloaders use them
83
+ * automatically.
84
+ * - `kind: "api_key"` — the user visits the service's API-key page,
85
+ * copies the key, and runs the displayed `setupCommand`. The key
86
+ * lives in `~/.membot/config.json` and downloaders read it from
87
+ * `ctx.config`.
88
+ *
89
+ * Multiple downloaders can declare the same `LoginEntry` (e.g. all
90
+ * three Google downloaders share Google sign-in); the login page
91
+ * dedupes by `(kind, url)`.
92
+ */
93
+ export type LoginEntry = BrowserLoginEntry | ApiKeyLoginEntry;
94
+
95
+ export interface BrowserLoginEntry {
96
+ kind: "browser";
97
+ /** Display name (e.g. "Google"). */
98
+ name: string;
99
+ /** Login URL the button opens. */
100
+ url: string;
101
+ /** Optional one-liner shown next to the button. */
102
+ description?: string;
103
+ }
104
+
105
+ export interface ApiKeyLoginEntry {
106
+ kind: "api_key";
107
+ /** Display name (e.g. "Linear"). */
108
+ name: string;
109
+ /** Settings page where the user creates the key. */
110
+ url: string;
111
+ /** Shell command the user copies — e.g. `membot config set linear.api_key <KEY>`. */
112
+ setupCommand: string;
113
+ /** Optional one-liner shown next to the link. */
114
+ description?: string;
115
+ }
116
+
117
+ const REGISTRY: Downloader[] = [
118
+ googleDocsDownloader,
119
+ googleSheetsDownloader,
120
+ googleSlidesDownloader,
121
+ githubDownloader,
122
+ linearDownloader,
123
+ genericWebDownloader,
124
+ ];
125
+
126
+ /**
127
+ * Find the first downloader that matches `url`. Returns `null` only
128
+ * if `url` doesn't parse — in normal use the generic-web downloader
129
+ * matches everything else, so callers can treat `findDownloader` as
130
+ * total over valid URLs.
131
+ */
132
+ export function findDownloader(url: string | URL): Downloader | null {
133
+ let parsed: URL;
134
+ try {
135
+ parsed = typeof url === "string" ? new URL(url) : url;
136
+ } catch {
137
+ return null;
138
+ }
139
+ for (const d of REGISTRY) {
140
+ if (d.matches(parsed)) return d;
141
+ }
142
+ return null;
143
+ }
144
+
145
+ /** Lookup by name (used by refresh to replay a persisted downloader). */
146
+ export function findDownloaderByName(name: string): Downloader | null {
147
+ return REGISTRY.find((d) => d.name === name) ?? null;
148
+ }
149
+
150
+ /** Read-only view of every registered downloader. */
151
+ export function listDownloaders(): readonly Downloader[] {
152
+ return REGISTRY;
153
+ }
154
+
155
+ /**
156
+ * Collect every `LoginEntry` declared by a downloader, deduped by URL
157
+ * within each kind. Used by `membot login` to render one button per
158
+ * service (browser-login) and one set of instructions per service
159
+ * (api-key) even when multiple downloaders share the same setup
160
+ * (e.g. Google Docs / Sheets / Slides all share Google sign-in).
161
+ */
162
+ export function collectLoginEntries(): { browser: BrowserLoginEntry[]; apiKey: ApiKeyLoginEntry[] } {
163
+ const browser = new Map<string, BrowserLoginEntry>();
164
+ const apiKey = new Map<string, ApiKeyLoginEntry>();
165
+ for (const d of REGISTRY) {
166
+ if (!d.logins) continue;
167
+ for (const entry of d.logins) {
168
+ if (entry.kind === "browser") {
169
+ if (!browser.has(entry.url)) browser.set(entry.url, entry);
170
+ } else {
171
+ if (!apiKey.has(entry.url)) apiKey.set(entry.url, entry);
172
+ }
173
+ }
174
+ }
175
+ return { browser: [...browser.values()], apiKey: [...apiKey.values()] };
176
+ }
177
+
178
+ /**
179
+ * Compute a stable sha256 hex digest of the bytes. Re-exposed here
180
+ * because every downloader uses it.
181
+ */
182
+ export { sha256Hex } from "../local-reader.ts";