membot 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/membot.md +25 -10
- package/.cursor/rules/membot.mdc +25 -10
- package/README.md +35 -4
- package/package.json +8 -5
- package/scripts/apply-patches.sh +0 -11
- package/src/cli.ts +2 -2
- package/src/commands/login-page.mustache +50 -0
- package/src/commands/login.ts +83 -0
- package/src/config/schemas.ts +17 -5
- package/src/constants.ts +13 -1
- package/src/context.ts +1 -24
- package/src/db/files.ts +21 -25
- package/src/db/migrations/003-downloader-columns.ts +58 -0
- package/src/db/migrations.ts +2 -1
- package/src/ingest/converter/index.ts +9 -0
- package/src/ingest/converter/xlsx.ts +111 -0
- package/src/ingest/downloaders/browser.ts +180 -0
- package/src/ingest/downloaders/generic-web.ts +81 -0
- package/src/ingest/downloaders/github.ts +178 -0
- package/src/ingest/downloaders/google-docs.ts +56 -0
- package/src/ingest/downloaders/google-shared.ts +86 -0
- package/src/ingest/downloaders/google-sheets.ts +58 -0
- package/src/ingest/downloaders/google-slides.ts +53 -0
- package/src/ingest/downloaders/index.ts +182 -0
- package/src/ingest/downloaders/linear.ts +291 -0
- package/src/ingest/fetcher.ts +104 -129
- package/src/ingest/ingest.ts +43 -70
- package/src/mcp/instructions.ts +4 -2
- package/src/operations/add.ts +6 -4
- package/src/operations/info.ts +4 -6
- package/src/operations/move.ts +2 -3
- package/src/operations/refresh.ts +2 -4
- package/src/operations/remove.ts +23 -2
- package/src/operations/tree.ts +1 -1
- package/src/operations/types.ts +1 -1
- package/src/refresh/runner.ts +59 -114
- package/src/types/text-modules.d.ts +5 -0
- package/patches/@evantahler%2Fmcpx@0.21.4.patch +0 -51
- package/src/commands/mcpx.ts +0 -112
- package/src/ingest/agent-fetcher.ts +0 -639
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import { HelpfulError } from "../../errors.ts";
|
|
2
|
+
import { sha256Hex } from "../local-reader.ts";
|
|
3
|
+
import type { DownloadedRemote, Downloader } from "./index.ts";
|
|
4
|
+
|
|
5
|
+
const ISSUE_PATH = /^\/([^/]+)\/issue\/([A-Z]+-\d+)(?:$|\/|#|\?)/;
|
|
6
|
+
const PROJECT_PATH = /^\/([^/]+)\/project\/([^/?#]+)/;
|
|
7
|
+
|
|
8
|
+
const GRAPHQL_ENDPOINT = "https://api.linear.app/graphql";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Linear's web app uses a sophisticated cookie + signed-request scheme
|
|
12
|
+
* (`client-api.linear.app/graphql` with `useraccount`/`linear-client-id`
|
|
13
|
+
* headers) that's not realistically replayable from outside a real
|
|
14
|
+
* Linear browser session. Instead we use Linear's official API at
|
|
15
|
+
* `api.linear.app/graphql` with a personal API key — set up once via
|
|
16
|
+
* `membot config set downloaders.linear.api_key <KEY>` after creating
|
|
17
|
+
* the key at https://linear.app/settings/api.
|
|
18
|
+
*
|
|
19
|
+
* The API gives us the structured issue/project payload (title, body,
|
|
20
|
+
* comments, status, …) directly; we render it to markdown
|
|
21
|
+
* deterministically rather than scraping the rendered DOM.
|
|
22
|
+
*/
|
|
23
|
+
export const linearDownloader: Downloader = {
|
|
24
|
+
name: "linear",
|
|
25
|
+
description: "Linear (linear.app/<workspace>/issue/<KEY> and /project/<slug>) — uses the Linear API.",
|
|
26
|
+
logins: [
|
|
27
|
+
{
|
|
28
|
+
kind: "api_key",
|
|
29
|
+
name: "Linear",
|
|
30
|
+
url: "https://linear.app/settings/api",
|
|
31
|
+
setupCommand: "membot config set downloaders.linear.api_key <KEY>",
|
|
32
|
+
description: "create a personal API key, then run the command on the right",
|
|
33
|
+
},
|
|
34
|
+
],
|
|
35
|
+
requiresApiKey: true,
|
|
36
|
+
matches(url) {
|
|
37
|
+
return url.hostname === "linear.app" && (ISSUE_PATH.test(url.pathname) || PROJECT_PATH.test(url.pathname));
|
|
38
|
+
},
|
|
39
|
+
async download(url, ctx): Promise<DownloadedRemote> {
|
|
40
|
+
const apiKey = ctx.config.downloaders.linear.api_key.trim();
|
|
41
|
+
if (apiKey === "") {
|
|
42
|
+
throw new HelpfulError({
|
|
43
|
+
kind: "auth_error",
|
|
44
|
+
message: `Linear API key not configured.`,
|
|
45
|
+
hint: "Create a personal API key at https://linear.app/settings/api, then run `membot config set downloaders.linear.api_key <KEY>`.",
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const issueMatch = url.pathname.match(ISSUE_PATH);
|
|
50
|
+
const projectMatch = url.pathname.match(PROJECT_PATH);
|
|
51
|
+
let markdown: string;
|
|
52
|
+
let downloaderArgs: Record<string, unknown>;
|
|
53
|
+
|
|
54
|
+
if (issueMatch) {
|
|
55
|
+
const identifier = issueMatch[2] as string;
|
|
56
|
+
ctx.onProgress?.(`querying issue ${identifier}`);
|
|
57
|
+
const issue = await fetchIssue(identifier, apiKey, url);
|
|
58
|
+
markdown = renderIssue(issue);
|
|
59
|
+
downloaderArgs = { kind: "issue", workspace: issueMatch[1], identifier };
|
|
60
|
+
} else if (projectMatch) {
|
|
61
|
+
const slug = projectMatch[2] as string;
|
|
62
|
+
const slugId = extractProjectSlugId(slug);
|
|
63
|
+
ctx.onProgress?.(`querying project ${slugId}`);
|
|
64
|
+
const project = await fetchProject(slugId, apiKey, url);
|
|
65
|
+
markdown = renderProject(project);
|
|
66
|
+
downloaderArgs = { kind: "project", workspace: projectMatch[1], slug, slug_id: slugId };
|
|
67
|
+
} else {
|
|
68
|
+
throw new HelpfulError({
|
|
69
|
+
kind: "input_error",
|
|
70
|
+
message: `not a Linear issue/project URL: ${url.toString()}`,
|
|
71
|
+
hint: "Pass a URL like https://linear.app/<workspace>/issue/<KEY> or .../project/<slug>.",
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const bytes = new TextEncoder().encode(markdown);
|
|
76
|
+
return {
|
|
77
|
+
bytes,
|
|
78
|
+
sha256: sha256Hex(bytes),
|
|
79
|
+
mimeType: "text/markdown",
|
|
80
|
+
downloader: "linear",
|
|
81
|
+
downloaderArgs,
|
|
82
|
+
sourceUrl: url.toString(),
|
|
83
|
+
};
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
interface LinearUser {
|
|
88
|
+
name?: string | null;
|
|
89
|
+
displayName?: string | null;
|
|
90
|
+
email?: string | null;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
interface LinearComment {
|
|
94
|
+
body: string | null;
|
|
95
|
+
createdAt: string | null;
|
|
96
|
+
user: LinearUser | null;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
interface LinearIssue {
|
|
100
|
+
identifier: string;
|
|
101
|
+
url: string;
|
|
102
|
+
title: string;
|
|
103
|
+
description: string | null;
|
|
104
|
+
priorityLabel: string | null;
|
|
105
|
+
state: { name: string } | null;
|
|
106
|
+
assignee: LinearUser | null;
|
|
107
|
+
creator: LinearUser | null;
|
|
108
|
+
createdAt: string;
|
|
109
|
+
updatedAt: string;
|
|
110
|
+
comments: { nodes: LinearComment[] };
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
interface LinearProject {
|
|
114
|
+
id: string;
|
|
115
|
+
url: string;
|
|
116
|
+
name: string;
|
|
117
|
+
slugId: string;
|
|
118
|
+
description: string | null;
|
|
119
|
+
content: string | null;
|
|
120
|
+
state: string | null;
|
|
121
|
+
startDate: string | null;
|
|
122
|
+
targetDate: string | null;
|
|
123
|
+
createdAt: string;
|
|
124
|
+
updatedAt: string;
|
|
125
|
+
lead: LinearUser | null;
|
|
126
|
+
members: { nodes: LinearUser[] };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const ISSUE_QUERY = `query Issue($id: String!) {
|
|
130
|
+
issue(id: $id) {
|
|
131
|
+
identifier url title description priorityLabel
|
|
132
|
+
state { name }
|
|
133
|
+
assignee { name displayName email }
|
|
134
|
+
creator { name displayName email }
|
|
135
|
+
createdAt updatedAt
|
|
136
|
+
comments(first: 100) {
|
|
137
|
+
nodes { body createdAt user { name displayName email } }
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}`;
|
|
141
|
+
|
|
142
|
+
const PROJECT_QUERY = `query ProjectBySlug($slugId: String!) {
|
|
143
|
+
projects(filter: { slugId: { eq: $slugId } }, first: 1) {
|
|
144
|
+
nodes {
|
|
145
|
+
id url name slugId description content state startDate targetDate createdAt updatedAt
|
|
146
|
+
lead { name displayName email }
|
|
147
|
+
members(first: 50) { nodes { name displayName email } }
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}`;
|
|
151
|
+
|
|
152
|
+
async function fetchIssue(identifier: string, apiKey: string, url: URL): Promise<LinearIssue> {
|
|
153
|
+
const result = await graphql<{ issue: LinearIssue | null }>(apiKey, ISSUE_QUERY, { id: identifier }, url);
|
|
154
|
+
if (!result.issue) {
|
|
155
|
+
throw new HelpfulError({
|
|
156
|
+
kind: "not_found",
|
|
157
|
+
message: `Linear has no issue ${identifier} visible to this API key.`,
|
|
158
|
+
hint: "Verify the URL exists and that the API key belongs to a member of the issue's workspace.",
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
return result.issue;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
async function fetchProject(slugId: string, apiKey: string, url: URL): Promise<LinearProject> {
|
|
165
|
+
const result = await graphql<{ projects: { nodes: LinearProject[] } }>(apiKey, PROJECT_QUERY, { slugId }, url);
|
|
166
|
+
const project = result.projects.nodes[0];
|
|
167
|
+
if (!project) {
|
|
168
|
+
throw new HelpfulError({
|
|
169
|
+
kind: "not_found",
|
|
170
|
+
message: `Linear has no project with slug ${slugId} visible to this API key.`,
|
|
171
|
+
hint: "Verify the URL exists and that the API key belongs to a member of the project's workspace.",
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
return project;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* The trailing token on a Linear project URL is `<name>-<slugId>`,
|
|
179
|
+
* where `slugId` is a 12-char hex suffix. Linear's API matches by
|
|
180
|
+
* `slugId` exactly, so we slice the suffix off here.
|
|
181
|
+
*/
|
|
182
|
+
function extractProjectSlugId(slug: string): string {
|
|
183
|
+
const match = slug.match(/-([0-9a-f]{8,})$/i);
|
|
184
|
+
return match ? (match[1] as string) : slug;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
async function graphql<T>(apiKey: string, query: string, variables: Record<string, unknown>, url: URL): Promise<T> {
|
|
188
|
+
const response = await fetch(GRAPHQL_ENDPOINT, {
|
|
189
|
+
method: "POST",
|
|
190
|
+
headers: {
|
|
191
|
+
"Content-Type": "application/json",
|
|
192
|
+
Authorization: apiKey,
|
|
193
|
+
},
|
|
194
|
+
body: JSON.stringify({ query, variables }),
|
|
195
|
+
});
|
|
196
|
+
if (!response.ok) {
|
|
197
|
+
throw new HelpfulError({
|
|
198
|
+
kind: response.status === 401 || response.status === 403 ? "auth_error" : "network_error",
|
|
199
|
+
message: `Linear GraphQL returned ${response.status} ${response.statusText} for ${url.toString()}.`,
|
|
200
|
+
hint:
|
|
201
|
+
response.status === 401 || response.status === 403
|
|
202
|
+
? "Re-create the API key at https://linear.app/settings/api and run `membot config set downloaders.linear.api_key <KEY>`."
|
|
203
|
+
: "Check that the URL is reachable and that the API key has access to the issue/project.",
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
const json = (await response.json()) as { data?: T; errors?: Array<{ message: string }> };
|
|
207
|
+
if (json.errors && json.errors.length > 0) {
|
|
208
|
+
const detail = json.errors.map((e) => e.message).join("; ");
|
|
209
|
+
throw new HelpfulError({
|
|
210
|
+
kind: "input_error",
|
|
211
|
+
message: `Linear GraphQL errors for ${url.toString()}: ${detail}`,
|
|
212
|
+
hint: "Verify the URL is correct and the API key has visibility into the workspace.",
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
if (!json.data) {
|
|
216
|
+
throw new HelpfulError({
|
|
217
|
+
kind: "internal_error",
|
|
218
|
+
message: `Linear GraphQL returned no data for ${url.toString()}.`,
|
|
219
|
+
hint: "Re-run with `--verbose` and report the response shape.",
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
return json.data;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function renderIssue(issue: LinearIssue): string {
|
|
226
|
+
const lines: string[] = [];
|
|
227
|
+
lines.push(`# ${issue.identifier}: ${issue.title}`);
|
|
228
|
+
lines.push("");
|
|
229
|
+
lines.push(`- URL: ${issue.url}`);
|
|
230
|
+
if (issue.state) lines.push(`- Status: ${issue.state.name}`);
|
|
231
|
+
if (issue.priorityLabel) lines.push(`- Priority: ${issue.priorityLabel}`);
|
|
232
|
+
if (issue.assignee) lines.push(`- Assignee: ${userLabel(issue.assignee)}`);
|
|
233
|
+
if (issue.creator) lines.push(`- Author: ${userLabel(issue.creator)}`);
|
|
234
|
+
lines.push(`- Created: ${issue.createdAt}`);
|
|
235
|
+
lines.push(`- Updated: ${issue.updatedAt}`);
|
|
236
|
+
lines.push("");
|
|
237
|
+
if (issue.description) {
|
|
238
|
+
lines.push("## Description");
|
|
239
|
+
lines.push("");
|
|
240
|
+
lines.push(issue.description.trim());
|
|
241
|
+
lines.push("");
|
|
242
|
+
}
|
|
243
|
+
const comments = issue.comments.nodes;
|
|
244
|
+
if (comments.length > 0) {
|
|
245
|
+
lines.push(`## Comments (${comments.length})`);
|
|
246
|
+
lines.push("");
|
|
247
|
+
for (const c of comments) {
|
|
248
|
+
const who = c.user ? userLabel(c.user) : "(unknown)";
|
|
249
|
+
lines.push(`### ${who} — ${c.createdAt ?? ""}`);
|
|
250
|
+
lines.push("");
|
|
251
|
+
lines.push((c.body ?? "").trim());
|
|
252
|
+
lines.push("");
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
return lines.join("\n").trim();
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function renderProject(project: LinearProject): string {
|
|
259
|
+
const lines: string[] = [];
|
|
260
|
+
lines.push(`# ${project.name}`);
|
|
261
|
+
lines.push("");
|
|
262
|
+
lines.push(`- URL: ${project.url}`);
|
|
263
|
+
if (project.state) lines.push(`- State: ${project.state}`);
|
|
264
|
+
if (project.startDate) lines.push(`- Start: ${project.startDate}`);
|
|
265
|
+
if (project.targetDate) lines.push(`- Target: ${project.targetDate}`);
|
|
266
|
+
if (project.lead) lines.push(`- Lead: ${userLabel(project.lead)}`);
|
|
267
|
+
const members = project.members.nodes;
|
|
268
|
+
if (members.length > 0) lines.push(`- Members: ${members.map(userLabel).join(", ")}`);
|
|
269
|
+
lines.push(`- Created: ${project.createdAt}`);
|
|
270
|
+
lines.push(`- Updated: ${project.updatedAt}`);
|
|
271
|
+
lines.push("");
|
|
272
|
+
if (project.description) {
|
|
273
|
+
lines.push("## Summary");
|
|
274
|
+
lines.push("");
|
|
275
|
+
lines.push(project.description.trim());
|
|
276
|
+
lines.push("");
|
|
277
|
+
}
|
|
278
|
+
if (project.content) {
|
|
279
|
+
lines.push("## Overview");
|
|
280
|
+
lines.push("");
|
|
281
|
+
lines.push(project.content.trim());
|
|
282
|
+
lines.push("");
|
|
283
|
+
}
|
|
284
|
+
return lines.join("\n").trim();
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function userLabel(user: LinearUser): string {
|
|
288
|
+
const name = user.displayName ?? user.name ?? "(unknown)";
|
|
289
|
+
if (user.email) return `${name} <${user.email}>`;
|
|
290
|
+
return name;
|
|
291
|
+
}
|
package/src/ingest/fetcher.ts
CHANGED
|
@@ -1,158 +1,133 @@
|
|
|
1
|
-
import
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
1
|
+
import { join } from "node:path";
|
|
2
|
+
import type { MembotConfig } from "../config/schemas.ts";
|
|
3
|
+
import { FILES } from "../constants.ts";
|
|
4
|
+
import { HelpfulError } from "../errors.ts";
|
|
4
5
|
import { logger } from "../output/logger.ts";
|
|
5
|
-
import
|
|
6
|
-
import {
|
|
7
|
-
|
|
6
|
+
import { BrowserPool } from "./downloaders/browser.ts";
|
|
7
|
+
import {
|
|
8
|
+
type DownloadedRemote,
|
|
9
|
+
type Downloader,
|
|
10
|
+
type DownloaderCtx,
|
|
11
|
+
findDownloader,
|
|
12
|
+
findDownloaderByName,
|
|
13
|
+
listDownloaders,
|
|
14
|
+
} from "./downloaders/index.ts";
|
|
8
15
|
|
|
9
|
-
export
|
|
10
|
-
bytes: Uint8Array;
|
|
11
|
-
sha256: string;
|
|
12
|
-
mimeType: string;
|
|
13
|
-
fetcher: "http" | "mcpx";
|
|
14
|
-
fetcherServer: string | null;
|
|
15
|
-
fetcherTool: string | null;
|
|
16
|
-
fetcherArgs: Record<string, unknown> | null;
|
|
17
|
-
sourceUrl: string;
|
|
18
|
-
}
|
|
16
|
+
export type FetchedRemote = DownloadedRemote;
|
|
19
17
|
|
|
20
18
|
export interface FetchOptions {
|
|
21
19
|
/**
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
25
|
-
*
|
|
20
|
+
* Optional explicit downloader override. Free-form; matched
|
|
21
|
+
* case-insensitively against `Downloader.name`. When given, skips the
|
|
22
|
+
* URL-based matching and forces that downloader (useful for the
|
|
23
|
+
* "use the generic-web fallback even though google-docs claimed
|
|
24
|
+
* this URL" escape hatch).
|
|
26
25
|
*/
|
|
27
|
-
|
|
28
|
-
/** Live mcpx adapter the agent loop drives via search/list/info/exec. */
|
|
29
|
-
mcpx?: AgentMcpxAdapter | null;
|
|
26
|
+
downloaderName?: string;
|
|
30
27
|
/**
|
|
31
|
-
*
|
|
32
|
-
*
|
|
28
|
+
* Override the on-disk path of the persistent chromium profile.
|
|
29
|
+
* Defaults to `<ctx.dataDir>/auth/browser-profile`.
|
|
33
30
|
*/
|
|
34
|
-
|
|
31
|
+
userDataDir?: string;
|
|
32
|
+
/** Pre-built BrowserPool to share across many fetches (set by ingest's outer loop). */
|
|
33
|
+
pool?: BrowserPool;
|
|
35
34
|
/**
|
|
36
|
-
*
|
|
37
|
-
*
|
|
35
|
+
* Sublabel hook forwarded to the downloader's `DownloaderCtx`.
|
|
36
|
+
* Drives the per-entry spinner text during multi-step fetches.
|
|
38
37
|
*/
|
|
39
38
|
onProgress?: (sublabel: string) => void;
|
|
40
39
|
}
|
|
41
40
|
|
|
42
41
|
/**
|
|
43
|
-
* Fetch a remote URL.
|
|
44
|
-
*
|
|
45
|
-
*
|
|
46
|
-
*
|
|
47
|
-
*
|
|
48
|
-
*
|
|
49
|
-
* The agent's selected mcp_exec invocation is recorded on the
|
|
50
|
-
* returned row so refresh can replay it deterministically without
|
|
51
|
-
* another agent round-trip.
|
|
52
|
-
*
|
|
53
|
-
* If the agent decides plain HTTP is the right call (`request_http_fallback`,
|
|
54
|
-
* no tool calls, max turns) we transparently fall through to `httpFetch`.
|
|
55
|
-
* If the agent reports an actionable failure, we surface that as a
|
|
56
|
-
* `HelpfulError`. If mcpx is configured but the LLM key is missing AND
|
|
57
|
-
* the HTTP fallback also fails, we surface an `auth_error` naming the env
|
|
58
|
-
* var so users see the real cause instead of a misleading 401.
|
|
42
|
+
* Fetch a remote URL via the per-service downloader registry. Specific
|
|
43
|
+
* downloaders (Google, GitHub, Linear) match first; the generic-web
|
|
44
|
+
* downloader is the always-matching catch-all. Every fetch authenticates
|
|
45
|
+
* via the cookies the user persisted with `membot login`. The returned
|
|
46
|
+
* shape includes the chosen downloader name and its args so refresh can
|
|
47
|
+
* replay it deterministically without involving the LLM.
|
|
59
48
|
*/
|
|
60
|
-
export async function fetchRemote(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
const
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
return await httpFetch(url);
|
|
73
|
-
} catch (err) {
|
|
74
|
-
if (err instanceof HelpfulError && err.kind === "network_error") {
|
|
75
|
-
throw new HelpfulError({
|
|
76
|
-
kind: "auth_error",
|
|
77
|
-
message: `${url} couldn't be fetched directly (${err.message}). Membot has mcpx configured, but routing through it requires Claude to translate the URL into the right tool arguments — and ANTHROPIC_API_KEY isn't set.`,
|
|
78
|
-
hint: `Set ANTHROPIC_API_KEY in your environment (or under llm.anthropic_api_key in ~/.membot/config.json), then retry. To force the HTTP path explicitly, run \`membot add ${url} --fetcher http\`.`,
|
|
79
|
-
});
|
|
80
|
-
}
|
|
81
|
-
throw err;
|
|
82
|
-
}
|
|
83
|
-
}
|
|
49
|
+
export async function fetchRemote(
|
|
50
|
+
url: string,
|
|
51
|
+
config: MembotConfig,
|
|
52
|
+
options: FetchOptions = {},
|
|
53
|
+
dataDir?: string,
|
|
54
|
+
): Promise<FetchedRemote> {
|
|
55
|
+
const downloader = pickDownloader(url, options.downloaderName);
|
|
56
|
+
const userDataDir = options.userDataDir ?? defaultProfileDir(dataDir);
|
|
57
|
+
const ownsPool = !options.pool;
|
|
58
|
+
const headless = !downloader.requireHeaded;
|
|
59
|
+
const pool = options.pool ?? new BrowserPool({ userDataDir, headless });
|
|
60
|
+
const dctx: DownloaderCtx = { pool, logger, config, onProgress: options.onProgress };
|
|
84
61
|
|
|
85
|
-
let outcome: Awaited<ReturnType<typeof agentFetch>>;
|
|
86
62
|
try {
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
63
|
+
// Fetches are strictly non-interactive: there's no auto-launch
|
|
64
|
+
// of a browser when auth fails. Batch ingest (`membot add` of
|
|
65
|
+
// many URLs) and the refresh daemon both run without a human
|
|
66
|
+
// available to drive a window, so any auth_error must
|
|
67
|
+
// propagate as-is. The HelpfulError's hint tells the user to
|
|
68
|
+
// `membot login` (cookie-based services) or `membot config set
|
|
69
|
+
// downloaders.<svc>.api_key` (API-key services); they fix it
|
|
70
|
+
// once and re-run.
|
|
71
|
+
return await downloader.download(new URL(url), dctx);
|
|
72
|
+
} finally {
|
|
73
|
+
if (ownsPool) await pool.dispose();
|
|
92
74
|
}
|
|
75
|
+
}
|
|
93
76
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
77
|
+
/**
|
|
78
|
+
* Replay a fetch by downloader name (used by refresh). Looks up the
|
|
79
|
+
* persisted downloader by name and calls it against the original URL —
|
|
80
|
+
* deterministic, no agent loop. When the persisted downloader is no
|
|
81
|
+
* longer registered (e.g. from a prior membot version), falls back to
|
|
82
|
+
* URL-based dispatch so refresh degrades gracefully instead of erroring.
|
|
83
|
+
*/
|
|
84
|
+
export async function fetchRemoteByDownloader(
|
|
85
|
+
downloaderName: string | null,
|
|
86
|
+
url: string,
|
|
87
|
+
pool: BrowserPool,
|
|
88
|
+
config: MembotConfig,
|
|
89
|
+
): Promise<FetchedRemote> {
|
|
90
|
+
const named = downloaderName ? findDownloaderByName(downloaderName) : null;
|
|
91
|
+
const downloader = named ?? findDownloader(url);
|
|
92
|
+
if (!downloader) {
|
|
93
|
+
throw new HelpfulError({
|
|
94
|
+
kind: "input_error",
|
|
95
|
+
message: `no downloader matches ${url}`,
|
|
96
|
+
hint: "Re-add the URL with `membot add <url>` to pick a fresh downloader.",
|
|
97
|
+
});
|
|
105
98
|
}
|
|
106
|
-
|
|
107
|
-
return
|
|
99
|
+
const dctx: DownloaderCtx = { pool, logger, config };
|
|
100
|
+
return downloader.download(new URL(url), dctx);
|
|
108
101
|
}
|
|
109
102
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
);
|
|
103
|
+
function pickDownloader(url: string, override?: string): Downloader {
|
|
104
|
+
if (override) {
|
|
105
|
+
const named = findDownloaderByName(override.toLowerCase());
|
|
106
|
+
if (!named) {
|
|
107
|
+
const available = listDownloaders()
|
|
108
|
+
.map((d) => d.name)
|
|
109
|
+
.join(", ");
|
|
110
|
+
throw new HelpfulError({
|
|
111
|
+
kind: "input_error",
|
|
112
|
+
message: `unknown downloader '${override}'`,
|
|
113
|
+
hint: `Pick one of: ${available}.`,
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
return named;
|
|
125
117
|
}
|
|
126
|
-
|
|
118
|
+
const matched = findDownloader(url);
|
|
119
|
+
if (!matched) {
|
|
127
120
|
throw new HelpfulError({
|
|
128
|
-
kind: "
|
|
129
|
-
message: `
|
|
130
|
-
hint: "
|
|
121
|
+
kind: "input_error",
|
|
122
|
+
message: `not a fetchable URL: ${url}`,
|
|
123
|
+
hint: "Pass an http(s):// URL.",
|
|
131
124
|
});
|
|
132
125
|
}
|
|
133
|
-
|
|
134
|
-
const ct = resp.headers.get("content-type") ?? "";
|
|
135
|
-
const mime = ct.split(";")[0]?.trim() || "application/octet-stream";
|
|
136
|
-
return {
|
|
137
|
-
bytes,
|
|
138
|
-
sha256: sha256Hex(bytes),
|
|
139
|
-
mimeType: mime,
|
|
140
|
-
fetcher: "http",
|
|
141
|
-
fetcherServer: null,
|
|
142
|
-
fetcherTool: null,
|
|
143
|
-
fetcherArgs: null,
|
|
144
|
-
sourceUrl: url,
|
|
145
|
-
};
|
|
126
|
+
return matched;
|
|
146
127
|
}
|
|
147
128
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
* as a successful payload. Used by the refresh runner; the agent loop
|
|
153
|
-
* has its own preview-aware check.
|
|
154
|
-
*/
|
|
155
|
-
export function isMcpToolError(result: unknown): boolean {
|
|
156
|
-
if (!result || typeof result !== "object") return false;
|
|
157
|
-
return (result as { isError?: unknown }).isError === true;
|
|
129
|
+
function defaultProfileDir(dataDir?: string): string {
|
|
130
|
+
if (dataDir) return join(dataDir, FILES.BROWSER_PROFILE);
|
|
131
|
+
const home = process.env.MEMBOT_HOME ?? `${process.env.HOME ?? "."}/.membot`;
|
|
132
|
+
return join(home, FILES.BROWSER_PROFILE);
|
|
158
133
|
}
|