memdex 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -5
- package/bin/memdex.js +2 -21
- package/dist/cli.js +1975 -0
- package/package.json +12 -6
- package/scripts/memdex.py +0 -2517
package/dist/cli.js
ADDED
|
@@ -0,0 +1,1975 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __export = (target, all) => {
|
|
4
|
+
for (var name in all)
|
|
5
|
+
__defProp(target, name, {
|
|
6
|
+
get: all[name],
|
|
7
|
+
enumerable: true,
|
|
8
|
+
configurable: true,
|
|
9
|
+
set: (newValue) => all[name] = () => newValue
|
|
10
|
+
});
|
|
11
|
+
};
|
|
12
|
+
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
13
|
+
|
|
14
|
+
// src/common.ts
|
|
15
|
+
var exports_common = {};
|
|
16
|
+
__export(exports_common, {
|
|
17
|
+
yymmddhhmm: () => yymmddhhmm,
|
|
18
|
+
writeJson: () => writeJson,
|
|
19
|
+
which: () => which,
|
|
20
|
+
uninitializedStatus: () => uninitializedStatus,
|
|
21
|
+
slugify: () => slugify,
|
|
22
|
+
sleep: () => sleep,
|
|
23
|
+
shellSplit: () => shellSplit,
|
|
24
|
+
shellQuote: () => shellQuote,
|
|
25
|
+
sha256Text: () => sha256Text,
|
|
26
|
+
sha256File: () => sha256File,
|
|
27
|
+
sha256Bytes: () => sha256Bytes,
|
|
28
|
+
setTestHooks: () => setTestHooks,
|
|
29
|
+
secondsSince: () => secondsSince,
|
|
30
|
+
scriptCmd: () => scriptCmd,
|
|
31
|
+
runCommand: () => runCommand,
|
|
32
|
+
resetTestHooks: () => resetTestHooks,
|
|
33
|
+
repomixCmd: () => repomixCmd,
|
|
34
|
+
repoLock: () => repoLock,
|
|
35
|
+
removeFileQuiet: () => removeFileQuiet,
|
|
36
|
+
readJson: () => readJson,
|
|
37
|
+
posixPath: () => posixPath,
|
|
38
|
+
positiveInt: () => positiveInt,
|
|
39
|
+
pathMatchesSpec: () => pathMatchesSpec,
|
|
40
|
+
pathIsIncluded: () => pathIsIncluded,
|
|
41
|
+
pathIsIgnored: () => pathIsIgnored,
|
|
42
|
+
parseSizeBytes: () => parseSizeBytes,
|
|
43
|
+
parseIso: () => parseIso,
|
|
44
|
+
packageRoot: () => packageRoot,
|
|
45
|
+
output: () => output,
|
|
46
|
+
nowUtc: () => nowUtc,
|
|
47
|
+
notebooklmCmd: () => notebooklmCmd,
|
|
48
|
+
neverUploadSpecs: () => neverUploadSpecs,
|
|
49
|
+
missingConfigMessage: () => missingConfigMessage,
|
|
50
|
+
loadState: () => loadState,
|
|
51
|
+
loadConfig: () => loadConfig,
|
|
52
|
+
iso: () => iso,
|
|
53
|
+
includeSpecs: () => includeSpecs,
|
|
54
|
+
groupSpecs: () => groupSpecs,
|
|
55
|
+
fileSize: () => fileSize,
|
|
56
|
+
fileExists: () => fileExists,
|
|
57
|
+
die: () => die,
|
|
58
|
+
defaultShortSourceTitlePrefix: () => defaultShortSourceTitlePrefix,
|
|
59
|
+
defaultNotebookTitle: () => defaultNotebookTitle,
|
|
60
|
+
defaultInclude: () => defaultInclude,
|
|
61
|
+
defaultGroups: () => defaultGroups,
|
|
62
|
+
defaultConfig: () => defaultConfig,
|
|
63
|
+
configPath: () => configPath,
|
|
64
|
+
commandLine: () => commandLine,
|
|
65
|
+
bundleMode: () => bundleMode,
|
|
66
|
+
STATE_JSON: () => STATE_JSON,
|
|
67
|
+
SCRIPT_CMD_ENV: () => SCRIPT_CMD_ENV,
|
|
68
|
+
PENDING_UPLOAD_JSON: () => PENDING_UPLOAD_JSON,
|
|
69
|
+
NOTEBOOKLM_PACKAGE: () => NOTEBOOKLM_PACKAGE,
|
|
70
|
+
NOTEBOOKLM_BIN_ENV: () => NOTEBOOKLM_BIN_ENV,
|
|
71
|
+
MemdexError: () => MemdexError,
|
|
72
|
+
LEGACY_SCRIPT_CMD_ENV: () => LEGACY_SCRIPT_CMD_ENV,
|
|
73
|
+
DEFAULT_NOTEBOOK_TITLE_PREFIX: () => DEFAULT_NOTEBOOK_TITLE_PREFIX,
|
|
74
|
+
CONFIG_JSON: () => CONFIG_JSON,
|
|
75
|
+
CONFIG_DIR: () => CONFIG_DIR2
|
|
76
|
+
});
|
|
77
|
+
import { spawn } from "node:child_process";
|
|
78
|
+
import { createHash } from "node:crypto";
|
|
79
|
+
import { closeSync, existsSync, mkdirSync, openSync, readFileSync, statSync, unlinkSync, writeFileSync } from "node:fs";
|
|
80
|
+
import { basename, dirname, join, resolve, sep } from "node:path";
|
|
81
|
+
import { fileURLToPath } from "node:url";
|
|
82
|
+
import YAML from "yaml";
|
|
83
|
+
function setTestHooks(next) {
|
|
84
|
+
hooks.run = next.run;
|
|
85
|
+
hooks.notebooklmCmd = next.notebooklmCmd;
|
|
86
|
+
hooks.repomixCmd = next.repomixCmd;
|
|
87
|
+
}
|
|
88
|
+
function resetTestHooks() {
|
|
89
|
+
hooks.run = undefined;
|
|
90
|
+
hooks.notebooklmCmd = undefined;
|
|
91
|
+
hooks.repomixCmd = undefined;
|
|
92
|
+
}
|
|
93
|
+
function nowUtc() {
|
|
94
|
+
return new Date;
|
|
95
|
+
}
|
|
96
|
+
function iso(ts = nowUtc()) {
|
|
97
|
+
return ts.toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
98
|
+
}
|
|
99
|
+
function parseIso(value) {
|
|
100
|
+
if (!value)
|
|
101
|
+
return null;
|
|
102
|
+
const parsed = new Date(value);
|
|
103
|
+
return Number.isNaN(parsed.getTime()) ? null : parsed;
|
|
104
|
+
}
|
|
105
|
+
function yymmddhhmm(ts = nowUtc()) {
|
|
106
|
+
const yy = String(ts.getUTCFullYear()).slice(-2);
|
|
107
|
+
const mm = String(ts.getUTCMonth() + 1).padStart(2, "0");
|
|
108
|
+
const dd = String(ts.getUTCDate()).padStart(2, "0");
|
|
109
|
+
const hh = String(ts.getUTCHours()).padStart(2, "0");
|
|
110
|
+
const mi = String(ts.getUTCMinutes()).padStart(2, "0");
|
|
111
|
+
return `${yy}${mm}${dd}${hh}${mi}`;
|
|
112
|
+
}
|
|
113
|
+
function die(message, code = 2) {
|
|
114
|
+
throw new MemdexError(message, code);
|
|
115
|
+
}
|
|
116
|
+
function sleep(ms) {
|
|
117
|
+
return new Promise((resolveSleep) => setTimeout(resolveSleep, ms));
|
|
118
|
+
}
|
|
119
|
+
function shellSplit(value) {
|
|
120
|
+
const parts = [];
|
|
121
|
+
const re = /"([^"]*)"|'([^']*)'|(\S+)/g;
|
|
122
|
+
let match;
|
|
123
|
+
while (match = re.exec(value))
|
|
124
|
+
parts.push(match[1] ?? match[2] ?? match[3]);
|
|
125
|
+
return parts;
|
|
126
|
+
}
|
|
127
|
+
function shellQuote(value) {
|
|
128
|
+
if (/^[A-Za-z0-9_./:=+-]+$/.test(value))
|
|
129
|
+
return value;
|
|
130
|
+
return `'${value.replaceAll("'", `'"'"'`)}'`;
|
|
131
|
+
}
|
|
132
|
+
function scriptCmd() {
|
|
133
|
+
const override = (process.env[SCRIPT_CMD_ENV] || process.env[LEGACY_SCRIPT_CMD_ENV] || "").trim();
|
|
134
|
+
return override ? shellSplit(override) : ["memdex"];
|
|
135
|
+
}
|
|
136
|
+
function commandLine(repo, command, ...parts) {
|
|
137
|
+
return [...scriptCmd(), command, "--repo", repo, ...parts].map(shellQuote).join(" ");
|
|
138
|
+
}
|
|
139
|
+
function missingConfigMessage(repo, configFile, command = "") {
|
|
140
|
+
const lines = [
|
|
141
|
+
`project is not initialized for project retrieval: ${configFile}`,
|
|
142
|
+
"",
|
|
143
|
+
"Initialize this repo first:",
|
|
144
|
+
` ${commandLine(repo, "init", "--create-notebook")}`,
|
|
145
|
+
"",
|
|
146
|
+
"Or reuse an existing NotebookLM notebook with the expected title:",
|
|
147
|
+
` ${commandLine(repo, "init", "--reuse-existing-notebook")}`,
|
|
148
|
+
"",
|
|
149
|
+
"Then ask or locate directly; both commands run freshness preflight:",
|
|
150
|
+
` ${commandLine(repo, "ask", "your question")}`,
|
|
151
|
+
` ${commandLine(repo, "locate", "thing to find")}`,
|
|
152
|
+
"",
|
|
153
|
+
"If this is the first broad upload and you already approve it:",
|
|
154
|
+
` ${commandLine(repo, "ask", "--yes", "your question")}`
|
|
155
|
+
];
|
|
156
|
+
if (command)
|
|
157
|
+
lines.splice(1, 0, `Command \`${command}\` needs \`.memdex/config.json\` before it can run.`);
|
|
158
|
+
return lines.join(`
|
|
159
|
+
`);
|
|
160
|
+
}
|
|
161
|
+
function uninitializedStatus(repo, configFile) {
|
|
162
|
+
return {
|
|
163
|
+
status: "not-initialized",
|
|
164
|
+
initialized: false,
|
|
165
|
+
config: configFile,
|
|
166
|
+
message: "project is not initialized for project retrieval",
|
|
167
|
+
next: {
|
|
168
|
+
createNotebook: commandLine(repo, "init", "--create-notebook"),
|
|
169
|
+
reuseExistingNotebook: commandLine(repo, "init", "--reuse-existing-notebook"),
|
|
170
|
+
ask: commandLine(repo, "ask", "your question"),
|
|
171
|
+
locate: commandLine(repo, "locate", "thing to find"),
|
|
172
|
+
askWithFirstUploadApproval: commandLine(repo, "ask", "--yes", "your question")
|
|
173
|
+
}
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
async function repoLock(repo, fn, timeoutSeconds = 300) {
|
|
177
|
+
const lockPath = join(repo, CONFIG_DIR2, ".lock");
|
|
178
|
+
mkdirSync(dirname(lockPath), { recursive: true });
|
|
179
|
+
const started = Date.now();
|
|
180
|
+
let fd;
|
|
181
|
+
while (fd === undefined) {
|
|
182
|
+
try {
|
|
183
|
+
fd = openSync(lockPath, "wx");
|
|
184
|
+
writeFileSync(fd, `pid=${process.pid}
|
|
185
|
+
createdAt=${iso()}
|
|
186
|
+
`);
|
|
187
|
+
} catch (error) {
|
|
188
|
+
if (error?.code !== "EEXIST")
|
|
189
|
+
throw error;
|
|
190
|
+
if ((Date.now() - started) / 1000 > timeoutSeconds)
|
|
191
|
+
die(`timed out waiting for lock: ${lockPath}`);
|
|
192
|
+
await sleep(200);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
try {
|
|
196
|
+
return await fn();
|
|
197
|
+
} finally {
|
|
198
|
+
try {
|
|
199
|
+
if (fd !== undefined)
|
|
200
|
+
closeSync(fd);
|
|
201
|
+
} catch {}
|
|
202
|
+
removeFileQuiet(lockPath);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
async function runCommand(argv, cwd, opts = {}) {
|
|
206
|
+
if (hooks.run)
|
|
207
|
+
return hooks.run(argv, cwd, opts);
|
|
208
|
+
return new Promise((resolveRun) => {
|
|
209
|
+
const child = spawn(argv[0], argv.slice(1), { cwd, stdio: ["pipe", "pipe", "pipe"] });
|
|
210
|
+
let stdout = "";
|
|
211
|
+
let stderr = "";
|
|
212
|
+
let finished = false;
|
|
213
|
+
const timer = opts.timeout ? setTimeout(() => {
|
|
214
|
+
if (!finished)
|
|
215
|
+
child.kill("SIGTERM");
|
|
216
|
+
}, opts.timeout * 1000) : undefined;
|
|
217
|
+
child.stdout.setEncoding("utf8");
|
|
218
|
+
child.stderr.setEncoding("utf8");
|
|
219
|
+
child.stdout.on("data", (chunk) => {
|
|
220
|
+
stdout += chunk;
|
|
221
|
+
});
|
|
222
|
+
child.stderr.on("data", (chunk) => {
|
|
223
|
+
stderr += chunk;
|
|
224
|
+
});
|
|
225
|
+
child.on("close", (code) => {
|
|
226
|
+
finished = true;
|
|
227
|
+
if (timer)
|
|
228
|
+
clearTimeout(timer);
|
|
229
|
+
resolveRun({ argv, returncode: code ?? 1, stdout, stderr });
|
|
230
|
+
});
|
|
231
|
+
child.on("error", (error) => {
|
|
232
|
+
finished = true;
|
|
233
|
+
if (timer)
|
|
234
|
+
clearTimeout(timer);
|
|
235
|
+
resolveRun({ argv, returncode: 1, stdout, stderr: `${stderr}${error.message}` });
|
|
236
|
+
});
|
|
237
|
+
if (opts.inputText !== undefined)
|
|
238
|
+
child.stdin.end(opts.inputText);
|
|
239
|
+
else
|
|
240
|
+
child.stdin.end();
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
function which(name) {
|
|
244
|
+
const paths = (process.env.PATH || "").split(":");
|
|
245
|
+
const exts = process.platform === "win32" ? ["", ".exe", ".cmd", ".bat"] : [""];
|
|
246
|
+
for (const dir of paths) {
|
|
247
|
+
for (const ext of exts) {
|
|
248
|
+
const candidate = join(dir, name + ext);
|
|
249
|
+
if (existsSync(candidate))
|
|
250
|
+
return candidate;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return null;
|
|
254
|
+
}
|
|
255
|
+
function notebooklmCmd() {
|
|
256
|
+
if (hooks.notebooklmCmd)
|
|
257
|
+
return hooks.notebooklmCmd();
|
|
258
|
+
const override = (process.env[NOTEBOOKLM_BIN_ENV] || "").trim();
|
|
259
|
+
if (override)
|
|
260
|
+
return shellSplit(override);
|
|
261
|
+
const found = which("notebooklm");
|
|
262
|
+
if (found)
|
|
263
|
+
return [found];
|
|
264
|
+
die(`required tool not found on PATH: notebooklm
|
|
265
|
+
` + `Install persistently: uv tool install ${NOTEBOOKLM_PACKAGE}
|
|
266
|
+
` + `Or set ${NOTEBOOKLM_BIN_ENV}='uvx --from ${NOTEBOOKLM_PACKAGE} notebooklm'`);
|
|
267
|
+
}
|
|
268
|
+
function repomixCmd() {
|
|
269
|
+
if (hooks.repomixCmd)
|
|
270
|
+
return hooks.repomixCmd();
|
|
271
|
+
const found = which("repomix");
|
|
272
|
+
if (found)
|
|
273
|
+
return [found];
|
|
274
|
+
if (which("npx"))
|
|
275
|
+
return ["npx", "repomix"];
|
|
276
|
+
die("required tool not found on PATH: repomix or npx");
|
|
277
|
+
}
|
|
278
|
+
function sha256Bytes(data) {
|
|
279
|
+
return `sha256:${createHash("sha256").update(data).digest("hex")}`;
|
|
280
|
+
}
|
|
281
|
+
function sha256Text(data) {
|
|
282
|
+
return sha256Bytes(Buffer.from(data, "utf8"));
|
|
283
|
+
}
|
|
284
|
+
function sha256File(path) {
|
|
285
|
+
return sha256Bytes(readFileSync(path));
|
|
286
|
+
}
|
|
287
|
+
function removeFileQuiet(path) {
|
|
288
|
+
try {
|
|
289
|
+
unlinkSync(path);
|
|
290
|
+
} catch (error) {
|
|
291
|
+
if (error?.code !== "ENOENT")
|
|
292
|
+
throw error;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
function writeJson(path, value) {
|
|
296
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
297
|
+
writeFileSync(path, `${JSON.stringify(value, null, 2)}
|
|
298
|
+
`);
|
|
299
|
+
}
|
|
300
|
+
function readJson(path) {
|
|
301
|
+
return JSON.parse(readFileSync(path, "utf8"));
|
|
302
|
+
}
|
|
303
|
+
function defaultInclude() {
|
|
304
|
+
return ["src", "crates", "packages", "apps", "bins", "docs", "scripts", "tests", "xtask", "AGENTS.md", "CLAUDE.md", "README.md", "Cargo.toml", "package.json", "justfile"];
|
|
305
|
+
}
|
|
306
|
+
function defaultGroups() {
|
|
307
|
+
return [
|
|
308
|
+
{ id: "docs", include: ["AGENTS.md", "CLAUDE.md", "README.md", "docs/**"] },
|
|
309
|
+
{ id: "apps", include: ["apps/**"] },
|
|
310
|
+
{ id: "packages", include: ["packages/**"] },
|
|
311
|
+
{ id: "src", include: ["src/**", "crates/**", "bins/**", "xtask/**"] },
|
|
312
|
+
{ id: "tests", include: ["tests/**", "testdata/**"] },
|
|
313
|
+
{ id: "scripts", include: ["scripts/**"] }
|
|
314
|
+
];
|
|
315
|
+
}
|
|
316
|
+
function slugify(value) {
|
|
317
|
+
const slug = value.trim().toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
318
|
+
return slug || "repo";
|
|
319
|
+
}
|
|
320
|
+
function defaultNotebookTitle(projectName, titlePrefix = DEFAULT_NOTEBOOK_TITLE_PREFIX) {
|
|
321
|
+
return `${titlePrefix}:${projectName}`;
|
|
322
|
+
}
|
|
323
|
+
function defaultShortSourceTitlePrefix() {
|
|
324
|
+
return "memdex";
|
|
325
|
+
}
|
|
326
|
+
function defaultConfig(repo, notebookId = "", opts = {}) {
|
|
327
|
+
const project = opts.projectName || basename(repo);
|
|
328
|
+
const prefix = opts.notebookTitlePrefix || DEFAULT_NOTEBOOK_TITLE_PREFIX;
|
|
329
|
+
return {
|
|
330
|
+
version: 1,
|
|
331
|
+
project: { name: project },
|
|
332
|
+
provider: "notebooklm",
|
|
333
|
+
notebooklm: {
|
|
334
|
+
notebook_id: notebookId,
|
|
335
|
+
notebook_title_prefix: prefix,
|
|
336
|
+
notebook_title: opts.notebookTitle || defaultNotebookTitle(project, prefix),
|
|
337
|
+
source_title_prefix: defaultShortSourceTitlePrefix(),
|
|
338
|
+
wait_after_upload: true,
|
|
339
|
+
upload_parallelism: 4,
|
|
340
|
+
wait_parallelism: 8,
|
|
341
|
+
delete_parallelism: 4
|
|
342
|
+
},
|
|
343
|
+
bundle: {
|
|
344
|
+
tool: "repomix",
|
|
345
|
+
mode: "chunked",
|
|
346
|
+
include: defaultInclude(),
|
|
347
|
+
output: `${CONFIG_DIR2}/cache/{prefix}-{timestamp}.txt`,
|
|
348
|
+
style: "",
|
|
349
|
+
compress: false,
|
|
350
|
+
target_chunk_bytes: 524288,
|
|
351
|
+
max_chunk_bytes: 900000,
|
|
352
|
+
source_title_template: "{prefix}--{set}--{group}--{chunk}--{hash}.md",
|
|
353
|
+
groups: defaultGroups(),
|
|
354
|
+
default_group: { enabled: true, id: "misc" }
|
|
355
|
+
},
|
|
356
|
+
refresh: {
|
|
357
|
+
auto: true,
|
|
358
|
+
mode: "replace",
|
|
359
|
+
check_ttl_seconds: 300,
|
|
360
|
+
min_upload_interval_seconds: 900,
|
|
361
|
+
max_staleness_seconds: 86400,
|
|
362
|
+
keep_previous_sources: 0,
|
|
363
|
+
delete_previous_after_success: true
|
|
364
|
+
},
|
|
365
|
+
safety: {
|
|
366
|
+
require_user_approval_first_upload: true,
|
|
367
|
+
never_upload: [
|
|
368
|
+
".env*",
|
|
369
|
+
"**/.env*",
|
|
370
|
+
".git/**",
|
|
371
|
+
"**/.git/**",
|
|
372
|
+
"node_modules/**",
|
|
373
|
+
"**/node_modules/**",
|
|
374
|
+
"target/**",
|
|
375
|
+
"**/target/**",
|
|
376
|
+
"dist/**",
|
|
377
|
+
"**/dist/**",
|
|
378
|
+
"build/**",
|
|
379
|
+
"**/build/**",
|
|
380
|
+
"coverage/**",
|
|
381
|
+
"**/coverage/**",
|
|
382
|
+
".next/**",
|
|
383
|
+
"**/.next/**",
|
|
384
|
+
".generated/**",
|
|
385
|
+
"**/.generated/**",
|
|
386
|
+
"public/**",
|
|
387
|
+
"**/public/**",
|
|
388
|
+
"*.png",
|
|
389
|
+
"**/*.png",
|
|
390
|
+
"*.jpg",
|
|
391
|
+
"**/*.jpg",
|
|
392
|
+
"*.jpeg",
|
|
393
|
+
"**/*.jpeg",
|
|
394
|
+
"*.gif",
|
|
395
|
+
"**/*.gif",
|
|
396
|
+
"*.webp",
|
|
397
|
+
"**/*.webp",
|
|
398
|
+
"*.svg",
|
|
399
|
+
"**/*.svg",
|
|
400
|
+
"*.ico",
|
|
401
|
+
"**/*.ico",
|
|
402
|
+
"*.otf",
|
|
403
|
+
"**/*.otf",
|
|
404
|
+
"*.ttf",
|
|
405
|
+
"**/*.ttf",
|
|
406
|
+
"*.woff",
|
|
407
|
+
"**/*.woff",
|
|
408
|
+
"*.woff2",
|
|
409
|
+
"**/*.woff2",
|
|
410
|
+
"*.mp4",
|
|
411
|
+
"**/*.mp4",
|
|
412
|
+
"*.mov",
|
|
413
|
+
"**/*.mov",
|
|
414
|
+
"*.zip",
|
|
415
|
+
"**/*.zip",
|
|
416
|
+
"*.tar",
|
|
417
|
+
"**/*.tar",
|
|
418
|
+
"*.gz",
|
|
419
|
+
"**/*.gz"
|
|
420
|
+
]
|
|
421
|
+
},
|
|
422
|
+
retrieval: { line_numbers_require_local_verify: true, max_local_matches: 80 }
|
|
423
|
+
};
|
|
424
|
+
}
|
|
425
|
+
function configPath(repo) {
|
|
426
|
+
const candidates = [
|
|
427
|
+
join(repo, CONFIG_DIR2, CONFIG_JSON),
|
|
428
|
+
join(repo, CONFIG_DIR2, "config.yaml"),
|
|
429
|
+
join(repo, CONFIG_DIR2, "config.yml"),
|
|
430
|
+
join(repo, ".notebooklm", CONFIG_JSON),
|
|
431
|
+
join(repo, ".notebooklm", "config.yaml"),
|
|
432
|
+
join(repo, ".notebooklm", "config.yml")
|
|
433
|
+
];
|
|
434
|
+
return candidates.find((path) => existsSync(path)) || join(repo, CONFIG_DIR2, CONFIG_JSON);
|
|
435
|
+
}
|
|
436
|
+
function loadConfig(repo, command = "") {
|
|
437
|
+
const path = configPath(repo);
|
|
438
|
+
if (!existsSync(path))
|
|
439
|
+
die(missingConfigMessage(repo, path, command));
|
|
440
|
+
const text = readFileSync(path, "utf8");
|
|
441
|
+
const data = path.endsWith(".json") ? JSON.parse(text) : YAML.parse(text);
|
|
442
|
+
return [data || {}, path];
|
|
443
|
+
}
|
|
444
|
+
function loadState(configFile) {
|
|
445
|
+
const statePath = join(dirname(configFile), STATE_JSON);
|
|
446
|
+
if (existsSync(statePath))
|
|
447
|
+
return [readJson(statePath), statePath];
|
|
448
|
+
return [{ sources: [] }, statePath];
|
|
449
|
+
}
|
|
450
|
+
function includeSpecs(config) {
|
|
451
|
+
const include = config.bundle?.include || defaultInclude();
|
|
452
|
+
return include.map((item) => String(item).trim().replace(/^\/|\/$/g, "")).filter(Boolean);
|
|
453
|
+
}
|
|
454
|
+
function groupSpecs(group) {
|
|
455
|
+
return (group.include || []).map((item) => String(item).trim().replace(/^\/|\/$/g, "")).filter(Boolean);
|
|
456
|
+
}
|
|
457
|
+
function neverUploadSpecs(config) {
|
|
458
|
+
const builtIn = defaultConfig(process.cwd()).safety.never_upload;
|
|
459
|
+
return [...builtIn, ...config.safety?.never_upload || []].map((item) => String(item).trim()).filter(Boolean);
|
|
460
|
+
}
|
|
461
|
+
function globRegex(spec) {
|
|
462
|
+
let pattern = spec.trim().replace(/^\.\//, "");
|
|
463
|
+
let out = "";
|
|
464
|
+
for (let i = 0;i < pattern.length; i += 1) {
|
|
465
|
+
const ch = pattern[i];
|
|
466
|
+
const next = pattern[i + 1];
|
|
467
|
+
if (ch === "*" && next === "*") {
|
|
468
|
+
out += ".*";
|
|
469
|
+
i += 1;
|
|
470
|
+
} else if (ch === "*") {
|
|
471
|
+
out += "[^/]*";
|
|
472
|
+
} else if (ch === "?") {
|
|
473
|
+
out += "[^/]";
|
|
474
|
+
} else {
|
|
475
|
+
out += ch.replace(/[.+^${}()|[\]\\]/g, "\\$&");
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
return new RegExp(`^${out}$`);
|
|
479
|
+
}
|
|
480
|
+
function pathMatchesSpec(path, spec) {
|
|
481
|
+
const clean = path.trim().replace(/^\.\//, "");
|
|
482
|
+
const pattern = spec.trim().replace(/^\.\//, "");
|
|
483
|
+
if (!pattern)
|
|
484
|
+
return false;
|
|
485
|
+
if (pattern === "." || pattern === "*")
|
|
486
|
+
return true;
|
|
487
|
+
if (clean === pattern || clean.startsWith(`${pattern.replace(/\/$/, "")}/`))
|
|
488
|
+
return true;
|
|
489
|
+
return globRegex(pattern).test(clean) || globRegex(pattern).test(`./${clean}`);
|
|
490
|
+
}
|
|
491
|
+
function pathIsIncluded(path, includes) {
|
|
492
|
+
return includes.some((spec) => pathMatchesSpec(path, spec));
|
|
493
|
+
}
|
|
494
|
+
function pathIsIgnored(path, ignores) {
|
|
495
|
+
return ignores.some((spec) => pathMatchesSpec(path, spec));
|
|
496
|
+
}
|
|
497
|
+
function bundleMode(config) {
|
|
498
|
+
return String(config.bundle?.mode || "chunked");
|
|
499
|
+
}
|
|
500
|
+
function parseSizeBytes(value, fallback) {
|
|
501
|
+
if (Number.isInteger(value))
|
|
502
|
+
return value;
|
|
503
|
+
const match = String(value || "").trim().toLowerCase().match(/^(\d+)(?:\s*(b|kb|kib|mb|mib))?$/);
|
|
504
|
+
if (!match)
|
|
505
|
+
return fallback;
|
|
506
|
+
const amount = Number(match[1]);
|
|
507
|
+
const unit = match[2] || "b";
|
|
508
|
+
if (unit === "kb" || unit === "kib")
|
|
509
|
+
return amount * 1024;
|
|
510
|
+
if (unit === "mb" || unit === "mib")
|
|
511
|
+
return amount * 1024 * 1024;
|
|
512
|
+
return amount;
|
|
513
|
+
}
|
|
514
|
+
function positiveInt(value, fallback, minimum = 1, maximum = 32) {
|
|
515
|
+
const parsed = Number.parseInt(String(value ?? ""), 10);
|
|
516
|
+
const valueOrFallback = Number.isFinite(parsed) ? parsed : fallback;
|
|
517
|
+
return Math.max(minimum, Math.min(maximum, valueOrFallback));
|
|
518
|
+
}
|
|
519
|
+
function secondsSince(value) {
|
|
520
|
+
const parsed = parseIso(value);
|
|
521
|
+
return parsed ? (Date.now() - parsed.getTime()) / 1000 : null;
|
|
522
|
+
}
|
|
523
|
+
function posixPath(path) {
|
|
524
|
+
return sep === "/" ? path : path.split(sep).join("/");
|
|
525
|
+
}
|
|
526
|
+
function output(data, asJson) {
|
|
527
|
+
if (asJson) {
|
|
528
|
+
console.log(JSON.stringify(data, null, 2));
|
|
529
|
+
return;
|
|
530
|
+
}
|
|
531
|
+
if (data && typeof data === "object" && !Array.isArray(data)) {
|
|
532
|
+
for (const [key, value] of Object.entries(data)) {
|
|
533
|
+
console.log(`${key}: ${typeof value === "object" ? JSON.stringify(value) : value}`);
|
|
534
|
+
}
|
|
535
|
+
return;
|
|
536
|
+
}
|
|
537
|
+
console.log(data);
|
|
538
|
+
}
|
|
539
|
+
async function fileExists(path) {
|
|
540
|
+
try {
|
|
541
|
+
await access(path);
|
|
542
|
+
return true;
|
|
543
|
+
} catch {
|
|
544
|
+
return false;
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
function fileSize(path) {
|
|
548
|
+
return statSync(path).size;
|
|
549
|
+
}
|
|
550
|
+
var MemdexError, CONFIG_DIR2 = ".memdex", CONFIG_JSON = "config.json", STATE_JSON = "state.local.json", PENDING_UPLOAD_JSON = "pending-upload.local.json", DEFAULT_NOTEBOOK_TITLE_PREFIX = "memdex", SCRIPT_CMD_ENV = "MEMDEX_CMD", LEGACY_SCRIPT_CMD_ENV = "CODEBASE_RETRIEVE_CMD", NOTEBOOKLM_PACKAGE = "git+https://github.com/teng-lin/notebooklm-py.git", NOTEBOOKLM_BIN_ENV = "NOTEBOOKLM_BIN", packageRoot, hooks;
|
|
551
|
+
var init_common = __esm(() => {
|
|
552
|
+
MemdexError = class MemdexError extends Error {
|
|
553
|
+
code;
|
|
554
|
+
constructor(message, code = 2) {
|
|
555
|
+
super(message);
|
|
556
|
+
this.name = "MemdexError";
|
|
557
|
+
this.code = code;
|
|
558
|
+
}
|
|
559
|
+
};
|
|
560
|
+
packageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
|
|
561
|
+
hooks = {};
|
|
562
|
+
});
|
|
563
|
+
|
|
564
|
+
// src/cli.ts
|
|
565
|
+
init_common();
|
|
566
|
+
import { Command } from "commander";
|
|
567
|
+
|
|
568
|
+
// src/commands.ts
|
|
569
|
+
init_common();
|
|
570
|
+
import { existsSync as existsSync6, writeFileSync as writeFileSync2 } from "node:fs";
|
|
571
|
+
import { basename as basename2, isAbsolute, join as join5, resolve as resolve2 } from "node:path";
|
|
572
|
+
|
|
573
|
+
// src/chunking.ts
|
|
574
|
+
init_common();
|
|
575
|
+
import { existsSync as existsSync3, mkdirSync as mkdirSync3, readFileSync as readFileSync3, readdirSync, statSync as statSync2 } from "node:fs";
|
|
576
|
+
import { createHash as createHash3 } from "node:crypto";
|
|
577
|
+
import { dirname as dirname3, join as join3, relative } from "node:path";
|
|
578
|
+
|
|
579
|
+
// src/notebooklm.ts
|
|
580
|
+
init_common();
|
|
581
|
+
import { copyFileSync, existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync2 } from "node:fs";
|
|
582
|
+
import { createHash as createHash2 } from "node:crypto";
|
|
583
|
+
import { dirname as dirname2, join as join2 } from "node:path";
|
|
584
|
+
function notebookId(config) {
|
|
585
|
+
const value = config.notebooklm?.notebook_id || "";
|
|
586
|
+
if (!value)
|
|
587
|
+
die("notebooklm.notebook_id missing in config");
|
|
588
|
+
return String(value);
|
|
589
|
+
}
|
|
590
|
+
function notebookTitle(config) {
|
|
591
|
+
const project = String(config.project?.name || "repo");
|
|
592
|
+
const prefix = String(config.notebooklm?.notebook_title_prefix || "memdex");
|
|
593
|
+
return String(config.notebooklm?.notebook_title || defaultNotebookTitle(project, prefix));
|
|
594
|
+
}
|
|
595
|
+
function parseNotebookJson(stdout, fallbackTitle) {
|
|
596
|
+
let data;
|
|
597
|
+
try {
|
|
598
|
+
data = JSON.parse(stdout);
|
|
599
|
+
} catch {
|
|
600
|
+
return null;
|
|
601
|
+
}
|
|
602
|
+
const candidates = [data, data?.notebook, data?.data, data?.result];
|
|
603
|
+
for (const item of candidates) {
|
|
604
|
+
if (!item || typeof item !== "object")
|
|
605
|
+
continue;
|
|
606
|
+
const id = item.id || item.notebook_id || item.notebookId;
|
|
607
|
+
const title = item.title || item.name || fallbackTitle;
|
|
608
|
+
if (id)
|
|
609
|
+
return { id: String(id), title: String(title) };
|
|
610
|
+
}
|
|
611
|
+
return null;
|
|
612
|
+
}
|
|
613
|
+
async function listNotebooks(repo) {
|
|
614
|
+
const result = await runCommand([...notebooklmCmd(), "list", "--json"], repo, { timeout: 120 });
|
|
615
|
+
if (result.returncode !== 0)
|
|
616
|
+
die(`notebooklm list failed:
|
|
617
|
+
${result.stdout}
|
|
618
|
+
${result.stderr}`);
|
|
619
|
+
try {
|
|
620
|
+
const data = JSON.parse(result.stdout);
|
|
621
|
+
const notebooks = data.notebooks || (Array.isArray(data) ? data : []);
|
|
622
|
+
return notebooks.filter((item) => item && typeof item === "object");
|
|
623
|
+
} catch (error) {
|
|
624
|
+
die(`notebooklm list returned invalid JSON: ${error.message}`);
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
async function findNotebookByTitle(repo, title) {
|
|
628
|
+
const matches = (await listNotebooks(repo)).filter((item) => String(item.title || "") === title);
|
|
629
|
+
if (matches.length > 1)
|
|
630
|
+
die(`multiple notebooks found with title ${JSON.stringify(title)}: ${matches.map((item) => item.id || "").join(", ")}`);
|
|
631
|
+
if (!matches.length)
|
|
632
|
+
return null;
|
|
633
|
+
return { id: String(matches[0].id || ""), title: String(matches[0].title || title) };
|
|
634
|
+
}
|
|
635
|
+
async function createNotebook(repo, title) {
|
|
636
|
+
const result = await runCommand([...notebooklmCmd(), "create", title, "--json"], repo, { timeout: 180 });
|
|
637
|
+
if (result.returncode !== 0)
|
|
638
|
+
die(`notebooklm create failed:
|
|
639
|
+
${result.stdout}
|
|
640
|
+
${result.stderr}`);
|
|
641
|
+
const parsed = parseNotebookJson(result.stdout, title);
|
|
642
|
+
if (parsed)
|
|
643
|
+
return parsed;
|
|
644
|
+
const found = await findNotebookByTitle(repo, title);
|
|
645
|
+
if (found)
|
|
646
|
+
return found;
|
|
647
|
+
die(`created notebook but could not resolve notebook id for title ${JSON.stringify(title)}`);
|
|
648
|
+
}
|
|
649
|
+
async function listSources(repo, nbid) {
|
|
650
|
+
const result = await runCommand([...notebooklmCmd(), "source", "list", "-n", nbid, "--json"], repo, { timeout: 120 });
|
|
651
|
+
if (result.returncode !== 0)
|
|
652
|
+
return [];
|
|
653
|
+
try {
|
|
654
|
+
const data = JSON.parse(result.stdout);
|
|
655
|
+
const sources = data.sources || (Array.isArray(data) ? data : []);
|
|
656
|
+
return sources.filter((item) => item && typeof item === "object");
|
|
657
|
+
} catch {
|
|
658
|
+
return [];
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
async function findSourceByTitle(repo, nbid, title) {
|
|
662
|
+
for (const src of await listSources(repo, nbid)) {
|
|
663
|
+
if (String(src.title || "") === title && src.id)
|
|
664
|
+
return { id: String(src.id), title };
|
|
665
|
+
}
|
|
666
|
+
return null;
|
|
667
|
+
}
|
|
668
|
+
function sourceFromAddJson(stdout, title) {
|
|
669
|
+
let data;
|
|
670
|
+
try {
|
|
671
|
+
data = JSON.parse(stdout);
|
|
672
|
+
} catch {
|
|
673
|
+
return null;
|
|
674
|
+
}
|
|
675
|
+
for (const item of [data, data?.source, data?.data, data?.result]) {
|
|
676
|
+
if (!item || typeof item !== "object")
|
|
677
|
+
continue;
|
|
678
|
+
const id = item.id || item.source_id || item.sourceId;
|
|
679
|
+
if (id)
|
|
680
|
+
return { id: String(id), title: String(item.title || item.name || title) };
|
|
681
|
+
}
|
|
682
|
+
return null;
|
|
683
|
+
}
|
|
684
|
+
async function uploadTextSourceFromFile(repo, config, path, title) {
|
|
685
|
+
const nbid = notebookId(config);
|
|
686
|
+
let content = "";
|
|
687
|
+
try {
|
|
688
|
+
content = readFileSync2(path, "utf8");
|
|
689
|
+
} catch (error) {
|
|
690
|
+
die(`source is not valid UTF-8 text for ${title}: ${error.message}`);
|
|
691
|
+
}
|
|
692
|
+
const result = await runCommand([...notebooklmCmd(), "source", "add", "-", "-n", nbid, "--type", "text", "--title", title, "--json"], repo, { inputText: content, timeout: 600 });
|
|
693
|
+
if (result.returncode !== 0)
|
|
694
|
+
die(`notebooklm source add failed for ${title}:
|
|
695
|
+
${result.stdout}
|
|
696
|
+
${result.stderr}`);
|
|
697
|
+
const source = sourceFromAddJson(result.stdout, title) || await findSourceByTitle(repo, nbid, title);
|
|
698
|
+
if (!source?.id)
|
|
699
|
+
die(`uploaded source but could not resolve source id for ${title}`);
|
|
700
|
+
return source;
|
|
701
|
+
}
|
|
702
|
+
async function waitSourceReady(repo, nbid, sourceId) {
|
|
703
|
+
const wait = await runCommand([...notebooklmCmd(), "source", "wait", sourceId, "-n", nbid], repo, { timeout: 600 });
|
|
704
|
+
return wait.returncode === 0;
|
|
705
|
+
}
|
|
706
|
+
function sourceContentSha(value) {
|
|
707
|
+
return String(value.contentSha256 || value.chunkSha256 || value.bundleSha256 || "");
|
|
708
|
+
}
|
|
709
|
+
function sourceFileListSha(value) {
|
|
710
|
+
return String(value.fileListSha256 || value.sha256 || "");
|
|
711
|
+
}
|
|
712
|
+
function chunkKey(value) {
|
|
713
|
+
return `${value.group}/${value.chunk}`;
|
|
714
|
+
}
|
|
715
|
+
function sourceWithChunkMetadata(source, bundle, opts) {
|
|
716
|
+
const item = {
|
|
717
|
+
...source,
|
|
718
|
+
group: bundle.group,
|
|
719
|
+
chunk: bundle.chunk,
|
|
720
|
+
chunkKey: chunkKey(bundle),
|
|
721
|
+
chunkSha256: bundle.bundleSha256,
|
|
722
|
+
contentSha256: bundle.contentSha256 || bundle.bundleSha256,
|
|
723
|
+
fileListSha256: bundle.fileListSha256 || bundle.sha256,
|
|
724
|
+
fileCount: bundle.fileCount,
|
|
725
|
+
files: [...bundle.files || []],
|
|
726
|
+
status: opts.status
|
|
727
|
+
};
|
|
728
|
+
if (opts.reused)
|
|
729
|
+
Object.assign(item, { reused: true, reusedAt: iso() });
|
|
730
|
+
else
|
|
731
|
+
Object.assign(item, { uploadedAt: iso() });
|
|
732
|
+
return item;
|
|
733
|
+
}
|
|
734
|
+
function activeSources(state) {
|
|
735
|
+
if (state.activeSourceSet && typeof state.activeSourceSet === "object" && Array.isArray(state.activeSourceSet.sources)) {
|
|
736
|
+
return state.activeSourceSet.sources.filter((item) => item && typeof item === "object");
|
|
737
|
+
}
|
|
738
|
+
return Array.isArray(state.sources) ? state.sources.filter((item) => item && typeof item === "object") : [];
|
|
739
|
+
}
|
|
740
|
+
function activeReadySourceIds(state) {
|
|
741
|
+
return activeSources(state).filter((src) => src.id && String(src.status || "ready") === "ready").map((src) => String(src.id));
|
|
742
|
+
}
|
|
743
|
+
function cleanupPendingSourceIds(state) {
|
|
744
|
+
return Array.isArray(state.cleanupPendingSourceIds) ? [...new Set(state.cleanupPendingSourceIds.map(String).filter(Boolean))] : [];
|
|
745
|
+
}
|
|
746
|
+
function queueCleanupSourceIds(state, sourceIds) {
|
|
747
|
+
const active = new Set(activeSources(state).map((src) => String(src.id || "")).filter(Boolean));
|
|
748
|
+
const merged = [...new Set([...cleanupPendingSourceIds(state), ...sourceIds])].filter((sid) => sid && !active.has(sid));
|
|
749
|
+
if (merged.length)
|
|
750
|
+
state.cleanupPendingSourceIds = merged;
|
|
751
|
+
else
|
|
752
|
+
delete state.cleanupPendingSourceIds;
|
|
753
|
+
return merged;
|
|
754
|
+
}
|
|
755
|
+
function pendingUploadPath(repo) {
|
|
756
|
+
return join2(repo, CONFIG_DIR2, PENDING_UPLOAD_JSON);
|
|
757
|
+
}
|
|
758
|
+
function clearPendingUpload(repo) {
|
|
759
|
+
removeFileQuiet(pendingUploadPath(repo));
|
|
760
|
+
}
|
|
761
|
+
function readPendingUpload(repo) {
|
|
762
|
+
const path = pendingUploadPath(repo);
|
|
763
|
+
if (!existsSync2(path))
|
|
764
|
+
return null;
|
|
765
|
+
try {
|
|
766
|
+
const data = JSON.parse(readFileSync2(path, "utf8"));
|
|
767
|
+
return data && typeof data === "object" ? data : { sources: [] };
|
|
768
|
+
} catch {
|
|
769
|
+
return { sources: [] };
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
function writePendingUpload(repo, value) {
|
|
773
|
+
writeJson(pendingUploadPath(repo), value);
|
|
774
|
+
}
|
|
775
|
+
async function runPool(items, workers, fn, progress) {
|
|
776
|
+
const results = [];
|
|
777
|
+
let index = 0;
|
|
778
|
+
let done = 0;
|
|
779
|
+
const worker = async () => {
|
|
780
|
+
while (index < items.length) {
|
|
781
|
+
const current = items[index++];
|
|
782
|
+
const result = await fn(current);
|
|
783
|
+
results.push(result);
|
|
784
|
+
done += 1;
|
|
785
|
+
progress?.(done, items.length);
|
|
786
|
+
}
|
|
787
|
+
};
|
|
788
|
+
await Promise.all(Array.from({ length: Math.min(workers, items.length) }, worker));
|
|
789
|
+
return results;
|
|
790
|
+
}
|
|
791
|
+
async function deleteSourceIdsParallel(repo, nbid, sourceIds, opts) {
|
|
792
|
+
const ids = [...new Set(sourceIds.filter(Boolean))];
|
|
793
|
+
if (!ids.length)
|
|
794
|
+
return [];
|
|
795
|
+
const deleted = await runPool(ids, Math.min(ids.length, Math.max(1, opts.parallelism)), async (sid) => {
|
|
796
|
+
const result = await runCommand([...notebooklmCmd(), "source", "delete", sid, "-n", nbid, "--yes"], repo, { timeout: 120 });
|
|
797
|
+
if (result.returncode !== 0) {
|
|
798
|
+
console.error(`warning: failed to delete source ${sid}`);
|
|
799
|
+
return "";
|
|
800
|
+
}
|
|
801
|
+
return sid;
|
|
802
|
+
}, (count, total) => console.error(`cleanup ${count}/${total}`));
|
|
803
|
+
return deleted.filter(Boolean);
|
|
804
|
+
}
|
|
805
|
+
async function recoverPendingCleanup(repo, config, state, statePath) {
|
|
806
|
+
const pending = cleanupPendingSourceIds(state);
|
|
807
|
+
if (!pending.length)
|
|
808
|
+
return [];
|
|
809
|
+
const active = new Set(activeSources(state).map((src) => String(src.id || "")).filter(Boolean));
|
|
810
|
+
const deleteIds = pending.filter((sid) => !active.has(sid));
|
|
811
|
+
if (!deleteIds.length) {
|
|
812
|
+
delete state.cleanupPendingSourceIds;
|
|
813
|
+
writeJson(statePath, state);
|
|
814
|
+
return [];
|
|
815
|
+
}
|
|
816
|
+
const deleted = await deleteSourceIdsParallel(repo, notebookId(config), deleteIds, { parallelism: positiveInt(config.notebooklm?.delete_parallelism, 4) });
|
|
817
|
+
const deletedSet = new Set(deleted);
|
|
818
|
+
const remaining = pending.filter((sid) => !deletedSet.has(sid) && !active.has(sid));
|
|
819
|
+
if (remaining.length)
|
|
820
|
+
state.cleanupPendingSourceIds = remaining;
|
|
821
|
+
else
|
|
822
|
+
delete state.cleanupPendingSourceIds;
|
|
823
|
+
writeJson(statePath, state);
|
|
824
|
+
return deleted;
|
|
825
|
+
}
|
|
826
|
+
async function recoverPendingUpload(repo, config, state = {}) {
|
|
827
|
+
const pending = readPendingUpload(repo);
|
|
828
|
+
if (!pending)
|
|
829
|
+
return [];
|
|
830
|
+
const sources = Array.isArray(pending.sources) ? pending.sources : [];
|
|
831
|
+
if (!Array.isArray(pending.sources)) {
|
|
832
|
+
clearPendingUpload(repo);
|
|
833
|
+
return [];
|
|
834
|
+
}
|
|
835
|
+
const active = new Set(activeSources(state).map((src) => String(src.id || "")).filter(Boolean));
|
|
836
|
+
const ids = sources.filter((src) => src?.id).map((src) => String(src.id));
|
|
837
|
+
if (ids.length && active.size && ids.every((sid) => active.has(sid))) {
|
|
838
|
+
clearPendingUpload(repo);
|
|
839
|
+
return [];
|
|
840
|
+
}
|
|
841
|
+
const deleted = await deleteSourceIdsParallel(repo, String(pending.notebookId || notebookId(config)), ids.filter((sid) => !active.has(sid)), { parallelism: positiveInt(config.notebooklm?.delete_parallelism, 4) });
|
|
842
|
+
const deletedSet = new Set(deleted);
|
|
843
|
+
const remaining = sources.filter((src) => src?.id && !deletedSet.has(String(src.id)));
|
|
844
|
+
if (remaining.length)
|
|
845
|
+
writePendingUpload(repo, { ...pending, sources: remaining });
|
|
846
|
+
else
|
|
847
|
+
clearPendingUpload(repo);
|
|
848
|
+
return deleted;
|
|
849
|
+
}
|
|
850
|
+
function appendPendingSource(repo, journal, source) {
|
|
851
|
+
journal.sources = Array.isArray(journal.sources) ? journal.sources : [];
|
|
852
|
+
journal.sources.push({ id: source.id, title: source.title });
|
|
853
|
+
writePendingUpload(repo, journal);
|
|
854
|
+
}
|
|
855
|
+
function findReusableSource(bundle, previous, used) {
|
|
856
|
+
const wanted = sourceContentSha(bundle);
|
|
857
|
+
if (!wanted)
|
|
858
|
+
return null;
|
|
859
|
+
for (const source of previous) {
|
|
860
|
+
const sid = String(source.id || "");
|
|
861
|
+
if (!sid || used.has(sid) || String(source.status || "ready") !== "ready")
|
|
862
|
+
continue;
|
|
863
|
+
if (sourceContentSha(source) === wanted) {
|
|
864
|
+
used.add(sid);
|
|
865
|
+
return source;
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
return null;
|
|
869
|
+
}
|
|
870
|
+
async function uploadOneChunk(repo, config, bundle) {
|
|
871
|
+
const source = await uploadTextSourceFromFile(repo, config, String(bundle.path), String(bundle.title));
|
|
872
|
+
return sourceWithChunkMetadata(source, bundle, { status: "uploaded" });
|
|
873
|
+
}
|
|
874
|
+
async function uploadChunksParallel(repo, config, bundles, opts) {
|
|
875
|
+
if (!bundles.length)
|
|
876
|
+
return [];
|
|
877
|
+
const nbid = notebookId(config);
|
|
878
|
+
const journal = { version: 1, setId: opts.setId, notebookId: nbid, startedAt: iso(), sources: [] };
|
|
879
|
+
writePendingUpload(repo, journal);
|
|
880
|
+
const uploaded = [];
|
|
881
|
+
try {
|
|
882
|
+
const results = await runPool(bundles, Math.min(bundles.length, positiveInt(config.notebooklm?.upload_parallelism, 4)), async ([index, bundle]) => {
|
|
883
|
+
const source = await uploadOneChunk(repo, config, bundle);
|
|
884
|
+
appendPendingSource(repo, journal, source);
|
|
885
|
+
return [index, source];
|
|
886
|
+
}, (count, total) => console.error(`upload ${count}/${total}`));
|
|
887
|
+
uploaded.push(...results);
|
|
888
|
+
return uploaded.sort((a, b) => a[0] - b[0]);
|
|
889
|
+
} catch (error) {
|
|
890
|
+
await deleteSourceIdsParallel(repo, nbid, uploaded.map(([, source]) => String(source.id || "")), { parallelism: positiveInt(config.notebooklm?.delete_parallelism, 4) });
|
|
891
|
+
clearPendingUpload(repo);
|
|
892
|
+
throw error;
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
async function waitUploadedSourcesParallel(repo, config, sources) {
|
|
896
|
+
if (!sources.length || config.notebooklm?.wait_after_upload === false)
|
|
897
|
+
return sources;
|
|
898
|
+
const nbid = notebookId(config);
|
|
899
|
+
const ready = await runPool(sources, Math.min(sources.length, positiveInt(config.notebooklm?.wait_parallelism, 8)), async ([index, source]) => {
|
|
900
|
+
const sid = String(source.id || "");
|
|
901
|
+
if (!sid)
|
|
902
|
+
throw new Error(`missing source id for ${source.title}`);
|
|
903
|
+
if (!await waitSourceReady(repo, nbid, sid))
|
|
904
|
+
throw new Error(`source processing failed for chunk ${source.title}: ${sid}`);
|
|
905
|
+
return [index, { ...source, status: "ready" }];
|
|
906
|
+
}, (count, total) => console.error(`wait ${count}/${total}`));
|
|
907
|
+
return ready.sort((a, b) => a[0] - b[0]);
|
|
908
|
+
}
|
|
909
|
+
async function uploadBundleSet(repo, config, state, bundles, opts) {
|
|
910
|
+
const nbid = notebookId(config);
|
|
911
|
+
await recoverPendingUpload(repo, config, state);
|
|
912
|
+
const previous = activeSources(state);
|
|
913
|
+
const used = new Set;
|
|
914
|
+
const sourcesByIndex = Array(bundles.length).fill(null);
|
|
915
|
+
const uploadPairs = [];
|
|
916
|
+
bundles.forEach((bundle, index) => {
|
|
917
|
+
const reusable = findReusableSource(bundle, previous, used);
|
|
918
|
+
if (reusable)
|
|
919
|
+
sourcesByIndex[index] = sourceWithChunkMetadata(reusable, bundle, { status: "ready", reused: true });
|
|
920
|
+
else
|
|
921
|
+
uploadPairs.push([index, bundle]);
|
|
922
|
+
});
|
|
923
|
+
const uploaded = await uploadChunksParallel(repo, config, uploadPairs, opts);
|
|
924
|
+
let ready;
|
|
925
|
+
try {
|
|
926
|
+
ready = await waitUploadedSourcesParallel(repo, config, uploaded);
|
|
927
|
+
} catch (error) {
|
|
928
|
+
await deleteSourceIdsParallel(repo, nbid, uploaded.map(([, source]) => String(source.id || "")), { parallelism: positiveInt(config.notebooklm?.delete_parallelism, 4) });
|
|
929
|
+
clearPendingUpload(repo);
|
|
930
|
+
throw error;
|
|
931
|
+
}
|
|
932
|
+
for (const [index, source] of ready)
|
|
933
|
+
sourcesByIndex[index] = source;
|
|
934
|
+
const sources = sourcesByIndex.filter(Boolean);
|
|
935
|
+
const activeIds = new Set(sources.map((src) => String(src.id || "")).filter(Boolean));
|
|
936
|
+
const previousIds = previous.map((src) => String(src.id || "")).filter(Boolean);
|
|
937
|
+
const keepPrevious = Number(config.refresh?.keep_previous_sources || 0);
|
|
938
|
+
const keepIds = keepPrevious > 0 ? new Set(previousIds.slice(-keepPrevious)) : new Set;
|
|
939
|
+
const retiredIds = previousIds.filter((sid) => !activeIds.has(sid) && !keepIds.has(sid));
|
|
940
|
+
const sourceSet = {
|
|
941
|
+
id: opts.setId,
|
|
942
|
+
prefix: String(config.notebooklm?.source_title_prefix || defaultShortSourceTitlePrefix()),
|
|
943
|
+
bundleSetSha256: sourceSetHash(bundles),
|
|
944
|
+
uploadedAt: iso(),
|
|
945
|
+
sources
|
|
946
|
+
};
|
|
947
|
+
if ((config.refresh?.mode || "replace") === "replace" && config.refresh?.delete_previous_after_success !== false)
|
|
948
|
+
sourceSet._retiredSourceIds = retiredIds;
|
|
949
|
+
return sourceSet;
|
|
950
|
+
}
|
|
951
|
+
function sourceSetHash(bundles) {
|
|
952
|
+
return createHash2("sha256").update(bundles.map((bundle) => `${bundle.group} ${bundle.chunk} ${sourceContentSha(bundle)} ${sourceFileListSha(bundle)}`).join(`
|
|
953
|
+
`)).digest("hex").replace(/^/, "sha256:");
|
|
954
|
+
}
|
|
955
|
+
function tempSourcePrefix(config) {
|
|
956
|
+
const prefix = String(config.notebooklm?.temporary_source_title_prefix || "").trim();
|
|
957
|
+
return prefix ? slugify(prefix) : `${String(config.notebooklm?.source_title_prefix || defaultShortSourceTitlePrefix()).trim()}tmp`;
|
|
958
|
+
}
|
|
959
|
+
function tempSourceTitle(config, opts) {
|
|
960
|
+
const digest = opts.contentSha.split(":", 2).at(-1) || opts.contentSha;
|
|
961
|
+
return `${tempSourcePrefix(config)}--${opts.setId}--${slugify(opts.kind)}--${slugify(opts.title)}--${digest.slice(0, 8)}.md`;
|
|
962
|
+
}
|
|
963
|
+
function stageTempSourceFile(repo, title, sourcePath) {
|
|
964
|
+
const staged = join2(repo, CONFIG_DIR2, "cache", title);
|
|
965
|
+
mkdirSync2(dirname2(staged), { recursive: true });
|
|
966
|
+
copyFileSync(sourcePath, staged);
|
|
967
|
+
return staged;
|
|
968
|
+
}
|
|
969
|
+
function tempSourceSets(state) {
|
|
970
|
+
return Array.isArray(state.temporarySourceSets) ? state.temporarySourceSets.filter((item) => item && typeof item === "object") : [];
|
|
971
|
+
}
|
|
972
|
+
function tempSourceExpiresAt(ttlSeconds) {
|
|
973
|
+
return ttlSeconds > 0 ? iso(new Date(Date.now() + ttlSeconds * 1000)) : null;
|
|
974
|
+
}
|
|
975
|
+
function sourceIsExpired(sourceSet) {
|
|
976
|
+
return sourceSet.expiresAt ? new Date(String(sourceSet.expiresAt)).getTime() <= Date.now() : false;
|
|
977
|
+
}
|
|
978
|
+
async function withRepoLock(repo, fn) {
|
|
979
|
+
return repoLock(repo, fn);
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
// src/chunking.ts
|
|
983
|
+
async function listGitFiles(repo) {
|
|
984
|
+
const result = await runCommand(["git", "ls-files", "-co", "--exclude-standard"], repo);
|
|
985
|
+
if (result.returncode === 0)
|
|
986
|
+
return result.stdout.split(`
|
|
987
|
+
`).map((line) => line.trim()).filter(Boolean).sort();
|
|
988
|
+
const files = [];
|
|
989
|
+
const walk = (dir) => {
|
|
990
|
+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
991
|
+
const full = join3(dir, entry.name);
|
|
992
|
+
const rel = posixPath(relative(repo, full));
|
|
993
|
+
if (rel === ".git" || rel.startsWith(".git/"))
|
|
994
|
+
continue;
|
|
995
|
+
if (entry.isDirectory())
|
|
996
|
+
walk(full);
|
|
997
|
+
else if (entry.isFile())
|
|
998
|
+
files.push(rel);
|
|
999
|
+
}
|
|
1000
|
+
};
|
|
1001
|
+
walk(repo);
|
|
1002
|
+
return files.sort();
|
|
1003
|
+
}
|
|
1004
|
+
async function collectBundleFiles(repo, config) {
|
|
1005
|
+
const includes = includeSpecs(config);
|
|
1006
|
+
const ignores = neverUploadSpecs(config);
|
|
1007
|
+
const files = [];
|
|
1008
|
+
for (const path of await listGitFiles(repo)) {
|
|
1009
|
+
if (!pathIsIncluded(path, includes) || pathIsIgnored(path, ignores))
|
|
1010
|
+
continue;
|
|
1011
|
+
const full = join3(repo, path);
|
|
1012
|
+
if (!existsSync3(full))
|
|
1013
|
+
continue;
|
|
1014
|
+
const stat = statSync2(full);
|
|
1015
|
+
if (!stat.isFile() || stat.isSymbolicLink())
|
|
1016
|
+
continue;
|
|
1017
|
+
files.push(path);
|
|
1018
|
+
}
|
|
1019
|
+
return [...new Set(files)].sort();
|
|
1020
|
+
}
|
|
1021
|
+
function chunkFileSize(repo, path) {
|
|
1022
|
+
return statSync2(join3(repo, path)).size + Buffer.byteLength(path, "utf8") + 64;
|
|
1023
|
+
}
|
|
1024
|
+
function fileBucket(path) {
|
|
1025
|
+
const parts = path.split("/");
|
|
1026
|
+
if (parts.length >= 3 && ["apps", "packages", "crates"].includes(parts[0]))
|
|
1027
|
+
return parts.slice(0, 3).join("/");
|
|
1028
|
+
if (parts.length >= 2)
|
|
1029
|
+
return parts.slice(0, 2).join("/");
|
|
1030
|
+
return parts[0];
|
|
1031
|
+
}
|
|
1032
|
+
function sourceTitleForChunk(config, opts) {
|
|
1033
|
+
const configured = String(config.notebooklm?.source_title_prefix || "").trim();
|
|
1034
|
+
const prefix = !configured || configured.startsWith("codebase-retrieve-") ? defaultShortSourceTitlePrefix() : configured;
|
|
1035
|
+
const template = String(config.bundle?.source_title_template || "{prefix}--{set}--{group}--{chunk}--{hash}.md");
|
|
1036
|
+
return template.replaceAll("{prefix}", slugify(prefix)).replaceAll("{set}", opts.setId).replaceAll("{set_id}", opts.setId).replaceAll("{group}", slugify(opts.group)).replaceAll("{chunk}", String(opts.index).padStart(3, "0")).replaceAll("{idx}", String(opts.index).padStart(3, "0")).replaceAll("{hash}", opts.chunkHash.slice(0, 8));
|
|
1037
|
+
}
|
|
1038
|
+
function chunkHashForFiles(repo, files) {
|
|
1039
|
+
const digest = createHash3("sha256");
|
|
1040
|
+
for (const file of files) {
|
|
1041
|
+
digest.update(file);
|
|
1042
|
+
digest.update("\x00");
|
|
1043
|
+
const full = join3(repo, file);
|
|
1044
|
+
if (existsSync3(full) && statSync2(full).isFile())
|
|
1045
|
+
digest.update(readFileSync3(full));
|
|
1046
|
+
digest.update("\x00");
|
|
1047
|
+
}
|
|
1048
|
+
return digest.digest("hex");
|
|
1049
|
+
}
|
|
1050
|
+
function assignFilesToGroups(files, config) {
|
|
1051
|
+
const bundle = config.bundle || {};
|
|
1052
|
+
const groups = "groups" in bundle ? bundle.groups || [] : defaultGroups();
|
|
1053
|
+
const assigned = [];
|
|
1054
|
+
const seen = new Set;
|
|
1055
|
+
for (const group of groups) {
|
|
1056
|
+
const gid = slugify(String(group.id || "group"));
|
|
1057
|
+
const specs = groupSpecs(group);
|
|
1058
|
+
for (const path of files) {
|
|
1059
|
+
if (seen.has(path))
|
|
1060
|
+
continue;
|
|
1061
|
+
if (specs.length && pathIsIncluded(path, specs)) {
|
|
1062
|
+
assigned.push([gid, path]);
|
|
1063
|
+
seen.add(path);
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
const defaultGroup = "default_group" in bundle ? bundle.default_group || {} : { enabled: true, id: "misc" };
|
|
1068
|
+
if (defaultGroup.enabled) {
|
|
1069
|
+
const gid = slugify(String(defaultGroup.id || "misc"));
|
|
1070
|
+
for (const path of files)
|
|
1071
|
+
if (!seen.has(path))
|
|
1072
|
+
assigned.push([gid, path]);
|
|
1073
|
+
} else if (!groups.length) {
|
|
1074
|
+
for (const path of files)
|
|
1075
|
+
assigned.push(["repo", path]);
|
|
1076
|
+
}
|
|
1077
|
+
return assigned;
|
|
1078
|
+
}
|
|
1079
|
+
function flushChunk(chunks, repo, config, setId, group, index, files, total) {
|
|
1080
|
+
if (!files.length)
|
|
1081
|
+
return;
|
|
1082
|
+
const digest = chunkHashForFiles(repo, files);
|
|
1083
|
+
chunks.push({
|
|
1084
|
+
group,
|
|
1085
|
+
chunk: String(index).padStart(3, "0"),
|
|
1086
|
+
index,
|
|
1087
|
+
files: [...files],
|
|
1088
|
+
estimatedBytes: total,
|
|
1089
|
+
sha256: `sha256:${digest}`,
|
|
1090
|
+
title: sourceTitleForChunk(config, { setId, group, index, chunkHash: digest })
|
|
1091
|
+
});
|
|
1092
|
+
}
|
|
1093
|
+
function activeChunkFileMembers(state, group) {
|
|
1094
|
+
if (!state)
|
|
1095
|
+
return [];
|
|
1096
|
+
return activeSources(state).filter((source) => String(source.group || "") === group && Array.isArray(source.files) && source.files.length).map((source) => [Number.parseInt(String(source.chunk || "0"), 10) || 0, source.files.map(String).filter(Boolean)]).sort((a, b) => a[0] - b[0]).map(([, files]) => files);
|
|
1097
|
+
}
|
|
1098
|
+
function appendGreedyChunks(chunks, repo, config, opts) {
|
|
1099
|
+
let current = [];
|
|
1100
|
+
let currentSize = 0;
|
|
1101
|
+
let index = opts.startIndex;
|
|
1102
|
+
for (const path of opts.files) {
|
|
1103
|
+
const size = chunkFileSize(repo, path);
|
|
1104
|
+
if (size > opts.maxBytes)
|
|
1105
|
+
die(`file exceeds max chunk size (${opts.maxBytes} bytes): ${path} (${size} bytes)`);
|
|
1106
|
+
if (current.length && currentSize + size > opts.target) {
|
|
1107
|
+
flushChunk(chunks, repo, config, opts.setId, opts.group, index, current, currentSize);
|
|
1108
|
+
current = [];
|
|
1109
|
+
currentSize = 0;
|
|
1110
|
+
index += 1;
|
|
1111
|
+
}
|
|
1112
|
+
current.push(path);
|
|
1113
|
+
currentSize += size;
|
|
1114
|
+
}
|
|
1115
|
+
if (current.length)
|
|
1116
|
+
flushChunk(chunks, repo, config, opts.setId, opts.group, index, current, currentSize);
|
|
1117
|
+
}
|
|
1118
|
+
function planGroupChunks(chunks, repo, config, opts) {
|
|
1119
|
+
const ordered = [...opts.files].sort((a, b) => `${fileBucket(a)}\x00${a}`.localeCompare(`${fileBucket(b)}\x00${b}`));
|
|
1120
|
+
const available = new Set(ordered);
|
|
1121
|
+
const kept = [];
|
|
1122
|
+
for (const previousFiles of activeChunkFileMembers(opts.state, opts.group)) {
|
|
1123
|
+
const retained = previousFiles.filter((path) => available.has(path));
|
|
1124
|
+
if (!retained.length)
|
|
1125
|
+
continue;
|
|
1126
|
+
const sizes = retained.map((path) => [path, chunkFileSize(repo, path)]);
|
|
1127
|
+
for (const [path, size] of sizes)
|
|
1128
|
+
if (size > opts.maxBytes)
|
|
1129
|
+
die(`file exceeds max chunk size (${opts.maxBytes} bytes): ${path} (${size} bytes)`);
|
|
1130
|
+
const total = sizes.reduce((sum, [, size]) => sum + size, 0);
|
|
1131
|
+
if (total <= opts.target || retained.length === 1) {
|
|
1132
|
+
kept.push(retained);
|
|
1133
|
+
for (const path of retained)
|
|
1134
|
+
available.delete(path);
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
let index = 1;
|
|
1138
|
+
for (const files of kept) {
|
|
1139
|
+
flushChunk(chunks, repo, config, opts.setId, opts.group, index, files, files.reduce((sum, path) => sum + chunkFileSize(repo, path), 0));
|
|
1140
|
+
index += 1;
|
|
1141
|
+
}
|
|
1142
|
+
appendGreedyChunks(chunks, repo, config, { ...opts, startIndex: index, files: ordered.filter((path) => available.has(path)) });
|
|
1143
|
+
}
|
|
1144
|
+
async function planBundleChunks(repo, config, opts) {
|
|
1145
|
+
const bundle = config.bundle || {};
|
|
1146
|
+
let target = parseSizeBytes(bundle.target_chunk_bytes, 524288);
|
|
1147
|
+
const maxBytes = parseSizeBytes(bundle.max_chunk_bytes, 900000);
|
|
1148
|
+
if (target > maxBytes)
|
|
1149
|
+
target = maxBytes;
|
|
1150
|
+
const assigned = assignFilesToGroups(await collectBundleFiles(repo, config), config);
|
|
1151
|
+
const byGroup = new Map;
|
|
1152
|
+
for (const [group, path] of assigned)
|
|
1153
|
+
byGroup.set(group, [...byGroup.get(group) || [], path]);
|
|
1154
|
+
const chunks = [];
|
|
1155
|
+
for (const group of [...byGroup.keys()].sort()) {
|
|
1156
|
+
planGroupChunks(chunks, repo, config, { setId: opts.setId, group, files: byGroup.get(group) || [], target, maxBytes, state: opts.state });
|
|
1157
|
+
}
|
|
1158
|
+
return chunks;
|
|
1159
|
+
}
|
|
1160
|
+
async function gitHead(repo) {
|
|
1161
|
+
const result = await runCommand(["git", "rev-parse", "HEAD"], repo);
|
|
1162
|
+
return result.returncode === 0 ? result.stdout.trim() : "no-git-head";
|
|
1163
|
+
}
|
|
1164
|
+
async function gitStatusRecords(repo) {
|
|
1165
|
+
const result = await runCommand(["git", "status", "--porcelain=v1", "-z", "--untracked-files=all"], repo);
|
|
1166
|
+
if (result.returncode !== 0)
|
|
1167
|
+
return [];
|
|
1168
|
+
const raw = result.stdout.split("\x00").filter(Boolean);
|
|
1169
|
+
const records = [];
|
|
1170
|
+
let skip = false;
|
|
1171
|
+
for (const item of raw) {
|
|
1172
|
+
if (skip) {
|
|
1173
|
+
skip = false;
|
|
1174
|
+
continue;
|
|
1175
|
+
}
|
|
1176
|
+
const status = item.slice(0, 2);
|
|
1177
|
+
const path = item.slice(3);
|
|
1178
|
+
if (status.startsWith("R") || status.startsWith("C"))
|
|
1179
|
+
skip = true;
|
|
1180
|
+
records.push([status, path]);
|
|
1181
|
+
}
|
|
1182
|
+
return records;
|
|
1183
|
+
}
|
|
1184
|
+
async function fastFingerprint(repo, config, configFile) {
|
|
1185
|
+
const includes = includeSpecs(config);
|
|
1186
|
+
const ignores = neverUploadSpecs(config);
|
|
1187
|
+
const parts = [`head=${await gitHead(repo)}`, `config=${sha256File(configFile)}`];
|
|
1188
|
+
const relevant = [];
|
|
1189
|
+
for (const [status, path] of await gitStatusRecords(repo)) {
|
|
1190
|
+
if (!pathIsIncluded(path, includes) || pathIsIgnored(path, ignores))
|
|
1191
|
+
continue;
|
|
1192
|
+
relevant.push(path);
|
|
1193
|
+
const full = join3(repo, path);
|
|
1194
|
+
let content = "missing";
|
|
1195
|
+
if (existsSync3(full) && statSync2(full).isFile())
|
|
1196
|
+
content = sha256File(full);
|
|
1197
|
+
else if (existsSync3(full))
|
|
1198
|
+
content = "dir";
|
|
1199
|
+
parts.push(`${status} ${path} ${content}`);
|
|
1200
|
+
}
|
|
1201
|
+
return [sha256Text(parts.join(`
|
|
1202
|
+
`)), relevant];
|
|
1203
|
+
}
|
|
1204
|
+
function expandBundlePath(repo, config) {
|
|
1205
|
+
const prefix = config.notebooklm?.source_title_prefix || `${repo}-repo`;
|
|
1206
|
+
const timestamp = iso().replaceAll("-", "").replaceAll(":", "").replace("Z", "Z");
|
|
1207
|
+
const template = config.bundle?.output || `${CONFIG_DIR}/cache/{prefix}-{timestamp}.txt`;
|
|
1208
|
+
return join3(repo, template.replaceAll("{prefix}", prefix).replaceAll("{timestamp}", timestamp));
|
|
1209
|
+
}
|
|
1210
|
+
function expandChunkPath(repo, config, title) {
|
|
1211
|
+
const template = config.bundle?.output || `${CONFIG_DIR}/cache/{title}`;
|
|
1212
|
+
if (template.includes("{title}")) {
|
|
1213
|
+
return join3(repo, template.replaceAll("{title}", title).replaceAll("{prefix}", config.notebooklm?.source_title_prefix || defaultShortSourceTitlePrefix()).replaceAll("{timestamp}", yymmddhhmm()));
|
|
1214
|
+
}
|
|
1215
|
+
return join3(dirname3(join3(repo, template)), title);
|
|
1216
|
+
}
|
|
1217
|
+
function repomixBaseArgv(config) {
|
|
1218
|
+
const argv = repomixCmd();
|
|
1219
|
+
const bundle = config.bundle || {};
|
|
1220
|
+
if (String(bundle.style || "").trim())
|
|
1221
|
+
argv.push("--style", String(bundle.style).trim());
|
|
1222
|
+
if (bundle.compress)
|
|
1223
|
+
argv.push("--compress");
|
|
1224
|
+
const ignore = neverUploadSpecs(config).join(",");
|
|
1225
|
+
if (ignore)
|
|
1226
|
+
argv.push("--ignore", ignore);
|
|
1227
|
+
return argv;
|
|
1228
|
+
}
|
|
1229
|
+
async function buildBundle(repo, config) {
|
|
1230
|
+
const out = expandBundlePath(repo, config);
|
|
1231
|
+
mkdirSync3(dirname3(out), { recursive: true });
|
|
1232
|
+
const result = await runCommand([...repomixBaseArgv(config), "--include", includeSpecs(config).join(","), "--output", out], repo, { timeout: 600 });
|
|
1233
|
+
if (result.returncode !== 0)
|
|
1234
|
+
die(`repomix failed:
|
|
1235
|
+
${result.stdout}
|
|
1236
|
+
${result.stderr}`);
|
|
1237
|
+
return out;
|
|
1238
|
+
}
|
|
1239
|
+
async function buildBundleSet(repo, config, opts) {
|
|
1240
|
+
const maxBytes = parseSizeBytes(config.bundle?.max_chunk_bytes, 900000);
|
|
1241
|
+
const chunks = await planBundleChunks(repo, config, opts);
|
|
1242
|
+
const bundles = [];
|
|
1243
|
+
try {
|
|
1244
|
+
for (const chunk of chunks) {
|
|
1245
|
+
const title = String(chunk.title);
|
|
1246
|
+
const out = expandChunkPath(repo, config, title);
|
|
1247
|
+
mkdirSync3(dirname3(out), { recursive: true });
|
|
1248
|
+
const inputText = `${chunk.files.map(String).join(`
|
|
1249
|
+
`)}
|
|
1250
|
+
`;
|
|
1251
|
+
const result = await runCommand([...repomixBaseArgv(config), "--stdin", "--output", out], repo, { inputText, timeout: 600 });
|
|
1252
|
+
if (result.returncode !== 0)
|
|
1253
|
+
die(`repomix failed for chunk ${title}:
|
|
1254
|
+
${result.stdout}
|
|
1255
|
+
${result.stderr}`);
|
|
1256
|
+
const actualBytes = statSync2(out).size;
|
|
1257
|
+
if (actualBytes > maxBytes)
|
|
1258
|
+
die(`rendered chunk exceeds max size (${maxBytes} bytes): ${title} (${actualBytes} bytes)`);
|
|
1259
|
+
bundles.push({ ...chunk, path: out, bundleSha256: sha256File(out), contentSha256: sha256File(out), fileListSha256: chunk.sha256, actualBytes, fileCount: chunk.files.length });
|
|
1260
|
+
}
|
|
1261
|
+
} catch (error) {
|
|
1262
|
+
for (const bundle of bundles)
|
|
1263
|
+
if (bundle.path)
|
|
1264
|
+
removeFileQuiet(String(bundle.path));
|
|
1265
|
+
throw error;
|
|
1266
|
+
}
|
|
1267
|
+
return bundles;
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
// src/ensure.ts
|
|
1271
|
+
init_common();
|
|
1272
|
+
import { existsSync as existsSync4 } from "node:fs";
|
|
1273
|
+
function stateUploadedFingerprint(state) {
|
|
1274
|
+
return state.lastUploadedFastFingerprint || null;
|
|
1275
|
+
}
|
|
1276
|
+
async function ensureIndex(repo, opts = {}) {
|
|
1277
|
+
const cfg = configPath(repo);
|
|
1278
|
+
if (!existsSync4(cfg)) {
|
|
1279
|
+
if (opts.jsonOutput || opts.returnUninitialized)
|
|
1280
|
+
return uninitializedStatus(repo, cfg);
|
|
1281
|
+
die(missingConfigMessage(repo, cfg, opts.command || "ensure"));
|
|
1282
|
+
}
|
|
1283
|
+
return repoLock(repo, () => ensureIndexLocked(repo, opts));
|
|
1284
|
+
}
|
|
1285
|
+
async function ensureIndexLocked(repo, opts = {}) {
|
|
1286
|
+
const [config, cfgPath] = loadConfig(repo, opts.command || "ensure");
|
|
1287
|
+
const [state, statePath] = loadState(cfgPath);
|
|
1288
|
+
await recoverPendingUpload(repo, config, state);
|
|
1289
|
+
await recoverPendingCleanup(repo, config, state, statePath);
|
|
1290
|
+
const [fastHash, relevantPaths] = await fastFingerprint(repo, config, cfgPath);
|
|
1291
|
+
const refresh = config.refresh || {};
|
|
1292
|
+
const checkedAge = secondsSince(state.lastCheckedAt);
|
|
1293
|
+
const uploadedAge = secondsSince(state.lastUploadedAt);
|
|
1294
|
+
const uploadedFingerprint = stateUploadedFingerprint(state);
|
|
1295
|
+
const result = {
|
|
1296
|
+
status: "unknown",
|
|
1297
|
+
config: cfgPath,
|
|
1298
|
+
state: statePath,
|
|
1299
|
+
relevant_changed_paths: relevantPaths,
|
|
1300
|
+
fast_fingerprint: fastHash
|
|
1301
|
+
};
|
|
1302
|
+
if (!opts.force && checkedAge !== null && checkedAge < Number(refresh.check_ttl_seconds ?? 300) && uploadedFingerprint === fastHash) {
|
|
1303
|
+
Object.assign(state, { lastCheckedAt: iso(), lastCheckedFastFingerprint: fastHash, lastBundlePath: null });
|
|
1304
|
+
writeJson(statePath, state);
|
|
1305
|
+
return { ...result, status: "fresh-ttl", checked_age_seconds: checkedAge };
|
|
1306
|
+
}
|
|
1307
|
+
if (!opts.force && uploadedFingerprint === fastHash && state.lastUploadedAt) {
|
|
1308
|
+
Object.assign(state, { lastCheckedAt: iso(), lastCheckedFastFingerprint: fastHash, lastBundlePath: null });
|
|
1309
|
+
writeJson(statePath, state);
|
|
1310
|
+
return { ...result, status: "fresh-fingerprint" };
|
|
1311
|
+
}
|
|
1312
|
+
const firstUpload = activeSources(state).length === 0;
|
|
1313
|
+
if (firstUpload && config.safety?.require_user_approval_first_upload !== false && !opts.yes && !opts.force) {
|
|
1314
|
+
return { ...result, status: "needs-first-upload-approval" };
|
|
1315
|
+
}
|
|
1316
|
+
const minInterval = Number(refresh.min_upload_interval_seconds ?? 900);
|
|
1317
|
+
const maxStaleness = Number(refresh.max_staleness_seconds ?? 86400);
|
|
1318
|
+
if (!opts.force && uploadedAge !== null && uploadedAge < minInterval && uploadedAge < maxStaleness) {
|
|
1319
|
+
Object.assign(state, { lastCheckedAt: iso(), lastCheckedFastFingerprint: fastHash, lastBundlePath: null });
|
|
1320
|
+
writeJson(statePath, state);
|
|
1321
|
+
return { ...result, status: "stale-throttled", uploaded_age_seconds: uploadedAge };
|
|
1322
|
+
}
|
|
1323
|
+
if (!opts.force && refresh.auto === false) {
|
|
1324
|
+
Object.assign(state, { lastCheckedAt: iso(), lastCheckedFastFingerprint: fastHash, lastBundlePath: null });
|
|
1325
|
+
writeJson(statePath, state);
|
|
1326
|
+
return { ...result, status: "auto-refresh-disabled" };
|
|
1327
|
+
}
|
|
1328
|
+
if (bundleMode(config) === "chunked") {
|
|
1329
|
+
const setId = yymmddhhmm();
|
|
1330
|
+
const bundles = await buildBundleSet(repo, config, { setId, state });
|
|
1331
|
+
try {
|
|
1332
|
+
const bundleSetSha = sourceSetHash(bundles);
|
|
1333
|
+
if (!opts.force && state.lastBundleSetSha256 === bundleSetSha) {
|
|
1334
|
+
Object.assign(state, { lastCheckedAt: iso(), lastCheckedFastFingerprint: fastHash, lastBundlePath: null });
|
|
1335
|
+
writeJson(statePath, state);
|
|
1336
|
+
return { ...result, status: "fresh-bundle-hash", bundleSetSha256: bundleSetSha, bundleDeleted: true };
|
|
1337
|
+
}
|
|
1338
|
+
const sourceSet = await uploadBundleSet(repo, config, state, bundles, { setId });
|
|
1339
|
+
const retiredIds = (sourceSet._retiredSourceIds || []).map(String).filter(Boolean);
|
|
1340
|
+
delete sourceSet._retiredSourceIds;
|
|
1341
|
+
Object.assign(state, {
|
|
1342
|
+
lastCheckedAt: iso(),
|
|
1343
|
+
lastUploadedAt: iso(),
|
|
1344
|
+
lastConfigSha256: sha256File(cfgPath),
|
|
1345
|
+
lastCheckedFastFingerprint: fastHash,
|
|
1346
|
+
lastUploadedFastFingerprint: fastHash,
|
|
1347
|
+
lastFastFingerprint: fastHash,
|
|
1348
|
+
lastBundleSetSha256: bundleSetSha,
|
|
1349
|
+
lastBundleSha256: bundleSetSha,
|
|
1350
|
+
lastBundlePath: null,
|
|
1351
|
+
activeSourceSet: sourceSet,
|
|
1352
|
+
sources: (sourceSet.sources || []).filter((src) => src && typeof src === "object")
|
|
1353
|
+
});
|
|
1354
|
+
const cleanupPendingSourceIds2 = queueCleanupSourceIds(state, retiredIds);
|
|
1355
|
+
writeJson(statePath, state);
|
|
1356
|
+
clearPendingUpload(repo);
|
|
1357
|
+
return { ...result, status: "uploaded", bundleSetSha256: bundleSetSha, bundleDeleted: true, sourceSet, cleanupPendingSourceIds: cleanupPendingSourceIds2 };
|
|
1358
|
+
} finally {
|
|
1359
|
+
for (const bundle2 of bundles)
|
|
1360
|
+
if (bundle2.path)
|
|
1361
|
+
removeFileQuiet(String(bundle2.path));
|
|
1362
|
+
}
|
|
1363
|
+
}
|
|
1364
|
+
const bundle = await buildBundle(repo, config);
|
|
1365
|
+
try {
|
|
1366
|
+
const bundleSha = sha256File(bundle);
|
|
1367
|
+
if (!opts.force && state.lastBundleSha256 === bundleSha) {
|
|
1368
|
+
Object.assign(state, { lastCheckedAt: iso(), lastCheckedFastFingerprint: fastHash, lastBundlePath: null });
|
|
1369
|
+
writeJson(statePath, state);
|
|
1370
|
+
return { ...result, status: "fresh-bundle-hash", bundleSha256: bundleSha, bundleDeleted: true };
|
|
1371
|
+
}
|
|
1372
|
+
const source = await uploadTextSourceFromFile(repo, config, bundle, bundle.split("/").at(-1) || "bundle.txt");
|
|
1373
|
+
source.bundleSha256 = bundleSha;
|
|
1374
|
+
source.uploadedAt = iso();
|
|
1375
|
+
if (config.notebooklm?.wait_after_upload && source.id) {
|
|
1376
|
+
if (!await waitSourceReady(repo, notebookId(config), String(source.id)))
|
|
1377
|
+
console.error(`warning: source wait failed for ${source.id}`);
|
|
1378
|
+
}
|
|
1379
|
+
state.sources = [...state.sources || [], source];
|
|
1380
|
+
Object.assign(state, {
|
|
1381
|
+
lastCheckedAt: iso(),
|
|
1382
|
+
lastUploadedAt: iso(),
|
|
1383
|
+
lastConfigSha256: sha256File(cfgPath),
|
|
1384
|
+
lastCheckedFastFingerprint: fastHash,
|
|
1385
|
+
lastUploadedFastFingerprint: fastHash,
|
|
1386
|
+
lastFastFingerprint: fastHash,
|
|
1387
|
+
lastBundleSha256: bundleSha,
|
|
1388
|
+
lastBundlePath: null
|
|
1389
|
+
});
|
|
1390
|
+
writeJson(statePath, state);
|
|
1391
|
+
return { ...result, status: "uploaded", bundleSha256: bundleSha, bundleDeleted: true, source };
|
|
1392
|
+
} finally {
|
|
1393
|
+
removeFileQuiet(bundle);
|
|
1394
|
+
}
|
|
1395
|
+
}
|
|
1396
|
+
|
|
1397
|
+
// src/retrieval.ts
|
|
1398
|
+
init_common();
|
|
1399
|
+
import { existsSync as existsSync5, readFileSync as readFileSync4, statSync as statSync3 } from "node:fs";
|
|
1400
|
+
import { join as join4 } from "node:path";
|
|
1401
|
+
var PATH_RE = /(?:(?:[\w.-]+\/)+[\w.@+-]+\.(?:rs|ts|tsx|js|jsx|py|go|java|kt|md|toml|yaml|yml|json|sh|sql|css|scss|html))/g;
|
|
1402
|
+
var TERM_RE = /[A-Za-z_][A-Za-z0-9_]{3,}|[A-Za-z0-9][A-Za-z0-9_-]{4,}/g;
|
|
1403
|
+
var STOP_TERMS = new Set([
|
|
1404
|
+
"agent",
|
|
1405
|
+
"authority",
|
|
1406
|
+
"btreemap",
|
|
1407
|
+
"bundle",
|
|
1408
|
+
"codex",
|
|
1409
|
+
"command",
|
|
1410
|
+
"docs",
|
|
1411
|
+
"fixture",
|
|
1412
|
+
"gate",
|
|
1413
|
+
"justfile",
|
|
1414
|
+
"keywords",
|
|
1415
|
+
"local",
|
|
1416
|
+
"names",
|
|
1417
|
+
"paths",
|
|
1418
|
+
"postgres",
|
|
1419
|
+
"postgresql",
|
|
1420
|
+
"real",
|
|
1421
|
+
"refs",
|
|
1422
|
+
"repo",
|
|
1423
|
+
"shell",
|
|
1424
|
+
"test",
|
|
1425
|
+
"trigger",
|
|
1426
|
+
"where",
|
|
1427
|
+
"which",
|
|
1428
|
+
"what",
|
|
1429
|
+
"when",
|
|
1430
|
+
"implemented",
|
|
1431
|
+
"implementation",
|
|
1432
|
+
"function",
|
|
1433
|
+
"tests",
|
|
1434
|
+
"files",
|
|
1435
|
+
"return",
|
|
1436
|
+
"likely",
|
|
1437
|
+
"line",
|
|
1438
|
+
"numbers",
|
|
1439
|
+
"source",
|
|
1440
|
+
"notebooklm"
|
|
1441
|
+
]);
|
|
1442
|
+
async function askProvider(repo, question) {
|
|
1443
|
+
const [config, cfgPath] = loadConfig(repo, "ask");
|
|
1444
|
+
const [state] = loadState(cfgPath);
|
|
1445
|
+
const argv = ["notebooklm", "ask", question, "-n", notebookId(config)];
|
|
1446
|
+
const { notebooklmCmd: notebooklmCmd2 } = await Promise.resolve().then(() => (init_common(), exports_common));
|
|
1447
|
+
argv.splice(0, 1, ...notebooklmCmd2());
|
|
1448
|
+
for (const sourceId of activeReadySourceIds(state))
|
|
1449
|
+
argv.push("-s", sourceId);
|
|
1450
|
+
argv.push("--json");
|
|
1451
|
+
const result = await runCommand(argv, repo, { timeout: 180 });
|
|
1452
|
+
if (result.returncode !== 0)
|
|
1453
|
+
return { error: true, stdout: result.stdout, stderr: result.stderr };
|
|
1454
|
+
try {
|
|
1455
|
+
return JSON.parse(result.stdout);
|
|
1456
|
+
} catch {
|
|
1457
|
+
return { answer: result.stdout };
|
|
1458
|
+
}
|
|
1459
|
+
}
|
|
1460
|
+
function answerText(data) {
|
|
1461
|
+
return typeof data.answer === "string" ? data.answer : JSON.stringify(data);
|
|
1462
|
+
}
|
|
1463
|
+
function activeSourcesById(repo) {
|
|
1464
|
+
const [, cfg] = loadConfig(repo, "ask");
|
|
1465
|
+
const [state] = loadState(cfg);
|
|
1466
|
+
const byId = new Map;
|
|
1467
|
+
for (const source of activeSources(state))
|
|
1468
|
+
if (source.id)
|
|
1469
|
+
byId.set(String(source.id), source);
|
|
1470
|
+
return byId;
|
|
1471
|
+
}
|
|
1472
|
+
function referencePathCandidates(repo, source, text) {
|
|
1473
|
+
const files = Array.isArray(source.files) ? source.files.map(String).filter(Boolean) : [];
|
|
1474
|
+
const fileSet = new Set(files);
|
|
1475
|
+
const matches = [];
|
|
1476
|
+
for (const raw of text.matchAll(PATH_RE)) {
|
|
1477
|
+
const path = raw[0].replace(/[`'".,;:()[\]{}<>]+$/g, "");
|
|
1478
|
+
if (fileSet.has(path) && existsSync5(join4(repo, path)))
|
|
1479
|
+
matches.push([path, null]);
|
|
1480
|
+
}
|
|
1481
|
+
if (matches.length)
|
|
1482
|
+
return [...new Map(matches.map((item) => [item.join(":"), item])).values()].slice(0, 5);
|
|
1483
|
+
const snippet = text.split(/\s+/).join(" ");
|
|
1484
|
+
if (snippet.length < 4 || snippet.length > 240 || text.includes("<directory_structure>"))
|
|
1485
|
+
return [];
|
|
1486
|
+
for (const path of files) {
|
|
1487
|
+
const full = join4(repo, path);
|
|
1488
|
+
if (!existsSync5(full) || statSync3(full).size > 2000000)
|
|
1489
|
+
continue;
|
|
1490
|
+
const content = readFileSync4(full, "utf8");
|
|
1491
|
+
const index = content.indexOf(text);
|
|
1492
|
+
if (index >= 0)
|
|
1493
|
+
matches.push([path, content.slice(0, index).split(`
|
|
1494
|
+
`).length]);
|
|
1495
|
+
else if (content.split(/\s+/).join(" ").includes(snippet))
|
|
1496
|
+
matches.push([path, null]);
|
|
1497
|
+
if (matches.length >= 5)
|
|
1498
|
+
break;
|
|
1499
|
+
}
|
|
1500
|
+
return matches;
|
|
1501
|
+
}
|
|
1502
|
+
function formatReferencePaths(paths) {
|
|
1503
|
+
const rendered = paths.slice(0, 3).map(([path, line]) => line ? `${path}:${line}` : path);
|
|
1504
|
+
return `${rendered.join(", ")}${paths.length <= 3 ? "" : `, ...(+${paths.length - 3})`}`;
|
|
1505
|
+
}
|
|
1506
|
+
function printCompactReferences(repo, answer) {
|
|
1507
|
+
if (!Array.isArray(answer.references) || !answer.references.length)
|
|
1508
|
+
return;
|
|
1509
|
+
const sources = activeSourcesById(repo);
|
|
1510
|
+
const rows = [];
|
|
1511
|
+
const seen = new Set;
|
|
1512
|
+
for (const ref of answer.references) {
|
|
1513
|
+
if (!ref || typeof ref !== "object")
|
|
1514
|
+
continue;
|
|
1515
|
+
const num = String(ref.citation_number || "").trim();
|
|
1516
|
+
if (!num || seen.has(num))
|
|
1517
|
+
continue;
|
|
1518
|
+
seen.add(num);
|
|
1519
|
+
const source = sources.get(String(ref.source_id || ""));
|
|
1520
|
+
const paths = source ? referencePathCandidates(repo, source, String(ref.cited_text || "")) : [];
|
|
1521
|
+
if (paths.length)
|
|
1522
|
+
rows.push(`[${num}] ${formatReferencePaths(paths)}`);
|
|
1523
|
+
}
|
|
1524
|
+
if (rows.length)
|
|
1525
|
+
console.log(`
|
|
1526
|
+
references:
|
|
1527
|
+
${rows.join(`
|
|
1528
|
+
`)}`);
|
|
1529
|
+
}
|
|
1530
|
+
function extractCandidates(text, query) {
|
|
1531
|
+
const paths = [...new Set([...text.matchAll(PATH_RE)].map((match) => match[0]))].sort();
|
|
1532
|
+
const terms = new Set;
|
|
1533
|
+
for (const raw of `${text}
|
|
1534
|
+
${query}`.matchAll(TERM_RE)) {
|
|
1535
|
+
const term = raw[0].replace(/^[`'"]|[`'"]$/g, "");
|
|
1536
|
+
if (term.length < 4 || STOP_TERMS.has(term.toLowerCase()) || term.includes("/") || term.includes("."))
|
|
1537
|
+
continue;
|
|
1538
|
+
terms.add(term);
|
|
1539
|
+
}
|
|
1540
|
+
return [paths, [...terms].sort().slice(0, 24)];
|
|
1541
|
+
}
|
|
1542
|
+
function highSignalTerms(terms) {
|
|
1543
|
+
const selected = terms.filter((term) => !STOP_TERMS.has(term.toLowerCase()) && (term.includes("_") || term.includes("-") || /[A-Z]/.test(term.slice(1)) || term.length >= 14));
|
|
1544
|
+
return selected.length ? selected : terms.filter((term) => !STOP_TERMS.has(term.toLowerCase())).slice(0, 8);
|
|
1545
|
+
}
|
|
1546
|
+
function rgRoots(repo, config, candidates) {
|
|
1547
|
+
const candidateRoots = candidates.filter((path) => existsSync5(join4(repo, path)));
|
|
1548
|
+
const roots = includeSpecs(config).filter((spec) => existsSync5(join4(repo, spec)));
|
|
1549
|
+
return [...candidateRoots.length ? [candidateRoots] : [], roots.length ? roots : ["."]];
|
|
1550
|
+
}
|
|
1551
|
+
function parseRgMatches(stdout, seen, remaining) {
|
|
1552
|
+
const matches = [];
|
|
1553
|
+
for (const line of stdout.split(`
|
|
1554
|
+
`)) {
|
|
1555
|
+
if (matches.length >= remaining)
|
|
1556
|
+
break;
|
|
1557
|
+
const parts = line.split(":", 3);
|
|
1558
|
+
if (parts.length !== 3)
|
|
1559
|
+
continue;
|
|
1560
|
+
const [path, lineNo, text] = parts;
|
|
1561
|
+
const key = `${path}\x00${lineNo}\x00${text.trim()}`;
|
|
1562
|
+
if (seen.has(key))
|
|
1563
|
+
continue;
|
|
1564
|
+
seen.add(key);
|
|
1565
|
+
matches.push({ path, line: /^\d+$/.test(lineNo) ? Number(lineNo) : lineNo, text: text.trim() });
|
|
1566
|
+
}
|
|
1567
|
+
return matches;
|
|
1568
|
+
}
|
|
1569
|
+
async function localRg(repo, config, terms, candidatePaths = []) {
|
|
1570
|
+
if (!terms.length || !which("rg"))
|
|
1571
|
+
return [];
|
|
1572
|
+
const signal = highSignalTerms(terms);
|
|
1573
|
+
const pattern = signal.slice(0, 16).map((term) => term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|");
|
|
1574
|
+
const maxMatches = Number(config.retrieval?.max_local_matches || 80);
|
|
1575
|
+
const matches = [];
|
|
1576
|
+
const seen = new Set;
|
|
1577
|
+
for (const roots of rgRoots(repo, config, candidatePaths)) {
|
|
1578
|
+
const remaining = maxMatches - matches.length;
|
|
1579
|
+
if (remaining <= 0)
|
|
1580
|
+
break;
|
|
1581
|
+
const result = await runCommand(["rg", "-n", "-S", "-e", pattern, "--", ...roots], repo, { timeout: 120 });
|
|
1582
|
+
if (![0, 1].includes(result.returncode))
|
|
1583
|
+
return [{ error: result.stderr.trim() }];
|
|
1584
|
+
matches.push(...parseRgMatches(result.stdout, seen, remaining));
|
|
1585
|
+
}
|
|
1586
|
+
return matches;
|
|
1587
|
+
}
|
|
1588
|
+
function freshnessWarning(freshness) {
|
|
1589
|
+
const status = String(freshness.status || "");
|
|
1590
|
+
if (status === "stale-throttled") {
|
|
1591
|
+
const changed = Array.isArray(freshness.relevant_changed_paths) ? freshness.relevant_changed_paths : [];
|
|
1592
|
+
const preview = changed.length ? `; changed=${changed.slice(0, 5).join(", ")}${changed.length <= 5 ? "" : `, ...(+${changed.length - 5})`}` : "";
|
|
1593
|
+
const age = freshness.uploaded_age_seconds !== undefined ? `; uploaded_age_seconds=${freshness.uploaded_age_seconds}` : "";
|
|
1594
|
+
return `warning: index is stale-throttled${age}${preview}; provider answer may lag local changes. Use --force-refresh or refresh --force if needed.`;
|
|
1595
|
+
}
|
|
1596
|
+
if (status === "needs-first-upload-approval")
|
|
1597
|
+
return "warning: first broad upload requires approval; rerun with --yes or run refresh explicitly.";
|
|
1598
|
+
if (status === "auto-refresh-disabled")
|
|
1599
|
+
return "warning: auto refresh is disabled; provider answer may lag local changes.";
|
|
1600
|
+
return null;
|
|
1601
|
+
}
|
|
1602
|
+
function providerBlockMessage(freshness) {
|
|
1603
|
+
if (freshness.status === "not-initialized")
|
|
1604
|
+
return "skipped; project is not initialized for project retrieval.";
|
|
1605
|
+
if (freshness.status === "needs-first-upload-approval")
|
|
1606
|
+
return "skipped; first broad upload requires approval. Rerun ask/locate with --yes or run refresh explicitly.";
|
|
1607
|
+
return null;
|
|
1608
|
+
}
|
|
1609
|
+
function firstUploadNext(repo, command, query) {
|
|
1610
|
+
return {
|
|
1611
|
+
[`${command}WithFirstUploadApproval`]: commandLine(repo, command, "--yes", query),
|
|
1612
|
+
refresh: commandLine(repo, "refresh", "--force")
|
|
1613
|
+
};
|
|
1614
|
+
}
|
|
1615
|
+
function providerBlockPayload(freshness, nextSteps) {
|
|
1616
|
+
const next = freshness.next || nextSteps;
|
|
1617
|
+
return { error: true, message: providerBlockMessage(freshness) || "skipped", ...next ? { next } : {} };
|
|
1618
|
+
}
|
|
1619
|
+
async function locate(repo, query, opts) {
|
|
1620
|
+
const freshness = await ensureIndex(repo, { force: opts.forceRefresh, yes: opts.yes, jsonOutput: opts.json, command: "locate", returnUninitialized: true });
|
|
1621
|
+
const blocked = providerBlockMessage(freshness);
|
|
1622
|
+
if (blocked) {
|
|
1623
|
+
const next = freshness.next || (freshness.status === "needs-first-upload-approval" ? firstUploadNext(repo, "locate", query) : undefined);
|
|
1624
|
+
return {
|
|
1625
|
+
freshness,
|
|
1626
|
+
notebooklm_candidates: { paths: [], existing_paths: [], terms: [] },
|
|
1627
|
+
local_line_refs: [],
|
|
1628
|
+
provider_misses_or_stale_paths: [],
|
|
1629
|
+
provider_answer: `(${blocked})`,
|
|
1630
|
+
claim_boundary: "Semantic provider was not called because retrieval preflight is blocked.",
|
|
1631
|
+
...next ? { next } : {}
|
|
1632
|
+
};
|
|
1633
|
+
}
|
|
1634
|
+
const prompt = `Find the code location for this repository question. Return likely repo paths, function names, test names, command names, and keywords for rg. If exact line numbers are unavailable, say so. Question: ${query}`;
|
|
1635
|
+
const provider = await askProvider(repo, prompt);
|
|
1636
|
+
const [paths, terms] = extractCandidates(answerText(provider), query);
|
|
1637
|
+
const [config] = loadConfig(repo, "locate");
|
|
1638
|
+
const existing = paths.filter((path) => existsSync5(join4(repo, path)));
|
|
1639
|
+
return {
|
|
1640
|
+
freshness,
|
|
1641
|
+
notebooklm_candidates: { paths, existing_paths: existing, terms },
|
|
1642
|
+
local_line_refs: await localRg(repo, config, terms, existing),
|
|
1643
|
+
provider_misses_or_stale_paths: paths.filter((path) => !existsSync5(join4(repo, path))),
|
|
1644
|
+
provider_answer: opts.includeProviderAnswer ? provider : "(hidden; pass --include-provider-answer)",
|
|
1645
|
+
claim_boundary: "Line refs come from local rg results, not NotebookLM."
|
|
1646
|
+
};
|
|
1647
|
+
}
|
|
1648
|
+
|
|
1649
|
+
// src/commands.ts
|
|
1650
|
+
async function cmdInit(opts) {
|
|
1651
|
+
const repo = resolve2(String(opts.repo || "."));
|
|
1652
|
+
const cfgDir = join5(repo, CONFIG_DIR2);
|
|
1653
|
+
const cfg = join5(cfgDir, CONFIG_JSON);
|
|
1654
|
+
if (existsSync6(cfg) && !opts.force)
|
|
1655
|
+
die(`config already exists: ${cfg}`);
|
|
1656
|
+
const projectName = opts.projectName || basename2(repo);
|
|
1657
|
+
const titlePrefix = opts.notebookTitlePrefix || "memdex";
|
|
1658
|
+
const title = opts.notebookTitle || defaultNotebookTitle(projectName, titlePrefix);
|
|
1659
|
+
let notebookIdValue = opts.notebookId || "";
|
|
1660
|
+
let resolvedNotebook = null;
|
|
1661
|
+
if (!notebookIdValue && (opts.reuseExistingNotebook || opts.createNotebook)) {
|
|
1662
|
+
resolvedNotebook = await findNotebookByTitle(repo, title);
|
|
1663
|
+
if (!resolvedNotebook && opts.createNotebook)
|
|
1664
|
+
resolvedNotebook = await createNotebook(repo, title);
|
|
1665
|
+
if (!resolvedNotebook)
|
|
1666
|
+
die(`no NotebookLM notebook found with title ${JSON.stringify(title)}; pass --create-notebook or --notebook-id`);
|
|
1667
|
+
notebookIdValue = String(resolvedNotebook.id || "");
|
|
1668
|
+
}
|
|
1669
|
+
const config = defaultConfig(repo, notebookIdValue, { projectName, notebookTitlePrefix: titlePrefix, notebookTitle: title });
|
|
1670
|
+
if (opts.include)
|
|
1671
|
+
config.bundle.include = String(opts.include).split(",").map((part) => part.trim()).filter(Boolean);
|
|
1672
|
+
if (opts.sourceTitlePrefix)
|
|
1673
|
+
config.notebooklm.source_title_prefix = opts.sourceTitlePrefix;
|
|
1674
|
+
writeJson(cfg, config);
|
|
1675
|
+
writeFileSync2(join5(cfgDir, ".gitignore"), `state.local.json
|
|
1676
|
+
state.local.*.json
|
|
1677
|
+
pending-upload.local.json
|
|
1678
|
+
cache/
|
|
1679
|
+
*.lock
|
|
1680
|
+
`);
|
|
1681
|
+
console.log(`created: ${cfg}`);
|
|
1682
|
+
console.log(`created: ${join5(cfgDir, ".gitignore")}`);
|
|
1683
|
+
console.log(`notebook_title: ${title}`);
|
|
1684
|
+
if (resolvedNotebook)
|
|
1685
|
+
console.log(`notebook_id: ${notebookIdValue}`);
|
|
1686
|
+
console.log("next:");
|
|
1687
|
+
if (notebookIdValue) {
|
|
1688
|
+
console.log(` ${commandLine(repo, "ensure", "--yes")}`);
|
|
1689
|
+
console.log(` ${commandLine(repo, "ask", "your question")}`);
|
|
1690
|
+
} else {
|
|
1691
|
+
console.log(" set notebooklm.notebook_id in the config, or rerun init with --create-notebook / --reuse-existing-notebook / --notebook-id");
|
|
1692
|
+
}
|
|
1693
|
+
}
|
|
1694
|
+
async function cmdStatus(opts) {
|
|
1695
|
+
const repo = resolve2(String(opts.repo || "."));
|
|
1696
|
+
const cfgCandidate = configPath(repo);
|
|
1697
|
+
if (!existsSync6(cfgCandidate)) {
|
|
1698
|
+
output(uninitializedStatus(repo, cfgCandidate), opts.json);
|
|
1699
|
+
return;
|
|
1700
|
+
}
|
|
1701
|
+
const [config, cfgPath] = loadConfig(repo, "status");
|
|
1702
|
+
const [state, statePath] = loadState(cfgPath);
|
|
1703
|
+
const [fastHash, changed] = await fastFingerprint(repo, config, cfgPath);
|
|
1704
|
+
output({
|
|
1705
|
+
initialized: true,
|
|
1706
|
+
config: cfgPath,
|
|
1707
|
+
state: statePath,
|
|
1708
|
+
provider: config.provider,
|
|
1709
|
+
projectName: config.project?.name,
|
|
1710
|
+
notebook_id: config.notebooklm?.notebook_id,
|
|
1711
|
+
notebookTitle: notebookTitle(config),
|
|
1712
|
+
sourceTitlePrefix: config.notebooklm?.source_title_prefix,
|
|
1713
|
+
lastCheckedAt: state.lastCheckedAt,
|
|
1714
|
+
lastUploadedAt: state.lastUploadedAt,
|
|
1715
|
+
lastBundleSha256: state.lastBundleSha256,
|
|
1716
|
+
fastFingerprint: fastHash,
|
|
1717
|
+
stateCheckedFastFingerprint: state.lastCheckedFastFingerprint,
|
|
1718
|
+
stateUploadedFastFingerprint: stateUploadedFingerprint(state),
|
|
1719
|
+
stateFastFingerprint: state.lastFastFingerprint,
|
|
1720
|
+
relevantChangedPaths: changed,
|
|
1721
|
+
sources: state.sources || []
|
|
1722
|
+
}, opts.json);
|
|
1723
|
+
}
|
|
1724
|
+
async function cmdPack(opts) {
|
|
1725
|
+
const repo = resolve2(String(opts.repo || "."));
|
|
1726
|
+
const [config, cfgPath] = loadConfig(repo, "pack");
|
|
1727
|
+
const [state] = loadState(cfgPath);
|
|
1728
|
+
const setId = opts.setId || yymmddhhmm();
|
|
1729
|
+
const chunks = await planBundleChunks(repo, config, { setId, state });
|
|
1730
|
+
if (opts.dryRun) {
|
|
1731
|
+
output({
|
|
1732
|
+
setId,
|
|
1733
|
+
mode: "chunked",
|
|
1734
|
+
chunkCount: chunks.length,
|
|
1735
|
+
chunks: chunks.map((chunk) => ({
|
|
1736
|
+
group: chunk.group,
|
|
1737
|
+
chunk: chunk.chunk,
|
|
1738
|
+
title: chunk.title,
|
|
1739
|
+
estimatedBytes: chunk.estimatedBytes,
|
|
1740
|
+
fileCount: (chunk.files || []).length,
|
|
1741
|
+
...opts.includeFiles ? { files: chunk.files || [] } : {}
|
|
1742
|
+
}))
|
|
1743
|
+
}, opts.json);
|
|
1744
|
+
return;
|
|
1745
|
+
}
|
|
1746
|
+
const bundles = await buildBundleSet(repo, config, { setId, state });
|
|
1747
|
+
output({
|
|
1748
|
+
setId,
|
|
1749
|
+
bundleCount: bundles.length,
|
|
1750
|
+
bundles: bundles.map((bundle) => ({
|
|
1751
|
+
group: bundle.group,
|
|
1752
|
+
chunk: bundle.chunk,
|
|
1753
|
+
title: bundle.title,
|
|
1754
|
+
path: bundle.path,
|
|
1755
|
+
fileCount: bundle.fileCount,
|
|
1756
|
+
bundleSha256: bundle.bundleSha256,
|
|
1757
|
+
contentSha256: bundle.contentSha256
|
|
1758
|
+
}))
|
|
1759
|
+
}, opts.json);
|
|
1760
|
+
}
|
|
1761
|
+
async function cmdEnsure(opts) {
|
|
1762
|
+
output(await ensureIndex(resolve2(String(opts.repo || ".")), { force: opts.force, yes: opts.yes, jsonOutput: opts.json, command: "ensure" }), opts.json);
|
|
1763
|
+
}
|
|
1764
|
+
async function cmdRefresh(opts) {
|
|
1765
|
+
output(await ensureIndex(resolve2(String(opts.repo || ".")), { force: true, yes: true, jsonOutput: opts.json, command: "refresh" }), opts.json);
|
|
1766
|
+
}
|
|
1767
|
+
function printAskResult(repo, freshness, answer, opts) {
|
|
1768
|
+
if (opts.json) {
|
|
1769
|
+
output({ freshness, provider_answer: answer }, true);
|
|
1770
|
+
return;
|
|
1771
|
+
}
|
|
1772
|
+
const warning = freshnessWarning(freshness);
|
|
1773
|
+
if (warning)
|
|
1774
|
+
console.log(warning);
|
|
1775
|
+
if (opts.verbose) {
|
|
1776
|
+
console.log(`freshness: ${JSON.stringify(freshness)}`);
|
|
1777
|
+
const metadata = {};
|
|
1778
|
+
for (const key of ["conversation_id", "turn_number", "is_follow_up"])
|
|
1779
|
+
if (key in answer)
|
|
1780
|
+
metadata[key] = answer[key];
|
|
1781
|
+
if (Array.isArray(answer.references))
|
|
1782
|
+
metadata.references_count = answer.references.length;
|
|
1783
|
+
if (Object.keys(metadata).length)
|
|
1784
|
+
console.log(`provider: ${JSON.stringify(metadata)}`);
|
|
1785
|
+
}
|
|
1786
|
+
console.log(answerText(answer));
|
|
1787
|
+
printCompactReferences(repo, answer);
|
|
1788
|
+
}
|
|
1789
|
+
async function cmdAsk(question, opts) {
|
|
1790
|
+
const repo = resolve2(String(opts.repo || "."));
|
|
1791
|
+
const freshness = await ensureIndex(repo, { force: opts.forceRefresh, yes: opts.yes, jsonOutput: opts.json, command: "ask", returnUninitialized: true });
|
|
1792
|
+
const blocked = providerBlockMessage(freshness);
|
|
1793
|
+
if (blocked) {
|
|
1794
|
+
const next = freshness.status === "needs-first-upload-approval" ? firstUploadNext(repo, "ask", question) : undefined;
|
|
1795
|
+
printAskResult(repo, freshness, providerBlockPayload(freshness, next), opts);
|
|
1796
|
+
return;
|
|
1797
|
+
}
|
|
1798
|
+
printAskResult(repo, freshness, await askProvider(repo, question), opts);
|
|
1799
|
+
}
|
|
1800
|
+
function printLocateResult(result, opts) {
|
|
1801
|
+
if (opts.json) {
|
|
1802
|
+
output(result, true);
|
|
1803
|
+
return;
|
|
1804
|
+
}
|
|
1805
|
+
const warning = freshnessWarning(result.freshness || {});
|
|
1806
|
+
if (warning)
|
|
1807
|
+
console.log(warning);
|
|
1808
|
+
if (opts.verbose)
|
|
1809
|
+
console.log(`freshness: ${JSON.stringify(result.freshness || {})}`);
|
|
1810
|
+
const visible = { ...result };
|
|
1811
|
+
delete visible.freshness;
|
|
1812
|
+
output(visible, false);
|
|
1813
|
+
}
|
|
1814
|
+
async function cmdLocate(query, opts) {
|
|
1815
|
+
printLocateResult(await locate(resolve2(String(opts.repo || ".")), query, { forceRefresh: opts.forceRefresh, yes: opts.yes, json: opts.json, includeProviderAnswer: opts.includeProviderAnswer }), opts);
|
|
1816
|
+
}
|
|
1817
|
+
async function cmdTempSourceUpload(opts) {
|
|
1818
|
+
const repo = resolve2(String(opts.repo || "."));
|
|
1819
|
+
const [config, cfgPath] = loadConfig(repo, "temp-source upload");
|
|
1820
|
+
let sourcePath = String(opts.file || "");
|
|
1821
|
+
sourcePath = isAbsolute(sourcePath) ? sourcePath : resolve2(repo, sourcePath);
|
|
1822
|
+
if (!existsSync6(sourcePath))
|
|
1823
|
+
die(`temp source file not found: ${sourcePath}`);
|
|
1824
|
+
const setId = yymmddhhmm();
|
|
1825
|
+
const contentSha = sha256File(sourcePath);
|
|
1826
|
+
const title = tempSourceTitle(config, { setId, kind: opts.kind, title: opts.title, contentSha });
|
|
1827
|
+
const staged = stageTempSourceFile(repo, title, sourcePath);
|
|
1828
|
+
let sourceSet = {};
|
|
1829
|
+
let item = {};
|
|
1830
|
+
await withRepoLock(repo, async () => {
|
|
1831
|
+
try {
|
|
1832
|
+
const [state, statePath] = loadState(cfgPath);
|
|
1833
|
+
const source = await uploadTextSourceFromFile(repo, config, staged, title);
|
|
1834
|
+
let status = "uploaded";
|
|
1835
|
+
if (config.notebooklm?.wait_after_upload !== false && source.id) {
|
|
1836
|
+
status = await waitSourceReady(repo, notebookId(config), String(source.id)) ? "ready" : "error";
|
|
1837
|
+
if (status !== "ready") {
|
|
1838
|
+
await deleteSourceIdsParallel(repo, notebookId(config), [String(source.id || "")], { parallelism: Number(config.notebooklm?.delete_parallelism || 4) });
|
|
1839
|
+
die(`source processing failed for temp source ${title}: ${source.id}`);
|
|
1840
|
+
}
|
|
1841
|
+
}
|
|
1842
|
+
const active = state.activeSourceSet && typeof state.activeSourceSet === "object" ? state.activeSourceSet : {};
|
|
1843
|
+
item = {
|
|
1844
|
+
id: source.id,
|
|
1845
|
+
title: source.title || title,
|
|
1846
|
+
contentSha256: contentSha,
|
|
1847
|
+
uploadedAt: iso(),
|
|
1848
|
+
status,
|
|
1849
|
+
origin: { activeSourceSetId: active.id, chunkKeys: opts.originChunk || [], filePaths: opts.originFile || [] }
|
|
1850
|
+
};
|
|
1851
|
+
sourceSet = { id: setId, kind: slugify(opts.kind), purpose: opts.title, createdAt: iso(), expiresAt: tempSourceExpiresAt(Number(opts.ttlSeconds || 0)), sources: [item] };
|
|
1852
|
+
state.temporarySourceSets = [...tempSourceSets(state), sourceSet];
|
|
1853
|
+
writeJson(statePath, state);
|
|
1854
|
+
} finally {
|
|
1855
|
+
removeFileQuiet(staged);
|
|
1856
|
+
}
|
|
1857
|
+
});
|
|
1858
|
+
output({ sourceSet, source: item }, opts.json);
|
|
1859
|
+
}
|
|
1860
|
+
async function cmdTempSourceList(opts) {
|
|
1861
|
+
const repo = resolve2(String(opts.repo || "."));
|
|
1862
|
+
const [config, cfgPath] = loadConfig(repo, "temp-source list");
|
|
1863
|
+
const [state] = loadState(cfgPath);
|
|
1864
|
+
let sets = tempSourceSets(state);
|
|
1865
|
+
if (opts.kind)
|
|
1866
|
+
sets = sets.filter((item) => String(item.kind || "") === slugify(opts.kind));
|
|
1867
|
+
const prefix = tempSourcePrefix(config);
|
|
1868
|
+
const provider = (await listSources(repo, notebookId(config))).filter((src) => String(src.title || "").startsWith(`${prefix}--`));
|
|
1869
|
+
const tracked = new Set(tempSourceSets(state).flatMap((set) => (set.sources || []).map((src) => String(src.id || "")).filter(Boolean)));
|
|
1870
|
+
output({ temporarySourceSets: sets, untrackedPrefixMatches: provider.filter((src) => !tracked.has(String(src.id || ""))) }, opts.json);
|
|
1871
|
+
}
|
|
1872
|
+
async function cmdTempSourceCleanup(opts) {
|
|
1873
|
+
const repo = resolve2(String(opts.repo || "."));
|
|
1874
|
+
const [config, cfgPath] = loadConfig(repo, "temp-source cleanup");
|
|
1875
|
+
let deleted = [];
|
|
1876
|
+
let untracked = [];
|
|
1877
|
+
await withRepoLock(repo, async () => {
|
|
1878
|
+
const [state, statePath] = loadState(cfgPath);
|
|
1879
|
+
const wantedKind = opts.kind ? slugify(opts.kind) : "";
|
|
1880
|
+
const selected = [];
|
|
1881
|
+
const kept = [];
|
|
1882
|
+
for (const sourceSet of tempSourceSets(state)) {
|
|
1883
|
+
let matches = true;
|
|
1884
|
+
if (opts.setId && String(sourceSet.id || "") !== String(opts.setId))
|
|
1885
|
+
matches = false;
|
|
1886
|
+
if (wantedKind && String(sourceSet.kind || "") !== wantedKind)
|
|
1887
|
+
matches = false;
|
|
1888
|
+
if (opts.expired && !sourceIsExpired(sourceSet))
|
|
1889
|
+
matches = false;
|
|
1890
|
+
(matches ? selected : kept).push(sourceSet);
|
|
1891
|
+
}
|
|
1892
|
+
if (!opts.yes)
|
|
1893
|
+
die("cleanup requires --yes");
|
|
1894
|
+
const sourceIds = selected.flatMap((set) => (set.sources || []).map((src) => String(src.id || "")).filter(Boolean));
|
|
1895
|
+
deleted = await deleteSourceIdsParallel(repo, notebookId(config), sourceIds, { parallelism: Number(config.notebooklm?.delete_parallelism || 4) });
|
|
1896
|
+
const deletedSet = new Set(deleted);
|
|
1897
|
+
const remainingSelected = selected.map((set) => ({ ...set, sources: (set.sources || []).filter((src) => !deletedSet.has(String(src.id || ""))) })).filter((set) => set.sources.length);
|
|
1898
|
+
state.temporarySourceSets = [...kept, ...remainingSelected];
|
|
1899
|
+
writeJson(statePath, state);
|
|
1900
|
+
const prefix = tempSourcePrefix(config);
|
|
1901
|
+
const providerMatches = (await listSources(repo, notebookId(config))).filter((src) => String(src.title || "").startsWith(`${prefix}--`));
|
|
1902
|
+
const tracked = new Set(tempSourceSets(state).flatMap((set) => (set.sources || []).map((src) => String(src.id || "")).filter(Boolean)));
|
|
1903
|
+
untracked = providerMatches.filter((src) => !tracked.has(String(src.id || "")) && !deletedSet.has(String(src.id || "")));
|
|
1904
|
+
if (opts.includeUntrackedPrefix) {
|
|
1905
|
+
const extra = await deleteSourceIdsParallel(repo, notebookId(config), untracked.map((src) => String(src.id || "")).filter(Boolean), { parallelism: Number(config.notebooklm?.delete_parallelism || 4) });
|
|
1906
|
+
deleted.push(...extra);
|
|
1907
|
+
const extraSet = new Set(extra);
|
|
1908
|
+
untracked = untracked.filter((src) => !extraSet.has(String(src.id || "")));
|
|
1909
|
+
}
|
|
1910
|
+
});
|
|
1911
|
+
output({ deletedSourceIds: deleted, untrackedPrefixMatches: untracked }, opts.json);
|
|
1912
|
+
}
|
|
1913
|
+
|
|
1914
|
+
// src/cli.ts
|
|
1915
|
+
function buildProgram() {
|
|
1916
|
+
const program = new Command;
|
|
1917
|
+
program.name("memdex").description([
|
|
1918
|
+
"Agent-facing semantic retrieval for projects and source sets.",
|
|
1919
|
+
"",
|
|
1920
|
+
"Memdex uses NotebookLM as a semantic locator, then treats local files,",
|
|
1921
|
+
"command output, and project docs as authority for exact evidence."
|
|
1922
|
+
].join(`
|
|
1923
|
+
`)).showHelpAfterError().addHelpText("after", `
|
|
1924
|
+
|
|
1925
|
+
Common agent paths:
|
|
1926
|
+
memdex init --repo . --create-notebook
|
|
1927
|
+
memdex ask --repo . "Where is retry/backfill documented?"
|
|
1928
|
+
memdex locate --repo . "invoice export retry command"
|
|
1929
|
+
memdex ask --repo . --yes "question"
|
|
1930
|
+
|
|
1931
|
+
Command routing:
|
|
1932
|
+
ask answer architecture/docs/status questions over the source set
|
|
1933
|
+
locate find likely files or symbols and return local line refs
|
|
1934
|
+
init create .memdex/config.json and bind a NotebookLM notebook
|
|
1935
|
+
status inspect local config, freshness, and recorded source state
|
|
1936
|
+
ensure prewarm or refresh the index when policy allows
|
|
1937
|
+
refresh force a source replacement
|
|
1938
|
+
pack preview deterministic repomix chunks without provider Q&A
|
|
1939
|
+
`);
|
|
1940
|
+
program.command("ask").description("answer semantic project questions with freshness preflight").argument("<question>", "natural-language question to ask over the source set").option("--repo <repo>", "project root", ".").option("--yes", "approve first broad upload if setup is otherwise ready").option("--force-refresh", "refresh managed sources before asking").option("--json", "print machine-readable JSON").option("--verbose", "include freshness and provider metadata").action((question, opts) => cmdAsk(question, opts));
|
|
1941
|
+
program.command("locate").description("find likely files or symbols and verify local line refs").argument("<query>", "natural-language thing to find").option("--repo <repo>", "project root", ".").option("--yes", "approve first broad upload if setup is otherwise ready").option("--force-refresh", "refresh managed sources before locating").option("--include-provider-answer", "include the raw provider answer in output").option("--json", "print machine-readable JSON").option("--verbose", "include freshness metadata").action((query, opts) => cmdLocate(query, opts));
|
|
1942
|
+
program.command("init").description("create .memdex/config.json and bind a NotebookLM notebook").option("--repo <repo>", "project, repo, vault, or source-set root", ".").option("--notebook-id <id>", "bind an existing NotebookLM notebook by ID", "").option("--project-name <name>", "stable project key for notebook and source titles", "").option("--notebook-title-prefix <prefix>", "NotebookLM title prefix", "memdex").option("--notebook-title <title>", "exact NotebookLM title to create or reuse", "").option("--reuse-existing-notebook", "reuse an exact title match; do not create cloud state").option("--create-notebook", "create the NotebookLM notebook when no exact title match exists").option("--source-title-prefix <prefix>", "prefix for managed NotebookLM source titles", "").option("--include <specs>", "comma-separated include roots or files for the source set", "").option("--force", "overwrite existing .memdex/config.json").action((opts) => cmdInit(opts));
|
|
1943
|
+
program.command("status").description("inspect config, freshness, and recorded source state").option("--repo <repo>", "project root", ".").option("--json", "print machine-readable JSON").action((opts) => cmdStatus(opts));
|
|
1944
|
+
program.command("pack").description("preview deterministic repomix chunks").option("--repo <repo>", "project root", ".").option("--set-id <id>", "stable source-set ID for rendered chunk titles", "").option("--dry-run", "show planned chunks without running repomix").option("--include-files", "include per-chunk file lists in output").option("--json", "print machine-readable JSON").action((opts) => cmdPack(opts));
|
|
1945
|
+
program.command("ensure").description("prewarm or refresh the index when policy allows").option("--repo <repo>", "project root", ".").option("--force", "bypass freshness TTL and rebuild source state").option("--yes", "approve the first broad upload for this run").option("--json", "print machine-readable JSON").action((opts) => cmdEnsure(opts));
|
|
1946
|
+
program.command("refresh").description("force source replacement").option("--repo <repo>", "project root", ".").option("--force", "force refresh even when freshness checks would skip it").option("--json", "print machine-readable JSON").action((opts) => cmdRefresh(opts));
|
|
1947
|
+
const temp = program.command("temp-source").description("manage temporary derived NotebookLM sources");
|
|
1948
|
+
temp.command("upload").description("upload a temporary source file").option("--repo <repo>", "project root", ".").requiredOption("--kind <kind>", "temporary source kind").requiredOption("--title <title>", "human-readable title slug").requiredOption("--file <file>", "local markdown/text file to upload").option("--origin-chunk <chunk>", "origin active chunk key; repeatable", collect, []).option("--origin-file <file>", "origin local file path; repeatable", collect, []).option("--ttl-seconds <seconds>", "optional expiry TTL in seconds", "0").option("--json", "print machine-readable JSON").action((opts) => cmdTempSourceUpload({ ...opts, ttlSeconds: Number(opts.ttlSeconds || 0) }));
|
|
1949
|
+
temp.command("list").description("list recorded temporary sources").option("--repo <repo>", "project root", ".").option("--kind <kind>", "filter by temporary source kind", "").option("--json", "print machine-readable JSON").action((opts) => cmdTempSourceList(opts));
|
|
1950
|
+
temp.command("cleanup").description("delete recorded temporary sources").option("--repo <repo>", "project root", ".").option("--kind <kind>", "filter by temporary source kind", "").option("--set-id <id>", "filter by temporary source-set ID", "").option("--expired", "clean only expired temporary sources").option("--include-untracked-prefix", "also delete untracked prefix matches; requires --yes").option("--yes", "confirm deletion").option("--json", "print machine-readable JSON").action((opts) => cmdTempSourceCleanup(opts));
|
|
1951
|
+
return program;
|
|
1952
|
+
}
|
|
1953
|
+
function collect(value, previous) {
|
|
1954
|
+
previous.push(value);
|
|
1955
|
+
return previous;
|
|
1956
|
+
}
|
|
1957
|
+
async function main(argv = process.argv) {
|
|
1958
|
+
try {
|
|
1959
|
+
await buildProgram().parseAsync(argv);
|
|
1960
|
+
} catch (error) {
|
|
1961
|
+
if (error instanceof MemdexError) {
|
|
1962
|
+
console.error(`error: ${error.message}`);
|
|
1963
|
+
process.exitCode = error.code;
|
|
1964
|
+
return;
|
|
1965
|
+
}
|
|
1966
|
+
throw error;
|
|
1967
|
+
}
|
|
1968
|
+
}
|
|
1969
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
1970
|
+
await main();
|
|
1971
|
+
}
|
|
1972
|
+
export {
|
|
1973
|
+
main,
|
|
1974
|
+
buildProgram
|
|
1975
|
+
};
|