@bodhi-ventures/aiocs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4535 @@
1
+ // src/errors.ts
2
+ var AIOCS_ERROR_CODES = {
3
+ invalidArgument: "INVALID_ARGUMENT",
4
+ sourceNotFound: "SOURCE_NOT_FOUND",
5
+ snapshotNotFound: "SNAPSHOT_NOT_FOUND",
6
+ snapshotDiffBaseNotFound: "SNAPSHOT_DIFF_BASE_NOT_FOUND",
7
+ noPagesFetched: "NO_PAGES_FETCHED",
8
+ noProjectScope: "NO_PROJECT_SCOPE",
9
+ chunkNotFound: "CHUNK_NOT_FOUND",
10
+ referenceFileNotFound: "REFERENCE_FILE_NOT_FOUND",
11
+ invalidReferenceFile: "INVALID_REFERENCE_FILE",
12
+ authEnvMissing: "AUTH_ENV_MISSING",
13
+ canaryFailed: "CANARY_FAILED",
14
+ backupConflict: "BACKUP_CONFLICT",
15
+ backupInvalid: "BACKUP_INVALID",
16
+ backupSourceMissing: "BACKUP_SOURCE_MISSING",
17
+ embeddingConfigInvalid: "EMBEDDING_CONFIG_INVALID",
18
+ embeddingProviderUnavailable: "EMBEDDING_PROVIDER_UNAVAILABLE",
19
+ vectorStoreUnavailable: "VECTOR_STORE_UNAVAILABLE",
20
+ embeddingJobNotFound: "EMBEDDING_JOB_NOT_FOUND",
21
+ internalError: "INTERNAL_ERROR"
22
+ };
23
+ var AiocsError = class extends Error {
24
+ code;
25
+ details;
26
+ constructor(code, message, details) {
27
+ super(message);
28
+ this.name = "AiocsError";
29
+ this.code = code;
30
+ this.details = details;
31
+ }
32
+ };
33
+ function isAiocsError(error) {
34
+ return error instanceof AiocsError;
35
+ }
36
+ function toAiocsError(error) {
37
+ if (isAiocsError(error)) {
38
+ return error;
39
+ }
40
+ if (error instanceof Error) {
41
+ return new AiocsError(AIOCS_ERROR_CODES.internalError, error.message);
42
+ }
43
+ return new AiocsError(AIOCS_ERROR_CODES.internalError, String(error));
44
+ }
45
+
46
+ // src/catalog/catalog.ts
47
+ import { mkdirSync } from "fs";
48
+ import { join, resolve as resolve2 } from "path";
49
+ import { randomUUID } from "crypto";
50
+ import Database from "better-sqlite3";
51
+
52
+ // src/catalog/chunking.ts
53
+ var MAX_CHUNK_BYTES = 16384;
54
+ var HEADING_PATTERN = /^(#{1,6})\s+(.*)$/;
55
+ function byteLength(value) {
56
+ return Buffer.byteLength(value, "utf8");
57
+ }
58
+ function splitLargeSection(sectionTitle, markdown, startOrder) {
59
+ const lines = markdown.split("\n");
60
+ const chunks = [];
61
+ let current = "";
62
+ let order = startOrder;
63
+ const flush = () => {
64
+ const trimmed = current.trim();
65
+ if (!trimmed) {
66
+ current = "";
67
+ return;
68
+ }
69
+ chunks.push({
70
+ sectionTitle,
71
+ markdown: trimmed,
72
+ chunkOrder: order
73
+ });
74
+ order += 1;
75
+ current = "";
76
+ };
77
+ for (const line of lines) {
78
+ const next = current ? `${current}
79
+ ${line}` : line;
80
+ if (current && byteLength(next) > MAX_CHUNK_BYTES) {
81
+ flush();
82
+ }
83
+ current = current ? `${current}
84
+ ${line}` : line;
85
+ }
86
+ flush();
87
+ return chunks;
88
+ }
89
+ function chunkMarkdown(pageTitle, markdown) {
90
+ const trimmed = markdown.trim();
91
+ if (!trimmed) {
92
+ return [];
93
+ }
94
+ if (byteLength(trimmed) <= MAX_CHUNK_BYTES) {
95
+ return [{ sectionTitle: pageTitle, markdown: trimmed, chunkOrder: 0 }];
96
+ }
97
+ const lines = trimmed.split("\n");
98
+ const sections = [];
99
+ let currentTitle = pageTitle;
100
+ let currentLines = [];
101
+ const flushSection = () => {
102
+ const content = currentLines.join("\n").trim();
103
+ if (!content) {
104
+ currentLines = [];
105
+ return;
106
+ }
107
+ sections.push({ title: currentTitle, markdown: content });
108
+ currentLines = [];
109
+ };
110
+ for (const line of lines) {
111
+ const match = line.trim().match(HEADING_PATTERN);
112
+ if (match && match[1].length >= 2) {
113
+ flushSection();
114
+ currentTitle = match[2].trim() || pageTitle;
115
+ }
116
+ currentLines.push(line);
117
+ }
118
+ flushSection();
119
+ const chunks = [];
120
+ let order = 0;
121
+ for (const section of sections) {
122
+ if (byteLength(section.markdown) <= MAX_CHUNK_BYTES) {
123
+ chunks.push({
124
+ sectionTitle: section.title,
125
+ markdown: section.markdown,
126
+ chunkOrder: order
127
+ });
128
+ order += 1;
129
+ continue;
130
+ }
131
+ const split = splitLargeSection(section.title, section.markdown, order);
132
+ chunks.push(...split);
133
+ order = chunks.length;
134
+ }
135
+ return chunks;
136
+ }
137
+
138
+ // src/catalog/fingerprint.ts
139
+ import { createHash } from "crypto";
140
+ function sha256(value) {
141
+ return createHash("sha256").update(value).digest("hex");
142
+ }
143
+ function buildSnapshotFingerprint(input) {
144
+ const normalizedPages = [...input.pages].sort((left, right) => left.url.localeCompare(right.url));
145
+ const payload = JSON.stringify({
146
+ sourceId: input.sourceId,
147
+ configHash: input.configHash,
148
+ pages: normalizedPages
149
+ });
150
+ return sha256(payload);
151
+ }
152
+
153
+ // src/catalog/project-scope.ts
154
+ import { realpathSync } from "fs";
155
+ import { resolve } from "path";
156
+ function isWithin(candidate, root) {
157
+ return candidate === root || candidate.startsWith(`${root}/`);
158
+ }
159
+ function canonicalizeProjectPath(path) {
160
+ const resolved = resolve(path);
161
+ try {
162
+ return realpathSync.native(resolved);
163
+ } catch {
164
+ return resolved;
165
+ }
166
+ }
167
+ function resolveProjectScope(cwd, scopes) {
168
+ const normalizedCwd = canonicalizeProjectPath(cwd);
169
+ const normalizedScopes = scopes.map((scope) => ({
170
+ projectPath: canonicalizeProjectPath(scope.projectPath),
171
+ sourceIds: [...scope.sourceIds]
172
+ })).filter((scope) => isWithin(normalizedCwd, scope.projectPath)).sort((left, right) => right.projectPath.length - left.projectPath.length);
173
+ return normalizedScopes[0] ?? null;
174
+ }
175
+
176
+ // src/spec/source-spec.ts
177
+ import { readFile } from "fs/promises";
178
+ import { extname } from "path";
179
+ import YAML from "yaml";
180
+ import { z } from "zod";
181
+ var patternSchema = z.string().min(1);
182
+ var interactionSchema = z.discriminatedUnion("action", [
183
+ z.object({
184
+ action: z.literal("hover"),
185
+ selector: z.string().min(1),
186
+ timeoutMs: z.number().int().positive().optional()
187
+ }),
188
+ z.object({
189
+ action: z.literal("click"),
190
+ selector: z.string().min(1),
191
+ timeoutMs: z.number().int().positive().optional()
192
+ }),
193
+ z.object({
194
+ action: z.literal("press"),
195
+ key: z.string().min(1)
196
+ }),
197
+ z.object({
198
+ action: z.literal("wait"),
199
+ timeoutMs: z.number().int().positive()
200
+ })
201
+ ]);
202
+ var clipboardExtractSchema = z.object({
203
+ strategy: z.literal("clipboardButton"),
204
+ interactions: z.array(interactionSchema).min(1),
205
+ clipboardTimeoutMs: z.number().int().positive().default(1e4)
206
+ });
207
+ var selectorExtractSchema = z.object({
208
+ strategy: z.literal("selector"),
209
+ selector: z.string().min(1)
210
+ });
211
+ var readabilityExtractSchema = z.object({
212
+ strategy: z.literal("readability")
213
+ });
214
+ var authHeaderSchema = z.object({
215
+ name: z.string().min(1),
216
+ valueFromEnv: z.string().min(1),
217
+ hosts: z.array(z.string().min(1)).min(1).optional(),
218
+ include: z.array(patternSchema).min(1).optional()
219
+ });
220
+ var authCookieSchema = z.object({
221
+ name: z.string().min(1),
222
+ valueFromEnv: z.string().min(1),
223
+ domain: z.string().min(1),
224
+ path: z.string().min(1).default("/"),
225
+ secure: z.boolean().optional(),
226
+ httpOnly: z.boolean().optional(),
227
+ sameSite: z.enum(["Strict", "Lax", "None"]).optional()
228
+ });
229
+ var canaryCheckSchema = z.object({
230
+ url: z.string().url(),
231
+ expectedTitle: z.string().min(1).optional(),
232
+ expectedText: z.string().min(1).optional(),
233
+ minMarkdownLength: z.number().int().positive().default(40)
234
+ });
235
+ var sourceSpecSchema = z.object({
236
+ id: z.string().min(1).regex(/^[a-z0-9-]+$/),
237
+ label: z.string().min(1),
238
+ startUrls: z.array(z.string().url()).min(1),
239
+ allowedHosts: z.array(z.string().min(1)).min(1),
240
+ discovery: z.object({
241
+ include: z.array(patternSchema).min(1),
242
+ exclude: z.array(patternSchema),
243
+ maxPages: z.number().int().positive()
244
+ }),
245
+ extract: z.discriminatedUnion("strategy", [
246
+ clipboardExtractSchema,
247
+ selectorExtractSchema,
248
+ readabilityExtractSchema
249
+ ]),
250
+ normalize: z.object({
251
+ prependSourceComment: z.boolean().default(true)
252
+ }),
253
+ schedule: z.object({
254
+ everyHours: z.number().int().positive()
255
+ }),
256
+ auth: z.object({
257
+ headers: z.array(authHeaderSchema).default([]),
258
+ cookies: z.array(authCookieSchema).default([])
259
+ }).optional(),
260
+ canary: z.object({
261
+ everyHours: z.number().int().positive().optional(),
262
+ checks: z.array(canaryCheckSchema).min(1)
263
+ }).optional()
264
+ }).superRefine((spec, context) => {
265
+ for (const [index, header] of (spec.auth?.headers ?? []).entries()) {
266
+ if (!header.hosts) {
267
+ continue;
268
+ }
269
+ for (const host of header.hosts) {
270
+ if (!spec.allowedHosts.includes(host)) {
271
+ context.addIssue({
272
+ code: z.ZodIssueCode.custom,
273
+ path: ["auth", "headers", index, "hosts"],
274
+ message: `Authenticated header host '${host}' must be included in allowedHosts`
275
+ });
276
+ }
277
+ }
278
+ }
279
+ });
280
+ function parseSourceSpec(raw, ext) {
281
+ if (ext === ".json") {
282
+ return JSON.parse(raw);
283
+ }
284
+ return YAML.parse(raw);
285
+ }
286
+ async function loadSourceSpec(path) {
287
+ const raw = await readFile(path, "utf8");
288
+ const parsed = parseSourceSpec(raw, extname(path).toLowerCase());
289
+ return sourceSpecSchema.parse(parsed);
290
+ }
291
+ function resolveSourceCanary(spec) {
292
+ return {
293
+ everyHours: spec.canary?.everyHours ?? Math.max(1, Math.min(spec.schedule.everyHours, 6)),
294
+ checks: spec.canary?.checks ?? [
295
+ {
296
+ url: spec.startUrls[0],
297
+ minMarkdownLength: 40
298
+ }
299
+ ]
300
+ };
301
+ }
302
+
303
+ // src/catalog/catalog.ts
304
+ function initSchema(db) {
305
+ db.exec(`
306
+ PRAGMA foreign_keys = ON;
307
+
308
+ CREATE TABLE IF NOT EXISTS sources (
309
+ id TEXT PRIMARY KEY,
310
+ label TEXT NOT NULL,
311
+ spec_json TEXT NOT NULL,
312
+ spec_path TEXT,
313
+ config_hash TEXT NOT NULL,
314
+ created_at TEXT NOT NULL,
315
+ updated_at TEXT NOT NULL,
316
+ last_checked_at TEXT,
317
+ last_successful_snapshot_at TEXT,
318
+ last_successful_snapshot_id TEXT,
319
+ last_canary_checked_at TEXT,
320
+ last_successful_canary_at TEXT,
321
+ last_canary_status TEXT,
322
+ next_canary_due_at TEXT,
323
+ next_due_at TEXT NOT NULL
324
+ );
325
+
326
+ CREATE TABLE IF NOT EXISTS snapshots (
327
+ id TEXT PRIMARY KEY,
328
+ source_id TEXT NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
329
+ fingerprint TEXT NOT NULL,
330
+ config_hash TEXT NOT NULL,
331
+ detected_version TEXT,
332
+ page_count INTEGER NOT NULL,
333
+ created_at TEXT NOT NULL,
334
+ UNIQUE(source_id, fingerprint)
335
+ );
336
+
337
+ CREATE TABLE IF NOT EXISTS pages (
338
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
339
+ snapshot_id TEXT NOT NULL REFERENCES snapshots(id) ON DELETE CASCADE,
340
+ url TEXT NOT NULL,
341
+ title TEXT NOT NULL,
342
+ markdown TEXT NOT NULL,
343
+ content_hash TEXT NOT NULL,
344
+ UNIQUE(snapshot_id, url)
345
+ );
346
+
347
+ CREATE TABLE IF NOT EXISTS chunks (
348
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
349
+ source_id TEXT NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
350
+ snapshot_id TEXT NOT NULL REFERENCES snapshots(id) ON DELETE CASCADE,
351
+ page_id INTEGER NOT NULL REFERENCES pages(id) ON DELETE CASCADE,
352
+ page_url TEXT NOT NULL,
353
+ page_title TEXT NOT NULL,
354
+ section_title TEXT NOT NULL,
355
+ chunk_order INTEGER NOT NULL,
356
+ markdown TEXT NOT NULL
357
+ );
358
+
359
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
360
+ page_title,
361
+ section_title,
362
+ markdown,
363
+ content=chunks,
364
+ content_rowid=id,
365
+ tokenize='porter unicode61'
366
+ );
367
+
368
+ CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
369
+ INSERT INTO chunks_fts(rowid, page_title, section_title, markdown)
370
+ VALUES (new.id, new.page_title, new.section_title, new.markdown);
371
+ END;
372
+
373
+ CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
374
+ INSERT INTO chunks_fts(chunks_fts, rowid, page_title, section_title, markdown)
375
+ VALUES ('delete', old.id, old.page_title, old.section_title, old.markdown);
376
+ END;
377
+
378
+ CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
379
+ INSERT INTO chunks_fts(chunks_fts, rowid, page_title, section_title, markdown)
380
+ VALUES ('delete', old.id, old.page_title, old.section_title, old.markdown);
381
+ INSERT INTO chunks_fts(rowid, page_title, section_title, markdown)
382
+ VALUES (new.id, new.page_title, new.section_title, new.markdown);
383
+ END;
384
+
385
+ CREATE TABLE IF NOT EXISTS fetch_runs (
386
+ id TEXT PRIMARY KEY,
387
+ source_id TEXT NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
388
+ status TEXT NOT NULL CHECK(status IN ('success', 'failed')),
389
+ error_message TEXT,
390
+ snapshot_id TEXT REFERENCES snapshots(id) ON DELETE SET NULL,
391
+ started_at TEXT NOT NULL,
392
+ finished_at TEXT NOT NULL
393
+ );
394
+
395
+ CREATE TABLE IF NOT EXISTS canary_runs (
396
+ id TEXT PRIMARY KEY,
397
+ source_id TEXT NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
398
+ status TEXT NOT NULL CHECK(status IN ('pass', 'fail')),
399
+ checked_at TEXT NOT NULL,
400
+ details_json TEXT NOT NULL
401
+ );
402
+
403
+ CREATE TABLE IF NOT EXISTS project_links (
404
+ project_path TEXT NOT NULL,
405
+ source_id TEXT NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
406
+ created_at TEXT NOT NULL,
407
+ PRIMARY KEY(project_path, source_id)
408
+ );
409
+
410
+ CREATE TABLE IF NOT EXISTS daemon_state (
411
+ singleton_id INTEGER PRIMARY KEY CHECK(singleton_id = 1),
412
+ last_started_at TEXT,
413
+ last_cycle_started_at TEXT,
414
+ last_cycle_completed_at TEXT,
415
+ last_cycle_status TEXT,
416
+ interval_minutes INTEGER,
417
+ fetch_on_start INTEGER
418
+ );
419
+
420
+ CREATE TABLE IF NOT EXISTS embedding_state (
421
+ chunk_id INTEGER PRIMARY KEY REFERENCES chunks(id) ON DELETE CASCADE,
422
+ source_id TEXT NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
423
+ snapshot_id TEXT NOT NULL REFERENCES snapshots(id) ON DELETE CASCADE,
424
+ content_hash TEXT NOT NULL,
425
+ model_key TEXT,
426
+ status TEXT NOT NULL CHECK(status IN ('pending', 'indexed', 'failed', 'stale')),
427
+ vector_point_id TEXT,
428
+ last_attempted_at TEXT,
429
+ indexed_at TEXT,
430
+ error_message TEXT
431
+ );
432
+
433
+ CREATE TABLE IF NOT EXISTS embedding_jobs (
434
+ source_id TEXT NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
435
+ snapshot_id TEXT NOT NULL REFERENCES snapshots(id) ON DELETE CASCADE,
436
+ status TEXT NOT NULL CHECK(status IN ('pending', 'running', 'succeeded', 'failed')),
437
+ attempt_count INTEGER NOT NULL DEFAULT 0,
438
+ chunk_count INTEGER NOT NULL,
439
+ created_at TEXT NOT NULL,
440
+ updated_at TEXT NOT NULL,
441
+ claimed_at TEXT,
442
+ completed_at TEXT,
443
+ error_message TEXT,
444
+ PRIMARY KEY(source_id, snapshot_id)
445
+ );
446
+
447
+ CREATE INDEX IF NOT EXISTS idx_embedding_jobs_status_updated
448
+ ON embedding_jobs(status, updated_at, source_id, snapshot_id);
449
+
450
+ CREATE INDEX IF NOT EXISTS idx_embedding_state_source_snapshot
451
+ ON embedding_state(source_id, snapshot_id, status);
452
+ `);
453
+ const sourceColumns = db.prepare("PRAGMA table_info(sources)").all();
454
+ if (!sourceColumns.some((column) => column.name === "spec_path")) {
455
+ db.exec("ALTER TABLE sources ADD COLUMN spec_path TEXT");
456
+ }
457
+ if (!sourceColumns.some((column) => column.name === "last_successful_snapshot_at")) {
458
+ db.exec("ALTER TABLE sources ADD COLUMN last_successful_snapshot_at TEXT");
459
+ }
460
+ if (!sourceColumns.some((column) => column.name === "last_canary_checked_at")) {
461
+ db.exec("ALTER TABLE sources ADD COLUMN last_canary_checked_at TEXT");
462
+ }
463
+ if (!sourceColumns.some((column) => column.name === "last_successful_canary_at")) {
464
+ db.exec("ALTER TABLE sources ADD COLUMN last_successful_canary_at TEXT");
465
+ }
466
+ if (!sourceColumns.some((column) => column.name === "last_canary_status")) {
467
+ db.exec("ALTER TABLE sources ADD COLUMN last_canary_status TEXT");
468
+ }
469
+ if (!sourceColumns.some((column) => column.name === "next_canary_due_at")) {
470
+ db.exec("ALTER TABLE sources ADD COLUMN next_canary_due_at TEXT");
471
+ }
472
+ }
473
+ function nowIso() {
474
+ return (/* @__PURE__ */ new Date()).toISOString();
475
+ }
476
+ function addHoursIso(hours) {
477
+ return new Date(Date.now() + hours * 60 * 60 * 1e3).toISOString();
478
+ }
479
+ function stableStringify(value) {
480
+ if (Array.isArray(value)) {
481
+ return `[${value.map((entry) => stableStringify(entry)).join(",")}]`;
482
+ }
483
+ if (value && typeof value === "object") {
484
+ const entries = Object.entries(value).sort(
485
+ ([left], [right]) => left.localeCompare(right)
486
+ );
487
+ return `{${entries.map(([key, entry]) => `${JSON.stringify(key)}:${stableStringify(entry)}`).join(",")}}`;
488
+ }
489
+ return JSON.stringify(value);
490
+ }
491
+ function normalizeQuery(query) {
492
+ const words = query.replace(/[^\p{L}\p{N}]+/gu, " ").split(/\s+/).map((part) => part.trim()).filter(Boolean);
493
+ return words.join(" ");
494
+ }
495
+ function assertPaginationValue(value, field, fallback) {
496
+ if (typeof value === "undefined") {
497
+ return fallback;
498
+ }
499
+ if (!Number.isInteger(value) || value < 0) {
500
+ throw new AiocsError(
501
+ AIOCS_ERROR_CODES.invalidArgument,
502
+ `${field} must be a non-negative integer`
503
+ );
504
+ }
505
+ if (field === "limit" && value === 0) {
506
+ throw new AiocsError(
507
+ AIOCS_ERROR_CODES.invalidArgument,
508
+ "limit must be greater than zero"
509
+ );
510
+ }
511
+ return value;
512
+ }
513
+ function openCatalog(options) {
514
+ const dataDir = resolve2(options.dataDir);
515
+ mkdirSync(dataDir, { recursive: true });
516
+ const db = new Database(join(dataDir, "catalog.sqlite"));
517
+ initSchema(db);
518
+ const listProjectLinks = () => {
519
+ const rows = db.prepare("SELECT project_path, source_id FROM project_links ORDER BY project_path, source_id").all();
520
+ const grouped = /* @__PURE__ */ new Map();
521
+ for (const row of rows) {
522
+ const current = grouped.get(row.project_path) ?? [];
523
+ current.push(row.source_id);
524
+ grouped.set(row.project_path, current);
525
+ }
526
+ return [...grouped.entries()].map(([projectPath, sourceIds]) => ({ projectPath, sourceIds }));
527
+ };
528
+ const resolveSearchScope = (input) => {
529
+ const limit = assertPaginationValue(input.limit, "limit", 20);
530
+ const offset = assertPaginationValue(input.offset, "offset", 0);
531
+ let sourceIds = input.sourceIds ? [...input.sourceIds] : void 0;
532
+ if (!sourceIds || sourceIds.length === 0) {
533
+ if (input.cwd) {
534
+ const scope = resolveProjectScope(
535
+ input.cwd,
536
+ listProjectLinks().map((link) => ({
537
+ projectPath: link.projectPath,
538
+ sourceIds: link.sourceIds
539
+ }))
540
+ );
541
+ if (scope) {
542
+ sourceIds = scope.sourceIds;
543
+ }
544
+ }
545
+ }
546
+ if ((!sourceIds || sourceIds.length === 0) && !input.all) {
547
+ return {
548
+ limit,
549
+ offset,
550
+ sourceIds: null,
551
+ snapshotIds: []
552
+ };
553
+ }
554
+ const filterSourceIds = sourceIds && sourceIds.length > 0 ? [...new Set(sourceIds)] : null;
555
+ const latestSnapshotIds = input.snapshotId ? [input.snapshotId] : db.prepare(`
556
+ SELECT last_successful_snapshot_id AS snapshot_id
557
+ FROM sources
558
+ WHERE last_successful_snapshot_id IS NOT NULL
559
+ ${filterSourceIds ? `AND id IN (${filterSourceIds.map(() => "?").join(",")})` : ""}
560
+ `).all(...filterSourceIds ?? []).map((row) => row.snapshot_id);
561
+ return {
562
+ limit,
563
+ offset,
564
+ sourceIds: filterSourceIds,
565
+ snapshotIds: latestSnapshotIds
566
+ };
567
+ };
568
+ const searchLexicalByScope = (input) => {
569
+ const normalized = normalizeQuery(input.query);
570
+ const limit = assertPaginationValue(input.limit, "limit", input.scope.limit);
571
+ const offset = assertPaginationValue(input.offset, "offset", input.scope.offset);
572
+ if (!normalized || input.scope.snapshotIds.length === 0) {
573
+ return {
574
+ total: 0,
575
+ limit,
576
+ offset,
577
+ hasMore: false,
578
+ results: []
579
+ };
580
+ }
581
+ const whereSnapshotPlaceholders = input.scope.snapshotIds.map(() => "?").join(",");
582
+ const sourceSql = input.scope.sourceIds ? `AND c.source_id IN (${input.scope.sourceIds.map(() => "?").join(",")})` : "";
583
+ const queryArgs = [
584
+ normalized,
585
+ ...input.scope.snapshotIds,
586
+ ...input.scope.sourceIds ?? []
587
+ ];
588
+ const totalRow = db.prepare(`
589
+ SELECT COUNT(*) AS total
590
+ FROM chunks_fts
591
+ JOIN chunks c ON c.id = chunks_fts.rowid
592
+ WHERE chunks_fts MATCH ?
593
+ AND c.snapshot_id IN (${whereSnapshotPlaceholders})
594
+ ${sourceSql}
595
+ `).get(...queryArgs);
596
+ const rows = db.prepare(`
597
+ SELECT
598
+ c.id AS chunk_id,
599
+ c.source_id,
600
+ c.snapshot_id,
601
+ c.page_url,
602
+ c.page_title,
603
+ c.section_title,
604
+ c.markdown
605
+ FROM chunks_fts
606
+ JOIN chunks c ON c.id = chunks_fts.rowid
607
+ WHERE chunks_fts MATCH ?
608
+ AND c.snapshot_id IN (${whereSnapshotPlaceholders})
609
+ ${sourceSql}
610
+ ORDER BY bm25(chunks_fts), c.id
611
+ LIMIT ?
612
+ OFFSET ?
613
+ `).all(...queryArgs, limit, offset);
614
+ const results = rows.map((row) => ({
615
+ chunkId: row.chunk_id,
616
+ sourceId: row.source_id,
617
+ snapshotId: row.snapshot_id,
618
+ pageUrl: row.page_url,
619
+ pageTitle: row.page_title,
620
+ sectionTitle: row.section_title,
621
+ markdown: row.markdown
622
+ }));
623
+ return {
624
+ total: totalRow.total,
625
+ limit,
626
+ offset,
627
+ hasMore: offset + results.length < totalRow.total,
628
+ results
629
+ };
630
+ };
631
+ const listLatestSnapshots = (sourceIds) => {
632
+ const filterSourceIds = sourceIds && sourceIds.length > 0 ? [...new Set(sourceIds)] : null;
633
+ const rows = db.prepare(`
634
+ SELECT id AS source_id, last_successful_snapshot_id AS snapshot_id
635
+ FROM sources
636
+ WHERE last_successful_snapshot_id IS NOT NULL
637
+ ${filterSourceIds ? `AND id IN (${filterSourceIds.map(() => "?").join(",")})` : ""}
638
+ ORDER BY id
639
+ `).all(...filterSourceIds ?? []);
640
+ return rows.map((row) => ({
641
+ sourceId: row.source_id,
642
+ snapshotId: row.snapshot_id
643
+ }));
644
+ };
645
+ const queueEmbeddingJobForSnapshot = (sourceId, snapshotId, previousLatestSnapshotId) => {
646
+ const timestamp = nowIso();
647
+ if (previousLatestSnapshotId && previousLatestSnapshotId !== snapshotId) {
648
+ db.prepare(`
649
+ UPDATE embedding_state
650
+ SET
651
+ status = 'stale',
652
+ vector_point_id = NULL,
653
+ indexed_at = NULL,
654
+ error_message = NULL
655
+ WHERE source_id = ?
656
+ AND snapshot_id = ?
657
+ `).run(sourceId, previousLatestSnapshotId);
658
+ db.prepare(`
659
+ DELETE FROM embedding_jobs
660
+ WHERE source_id = ?
661
+ AND snapshot_id = ?
662
+ `).run(sourceId, previousLatestSnapshotId);
663
+ }
664
+ const chunkRows = db.prepare(`
665
+ SELECT id, markdown
666
+ FROM chunks
667
+ WHERE source_id = ?
668
+ AND snapshot_id = ?
669
+ ORDER BY id
670
+ `).all(sourceId, snapshotId);
671
+ const upsertState = db.prepare(`
672
+ INSERT INTO embedding_state (
673
+ chunk_id,
674
+ source_id,
675
+ snapshot_id,
676
+ content_hash,
677
+ model_key,
678
+ status,
679
+ vector_point_id,
680
+ last_attempted_at,
681
+ indexed_at,
682
+ error_message
683
+ ) VALUES (?, ?, ?, ?, NULL, 'pending', NULL, NULL, NULL, NULL)
684
+ ON CONFLICT(chunk_id) DO UPDATE SET
685
+ source_id = excluded.source_id,
686
+ snapshot_id = excluded.snapshot_id,
687
+ content_hash = excluded.content_hash,
688
+ model_key = CASE
689
+ WHEN embedding_state.status = 'indexed' AND embedding_state.content_hash = excluded.content_hash
690
+ THEN embedding_state.model_key
691
+ ELSE NULL
692
+ END,
693
+ status = CASE
694
+ WHEN embedding_state.status = 'indexed' AND embedding_state.content_hash = excluded.content_hash
695
+ THEN 'indexed'
696
+ ELSE 'pending'
697
+ END,
698
+ vector_point_id = CASE
699
+ WHEN embedding_state.status = 'indexed' AND embedding_state.content_hash = excluded.content_hash
700
+ THEN embedding_state.vector_point_id
701
+ ELSE NULL
702
+ END,
703
+ last_attempted_at = CASE
704
+ WHEN embedding_state.status = 'indexed' AND embedding_state.content_hash = excluded.content_hash
705
+ THEN embedding_state.last_attempted_at
706
+ ELSE NULL
707
+ END,
708
+ indexed_at = CASE
709
+ WHEN embedding_state.status = 'indexed' AND embedding_state.content_hash = excluded.content_hash
710
+ THEN embedding_state.indexed_at
711
+ ELSE NULL
712
+ END,
713
+ error_message = CASE
714
+ WHEN embedding_state.status = 'indexed' AND embedding_state.content_hash = excluded.content_hash
715
+ THEN embedding_state.error_message
716
+ ELSE NULL
717
+ END
718
+ `);
719
+ const transaction = db.transaction(() => {
720
+ for (const chunk of chunkRows) {
721
+ upsertState.run(
722
+ chunk.id,
723
+ sourceId,
724
+ snapshotId,
725
+ sha256(chunk.markdown)
726
+ );
727
+ }
728
+ });
729
+ transaction();
730
+ const pendingRow = db.prepare(`
731
+ SELECT COUNT(*) AS pending_count
732
+ FROM embedding_state
733
+ WHERE source_id = ?
734
+ AND snapshot_id = ?
735
+ AND status != 'indexed'
736
+ `).get(sourceId, snapshotId);
737
+ if (pendingRow.pending_count === 0) {
738
+ db.prepare(`
739
+ INSERT INTO embedding_jobs (
740
+ source_id,
741
+ snapshot_id,
742
+ status,
743
+ attempt_count,
744
+ chunk_count,
745
+ created_at,
746
+ updated_at,
747
+ claimed_at,
748
+ completed_at,
749
+ error_message
750
+ ) VALUES (?, ?, 'succeeded', 0, ?, ?, ?, NULL, ?, NULL)
751
+ ON CONFLICT(source_id, snapshot_id) DO UPDATE SET
752
+ status = 'succeeded',
753
+ chunk_count = excluded.chunk_count,
754
+ updated_at = excluded.updated_at,
755
+ claimed_at = NULL,
756
+ completed_at = excluded.completed_at,
757
+ error_message = NULL
758
+ `).run(sourceId, snapshotId, chunkRows.length, timestamp, timestamp, timestamp);
759
+ return;
760
+ }
761
+ db.prepare(`
762
+ INSERT INTO embedding_jobs (
763
+ source_id,
764
+ snapshot_id,
765
+ status,
766
+ attempt_count,
767
+ chunk_count,
768
+ created_at,
769
+ updated_at,
770
+ claimed_at,
771
+ completed_at,
772
+ error_message
773
+ ) VALUES (?, ?, 'pending', 0, ?, ?, ?, NULL, NULL, NULL)
774
+ ON CONFLICT(source_id, snapshot_id) DO UPDATE SET
775
+ status = 'pending',
776
+ chunk_count = excluded.chunk_count,
777
+ updated_at = excluded.updated_at,
778
+ claimed_at = NULL,
779
+ completed_at = NULL,
780
+ error_message = NULL
781
+ `).run(sourceId, snapshotId, chunkRows.length, timestamp, timestamp);
782
+ };
783
+ return {
784
+ close() {
785
+ db.close();
786
+ },
787
+ upsertSource(spec, options2) {
788
+ const timestamp = nowIso();
789
+ const configHash = sha256(stableStringify(spec));
790
+ const existing = db.prepare("SELECT id, created_at, next_due_at, next_canary_due_at, config_hash FROM sources WHERE id = ?").get(spec.id);
791
+ const resolvedSpecPath = options2?.specPath ? resolve2(options2.specPath) : null;
792
+ const nextDueAt = !existing ? timestamp : existing.config_hash === configHash ? existing.next_due_at : timestamp;
793
+ const canaryConfig = resolveSourceCanary(spec);
794
+ const nextCanaryDueAt = !existing ? timestamp : existing.config_hash === configHash ? existing.next_canary_due_at ?? addHoursIso(canaryConfig.everyHours) : timestamp;
795
+ const configChanged = Boolean(existing && existing.config_hash !== configHash);
796
+ db.prepare(`
797
+ INSERT INTO sources (
798
+ id, label, spec_json, spec_path, config_hash, created_at, updated_at, next_due_at, next_canary_due_at
799
+ ) VALUES (
800
+ @id, @label, @specJson, @specPath, @configHash, @createdAt, @updatedAt, @nextDueAt, @nextCanaryDueAt
801
+ )
802
+ ON CONFLICT(id) DO UPDATE SET
803
+ label = excluded.label,
804
+ spec_json = excluded.spec_json,
805
+ spec_path = excluded.spec_path,
806
+ config_hash = excluded.config_hash,
807
+ updated_at = excluded.updated_at,
808
+ next_due_at = excluded.next_due_at,
809
+ next_canary_due_at = excluded.next_canary_due_at
810
+ `).run({
811
+ id: spec.id,
812
+ label: spec.label,
813
+ specJson: JSON.stringify(spec),
814
+ specPath: resolvedSpecPath,
815
+ configHash,
816
+ createdAt: existing?.created_at ?? timestamp,
817
+ updatedAt: timestamp,
818
+ nextDueAt,
819
+ nextCanaryDueAt
820
+ });
821
+ return {
822
+ sourceId: spec.id,
823
+ configHash,
824
+ configChanged
825
+ };
826
+ },
827
+ getSourceSpec(sourceId) {
828
+ const row = db.prepare("SELECT spec_json FROM sources WHERE id = ?").get(sourceId);
829
+ if (!row) {
830
+ return null;
831
+ }
832
+ return JSON.parse(row.spec_json);
833
+ },
834
+ listSources() {
835
+ const rows = db.prepare(`
836
+ SELECT
837
+ id,
838
+ label,
839
+ spec_path,
840
+ next_due_at,
841
+ next_canary_due_at,
842
+ last_checked_at,
843
+ last_successful_snapshot_at,
844
+ last_successful_snapshot_id,
845
+ last_canary_checked_at,
846
+ last_successful_canary_at,
847
+ last_canary_status
848
+ FROM sources
849
+ ORDER BY id
850
+ `).all();
851
+ return rows.map((row) => ({
852
+ id: row.id,
853
+ label: row.label,
854
+ specPath: row.spec_path,
855
+ nextDueAt: row.next_due_at,
856
+ isDue: Date.parse(row.next_due_at) <= Date.now(),
857
+ nextCanaryDueAt: row.next_canary_due_at,
858
+ isCanaryDue: row.next_canary_due_at ? Date.parse(row.next_canary_due_at) <= Date.now() : false,
859
+ lastCheckedAt: row.last_checked_at,
860
+ lastSuccessfulSnapshotAt: row.last_successful_snapshot_at,
861
+ lastSuccessfulSnapshotId: row.last_successful_snapshot_id,
862
+ lastCanaryCheckedAt: row.last_canary_checked_at,
863
+ lastSuccessfulCanaryAt: row.last_successful_canary_at,
864
+ lastCanaryStatus: row.last_canary_status
865
+ }));
866
+ },
867
+ listDueSourceIds(referenceTime = nowIso()) {
868
+ const rows = db.prepare(`
869
+ SELECT id
870
+ FROM sources
871
+ WHERE next_due_at <= ?
872
+ ORDER BY next_due_at, id
873
+ `).all(referenceTime);
874
+ return rows.map((row) => row.id);
875
+ },
876
+ listCanaryDueSourceIds(referenceTime = nowIso()) {
877
+ const rows = db.prepare(`
878
+ SELECT id
879
+ FROM sources
880
+ WHERE next_canary_due_at IS NOT NULL
881
+ AND next_canary_due_at <= ?
882
+ ORDER BY next_canary_due_at, id
883
+ `).all(referenceTime);
884
+ return rows.map((row) => row.id);
885
+ },
886
+ linkProject(projectPath, sourceIds) {
887
+ const normalizedPath = canonicalizeProjectPath(projectPath);
888
+ const timestamp = nowIso();
889
+ const insert = db.prepare(`
890
+ INSERT INTO project_links (project_path, source_id, created_at)
891
+ VALUES (?, ?, ?)
892
+ ON CONFLICT(project_path, source_id) DO NOTHING
893
+ `);
894
+ const transaction = db.transaction((ids) => {
895
+ for (const sourceId of ids) {
896
+ insert.run(normalizedPath, sourceId, timestamp);
897
+ }
898
+ });
899
+ transaction(sourceIds);
900
+ },
901
+ unlinkProject(projectPath, sourceIds) {
902
+ const normalizedPath = canonicalizeProjectPath(projectPath);
903
+ if (!sourceIds || sourceIds.length === 0) {
904
+ db.prepare("DELETE FROM project_links WHERE project_path = ?").run(normalizedPath);
905
+ return;
906
+ }
907
+ const statement = db.prepare("DELETE FROM project_links WHERE project_path = ? AND source_id = ?");
908
+ const transaction = db.transaction((ids) => {
909
+ for (const sourceId of ids) {
910
+ statement.run(normalizedPath, sourceId);
911
+ }
912
+ });
913
+ transaction(sourceIds);
914
+ },
915
+ recordSuccessfulSnapshot(input) {
916
+ const sourceRow = db.prepare("SELECT config_hash, spec_json, last_successful_snapshot_id FROM sources WHERE id = ?").get(input.sourceId);
917
+ if (!sourceRow) {
918
+ throw new AiocsError(
919
+ AIOCS_ERROR_CODES.sourceNotFound,
920
+ `Unknown source '${input.sourceId}'`
921
+ );
922
+ }
923
+ const pagesWithHashes = input.pages.map((page) => ({
924
+ ...page,
925
+ markdown: page.markdown.trim(),
926
+ contentHash: sha256(page.markdown.trim())
927
+ }));
928
+ const fingerprint = buildSnapshotFingerprint({
929
+ sourceId: input.sourceId,
930
+ configHash: sourceRow.config_hash,
931
+ pages: pagesWithHashes.map((page) => ({
932
+ url: page.url,
933
+ contentHash: page.contentHash
934
+ }))
935
+ });
936
+ const existing = db.prepare("SELECT id FROM snapshots WHERE source_id = ? AND fingerprint = ?").get(input.sourceId, fingerprint);
937
+ const spec = JSON.parse(sourceRow.spec_json);
938
+ const checkedAt = nowIso();
939
+ const nextDueAt = addHoursIso(spec.schedule.everyHours);
940
+ if (existing) {
941
+ db.prepare(`
942
+ UPDATE sources
943
+ SET last_checked_at = ?, last_successful_snapshot_at = ?, last_successful_snapshot_id = ?, next_due_at = ?, updated_at = ?
944
+ WHERE id = ?
945
+ `).run(checkedAt, checkedAt, existing.id, nextDueAt, checkedAt, input.sourceId);
946
+ queueEmbeddingJobForSnapshot(
947
+ input.sourceId,
948
+ existing.id,
949
+ sourceRow.last_successful_snapshot_id
950
+ );
951
+ db.prepare(`
952
+ INSERT INTO fetch_runs (id, source_id, status, snapshot_id, started_at, finished_at)
953
+ VALUES (?, ?, 'success', ?, ?, ?)
954
+ `).run(randomUUID(), input.sourceId, existing.id, checkedAt, checkedAt);
955
+ return {
956
+ snapshotId: existing.id,
957
+ reused: true
958
+ };
959
+ }
960
+ const snapshotId = `snp_${checkedAt.replace(/[-:.TZ]/g, "")}_${fingerprint.slice(0, 12)}`;
961
+ const insertSnapshot = db.prepare(`
962
+ INSERT INTO snapshots (
963
+ id, source_id, fingerprint, config_hash, detected_version, page_count, created_at
964
+ ) VALUES (?, ?, ?, ?, ?, ?, ?)
965
+ `);
966
+ const insertPage = db.prepare(`
967
+ INSERT INTO pages (snapshot_id, url, title, markdown, content_hash)
968
+ VALUES (?, ?, ?, ?, ?)
969
+ `);
970
+ const insertChunk = db.prepare(`
971
+ INSERT INTO chunks (
972
+ source_id, snapshot_id, page_id, page_url, page_title, section_title, chunk_order, markdown
973
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
974
+ `);
975
+ const insertRun = db.prepare(`
976
+ INSERT INTO fetch_runs (id, source_id, status, snapshot_id, started_at, finished_at)
977
+ VALUES (?, ?, 'success', ?, ?, ?)
978
+ `);
979
+ const transaction = db.transaction(() => {
980
+ insertSnapshot.run(
981
+ snapshotId,
982
+ input.sourceId,
983
+ fingerprint,
984
+ sourceRow.config_hash,
985
+ input.detectedVersion ?? null,
986
+ pagesWithHashes.length,
987
+ checkedAt
988
+ );
989
+ for (const page of pagesWithHashes) {
990
+ const pageInsert = insertPage.run(snapshotId, page.url, page.title, page.markdown, page.contentHash);
991
+ const pageId = Number(pageInsert.lastInsertRowid);
992
+ const chunks = chunkMarkdown(page.title, page.markdown);
993
+ for (const chunk of chunks) {
994
+ insertChunk.run(
995
+ input.sourceId,
996
+ snapshotId,
997
+ pageId,
998
+ page.url,
999
+ page.title,
1000
+ chunk.sectionTitle,
1001
+ chunk.chunkOrder,
1002
+ chunk.markdown
1003
+ );
1004
+ }
1005
+ }
1006
+ db.prepare(`
1007
+ UPDATE sources
1008
+ SET last_checked_at = ?, last_successful_snapshot_at = ?, last_successful_snapshot_id = ?, next_due_at = ?, updated_at = ?
1009
+ WHERE id = ?
1010
+ `).run(checkedAt, checkedAt, snapshotId, nextDueAt, checkedAt, input.sourceId);
1011
+ queueEmbeddingJobForSnapshot(
1012
+ input.sourceId,
1013
+ snapshotId,
1014
+ sourceRow.last_successful_snapshot_id
1015
+ );
1016
+ insertRun.run(randomUUID(), input.sourceId, snapshotId, checkedAt, checkedAt);
1017
+ });
1018
+ transaction();
1019
+ return {
1020
+ snapshotId,
1021
+ reused: false
1022
+ };
1023
+ },
1024
+ recordFailedFetchRun(input) {
1025
+ const sourceRow = db.prepare("SELECT spec_json FROM sources WHERE id = ?").get(input.sourceId);
1026
+ if (!sourceRow) {
1027
+ throw new AiocsError(
1028
+ AIOCS_ERROR_CODES.sourceNotFound,
1029
+ `Unknown source '${input.sourceId}'`
1030
+ );
1031
+ }
1032
+ const spec = JSON.parse(sourceRow.spec_json);
1033
+ const timestamp = nowIso();
1034
+ db.prepare(`
1035
+ INSERT INTO fetch_runs (id, source_id, status, error_message, started_at, finished_at)
1036
+ VALUES (?, ?, 'failed', ?, ?, ?)
1037
+ `).run(randomUUID(), input.sourceId, input.errorMessage, timestamp, timestamp);
1038
+ db.prepare(`
1039
+ UPDATE sources
1040
+ SET last_checked_at = ?, next_due_at = ?, updated_at = ?
1041
+ WHERE id = ?
1042
+ `).run(timestamp, addHoursIso(spec.schedule.everyHours), timestamp, input.sourceId);
1043
+ },
1044
+ recordCanaryRun(input) {
1045
+ const sourceRow = db.prepare("SELECT spec_json FROM sources WHERE id = ?").get(input.sourceId);
1046
+ if (!sourceRow) {
1047
+ throw new AiocsError(
1048
+ AIOCS_ERROR_CODES.sourceNotFound,
1049
+ `Unknown source '${input.sourceId}'`
1050
+ );
1051
+ }
1052
+ const spec = JSON.parse(sourceRow.spec_json);
1053
+ const canary = resolveSourceCanary(spec);
1054
+ db.prepare(`
1055
+ INSERT INTO canary_runs (id, source_id, status, checked_at, details_json)
1056
+ VALUES (?, ?, ?, ?, ?)
1057
+ `).run(
1058
+ randomUUID(),
1059
+ input.sourceId,
1060
+ input.status,
1061
+ input.checkedAt,
1062
+ JSON.stringify(input.details)
1063
+ );
1064
+ db.prepare(`
1065
+ UPDATE sources
1066
+ SET
1067
+ last_canary_checked_at = ?,
1068
+ last_successful_canary_at = CASE WHEN ? = 'pass' THEN ? ELSE last_successful_canary_at END,
1069
+ last_canary_status = ?,
1070
+ next_canary_due_at = ?,
1071
+ updated_at = ?
1072
+ WHERE id = ?
1073
+ `).run(
1074
+ input.checkedAt,
1075
+ input.status,
1076
+ input.checkedAt,
1077
+ input.status,
1078
+ addHoursIso(canary.everyHours),
1079
+ input.checkedAt,
1080
+ input.sourceId
1081
+ );
1082
+ },
1083
+ listProjectLinks,
1084
+ removeManagedSources(input) {
1085
+ if (input.managedRoots.length === 0) {
1086
+ return [];
1087
+ }
1088
+ const activeSourceKeys = new Set(
1089
+ input.activeSources.map((source) => `${source.sourceId}::${resolve2(source.specPath)}`)
1090
+ );
1091
+ const rows = db.prepare(`
1092
+ SELECT id, spec_path
1093
+ FROM sources
1094
+ WHERE spec_path IS NOT NULL
1095
+ ORDER BY id
1096
+ `).all();
1097
+ const toDelete = rows.filter((row) => {
1098
+ if (!row.spec_path) {
1099
+ return false;
1100
+ }
1101
+ const normalizedSpecPath = resolve2(row.spec_path);
1102
+ return input.managedRoots.some(
1103
+ (managedRoot) => normalizedSpecPath === managedRoot || normalizedSpecPath.startsWith(`${managedRoot}/`)
1104
+ ) && !activeSourceKeys.has(`${row.id}::${normalizedSpecPath}`);
1105
+ }).map((row) => row.id);
1106
+ if (toDelete.length === 0) {
1107
+ return [];
1108
+ }
1109
+ const deleteStatement = db.prepare("DELETE FROM sources WHERE id = ?");
1110
+ const transaction = db.transaction((sourceIds) => {
1111
+ for (const sourceId of sourceIds) {
1112
+ deleteStatement.run(sourceId);
1113
+ }
1114
+ });
1115
+ transaction(toDelete);
1116
+ return toDelete;
1117
+ },
1118
+ listSnapshots(sourceId) {
1119
+ const rows = db.prepare(`
1120
+ SELECT id, source_id, detected_version, created_at, page_count
1121
+ FROM snapshots
1122
+ WHERE source_id = ?
1123
+ ORDER BY rowid DESC
1124
+ `).all(sourceId);
1125
+ return rows.map((row) => ({
1126
+ snapshotId: row.id,
1127
+ sourceId: row.source_id,
1128
+ detectedVersion: row.detected_version,
1129
+ createdAt: row.created_at,
1130
+ pageCount: row.page_count
1131
+ }));
1132
+ },
1133
+ diffSnapshots(input) {
1134
+ const snapshots = this.listSnapshots(input.sourceId);
1135
+ if (snapshots.length === 0) {
1136
+ throw new AiocsError(
1137
+ AIOCS_ERROR_CODES.snapshotNotFound,
1138
+ `No successful snapshot found for source '${input.sourceId}'`
1139
+ );
1140
+ }
1141
+ const toSnapshot = input.toSnapshotId ? snapshots.find((snapshot) => snapshot.snapshotId === input.toSnapshotId) : snapshots[0];
1142
+ if (!toSnapshot) {
1143
+ throw new AiocsError(
1144
+ AIOCS_ERROR_CODES.snapshotNotFound,
1145
+ `Snapshot '${input.toSnapshotId}' not found for source '${input.sourceId}'`
1146
+ );
1147
+ }
1148
+ const toSnapshotIndex = snapshots.findIndex((snapshot) => snapshot.snapshotId === toSnapshot.snapshotId);
1149
+ const fromSnapshot = input.fromSnapshotId ? snapshots.find((snapshot) => snapshot.snapshotId === input.fromSnapshotId) : snapshots[toSnapshotIndex + 1];
1150
+ if (!fromSnapshot) {
1151
+ throw new AiocsError(
1152
+ AIOCS_ERROR_CODES.snapshotDiffBaseNotFound,
1153
+ `No base snapshot available to diff source '${input.sourceId}'`
1154
+ );
1155
+ }
1156
+ const loadSnapshotPages = (snapshotId) => db.prepare(`
1157
+ SELECT url, title, markdown, content_hash
1158
+ FROM pages
1159
+ WHERE snapshot_id = ?
1160
+ ORDER BY url
1161
+ `).all(snapshotId);
1162
+ const beforePages = loadSnapshotPages(fromSnapshot.snapshotId);
1163
+ const afterPages = loadSnapshotPages(toSnapshot.snapshotId);
1164
+ const beforeMap = new Map(beforePages.map((page) => [page.url, page]));
1165
+ const afterMap = new Map(afterPages.map((page) => [page.url, page]));
1166
+ const addedPages = afterPages.filter((page) => !beforeMap.has(page.url)).map((page) => ({
1167
+ url: page.url,
1168
+ title: page.title
1169
+ }));
1170
+ const removedPages = beforePages.filter((page) => !afterMap.has(page.url)).map((page) => ({
1171
+ url: page.url,
1172
+ title: page.title
1173
+ }));
1174
+ const summarizeLineDiff = (beforeMarkdown, afterMarkdown) => {
1175
+ const beforeLines = beforeMarkdown.split("\n");
1176
+ const afterLines = afterMarkdown.split("\n");
1177
+ let prefix = 0;
1178
+ while (prefix < beforeLines.length && prefix < afterLines.length && beforeLines[prefix] === afterLines[prefix]) {
1179
+ prefix += 1;
1180
+ }
1181
+ let suffix = 0;
1182
+ while (suffix < beforeLines.length - prefix && suffix < afterLines.length - prefix && beforeLines[beforeLines.length - 1 - suffix] === afterLines[afterLines.length - 1 - suffix]) {
1183
+ suffix += 1;
1184
+ }
1185
+ return {
1186
+ addedLineCount: Math.max(0, afterLines.length - prefix - suffix),
1187
+ removedLineCount: Math.max(0, beforeLines.length - prefix - suffix)
1188
+ };
1189
+ };
1190
+ const changedPages = beforePages.filter((page) => afterMap.has(page.url)).map((page) => ({
1191
+ before: page,
1192
+ after: afterMap.get(page.url)
1193
+ })).filter(({ before, after }) => before.content_hash !== after.content_hash || before.title !== after.title).map(({ before, after }) => ({
1194
+ url: before.url,
1195
+ beforeTitle: before.title,
1196
+ afterTitle: after.title,
1197
+ lineSummary: summarizeLineDiff(before.markdown, after.markdown)
1198
+ }));
1199
+ const unchangedPageCount = beforePages.filter((page) => {
1200
+ const next = afterMap.get(page.url);
1201
+ return next && next.content_hash === page.content_hash && next.title === page.title;
1202
+ }).length;
1203
+ return {
1204
+ sourceId: input.sourceId,
1205
+ fromSnapshotId: fromSnapshot.snapshotId,
1206
+ toSnapshotId: toSnapshot.snapshotId,
1207
+ summary: {
1208
+ addedPageCount: addedPages.length,
1209
+ removedPageCount: removedPages.length,
1210
+ changedPageCount: changedPages.length,
1211
+ unchangedPageCount
1212
+ },
1213
+ addedPages,
1214
+ removedPages,
1215
+ changedPages
1216
+ };
1217
+ },
1218
+ resolveSearchScope(input) {
1219
+ return resolveSearchScope(input);
1220
+ },
1221
+ searchLexical(input) {
1222
+ return searchLexicalByScope(input);
1223
+ },
1224
+ search(input) {
1225
+ return searchLexicalByScope({
1226
+ query: input.query,
1227
+ scope: resolveSearchScope(input)
1228
+ });
1229
+ },
1230
+ listLatestSnapshots(sourceIds) {
1231
+ return listLatestSnapshots(sourceIds);
1232
+ },
1233
+ listSnapshotChunks(input) {
1234
+ const rows = db.prepare(`
1235
+ SELECT
1236
+ c.id AS chunk_id,
1237
+ c.source_id,
1238
+ c.snapshot_id,
1239
+ c.page_url,
1240
+ c.page_title,
1241
+ c.section_title,
1242
+ c.markdown
1243
+ FROM chunks c
1244
+ WHERE c.source_id = ?
1245
+ AND c.snapshot_id = ?
1246
+ ORDER BY c.id
1247
+ `).all(input.sourceId, input.snapshotId);
1248
+ return rows.map((row) => ({
1249
+ chunkId: row.chunk_id,
1250
+ sourceId: row.source_id,
1251
+ snapshotId: row.snapshot_id,
1252
+ pageUrl: row.page_url,
1253
+ pageTitle: row.page_title,
1254
+ sectionTitle: row.section_title,
1255
+ markdown: row.markdown,
1256
+ contentHash: sha256(row.markdown)
1257
+ }));
1258
+ },
1259
+ getSnapshotEmbeddingState(input) {
1260
+ const rows = db.prepare(`
1261
+ SELECT chunk_id, status, model_key, content_hash
1262
+ FROM embedding_state
1263
+ WHERE source_id = ?
1264
+ AND snapshot_id = ?
1265
+ ORDER BY chunk_id
1266
+ `).all(input.sourceId, input.snapshotId);
1267
+ return rows.map((row) => ({
1268
+ chunkId: row.chunk_id,
1269
+ status: row.status,
1270
+ modelKey: row.model_key,
1271
+ contentHash: row.content_hash
1272
+ }));
1273
+ },
1274
+ listStaleEmbeddingChunkIds(sourceId) {
1275
+ const rows = db.prepare(`
1276
+ SELECT chunk_id
1277
+ FROM embedding_state
1278
+ WHERE source_id = ?
1279
+ AND status = 'stale'
1280
+ ORDER BY chunk_id
1281
+ `).all(sourceId);
1282
+ return rows.map((row) => row.chunk_id);
1283
+ },
1284
+ listEmbeddingChunkIds(sourceIds) {
1285
+ const filterSourceIds = sourceIds && sourceIds.length > 0 ? [...new Set(sourceIds)] : null;
1286
+ const rows = db.prepare(`
1287
+ SELECT chunk_id
1288
+ FROM embedding_state
1289
+ ${filterSourceIds ? `WHERE source_id IN (${filterSourceIds.map(() => "?").join(",")})` : ""}
1290
+ ORDER BY chunk_id
1291
+ `).all(...filterSourceIds ?? []);
1292
+ return rows.map((row) => row.chunk_id);
1293
+ },
1294
+ getChunksByIds(chunkIds) {
1295
+ if (chunkIds.length === 0) {
1296
+ return [];
1297
+ }
1298
+ const rows = db.prepare(`
1299
+ SELECT
1300
+ c.id AS chunk_id,
1301
+ c.source_id,
1302
+ c.snapshot_id,
1303
+ c.page_url,
1304
+ c.page_title,
1305
+ c.section_title,
1306
+ c.markdown
1307
+ FROM chunks c
1308
+ WHERE c.id IN (${chunkIds.map(() => "?").join(",")})
1309
+ `).all(...chunkIds);
1310
+ return rows.map((row) => ({
1311
+ chunkId: row.chunk_id,
1312
+ sourceId: row.source_id,
1313
+ snapshotId: row.snapshot_id,
1314
+ pageUrl: row.page_url,
1315
+ pageTitle: row.page_title,
1316
+ sectionTitle: row.section_title,
1317
+ markdown: row.markdown
1318
+ }));
1319
+ },
1320
+ queueLatestEmbeddingJobs(sourceIds) {
1321
+ const latestSnapshots = listLatestSnapshots(sourceIds);
1322
+ const transaction = db.transaction((snapshots) => {
1323
+ for (const snapshot of snapshots) {
1324
+ queueEmbeddingJobForSnapshot(snapshot.sourceId, snapshot.snapshotId);
1325
+ }
1326
+ });
1327
+ transaction(latestSnapshots);
1328
+ return {
1329
+ queuedJobs: latestSnapshots.length
1330
+ };
1331
+ },
1332
+ requeueLatestEmbeddingJobs(sourceIds) {
1333
+ const latestSnapshots = listLatestSnapshots(sourceIds);
1334
+ const transaction = db.transaction((snapshots) => {
1335
+ for (const snapshot of snapshots) {
1336
+ db.prepare(`
1337
+ UPDATE embedding_state
1338
+ SET
1339
+ status = 'pending',
1340
+ model_key = NULL,
1341
+ vector_point_id = NULL,
1342
+ last_attempted_at = NULL,
1343
+ indexed_at = NULL,
1344
+ error_message = NULL
1345
+ WHERE source_id = ?
1346
+ AND snapshot_id = ?
1347
+ `).run(snapshot.sourceId, snapshot.snapshotId);
1348
+ queueEmbeddingJobForSnapshot(snapshot.sourceId, snapshot.snapshotId);
1349
+ }
1350
+ });
1351
+ transaction(latestSnapshots);
1352
+ return {
1353
+ queuedJobs: latestSnapshots.length
1354
+ };
1355
+ },
1356
+ resetEmbeddingsAfterImport() {
1357
+ const transaction = db.transaction(() => {
1358
+ db.prepare("DELETE FROM embedding_jobs").run();
1359
+ db.prepare("DELETE FROM embedding_state").run();
1360
+ });
1361
+ transaction();
1362
+ const latestSnapshots = listLatestSnapshots();
1363
+ const queueTransaction = db.transaction((snapshots) => {
1364
+ for (const snapshot of snapshots) {
1365
+ queueEmbeddingJobForSnapshot(snapshot.sourceId, snapshot.snapshotId);
1366
+ }
1367
+ });
1368
+ queueTransaction(latestSnapshots);
1369
+ return {
1370
+ queuedJobs: latestSnapshots.length
1371
+ };
1372
+ },
1373
+ resetRunningEmbeddingJobs() {
1374
+ const result = db.prepare(`
1375
+ UPDATE embedding_jobs
1376
+ SET
1377
+ status = 'pending',
1378
+ updated_at = ?,
1379
+ claimed_at = NULL,
1380
+ error_message = NULL
1381
+ WHERE status = 'running'
1382
+ `).run(nowIso());
1383
+ return result.changes;
1384
+ },
1385
+ claimEmbeddingJobs(limit) {
1386
+ const normalizedLimit = assertPaginationValue(limit, "limit", limit);
1387
+ if (normalizedLimit === 0) {
1388
+ return [];
1389
+ }
1390
+ const claimedAt = nowIso();
1391
+ const transaction = db.transaction(() => {
1392
+ const pending = db.prepare(`
1393
+ SELECT
1394
+ source_id,
1395
+ snapshot_id,
1396
+ status,
1397
+ attempt_count,
1398
+ chunk_count,
1399
+ created_at,
1400
+ updated_at,
1401
+ claimed_at,
1402
+ completed_at,
1403
+ error_message
1404
+ FROM embedding_jobs
1405
+ WHERE status = 'pending'
1406
+ ORDER BY updated_at, source_id, snapshot_id
1407
+ LIMIT ?
1408
+ `).all(normalizedLimit);
1409
+ const claim = db.prepare(`
1410
+ UPDATE embedding_jobs
1411
+ SET
1412
+ status = 'running',
1413
+ attempt_count = attempt_count + 1,
1414
+ updated_at = ?,
1415
+ claimed_at = ?,
1416
+ error_message = NULL
1417
+ WHERE source_id = ?
1418
+ AND snapshot_id = ?
1419
+ `);
1420
+ for (const job of pending) {
1421
+ claim.run(claimedAt, claimedAt, job.source_id, job.snapshot_id);
1422
+ }
1423
+ return pending.map((job) => ({
1424
+ sourceId: job.source_id,
1425
+ snapshotId: job.snapshot_id,
1426
+ status: "running",
1427
+ attemptCount: job.attempt_count + 1,
1428
+ chunkCount: job.chunk_count,
1429
+ createdAt: job.created_at,
1430
+ updatedAt: claimedAt,
1431
+ claimedAt,
1432
+ completedAt: job.completed_at,
1433
+ errorMessage: null
1434
+ }));
1435
+ });
1436
+ return transaction();
1437
+ },
1438
+ markEmbeddingJobSucceeded(input) {
1439
+ const timestamp = nowIso();
1440
+ const staleChunkIds = [...new Set(input.staleChunkIds ?? [])];
1441
+ const indexedChunkIds = [...new Set(input.indexedChunkIds)];
1442
+ const indexedPlaceholders = indexedChunkIds.length > 0 ? indexedChunkIds.map(() => "?").join(",") : null;
1443
+ const stalePlaceholders = staleChunkIds.length > 0 ? staleChunkIds.map(() => "?").join(",") : null;
1444
+ const transaction = db.transaction(() => {
1445
+ if (indexedPlaceholders) {
1446
+ db.prepare(`
1447
+ UPDATE embedding_state
1448
+ SET
1449
+ status = 'indexed',
1450
+ model_key = ?,
1451
+ vector_point_id = CAST(chunk_id AS TEXT),
1452
+ last_attempted_at = ?,
1453
+ indexed_at = ?,
1454
+ error_message = NULL
1455
+ WHERE chunk_id IN (${indexedPlaceholders})
1456
+ `).run(input.modelKey, timestamp, timestamp, ...indexedChunkIds);
1457
+ }
1458
+ db.prepare(`
1459
+ UPDATE embedding_state
1460
+ SET
1461
+ status = 'failed',
1462
+ model_key = NULL,
1463
+ vector_point_id = NULL,
1464
+ last_attempted_at = ?,
1465
+ indexed_at = NULL,
1466
+ error_message = 'Chunk was not indexed during the latest embedding run'
1467
+ WHERE source_id = ?
1468
+ AND snapshot_id = ?
1469
+ AND status != 'indexed'
1470
+ `).run(timestamp, input.sourceId, input.snapshotId);
1471
+ if (stalePlaceholders) {
1472
+ db.prepare(`
1473
+ DELETE FROM embedding_state
1474
+ WHERE chunk_id IN (${stalePlaceholders})
1475
+ `).run(...staleChunkIds);
1476
+ }
1477
+ db.prepare(`
1478
+ UPDATE embedding_jobs
1479
+ SET
1480
+ status = 'succeeded',
1481
+ updated_at = ?,
1482
+ completed_at = ?,
1483
+ claimed_at = NULL,
1484
+ error_message = NULL
1485
+ WHERE source_id = ?
1486
+ AND snapshot_id = ?
1487
+ `).run(timestamp, timestamp, input.sourceId, input.snapshotId);
1488
+ });
1489
+ transaction();
1490
+ },
1491
+ markEmbeddingJobFailed(input) {
1492
+ const timestamp = nowIso();
1493
+ const transaction = db.transaction(() => {
1494
+ db.prepare(`
1495
+ UPDATE embedding_state
1496
+ SET
1497
+ status = 'failed',
1498
+ model_key = NULL,
1499
+ vector_point_id = NULL,
1500
+ last_attempted_at = ?,
1501
+ indexed_at = NULL,
1502
+ error_message = ?
1503
+ WHERE source_id = ?
1504
+ AND snapshot_id = ?
1505
+ AND status != 'indexed'
1506
+ `).run(timestamp, input.errorMessage, input.sourceId, input.snapshotId);
1507
+ db.prepare(`
1508
+ UPDATE embedding_jobs
1509
+ SET
1510
+ status = 'failed',
1511
+ updated_at = ?,
1512
+ completed_at = ?,
1513
+ claimed_at = NULL,
1514
+ error_message = ?
1515
+ WHERE source_id = ?
1516
+ AND snapshot_id = ?
1517
+ `).run(timestamp, timestamp, input.errorMessage, input.sourceId, input.snapshotId);
1518
+ });
1519
+ transaction();
1520
+ },
1521
+ clearEmbeddings(sourceIds) {
1522
+ const latestSnapshots = listLatestSnapshots(sourceIds);
1523
+ const clearedSources = latestSnapshots.map((snapshot) => snapshot.sourceId);
1524
+ const filterSourceIds = sourceIds && sourceIds.length > 0 ? [...new Set(sourceIds)] : null;
1525
+ const transaction = db.transaction(() => {
1526
+ if (filterSourceIds && filterSourceIds.length > 0) {
1527
+ db.prepare(`
1528
+ DELETE FROM embedding_jobs
1529
+ WHERE source_id IN (${filterSourceIds.map(() => "?").join(",")})
1530
+ `).run(...filterSourceIds);
1531
+ db.prepare(`
1532
+ DELETE FROM embedding_state
1533
+ WHERE source_id IN (${filterSourceIds.map(() => "?").join(",")})
1534
+ `).run(...filterSourceIds);
1535
+ } else {
1536
+ db.prepare("DELETE FROM embedding_jobs").run();
1537
+ db.prepare("DELETE FROM embedding_state").run();
1538
+ }
1539
+ });
1540
+ transaction();
1541
+ return {
1542
+ clearedSources
1543
+ };
1544
+ },
1545
+ getEmbeddingOverview() {
1546
+ const queueCounts = db.prepare(`
1547
+ SELECT
1548
+ SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) AS pending_jobs,
1549
+ SUM(CASE WHEN status = 'running' THEN 1 ELSE 0 END) AS running_jobs,
1550
+ SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) AS failed_jobs
1551
+ FROM embedding_jobs
1552
+ `).get();
1553
+ const rows = db.prepare(`
1554
+ SELECT
1555
+ s.id AS source_id,
1556
+ s.last_successful_snapshot_id AS snapshot_id,
1557
+ COUNT(c.id) AS total_chunks,
1558
+ SUM(CASE WHEN es.status = 'indexed' THEN 1 ELSE 0 END) AS indexed_chunks,
1559
+ SUM(CASE WHEN es.status = 'pending' THEN 1 ELSE 0 END) AS pending_chunks,
1560
+ SUM(CASE WHEN es.status = 'failed' THEN 1 ELSE 0 END) AS failed_chunks,
1561
+ SUM(CASE WHEN es.status = 'stale' THEN 1 ELSE 0 END) AS stale_chunks
1562
+ FROM sources s
1563
+ LEFT JOIN chunks c
1564
+ ON c.snapshot_id = s.last_successful_snapshot_id
1565
+ LEFT JOIN embedding_state es
1566
+ ON es.chunk_id = c.id
1567
+ GROUP BY s.id, s.last_successful_snapshot_id
1568
+ ORDER BY s.id
1569
+ `).all();
1570
+ return {
1571
+ queue: {
1572
+ pendingJobs: queueCounts.pending_jobs ?? 0,
1573
+ runningJobs: queueCounts.running_jobs ?? 0,
1574
+ failedJobs: queueCounts.failed_jobs ?? 0
1575
+ },
1576
+ sources: rows.map((row) => ({
1577
+ sourceId: row.source_id,
1578
+ snapshotId: row.snapshot_id,
1579
+ totalChunks: row.total_chunks,
1580
+ indexedChunks: row.indexed_chunks,
1581
+ pendingChunks: row.pending_chunks,
1582
+ failedChunks: row.failed_chunks,
1583
+ staleChunks: row.stale_chunks,
1584
+ coverageRatio: row.total_chunks === 0 ? 0 : row.indexed_chunks / row.total_chunks
1585
+ }))
1586
+ };
1587
+ },
1588
+ markDaemonStarted(input) {
1589
+ db.prepare(`
1590
+ INSERT INTO daemon_state (
1591
+ singleton_id,
1592
+ last_started_at,
1593
+ interval_minutes,
1594
+ fetch_on_start
1595
+ ) VALUES (1, ?, ?, ?)
1596
+ ON CONFLICT(singleton_id) DO UPDATE SET
1597
+ last_started_at = excluded.last_started_at,
1598
+ interval_minutes = excluded.interval_minutes,
1599
+ fetch_on_start = excluded.fetch_on_start
1600
+ `).run(
1601
+ input.startedAt,
1602
+ input.intervalMinutes,
1603
+ input.fetchOnStart ? 1 : 0
1604
+ );
1605
+ },
1606
+ markDaemonCycleStarted(startedAt) {
1607
+ db.prepare(`
1608
+ INSERT INTO daemon_state (singleton_id, last_cycle_started_at)
1609
+ VALUES (1, ?)
1610
+ ON CONFLICT(singleton_id) DO UPDATE SET
1611
+ last_cycle_started_at = excluded.last_cycle_started_at
1612
+ `).run(startedAt);
1613
+ },
1614
+ markDaemonCycleCompleted(input) {
1615
+ db.prepare(`
1616
+ INSERT INTO daemon_state (
1617
+ singleton_id,
1618
+ last_cycle_completed_at,
1619
+ last_cycle_status
1620
+ ) VALUES (1, ?, ?)
1621
+ ON CONFLICT(singleton_id) DO UPDATE SET
1622
+ last_cycle_completed_at = excluded.last_cycle_completed_at,
1623
+ last_cycle_status = excluded.last_cycle_status
1624
+ `).run(
1625
+ input.completedAt,
1626
+ input.status
1627
+ );
1628
+ },
1629
+ getDaemonState() {
1630
+ const row = db.prepare(`
1631
+ SELECT
1632
+ last_started_at,
1633
+ last_cycle_started_at,
1634
+ last_cycle_completed_at,
1635
+ last_cycle_status,
1636
+ interval_minutes,
1637
+ fetch_on_start
1638
+ FROM daemon_state
1639
+ WHERE singleton_id = 1
1640
+ `).get();
1641
+ if (!row) {
1642
+ return null;
1643
+ }
1644
+ return {
1645
+ lastStartedAt: row.last_started_at,
1646
+ lastCycleStartedAt: row.last_cycle_started_at,
1647
+ lastCycleCompletedAt: row.last_cycle_completed_at,
1648
+ lastCycleStatus: row.last_cycle_status,
1649
+ intervalMinutes: row.interval_minutes,
1650
+ fetchOnStart: row.fetch_on_start === null ? null : row.fetch_on_start === 1
1651
+ };
1652
+ },
1653
+ getCoverageCorpus(input) {
1654
+ const sourceRow = db.prepare("SELECT last_successful_snapshot_id FROM sources WHERE id = ?").get(input.sourceId);
1655
+ if (!sourceRow) {
1656
+ throw new AiocsError(
1657
+ AIOCS_ERROR_CODES.sourceNotFound,
1658
+ `Unknown source '${input.sourceId}'`
1659
+ );
1660
+ }
1661
+ const snapshotId = input.snapshotId ?? sourceRow.last_successful_snapshot_id;
1662
+ if (!snapshotId) {
1663
+ throw new AiocsError(
1664
+ AIOCS_ERROR_CODES.snapshotNotFound,
1665
+ `No successful snapshot found for source '${input.sourceId}'`
1666
+ );
1667
+ }
1668
+ const snapshotRow = db.prepare("SELECT id FROM snapshots WHERE id = ? AND source_id = ?").get(snapshotId, input.sourceId);
1669
+ if (!snapshotRow) {
1670
+ throw new AiocsError(
1671
+ AIOCS_ERROR_CODES.snapshotNotFound,
1672
+ `Snapshot '${snapshotId}' not found for source '${input.sourceId}'`
1673
+ );
1674
+ }
1675
+ const rows = db.prepare(`
1676
+ SELECT page_title, section_title, markdown
1677
+ FROM chunks
1678
+ WHERE source_id = ?
1679
+ AND snapshot_id = ?
1680
+ ORDER BY page_id, chunk_order
1681
+ `).all(input.sourceId, snapshotId);
1682
+ return {
1683
+ sourceId: input.sourceId,
1684
+ snapshotId,
1685
+ entries: rows.map((row) => ({
1686
+ pageTitle: row.page_title,
1687
+ sectionTitle: row.section_title,
1688
+ markdown: row.markdown
1689
+ }))
1690
+ };
1691
+ },
1692
+ getChunkById(chunkId) {
1693
+ const row = db.prepare(`
1694
+ SELECT
1695
+ c.id AS chunk_id,
1696
+ c.source_id,
1697
+ c.snapshot_id,
1698
+ c.page_url,
1699
+ c.page_title,
1700
+ c.section_title,
1701
+ c.markdown
1702
+ FROM chunks c
1703
+ WHERE c.id = ?
1704
+ `).get(chunkId);
1705
+ if (!row) {
1706
+ return null;
1707
+ }
1708
+ return {
1709
+ chunkId: row.chunk_id,
1710
+ sourceId: row.source_id,
1711
+ snapshotId: row.snapshot_id,
1712
+ pageUrl: row.page_url,
1713
+ pageTitle: row.page_title,
1714
+ sectionTitle: row.section_title,
1715
+ markdown: row.markdown
1716
+ };
1717
+ }
1718
+ };
1719
+ }
1720
+
1721
+ // src/runtime/paths.ts
1722
+ import { homedir } from "os";
1723
+ import { join as join2 } from "path";
1724
+ import { mkdirSync as mkdirSync2 } from "fs";
1725
+ function expandTilde(path) {
1726
+ if (path === "~") {
1727
+ return homedir();
1728
+ }
1729
+ if (path.startsWith("~/")) {
1730
+ return join2(homedir(), path.slice(2));
1731
+ }
1732
+ return path;
1733
+ }
1734
+ function getAiocsDataDir(env = process.env) {
1735
+ const override = env.AIOCS_DATA_DIR;
1736
+ if (override) {
1737
+ mkdirSync2(expandTilde(override), { recursive: true });
1738
+ return expandTilde(override);
1739
+ }
1740
+ const target = join2(homedir(), ".aiocs", "data");
1741
+ mkdirSync2(target, { recursive: true });
1742
+ return target;
1743
+ }
1744
+ function getAiocsConfigDir(env = process.env) {
1745
+ const override = env.AIOCS_CONFIG_DIR;
1746
+ if (override) {
1747
+ mkdirSync2(expandTilde(override), { recursive: true });
1748
+ return expandTilde(override);
1749
+ }
1750
+ const target = join2(homedir(), ".aiocs", "config");
1751
+ mkdirSync2(target, { recursive: true });
1752
+ return target;
1753
+ }
1754
+ function getAiocsSourcesDir(env = process.env) {
1755
+ const override = env.AIOCS_SOURCES_DIR;
1756
+ if (override) {
1757
+ mkdirSync2(expandTilde(override), { recursive: true });
1758
+ return expandTilde(override);
1759
+ }
1760
+ const target = join2(homedir(), ".aiocs", "sources");
1761
+ mkdirSync2(target, { recursive: true });
1762
+ return target;
1763
+ }
1764
+
1765
+ // src/daemon.ts
1766
+ import { resolve as resolve4 } from "path";
1767
+ import { setTimeout as sleep2 } from "timers/promises";
1768
+
1769
+ // src/fetch/fetch-source.ts
1770
+ import { mkdirSync as mkdirSync3, writeFileSync } from "fs";
1771
+ import { join as join3 } from "path";
1772
+ import { setTimeout as sleep } from "timers/promises";
1773
+ import { chromium } from "playwright";
1774
+
1775
+ // src/fetch/extract.ts
1776
+ import { JSDOM } from "jsdom";
1777
+ import { Readability } from "@mozilla/readability";
1778
+
1779
+ // src/fetch/normalize.ts
1780
+ import TurndownService from "turndown";
1781
+ import { gfm } from "turndown-plugin-gfm";
1782
+ var turndown = new TurndownService({
1783
+ headingStyle: "atx",
1784
+ codeBlockStyle: "fenced"
1785
+ });
1786
+ turndown.use(gfm);
1787
+ function htmlToMarkdown(html) {
1788
+ return turndown.turndown(html).trim();
1789
+ }
1790
+ function ensureTitle(markdown, title) {
1791
+ const trimmed = markdown.trim();
1792
+ if (!trimmed) {
1793
+ return `# ${title}`;
1794
+ }
1795
+ if (trimmed.startsWith("# ")) {
1796
+ return trimmed;
1797
+ }
1798
+ return `# ${title}
1799
+
1800
+ ${trimmed}`;
1801
+ }
1802
+ function normalizeMarkdown(spec, page) {
1803
+ const titled = ensureTitle(page.markdown, page.title);
1804
+ if (!spec.normalize.prependSourceComment) {
1805
+ return titled;
1806
+ }
1807
+ return `<!-- source: ${page.url} -->
1808
+
1809
+ ${titled}`;
1810
+ }
1811
+
1812
+ // src/fetch/extract.ts
1813
+ var CLIPBOARD_INTERACTION_DEFAULT_TIMEOUT_MS = 1e3;
1814
+ async function readClipboard(page) {
1815
+ return page.evaluate(() => navigator.clipboard.readText());
1816
+ }
1817
+ async function writeClipboard(page, value) {
1818
+ return page.evaluate(async (nextValue) => {
1819
+ try {
1820
+ await navigator.clipboard.writeText(nextValue);
1821
+ return true;
1822
+ } catch {
1823
+ return false;
1824
+ }
1825
+ }, value);
1826
+ }
1827
+ async function waitForClipboardChange(page, previousText, timeoutMs) {
1828
+ const startedAt = Date.now();
1829
+ while (Date.now() - startedAt < timeoutMs) {
1830
+ const current = (await readClipboard(page)).trim();
1831
+ if (current && current !== previousText.trim()) {
1832
+ return current;
1833
+ }
1834
+ await page.waitForTimeout(100);
1835
+ }
1836
+ throw new Error("Timed out waiting for clipboard content to change");
1837
+ }
1838
+ async function performClipboardInteractions(page, strategy, deadlineAt) {
1839
+ for (const interaction of strategy.interactions) {
1840
+ const remainingMs = deadlineAt - Date.now();
1841
+ if (remainingMs <= 0) {
1842
+ throw new Error("Timed out before clipboard copy controls became ready");
1843
+ }
1844
+ if (interaction.action === "hover") {
1845
+ const locator = page.locator(interaction.selector).first();
1846
+ const interactionTimeout = Math.min(
1847
+ interaction.timeoutMs ?? CLIPBOARD_INTERACTION_DEFAULT_TIMEOUT_MS,
1848
+ remainingMs
1849
+ );
1850
+ await locator.waitFor({
1851
+ state: "visible",
1852
+ timeout: interactionTimeout
1853
+ });
1854
+ await locator.hover({
1855
+ timeout: interactionTimeout
1856
+ });
1857
+ continue;
1858
+ }
1859
+ if (interaction.action === "click") {
1860
+ const locator = page.locator(interaction.selector).first();
1861
+ const interactionTimeout = Math.min(
1862
+ interaction.timeoutMs ?? CLIPBOARD_INTERACTION_DEFAULT_TIMEOUT_MS,
1863
+ remainingMs
1864
+ );
1865
+ await locator.waitFor({
1866
+ state: "visible",
1867
+ timeout: interactionTimeout
1868
+ });
1869
+ await locator.click({
1870
+ timeout: interactionTimeout
1871
+ });
1872
+ continue;
1873
+ }
1874
+ if (interaction.action === "press") {
1875
+ await page.keyboard.press(interaction.key);
1876
+ continue;
1877
+ }
1878
+ await page.waitForTimeout(Math.min(interaction.timeoutMs, remainingMs));
1879
+ }
1880
+ }
1881
+ async function runClipboardStrategy(page, strategy) {
1882
+ const sentinel = `__aiocs_clipboard_marker__${Date.now()}__${Math.random().toString(36).slice(2)}__`;
1883
+ const before = await writeClipboard(page, sentinel).catch(() => false) ? sentinel : await readClipboard(page).catch(() => "");
1884
+ const deadlineAt = Date.now() + strategy.clipboardTimeoutMs;
1885
+ let lastError = null;
1886
+ let markdown = null;
1887
+ while (Date.now() < deadlineAt && !markdown) {
1888
+ try {
1889
+ await performClipboardInteractions(page, strategy, deadlineAt);
1890
+ } catch (error) {
1891
+ lastError = error instanceof Error ? error : new Error(String(error));
1892
+ }
1893
+ const remainingMs = deadlineAt - Date.now();
1894
+ if (remainingMs <= 0) {
1895
+ break;
1896
+ }
1897
+ try {
1898
+ markdown = await waitForClipboardChange(page, before, Math.min(400, remainingMs));
1899
+ } catch (error) {
1900
+ lastError = error instanceof Error ? error : new Error(String(error));
1901
+ }
1902
+ }
1903
+ if (!markdown) {
1904
+ throw lastError ?? new Error("Timed out waiting for clipboard content to change");
1905
+ }
1906
+ const title = extractTitleFromMarkdown(markdown) ?? await page.title();
1907
+ return {
1908
+ title,
1909
+ markdown: markdown.trim()
1910
+ };
1911
+ }
1912
+ async function runSelectorStrategy(page, selector) {
1913
+ const locator = page.locator(selector).first();
1914
+ await locator.waitFor({ state: "visible", timeout: 1e4 });
1915
+ const html = await locator.innerHTML();
1916
+ const heading = await locator.locator("h1").first().textContent().catch(() => null);
1917
+ const title = (heading ?? await page.title()).trim();
1918
+ return {
1919
+ title,
1920
+ markdown: htmlToMarkdown(html)
1921
+ };
1922
+ }
1923
+ async function runReadabilityStrategy(page) {
1924
+ const html = await page.content();
1925
+ const dom = new JSDOM(html, { url: page.url() });
1926
+ const reader = new Readability(dom.window.document);
1927
+ const article = reader.parse();
1928
+ if (!article?.content) {
1929
+ throw new Error(`Readability could not extract content for ${page.url()}`);
1930
+ }
1931
+ return {
1932
+ title: article.title?.trim() || await page.title(),
1933
+ markdown: htmlToMarkdown(article.content)
1934
+ };
1935
+ }
1936
+ function extractTitleFromMarkdown(markdown) {
1937
+ for (const line of markdown.split("\n")) {
1938
+ const trimmed = line.trim();
1939
+ if (trimmed.startsWith("# ")) {
1940
+ return trimmed.slice(2).trim();
1941
+ }
1942
+ }
1943
+ return null;
1944
+ }
1945
+ async function extractPage(page, strategy) {
1946
+ if (strategy.strategy === "clipboardButton") {
1947
+ return runClipboardStrategy(page, strategy);
1948
+ }
1949
+ if (strategy.strategy === "selector") {
1950
+ return runSelectorStrategy(page, strategy.selector);
1951
+ }
1952
+ return runReadabilityStrategy(page);
1953
+ }
1954
+
1955
+ // src/fetch/url-patterns.ts
1956
+ function escapeRegex(value) {
1957
+ return value.replace(/[|\\{}()[\]^$+?.]/g, "\\$&");
1958
+ }
1959
+ function patternToRegex(pattern) {
1960
+ let regex = "^";
1961
+ for (let index = 0; index < pattern.length; index += 1) {
1962
+ const current = pattern[index];
1963
+ const next = pattern[index + 1];
1964
+ if (current === "*" && next === "*") {
1965
+ regex += ".*";
1966
+ index += 1;
1967
+ continue;
1968
+ }
1969
+ if (current === "*") {
1970
+ regex += "[^?#]*";
1971
+ continue;
1972
+ }
1973
+ regex += escapeRegex(current ?? "");
1974
+ }
1975
+ return new RegExp(`${regex}$`);
1976
+ }
1977
+ function matchesPatterns(url, patterns) {
1978
+ return patterns.some((pattern) => patternToRegex(pattern).test(url));
1979
+ }
1980
+
1981
+ // src/fetch/fetch-source.ts
1982
+ var MAX_FETCH_ATTEMPTS = 3;
1983
+ var RETRY_DELAY_MS = 250;
1984
+ function nowIso2() {
1985
+ return (/* @__PURE__ */ new Date()).toISOString();
1986
+ }
1987
+ function canonicalizeUrl(raw) {
1988
+ const url = new URL(raw);
1989
+ url.hash = "";
1990
+ if (url.pathname !== "/" && url.pathname.endsWith("/")) {
1991
+ url.pathname = url.pathname.slice(0, -1);
1992
+ }
1993
+ return url.toString();
1994
+ }
1995
+ function getCrawlKey(raw) {
1996
+ const url = new URL(canonicalizeUrl(raw));
1997
+ if (/\.(md|markdown)$/i.test(url.pathname)) {
1998
+ url.pathname = url.pathname.replace(/\.(md|markdown)$/i, "");
1999
+ }
2000
+ return url.toString();
2001
+ }
2002
+ function isAllowed(url, allowedHosts, include, exclude) {
2003
+ const parsed = new URL(url);
2004
+ if (!allowedHosts.includes(parsed.hostname)) {
2005
+ return false;
2006
+ }
2007
+ if (parsed.pathname.startsWith("/~gitbook/")) {
2008
+ return false;
2009
+ }
2010
+ if (!matchesPatterns(url, include)) {
2011
+ return false;
2012
+ }
2013
+ if (exclude.length > 0 && matchesPatterns(url, exclude)) {
2014
+ return false;
2015
+ }
2016
+ return true;
2017
+ }
2018
+ function slugify(value) {
2019
+ return value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80) || "page";
2020
+ }
2021
+ function extractTitleFromMarkdown2(markdown) {
2022
+ for (const line of markdown.split("\n")) {
2023
+ const trimmed = line.trim();
2024
+ if (trimmed.startsWith("# ")) {
2025
+ return trimmed.slice(2).trim();
2026
+ }
2027
+ }
2028
+ return null;
2029
+ }
2030
+ function deriveTitleFromUrl(url) {
2031
+ const pathname = new URL(url).pathname;
2032
+ const lastSegment = pathname.split("/").filter(Boolean).pop() ?? "page";
2033
+ return lastSegment.replace(/\.(md|markdown)$/i, "").replace(/[-_]+/g, " ").trim() || "page";
2034
+ }
2035
+ function isRawMarkdownResponse(url, response) {
2036
+ if (!response) {
2037
+ return false;
2038
+ }
2039
+ const contentType = response.headers()["content-type"]?.toLowerCase() ?? "";
2040
+ if (contentType.includes("text/markdown") || contentType.includes("text/x-markdown")) {
2041
+ return true;
2042
+ }
2043
+ return contentType.includes("text/plain") && /\.(md|markdown)$/i.test(new URL(url).pathname);
2044
+ }
2045
+ async function extractRawMarkdownPage(url, response) {
2046
+ const markdown = (await response.text()).trim();
2047
+ return {
2048
+ url,
2049
+ title: extractTitleFromMarkdown2(markdown) ?? deriveTitleFromUrl(url),
2050
+ markdown
2051
+ };
2052
+ }
2053
+ function persistSnapshotPages(input, snapshotId, pages) {
2054
+ const snapshotDir = join3(input.dataDir, "sources", input.sourceId, "snapshots", snapshotId, "pages");
2055
+ mkdirSync3(snapshotDir, { recursive: true });
2056
+ pages.forEach((page, index) => {
2057
+ const filename = `${String(index + 1).padStart(3, "0")}-${slugify(page.title)}.md`;
2058
+ writeFileSync(join3(snapshotDir, filename), page.markdown, "utf8");
2059
+ });
2060
+ }
2061
+ function resolveEnvValue(name, env) {
2062
+ const value = env[name];
2063
+ if (!value) {
2064
+ throw new AiocsError(
2065
+ AIOCS_ERROR_CODES.authEnvMissing,
2066
+ `Missing required environment variable '${name}' for authenticated source access`,
2067
+ {
2068
+ envVar: name
2069
+ }
2070
+ );
2071
+ }
2072
+ return value;
2073
+ }
2074
+ function resolveSourceAuth(spec, env) {
2075
+ const scopedHeaders = (spec.auth?.headers ?? []).map((header) => ({
2076
+ name: header.name,
2077
+ value: resolveEnvValue(header.valueFromEnv, env),
2078
+ hosts: header.hosts ?? spec.allowedHosts,
2079
+ ...header.include ? { include: header.include } : {}
2080
+ }));
2081
+ const cookies = (spec.auth?.cookies ?? []).map((cookie) => ({
2082
+ name: cookie.name,
2083
+ value: resolveEnvValue(cookie.valueFromEnv, env),
2084
+ domain: cookie.domain,
2085
+ path: cookie.path,
2086
+ ...typeof cookie.secure === "boolean" ? { secure: cookie.secure } : {},
2087
+ ...typeof cookie.httpOnly === "boolean" ? { httpOnly: cookie.httpOnly } : {},
2088
+ ...cookie.sameSite ? { sameSite: cookie.sameSite } : {}
2089
+ }));
2090
+ return {
2091
+ scopedHeaders,
2092
+ cookies
2093
+ };
2094
+ }
2095
+ function applyScopedAuthHeaders(requestUrl, headers, scopedHeaders) {
2096
+ if (scopedHeaders.length === 0) {
2097
+ return headers;
2098
+ }
2099
+ const hostname = new URL(requestUrl).hostname;
2100
+ const nextHeaders = { ...headers };
2101
+ for (const header of scopedHeaders) {
2102
+ if (!header.hosts.includes(hostname)) {
2103
+ continue;
2104
+ }
2105
+ if (header.include && !matchesPatterns(requestUrl, header.include)) {
2106
+ continue;
2107
+ }
2108
+ nextHeaders[header.name] = header.value;
2109
+ }
2110
+ return nextHeaders;
2111
+ }
2112
+ async function createSourceContext(spec, env) {
2113
+ const { scopedHeaders, cookies } = resolveSourceAuth(spec, env);
2114
+ const browser = await chromium.launch({ headless: true });
2115
+ const context = await browser.newContext({
2116
+ viewport: {
2117
+ width: 1440,
2118
+ height: 1200
2119
+ }
2120
+ });
2121
+ if (scopedHeaders.length > 0) {
2122
+ await context.route("**/*", async (route) => {
2123
+ await route.continue({
2124
+ headers: applyScopedAuthHeaders(route.request().url(), route.request().headers(), scopedHeaders)
2125
+ });
2126
+ });
2127
+ }
2128
+ if (cookies.length > 0) {
2129
+ await context.addCookies(cookies);
2130
+ }
2131
+ const uniqueOrigins = [...new Set(spec.startUrls.map((url) => new URL(url).origin))];
2132
+ for (const origin of uniqueOrigins) {
2133
+ await context.grantPermissions(["clipboard-read", "clipboard-write"], { origin });
2134
+ }
2135
+ const page = await context.newPage();
2136
+ page.setDefaultTimeout(15e3);
2137
+ return {
2138
+ page,
2139
+ async close() {
2140
+ await context.close();
2141
+ await browser.close();
2142
+ }
2143
+ };
2144
+ }
2145
+ async function discoverLinks(page) {
2146
+ return page.locator("a[href]").evaluateAll(
2147
+ (anchors) => anchors.map((anchor) => anchor.href).filter((href) => typeof href === "string" && href.length > 0)
2148
+ );
2149
+ }
2150
+ async function extractFetchedPage(spec, page, url, response) {
2151
+ if (response && isRawMarkdownResponse(url, response)) {
2152
+ const extracted2 = await extractRawMarkdownPage(url, response);
2153
+ const markdown2 = normalizeMarkdown(spec, extracted2);
2154
+ return {
2155
+ ...extracted2,
2156
+ markdown: markdown2,
2157
+ markdownLength: markdown2.trim().length
2158
+ };
2159
+ }
2160
+ await page.waitForTimeout(150);
2161
+ const extracted = await extractPage(page, spec.extract);
2162
+ const markdown = normalizeMarkdown(spec, {
2163
+ title: extracted.title,
2164
+ url,
2165
+ markdown: extracted.markdown
2166
+ });
2167
+ return {
2168
+ url,
2169
+ title: extracted.title,
2170
+ markdown,
2171
+ markdownLength: markdown.trim().length
2172
+ };
2173
+ }
2174
+ async function fetchSourceOnce(input) {
2175
+ const spec = input.catalog.getSourceSpec(input.sourceId);
2176
+ if (!spec) {
2177
+ throw new AiocsError(
2178
+ AIOCS_ERROR_CODES.sourceNotFound,
2179
+ `Unknown source '${input.sourceId}'`
2180
+ );
2181
+ }
2182
+ const session = await createSourceContext(spec, input.env ?? process.env);
2183
+ const { page } = session;
2184
+ const queue = spec.startUrls.map((url) => canonicalizeUrl(url));
2185
+ const seen = /* @__PURE__ */ new Set();
2186
+ const pageOrder = [];
2187
+ const pagesByCrawlKey = /* @__PURE__ */ new Map();
2188
+ const pendingRawFallbacks = /* @__PURE__ */ new Map();
2189
+ try {
2190
+ while (queue.length > 0 && pagesByCrawlKey.size < spec.discovery.maxPages) {
2191
+ const next = queue.shift();
2192
+ if (!next) {
2193
+ break;
2194
+ }
2195
+ const url = canonicalizeUrl(next);
2196
+ const crawlKey = getCrawlKey(url);
2197
+ const isRawMarkdownUrl = crawlKey !== url;
2198
+ const existing = pagesByCrawlKey.get(crawlKey);
2199
+ if (isRawMarkdownUrl) {
2200
+ if (existing && !existing.isRawMarkdown) {
2201
+ continue;
2202
+ }
2203
+ if (!seen.has(crawlKey) && !existing) {
2204
+ pendingRawFallbacks.set(crawlKey, url);
2205
+ const canonicalQueued = queue.some((queuedUrl) => canonicalizeUrl(queuedUrl) === crawlKey);
2206
+ if (!canonicalQueued) {
2207
+ queue.unshift(crawlKey);
2208
+ }
2209
+ continue;
2210
+ }
2211
+ }
2212
+ if (seen.has(url)) {
2213
+ continue;
2214
+ }
2215
+ seen.add(url);
2216
+ if (!isAllowed(url, spec.allowedHosts, spec.discovery.include, spec.discovery.exclude)) {
2217
+ continue;
2218
+ }
2219
+ const response = await page.goto(url, { waitUntil: "domcontentloaded" });
2220
+ if (response && response.status() >= 400) {
2221
+ const pendingRawFallback = pendingRawFallbacks.get(crawlKey);
2222
+ if (!isRawMarkdownUrl && pendingRawFallback && !seen.has(pendingRawFallback)) {
2223
+ queue.unshift(pendingRawFallback);
2224
+ }
2225
+ continue;
2226
+ }
2227
+ let fetchedPage;
2228
+ try {
2229
+ fetchedPage = await extractFetchedPage(spec, page, url, response);
2230
+ } catch (error) {
2231
+ const pendingRawFallback = pendingRawFallbacks.get(crawlKey);
2232
+ if (!isRawMarkdownUrl && pendingRawFallback && !seen.has(pendingRawFallback)) {
2233
+ queue.unshift(pendingRawFallback);
2234
+ continue;
2235
+ }
2236
+ throw error;
2237
+ }
2238
+ const isRawMarkdown = response !== null && isRawMarkdownResponse(url, response);
2239
+ if (!existing) {
2240
+ pageOrder.push(crawlKey);
2241
+ pagesByCrawlKey.set(crawlKey, { page: fetchedPage, isRawMarkdown });
2242
+ } else if (existing.isRawMarkdown && !isRawMarkdown) {
2243
+ pagesByCrawlKey.set(crawlKey, { page: fetchedPage, isRawMarkdown });
2244
+ }
2245
+ if (!isRawMarkdown) {
2246
+ pendingRawFallbacks.delete(crawlKey);
2247
+ }
2248
+ if (!isRawMarkdown) {
2249
+ const links = await discoverLinks(page);
2250
+ for (const link of links) {
2251
+ const canonical = canonicalizeUrl(link);
2252
+ if (!seen.has(canonical) && isAllowed(canonical, spec.allowedHosts, spec.discovery.include, spec.discovery.exclude)) {
2253
+ queue.push(canonical);
2254
+ }
2255
+ }
2256
+ }
2257
+ }
2258
+ const pages = pageOrder.map((crawlKey) => pagesByCrawlKey.get(crawlKey)?.page).filter((pageEntry) => pageEntry !== void 0);
2259
+ if (pages.length === 0) {
2260
+ throw new AiocsError(
2261
+ AIOCS_ERROR_CODES.noPagesFetched,
2262
+ `No pages fetched for source '${input.sourceId}'`
2263
+ );
2264
+ }
2265
+ const result = input.catalog.recordSuccessfulSnapshot({
2266
+ sourceId: input.sourceId,
2267
+ pages
2268
+ });
2269
+ if (!result.reused) {
2270
+ persistSnapshotPages(input, result.snapshotId, pages);
2271
+ }
2272
+ return {
2273
+ snapshotId: result.snapshotId,
2274
+ pageCount: pages.length,
2275
+ reused: result.reused
2276
+ };
2277
+ } finally {
2278
+ await session.close();
2279
+ }
2280
+ }
2281
+ async function fetchSource(input) {
2282
+ let lastError;
2283
+ for (let attempt = 1; attempt <= MAX_FETCH_ATTEMPTS; attempt += 1) {
2284
+ try {
2285
+ return await fetchSourceOnce(input);
2286
+ } catch (error) {
2287
+ lastError = error;
2288
+ if (attempt >= MAX_FETCH_ATTEMPTS) {
2289
+ input.catalog.recordFailedFetchRun({
2290
+ sourceId: input.sourceId,
2291
+ errorMessage: error instanceof Error ? error.message : String(error)
2292
+ });
2293
+ throw error;
2294
+ }
2295
+ await sleep(RETRY_DELAY_MS * attempt);
2296
+ }
2297
+ }
2298
+ throw lastError instanceof Error ? lastError : new Error(String(lastError));
2299
+ }
2300
+ async function runSourceCanaryOnce(input) {
2301
+ const spec = input.catalog.getSourceSpec(input.sourceId);
2302
+ if (!spec) {
2303
+ throw new AiocsError(
2304
+ AIOCS_ERROR_CODES.sourceNotFound,
2305
+ `Unknown source '${input.sourceId}'`
2306
+ );
2307
+ }
2308
+ const canary = resolveSourceCanary(spec);
2309
+ const session = await createSourceContext(spec, input.env ?? process.env);
2310
+ const { page } = session;
2311
+ const checks = [];
2312
+ try {
2313
+ for (const check of canary.checks) {
2314
+ const url = canonicalizeUrl(check.url);
2315
+ try {
2316
+ if (!isAllowed(url, spec.allowedHosts, spec.discovery.include, spec.discovery.exclude)) {
2317
+ throw new AiocsError(
2318
+ AIOCS_ERROR_CODES.invalidArgument,
2319
+ `Canary URL '${url}' is outside the allowed source scope`
2320
+ );
2321
+ }
2322
+ const response = await page.goto(url, { waitUntil: "domcontentloaded" });
2323
+ if (response && response.status() >= 400) {
2324
+ throw new Error(`Canary request failed with HTTP ${response.status()}`);
2325
+ }
2326
+ const extracted = await extractFetchedPage(spec, page, url, response);
2327
+ if (check.expectedTitle && !extracted.title.includes(check.expectedTitle)) {
2328
+ throw new Error(`Expected title to include '${check.expectedTitle}'`);
2329
+ }
2330
+ if (check.expectedText && !extracted.markdown.includes(check.expectedText)) {
2331
+ throw new Error(`Expected markdown to include '${check.expectedText}'`);
2332
+ }
2333
+ if (extracted.markdownLength < check.minMarkdownLength) {
2334
+ throw new Error(
2335
+ `Expected markdown length to be at least ${check.minMarkdownLength}, received ${extracted.markdownLength}`
2336
+ );
2337
+ }
2338
+ checks.push({
2339
+ url,
2340
+ status: "pass",
2341
+ title: extracted.title,
2342
+ markdownLength: extracted.markdownLength
2343
+ });
2344
+ } catch (error) {
2345
+ checks.push({
2346
+ url,
2347
+ status: "fail",
2348
+ errorMessage: error instanceof Error ? error.message : String(error)
2349
+ });
2350
+ }
2351
+ }
2352
+ } finally {
2353
+ await session.close();
2354
+ }
2355
+ const result = {
2356
+ sourceId: input.sourceId,
2357
+ status: checks.every((check) => check.status === "pass") ? "pass" : "fail",
2358
+ checkedAt: nowIso2(),
2359
+ summary: {
2360
+ checkCount: checks.length,
2361
+ passCount: checks.filter((check) => check.status === "pass").length,
2362
+ failCount: checks.filter((check) => check.status === "fail").length
2363
+ },
2364
+ checks
2365
+ };
2366
+ input.catalog.recordCanaryRun({
2367
+ sourceId: input.sourceId,
2368
+ status: result.status,
2369
+ checkedAt: result.checkedAt,
2370
+ details: result
2371
+ });
2372
+ if (result.status === "fail") {
2373
+ throw new AiocsError(
2374
+ AIOCS_ERROR_CODES.canaryFailed,
2375
+ `Canary failed for source '${input.sourceId}'`,
2376
+ result
2377
+ );
2378
+ }
2379
+ return result;
2380
+ }
2381
+ async function runSourceCanary(input) {
2382
+ let lastError;
2383
+ for (let attempt = 1; attempt <= MAX_FETCH_ATTEMPTS; attempt += 1) {
2384
+ try {
2385
+ return await runSourceCanaryOnce(input);
2386
+ } catch (error) {
2387
+ lastError = error;
2388
+ if (attempt >= MAX_FETCH_ATTEMPTS) {
2389
+ if (error instanceof AiocsError && error.code === AIOCS_ERROR_CODES.canaryFailed) {
2390
+ return error.details;
2391
+ }
2392
+ throw error;
2393
+ }
2394
+ await sleep(RETRY_DELAY_MS * attempt);
2395
+ }
2396
+ }
2397
+ throw lastError instanceof Error ? lastError : new Error(String(lastError));
2398
+ }
2399
+
2400
+ // src/hybrid/ollama.ts
2401
+ function getEmbeddingModelKey(config) {
2402
+ return `${config.embeddingProvider}:${config.ollamaEmbeddingModel}`;
2403
+ }
2404
+ function normalizeBaseUrl(baseUrl) {
2405
+ return baseUrl.endsWith("/") ? baseUrl.slice(0, -1) : baseUrl;
2406
+ }
2407
+ async function parseJsonResponse(response) {
2408
+ const text = await response.text();
2409
+ if (!text) {
2410
+ return {};
2411
+ }
2412
+ try {
2413
+ return JSON.parse(text);
2414
+ } catch {
2415
+ throw new AiocsError(
2416
+ AIOCS_ERROR_CODES.embeddingProviderUnavailable,
2417
+ `Ollama returned a non-JSON response with status ${response.status}`
2418
+ );
2419
+ }
2420
+ }
2421
+ async function embedTexts(config, texts) {
2422
+ if (texts.length === 0) {
2423
+ return [];
2424
+ }
2425
+ const response = await fetch(`${normalizeBaseUrl(config.ollamaBaseUrl)}/api/embed`, {
2426
+ method: "POST",
2427
+ headers: {
2428
+ "content-type": "application/json"
2429
+ },
2430
+ signal: AbortSignal.timeout(config.ollamaTimeoutMs),
2431
+ body: JSON.stringify({
2432
+ model: config.ollamaEmbeddingModel,
2433
+ input: texts
2434
+ })
2435
+ }).catch((error) => {
2436
+ throw new AiocsError(
2437
+ AIOCS_ERROR_CODES.embeddingProviderUnavailable,
2438
+ `Unable to reach Ollama at ${config.ollamaBaseUrl}: ${error instanceof Error ? error.message : String(error)}`
2439
+ );
2440
+ });
2441
+ if (!response.ok) {
2442
+ const body = await response.text();
2443
+ throw new AiocsError(
2444
+ AIOCS_ERROR_CODES.embeddingProviderUnavailable,
2445
+ `Ollama embed request failed with status ${response.status}`,
2446
+ body ? { body } : void 0
2447
+ );
2448
+ }
2449
+ const payload = await parseJsonResponse(response);
2450
+ if (!Array.isArray(payload.embeddings)) {
2451
+ throw new AiocsError(
2452
+ AIOCS_ERROR_CODES.embeddingProviderUnavailable,
2453
+ "Ollama embed response did not include an embeddings array"
2454
+ );
2455
+ }
2456
+ const embeddings = payload.embeddings.map((entry) => {
2457
+ if (!Array.isArray(entry) || !entry.every((value) => typeof value === "number")) {
2458
+ throw new AiocsError(
2459
+ AIOCS_ERROR_CODES.embeddingProviderUnavailable,
2460
+ "Ollama embed response contained an invalid embedding vector"
2461
+ );
2462
+ }
2463
+ return entry;
2464
+ });
2465
+ if (embeddings.length !== texts.length) {
2466
+ throw new AiocsError(
2467
+ AIOCS_ERROR_CODES.embeddingProviderUnavailable,
2468
+ `Ollama returned ${embeddings.length} embeddings for ${texts.length} inputs`
2469
+ );
2470
+ }
2471
+ return embeddings;
2472
+ }
2473
+ async function getEmbeddingProviderStatus(config) {
2474
+ const response = await fetch(`${normalizeBaseUrl(config.ollamaBaseUrl)}/api/tags`, {
2475
+ signal: AbortSignal.timeout(config.ollamaTimeoutMs)
2476
+ }).catch((error) => {
2477
+ throw new AiocsError(
2478
+ AIOCS_ERROR_CODES.embeddingProviderUnavailable,
2479
+ `Unable to reach Ollama at ${config.ollamaBaseUrl}: ${error instanceof Error ? error.message : String(error)}`
2480
+ );
2481
+ });
2482
+ if (!response.ok) {
2483
+ throw new AiocsError(
2484
+ AIOCS_ERROR_CODES.embeddingProviderUnavailable,
2485
+ `Ollama tags request failed with status ${response.status}`
2486
+ );
2487
+ }
2488
+ const payload = await parseJsonResponse(response);
2489
+ const availableModels = (payload.models ?? []).map((entry) => entry.name ?? entry.model).filter((entry) => typeof entry === "string" && entry.length > 0);
2490
+ const modelPresent = availableModels.some(
2491
+ (name) => name === config.ollamaEmbeddingModel || name.startsWith(`${config.ollamaEmbeddingModel}:`)
2492
+ );
2493
+ return {
2494
+ ok: modelPresent,
2495
+ modelPresent,
2496
+ baseUrl: config.ollamaBaseUrl,
2497
+ model: config.ollamaEmbeddingModel,
2498
+ availableModels
2499
+ };
2500
+ }
2501
+
2502
+ // src/hybrid/qdrant.ts
2503
+ import { QdrantClient } from "@qdrant/js-client-rest";
2504
+ var AiocsVectorStore = class {
2505
+ client;
2506
+ collectionName;
2507
+ constructor(config) {
2508
+ this.client = new QdrantClient({
2509
+ url: config.qdrantUrl,
2510
+ timeout: config.qdrantTimeoutMs,
2511
+ checkCompatibility: false
2512
+ });
2513
+ this.collectionName = config.qdrantCollection;
2514
+ }
2515
+ pointIdForChunk(chunkId) {
2516
+ return chunkId;
2517
+ }
2518
+ async ensureCollection(dimension) {
2519
+ const existsResponse = await this.client.collectionExists(this.collectionName).catch((error) => {
2520
+ throw new AiocsError(
2521
+ AIOCS_ERROR_CODES.vectorStoreUnavailable,
2522
+ `Unable to reach Qdrant collection '${this.collectionName}': ${error instanceof Error ? error.message : String(error)}`
2523
+ );
2524
+ });
2525
+ const exists = typeof existsResponse === "boolean" ? existsResponse : Boolean(existsResponse.exists);
2526
+ if (!exists) {
2527
+ await this.client.createCollection(this.collectionName, {
2528
+ vectors: {
2529
+ size: dimension,
2530
+ distance: "Cosine"
2531
+ }
2532
+ }).catch((error) => {
2533
+ throw new AiocsError(
2534
+ AIOCS_ERROR_CODES.vectorStoreUnavailable,
2535
+ `Unable to create Qdrant collection '${this.collectionName}': ${error instanceof Error ? error.message : String(error)}`
2536
+ );
2537
+ });
2538
+ return;
2539
+ }
2540
+ const collection = await this.client.getCollection(this.collectionName).catch((error) => {
2541
+ throw new AiocsError(
2542
+ AIOCS_ERROR_CODES.vectorStoreUnavailable,
2543
+ `Unable to inspect Qdrant collection '${this.collectionName}': ${error instanceof Error ? error.message : String(error)}`
2544
+ );
2545
+ });
2546
+ const params = collection.config?.params?.vectors;
2547
+ const currentSize = typeof params === "object" && params && "size" in params ? Number(params.size) : null;
2548
+ if (!currentSize || currentSize !== dimension) {
2549
+ await this.client.recreateCollection(this.collectionName, {
2550
+ vectors: {
2551
+ size: dimension,
2552
+ distance: "Cosine"
2553
+ }
2554
+ }).catch((error) => {
2555
+ throw new AiocsError(
2556
+ AIOCS_ERROR_CODES.vectorStoreUnavailable,
2557
+ `Unable to recreate Qdrant collection '${this.collectionName}' for dimension ${dimension}: ${error instanceof Error ? error.message : String(error)}`
2558
+ );
2559
+ });
2560
+ }
2561
+ }
2562
+ async upsertChunks(input) {
2563
+ if (input.points.length === 0) {
2564
+ return;
2565
+ }
2566
+ const points = input.points.map((point) => ({
2567
+ id: this.pointIdForChunk(point.chunkId),
2568
+ vector: point.vector,
2569
+ payload: {
2570
+ chunkId: point.chunkId,
2571
+ sourceId: point.sourceId,
2572
+ snapshotId: point.snapshotId,
2573
+ pageUrl: point.pageUrl,
2574
+ pageTitle: point.pageTitle,
2575
+ sectionTitle: point.sectionTitle,
2576
+ modelKey: input.modelKey
2577
+ }
2578
+ }));
2579
+ await this.client.upsert(this.collectionName, {
2580
+ wait: true,
2581
+ points
2582
+ }).catch((error) => {
2583
+ throw new AiocsError(
2584
+ AIOCS_ERROR_CODES.vectorStoreUnavailable,
2585
+ `Unable to upsert vectors into Qdrant collection '${this.collectionName}': ${error instanceof Error ? error.message : String(error)}`
2586
+ );
2587
+ });
2588
+ }
2589
+ async deleteChunkIds(chunkIds) {
2590
+ if (chunkIds.length === 0) {
2591
+ return;
2592
+ }
2593
+ await this.client.delete(this.collectionName, {
2594
+ wait: true,
2595
+ points: chunkIds.map((chunkId) => this.pointIdForChunk(chunkId))
2596
+ }).catch((error) => {
2597
+ throw new AiocsError(
2598
+ AIOCS_ERROR_CODES.vectorStoreUnavailable,
2599
+ `Unable to delete vectors from Qdrant collection '${this.collectionName}': ${error instanceof Error ? error.message : String(error)}`
2600
+ );
2601
+ });
2602
+ }
2603
+ async clearCollection() {
2604
+ const existsResponse = await this.client.collectionExists(this.collectionName).catch((error) => {
2605
+ throw new AiocsError(
2606
+ AIOCS_ERROR_CODES.vectorStoreUnavailable,
2607
+ `Unable to reach Qdrant collection '${this.collectionName}': ${error instanceof Error ? error.message : String(error)}`
2608
+ );
2609
+ });
2610
+ const exists = typeof existsResponse === "boolean" ? existsResponse : Boolean(existsResponse.exists);
2611
+ if (!exists) {
2612
+ return;
2613
+ }
2614
+ await this.client.deleteCollection(this.collectionName).catch((error) => {
2615
+ throw new AiocsError(
2616
+ AIOCS_ERROR_CODES.vectorStoreUnavailable,
2617
+ `Unable to delete Qdrant collection '${this.collectionName}': ${error instanceof Error ? error.message : String(error)}`
2618
+ );
2619
+ });
2620
+ }
2621
+ async search(input) {
2622
+ if (input.snapshotIds.length === 0) {
2623
+ return [];
2624
+ }
2625
+ const results = await this.client.search(this.collectionName, {
2626
+ vector: input.vector,
2627
+ limit: input.limit,
2628
+ ...typeof input.offset === "number" ? { offset: input.offset } : {},
2629
+ with_payload: ["chunkId", "snapshotId", "sourceId", "modelKey"],
2630
+ filter: {
2631
+ must: [
2632
+ {
2633
+ key: "snapshotId",
2634
+ match: {
2635
+ any: input.snapshotIds
2636
+ }
2637
+ },
2638
+ {
2639
+ key: "modelKey",
2640
+ match: {
2641
+ value: input.modelKey
2642
+ }
2643
+ },
2644
+ ...input.sourceIds && input.sourceIds.length > 0 ? [{
2645
+ key: "sourceId",
2646
+ match: {
2647
+ any: input.sourceIds
2648
+ }
2649
+ }] : []
2650
+ ]
2651
+ }
2652
+ }).catch((error) => {
2653
+ throw new AiocsError(
2654
+ AIOCS_ERROR_CODES.vectorStoreUnavailable,
2655
+ `Unable to search Qdrant collection '${this.collectionName}': ${error instanceof Error ? error.message : String(error)}`
2656
+ );
2657
+ });
2658
+ return results.map((result) => {
2659
+ const payload = result.payload ?? {};
2660
+ const chunkId = typeof payload.chunkId === "number" ? payload.chunkId : typeof result.id === "number" ? result.id : Number(result.id);
2661
+ if (!Number.isInteger(chunkId)) {
2662
+ return null;
2663
+ }
2664
+ return {
2665
+ chunkId,
2666
+ score: result.score
2667
+ };
2668
+ }).filter((result) => result !== null);
2669
+ }
2670
+ async getHealth() {
2671
+ try {
2672
+ const response = await this.client.getCollections();
2673
+ return {
2674
+ ok: true,
2675
+ collections: response.collections?.map((entry) => entry.name) ?? []
2676
+ };
2677
+ } catch (error) {
2678
+ return {
2679
+ ok: false,
2680
+ errorMessage: error instanceof Error ? error.message : String(error)
2681
+ };
2682
+ }
2683
+ }
2684
+ };
2685
+
2686
+ // src/hybrid/worker.ts
2687
+ function chunkArray(values, size) {
2688
+ const chunks = [];
2689
+ for (let index = 0; index < values.length; index += size) {
2690
+ chunks.push(values.slice(index, index + size));
2691
+ }
2692
+ return chunks;
2693
+ }
2694
+ async function processEmbeddingJobs(input) {
2695
+ const claimedJobs = input.catalog.claimEmbeddingJobs(input.config.embeddingJobsPerCycle);
2696
+ if (claimedJobs.length === 0) {
2697
+ return {
2698
+ processedJobs: 0,
2699
+ succeededJobs: [],
2700
+ failedJobs: []
2701
+ };
2702
+ }
2703
+ const vectorStore = new AiocsVectorStore(input.config);
2704
+ const modelKey = getEmbeddingModelKey(input.config);
2705
+ const succeededJobs = [];
2706
+ const failedJobs = [];
2707
+ for (const job of claimedJobs) {
2708
+ try {
2709
+ const chunks = input.catalog.listSnapshotChunks({
2710
+ sourceId: job.sourceId,
2711
+ snapshotId: job.snapshotId
2712
+ });
2713
+ if (chunks.length === 0) {
2714
+ input.catalog.markEmbeddingJobFailed({
2715
+ sourceId: job.sourceId,
2716
+ snapshotId: job.snapshotId,
2717
+ errorMessage: "No chunks found for embedding job snapshot"
2718
+ });
2719
+ failedJobs.push({
2720
+ sourceId: job.sourceId,
2721
+ snapshotId: job.snapshotId,
2722
+ errorMessage: "No chunks found for embedding job snapshot"
2723
+ });
2724
+ continue;
2725
+ }
2726
+ const existingState = input.catalog.getSnapshotEmbeddingState({
2727
+ sourceId: job.sourceId,
2728
+ snapshotId: job.snapshotId
2729
+ });
2730
+ const staleChunkIds = [
2731
+ .../* @__PURE__ */ new Set([
2732
+ ...input.catalog.listStaleEmbeddingChunkIds(job.sourceId),
2733
+ ...existingState.filter((entry) => entry.modelKey && entry.modelKey !== modelKey).map((entry) => entry.chunkId)
2734
+ ])
2735
+ ];
2736
+ const needsReindex = existingState.some((entry) => entry.status !== "indexed" || entry.modelKey !== modelKey);
2737
+ if (!needsReindex) {
2738
+ input.catalog.markEmbeddingJobSucceeded({
2739
+ sourceId: job.sourceId,
2740
+ snapshotId: job.snapshotId,
2741
+ modelKey,
2742
+ indexedChunkIds: chunks.map((chunk) => chunk.chunkId),
2743
+ staleChunkIds
2744
+ });
2745
+ succeededJobs.push({
2746
+ sourceId: job.sourceId,
2747
+ snapshotId: job.snapshotId,
2748
+ chunkCount: chunks.length
2749
+ });
2750
+ continue;
2751
+ }
2752
+ const dimensionProbe = await embedTexts(input.config, [chunks[0].markdown]);
2753
+ const vectorDimension = dimensionProbe[0]?.length;
2754
+ if (!vectorDimension) {
2755
+ throw new AiocsError(
2756
+ AIOCS_ERROR_CODES.embeddingProviderUnavailable,
2757
+ "Embedding provider returned an empty vector for the first chunk"
2758
+ );
2759
+ }
2760
+ await vectorStore.ensureCollection(vectorDimension);
2761
+ if (staleChunkIds.length > 0) {
2762
+ await vectorStore.deleteChunkIds(staleChunkIds);
2763
+ }
2764
+ const indexedChunkIds = [];
2765
+ const batchedChunks = chunkArray(chunks, input.config.embeddingBatchSize);
2766
+ let dimensionProbeConsumed = false;
2767
+ for (const batch of batchedChunks) {
2768
+ const embeddings = dimensionProbeConsumed ? await embedTexts(input.config, batch.map((chunk) => chunk.markdown)) : [
2769
+ dimensionProbe[0],
2770
+ ...batch.length > 1 ? await embedTexts(input.config, batch.slice(1).map((chunk) => chunk.markdown)) : []
2771
+ ];
2772
+ dimensionProbeConsumed = true;
2773
+ if (embeddings.length !== batch.length) {
2774
+ throw new AiocsError(
2775
+ AIOCS_ERROR_CODES.embeddingProviderUnavailable,
2776
+ `Embedding provider returned ${embeddings.length} embeddings for a batch of ${batch.length}`
2777
+ );
2778
+ }
2779
+ await vectorStore.upsertChunks({
2780
+ modelKey,
2781
+ points: batch.map((chunk, index) => ({
2782
+ chunkId: chunk.chunkId,
2783
+ vector: embeddings[index],
2784
+ sourceId: chunk.sourceId,
2785
+ snapshotId: chunk.snapshotId,
2786
+ pageUrl: chunk.pageUrl,
2787
+ pageTitle: chunk.pageTitle,
2788
+ sectionTitle: chunk.sectionTitle
2789
+ }))
2790
+ });
2791
+ indexedChunkIds.push(...batch.map((chunk) => chunk.chunkId));
2792
+ }
2793
+ input.catalog.markEmbeddingJobSucceeded({
2794
+ sourceId: job.sourceId,
2795
+ snapshotId: job.snapshotId,
2796
+ modelKey,
2797
+ indexedChunkIds,
2798
+ staleChunkIds
2799
+ });
2800
+ succeededJobs.push({
2801
+ sourceId: job.sourceId,
2802
+ snapshotId: job.snapshotId,
2803
+ chunkCount: indexedChunkIds.length
2804
+ });
2805
+ } catch (error) {
2806
+ const errorMessage = error instanceof Error ? error.message : String(error);
2807
+ input.catalog.markEmbeddingJobFailed({
2808
+ sourceId: job.sourceId,
2809
+ snapshotId: job.snapshotId,
2810
+ errorMessage
2811
+ });
2812
+ failedJobs.push({
2813
+ sourceId: job.sourceId,
2814
+ snapshotId: job.snapshotId,
2815
+ errorMessage
2816
+ });
2817
+ }
2818
+ }
2819
+ return {
2820
+ processedJobs: claimedJobs.length,
2821
+ succeededJobs,
2822
+ failedJobs
2823
+ };
2824
+ }
2825
+
2826
+ // src/runtime/bundled-sources.ts
2827
+ import { existsSync } from "fs";
2828
+ import { dirname, join as join4 } from "path";
2829
+ import { fileURLToPath } from "url";
2830
+ function findPackageRoot(startDir) {
2831
+ let currentDir = startDir;
2832
+ while (true) {
2833
+ if (existsSync(join4(currentDir, "package.json")) && existsSync(join4(currentDir, "sources"))) {
2834
+ return currentDir;
2835
+ }
2836
+ const parentDir = dirname(currentDir);
2837
+ if (parentDir === currentDir) {
2838
+ throw new Error(`Could not locate aiocs package root from ${startDir}`);
2839
+ }
2840
+ currentDir = parentDir;
2841
+ }
2842
+ }
2843
+ function getBundledSourcesDir() {
2844
+ const currentFilePath = fileURLToPath(import.meta.url);
2845
+ const packageRoot = findPackageRoot(dirname(currentFilePath));
2846
+ return join4(packageRoot, "sources");
2847
+ }
2848
+
2849
+ // src/runtime/hybrid-config.ts
2850
+ function parsePositiveInteger(value, field, fallback) {
2851
+ if (typeof value === "undefined" || value.trim() === "") {
2852
+ return fallback;
2853
+ }
2854
+ const parsed = Number(value);
2855
+ if (!Number.isInteger(parsed) || parsed <= 0) {
2856
+ throw new AiocsError(
2857
+ AIOCS_ERROR_CODES.embeddingConfigInvalid,
2858
+ `${field} must be a positive integer`
2859
+ );
2860
+ }
2861
+ return parsed;
2862
+ }
2863
+ function parseSearchMode(value) {
2864
+ if (!value) {
2865
+ return "auto";
2866
+ }
2867
+ if (value === "auto" || value === "lexical" || value === "hybrid" || value === "semantic") {
2868
+ return value;
2869
+ }
2870
+ throw new AiocsError(
2871
+ AIOCS_ERROR_CODES.embeddingConfigInvalid,
2872
+ "AIOCS_SEARCH_MODE_DEFAULT must be one of: auto, lexical, hybrid, semantic"
2873
+ );
2874
+ }
2875
+ function getHybridRuntimeConfig(env = process.env) {
2876
+ const embeddingProvider = env.AIOCS_EMBEDDING_PROVIDER ?? "ollama";
2877
+ if (embeddingProvider !== "ollama") {
2878
+ throw new AiocsError(
2879
+ AIOCS_ERROR_CODES.embeddingConfigInvalid,
2880
+ "AIOCS_EMBEDDING_PROVIDER currently supports only ollama"
2881
+ );
2882
+ }
2883
+ return {
2884
+ defaultSearchMode: parseSearchMode(env.AIOCS_SEARCH_MODE_DEFAULT),
2885
+ qdrantUrl: env.AIOCS_QDRANT_URL ?? "http://127.0.0.1:6333",
2886
+ qdrantCollection: env.AIOCS_QDRANT_COLLECTION ?? "aiocs_docs_chunks",
2887
+ qdrantTimeoutMs: parsePositiveInteger(env.AIOCS_QDRANT_TIMEOUT_MS, "AIOCS_QDRANT_TIMEOUT_MS", 1e3),
2888
+ embeddingProvider: "ollama",
2889
+ ollamaBaseUrl: env.AIOCS_OLLAMA_BASE_URL ?? "http://127.0.0.1:11434",
2890
+ ollamaEmbeddingModel: env.AIOCS_OLLAMA_EMBEDDING_MODEL ?? "nomic-embed-text",
2891
+ ollamaTimeoutMs: parsePositiveInteger(env.AIOCS_OLLAMA_TIMEOUT_MS, "AIOCS_OLLAMA_TIMEOUT_MS", 1e3),
2892
+ embeddingBatchSize: parsePositiveInteger(env.AIOCS_EMBEDDING_BATCH_SIZE, "AIOCS_EMBEDDING_BATCH_SIZE", 32),
2893
+ embeddingJobsPerCycle: parsePositiveInteger(env.AIOCS_EMBEDDING_JOB_LIMIT_PER_CYCLE, "AIOCS_EMBEDDING_JOB_LIMIT_PER_CYCLE", 2),
2894
+ lexicalCandidateWindow: parsePositiveInteger(env.AIOCS_LEXICAL_CANDIDATE_WINDOW, "AIOCS_LEXICAL_CANDIDATE_WINDOW", 40),
2895
+ vectorCandidateWindow: parsePositiveInteger(env.AIOCS_VECTOR_CANDIDATE_WINDOW, "AIOCS_VECTOR_CANDIDATE_WINDOW", 40),
2896
+ rrfK: parsePositiveInteger(env.AIOCS_RRF_K, "AIOCS_RRF_K", 60)
2897
+ };
2898
+ }
2899
+
2900
+ // src/spec/source-spec-files.ts
2901
+ import { access, readdir } from "fs/promises";
2902
+ import { constants as fsConstants } from "fs";
2903
+ import { extname as extname2, join as join5, resolve as resolve3 } from "path";
2904
+ var SOURCE_SPEC_EXTENSIONS = /* @__PURE__ */ new Set([".yaml", ".yml", ".json"]);
2905
+ function uniqueResolvedPaths(paths) {
2906
+ const seen = /* @__PURE__ */ new Set();
2907
+ const unique = [];
2908
+ for (const rawPath of paths) {
2909
+ const normalized = resolve3(rawPath);
2910
+ if (seen.has(normalized)) {
2911
+ continue;
2912
+ }
2913
+ seen.add(normalized);
2914
+ unique.push(normalized);
2915
+ }
2916
+ return unique;
2917
+ }
2918
+ async function pathExists(targetPath) {
2919
+ try {
2920
+ await access(targetPath, fsConstants.F_OK);
2921
+ return true;
2922
+ } catch {
2923
+ return false;
2924
+ }
2925
+ }
2926
+ async function walkSourceSpecFiles(rootDir) {
2927
+ const entries = await readdir(rootDir, { withFileTypes: true });
2928
+ const discovered = [];
2929
+ for (const entry of entries.sort((left, right) => left.name.localeCompare(right.name))) {
2930
+ const entryPath = join5(rootDir, entry.name);
2931
+ if (entry.isDirectory()) {
2932
+ discovered.push(...await walkSourceSpecFiles(entryPath));
2933
+ continue;
2934
+ }
2935
+ if (!entry.isFile()) {
2936
+ continue;
2937
+ }
2938
+ if (SOURCE_SPEC_EXTENSIONS.has(extname2(entry.name).toLowerCase())) {
2939
+ discovered.push(entryPath);
2940
+ }
2941
+ }
2942
+ return discovered;
2943
+ }
2944
+
2945
+ // src/daemon.ts
2946
+ var DEFAULT_INTERVAL_MINUTES = 60;
2947
+ var DEFAULT_CONTAINER_SOURCE_DIR = "/app/sources";
2948
+ var BOOLEAN_TRUE_VALUES = /* @__PURE__ */ new Set(["1", "true", "yes", "on"]);
2949
+ var BOOLEAN_FALSE_VALUES = /* @__PURE__ */ new Set(["0", "false", "no", "off"]);
2950
+ function nowIso3() {
2951
+ return (/* @__PURE__ */ new Date()).toISOString();
2952
+ }
2953
+ function parsePositiveInteger2(raw, variableName) {
2954
+ if (!/^\d+$/.test(raw)) {
2955
+ throw new Error(`${variableName} must be a positive integer`);
2956
+ }
2957
+ const parsed = Number(raw);
2958
+ if (!Number.isInteger(parsed) || parsed <= 0) {
2959
+ throw new Error(`${variableName} must be a positive integer`);
2960
+ }
2961
+ return parsed;
2962
+ }
2963
+ function parseBoolean(raw, variableName) {
2964
+ const normalized = raw.trim().toLowerCase();
2965
+ if (BOOLEAN_TRUE_VALUES.has(normalized)) {
2966
+ return true;
2967
+ }
2968
+ if (BOOLEAN_FALSE_VALUES.has(normalized)) {
2969
+ return false;
2970
+ }
2971
+ throw new Error(`${variableName} must be one of: true, false, 1, 0, yes, no, on, off`);
2972
+ }
2973
+ function parseDaemonConfig(env, options = {}) {
2974
+ const intervalMinutes = env.AIOCS_DAEMON_INTERVAL_MINUTES ? parsePositiveInteger2(env.AIOCS_DAEMON_INTERVAL_MINUTES, "AIOCS_DAEMON_INTERVAL_MINUTES") : DEFAULT_INTERVAL_MINUTES;
2975
+ const fetchOnStart = env.AIOCS_DAEMON_FETCH_ON_START ? parseBoolean(env.AIOCS_DAEMON_FETCH_ON_START, "AIOCS_DAEMON_FETCH_ON_START") : true;
2976
+ const defaultSourceDirs = uniqueResolvedPaths([
2977
+ options.bundledSourceDir ?? getBundledSourcesDir(),
2978
+ options.userSourceDir ?? getAiocsSourcesDir(env),
2979
+ options.containerSourceDir ?? DEFAULT_CONTAINER_SOURCE_DIR
2980
+ ]);
2981
+ const sourceSpecDirs = env.AIOCS_SOURCE_SPEC_DIRS ? uniqueResolvedPaths(
2982
+ env.AIOCS_SOURCE_SPEC_DIRS.split(",").map((entry) => entry.trim()).filter(Boolean)
2983
+ ) : defaultSourceDirs;
2984
+ if (env.AIOCS_SOURCE_SPEC_DIRS && sourceSpecDirs.length === 0) {
2985
+ throw new Error("AIOCS_SOURCE_SPEC_DIRS must include at least one directory");
2986
+ }
2987
+ return {
2988
+ intervalMinutes,
2989
+ fetchOnStart,
2990
+ strictSourceSpecDirs: Boolean(env.AIOCS_SOURCE_SPEC_DIRS),
2991
+ sourceSpecDirs
2992
+ };
2993
+ }
2994
+ async function bootstrapSourceSpecs(input) {
2995
+ const normalizedSourceSpecDirs = uniqueResolvedPaths(input.sourceSpecDirs);
2996
+ const missingDirs = [];
2997
+ const existingDirs = [];
2998
+ const sources = [];
2999
+ for (const sourceSpecDir of normalizedSourceSpecDirs) {
3000
+ if (!await pathExists(sourceSpecDir)) {
3001
+ missingDirs.push(sourceSpecDir);
3002
+ continue;
3003
+ }
3004
+ existingDirs.push(sourceSpecDir);
3005
+ }
3006
+ if (input.strictSourceSpecDirs && missingDirs.length > 0) {
3007
+ throw new Error(`Missing source spec directories: ${missingDirs.join(", ")}`);
3008
+ }
3009
+ for (const sourceSpecDir of existingDirs) {
3010
+ const specPaths = await walkSourceSpecFiles(sourceSpecDir);
3011
+ for (const specPath of specPaths) {
3012
+ const spec = await loadSourceSpec(specPath);
3013
+ const upserted = input.catalog.upsertSource(spec, { specPath });
3014
+ sources.push({
3015
+ sourceId: upserted.sourceId,
3016
+ configHash: upserted.configHash,
3017
+ configChanged: upserted.configChanged,
3018
+ specPath
3019
+ });
3020
+ }
3021
+ }
3022
+ if (input.strictSourceSpecDirs && sources.length === 0) {
3023
+ throw new Error(`No source spec files found in configured directories: ${normalizedSourceSpecDirs.join(", ")}`);
3024
+ }
3025
+ const removedSourceIds = input.catalog.removeManagedSources({
3026
+ managedRoots: existingDirs.map((sourceSpecDir) => resolve4(sourceSpecDir)),
3027
+ activeSources: sources.map((source) => ({
3028
+ sourceId: source.sourceId,
3029
+ specPath: source.specPath
3030
+ }))
3031
+ });
3032
+ return {
3033
+ processedSpecCount: sources.length,
3034
+ removedSourceIds,
3035
+ sources
3036
+ };
3037
+ }
3038
+ async function runDaemonCycle(input) {
3039
+ const startedAt = nowIso3();
3040
+ const bootstrapped = await bootstrapSourceSpecs({
3041
+ catalog: input.catalog,
3042
+ sourceSpecDirs: input.sourceSpecDirs,
3043
+ ...input.strictSourceSpecDirs !== void 0 ? { strictSourceSpecDirs: input.strictSourceSpecDirs } : {}
3044
+ });
3045
+ const dueSourceIds = [
3046
+ .../* @__PURE__ */ new Set([
3047
+ ...input.catalog.listDueSourceIds(input.referenceTime ?? startedAt),
3048
+ ...bootstrapped.sources.filter((source) => source.configChanged).map((source) => source.sourceId)
3049
+ ])
3050
+ ];
3051
+ const canaryDueSourceIds = [
3052
+ .../* @__PURE__ */ new Set([
3053
+ ...input.catalog.listCanaryDueSourceIds(input.referenceTime ?? startedAt),
3054
+ ...bootstrapped.sources.filter((source) => source.configChanged).map((source) => source.sourceId),
3055
+ ...input.catalog.listSources().filter((source) => source.lastCanaryCheckedAt === null).map((source) => source.id)
3056
+ ])
3057
+ ];
3058
+ const canaried = [];
3059
+ const canaryFailed = [];
3060
+ const refreshed = [];
3061
+ const failed = [];
3062
+ const embedded = [];
3063
+ const embeddingFailed = [];
3064
+ for (const sourceId of canaryDueSourceIds) {
3065
+ try {
3066
+ const result = await runSourceCanary({
3067
+ catalog: input.catalog,
3068
+ sourceId,
3069
+ env: process.env
3070
+ });
3071
+ canaried.push({
3072
+ sourceId,
3073
+ status: result.status,
3074
+ checkedAt: result.checkedAt,
3075
+ summary: result.summary
3076
+ });
3077
+ if (result.status === "fail") {
3078
+ canaryFailed.push({
3079
+ sourceId,
3080
+ errorMessage: `One or more canary checks failed for ${sourceId}`
3081
+ });
3082
+ }
3083
+ } catch (error) {
3084
+ canaryFailed.push({
3085
+ sourceId,
3086
+ errorMessage: error instanceof Error ? error.message : String(error)
3087
+ });
3088
+ }
3089
+ }
3090
+ for (const sourceId of dueSourceIds) {
3091
+ try {
3092
+ const result = await fetchSource({
3093
+ catalog: input.catalog,
3094
+ dataDir: input.dataDir,
3095
+ sourceId
3096
+ });
3097
+ refreshed.push({
3098
+ sourceId,
3099
+ snapshotId: result.snapshotId,
3100
+ pageCount: result.pageCount,
3101
+ reused: result.reused
3102
+ });
3103
+ } catch (error) {
3104
+ const errorMessage = error instanceof Error ? error.message : String(error);
3105
+ failed.push({
3106
+ sourceId,
3107
+ errorMessage
3108
+ });
3109
+ }
3110
+ }
3111
+ try {
3112
+ const embeddingResult = await processEmbeddingJobs({
3113
+ catalog: input.catalog,
3114
+ config: getHybridRuntimeConfig(process.env)
3115
+ });
3116
+ embedded.push(...embeddingResult.succeededJobs);
3117
+ embeddingFailed.push(...embeddingResult.failedJobs);
3118
+ } catch (error) {
3119
+ embeddingFailed.push({
3120
+ sourceId: "system",
3121
+ snapshotId: "system",
3122
+ errorMessage: error instanceof Error ? error.message : String(error)
3123
+ });
3124
+ }
3125
+ return {
3126
+ startedAt,
3127
+ finishedAt: nowIso3(),
3128
+ dueSourceIds,
3129
+ canaryDueSourceIds,
3130
+ bootstrapped,
3131
+ canaried,
3132
+ canaryFailed,
3133
+ refreshed,
3134
+ failed,
3135
+ embedded,
3136
+ embeddingFailed
3137
+ };
3138
+ }
3139
+ async function startDaemon(input) {
3140
+ const intervalMs = input.config.intervalMinutes * 6e4;
3141
+ input.catalog.resetRunningEmbeddingJobs();
3142
+ input.catalog.markDaemonStarted({
3143
+ startedAt: nowIso3(),
3144
+ intervalMinutes: input.config.intervalMinutes,
3145
+ fetchOnStart: input.config.fetchOnStart
3146
+ });
3147
+ input.logger.emit({
3148
+ type: "daemon.started",
3149
+ intervalMinutes: input.config.intervalMinutes,
3150
+ fetchOnStart: input.config.fetchOnStart,
3151
+ sourceSpecDirs: input.config.sourceSpecDirs
3152
+ });
3153
+ const runCycle = async (reason) => {
3154
+ const startedAt = nowIso3();
3155
+ input.catalog.markDaemonCycleStarted(startedAt);
3156
+ input.logger.emit({
3157
+ type: "daemon.cycle.started",
3158
+ reason,
3159
+ startedAt
3160
+ });
3161
+ try {
3162
+ const result = await runDaemonCycle({
3163
+ catalog: input.catalog,
3164
+ dataDir: input.dataDir,
3165
+ sourceSpecDirs: input.config.sourceSpecDirs,
3166
+ strictSourceSpecDirs: input.config.strictSourceSpecDirs,
3167
+ referenceTime: startedAt
3168
+ });
3169
+ input.catalog.markDaemonCycleCompleted({
3170
+ completedAt: result.finishedAt,
3171
+ status: result.failed.length > 0 || result.canaryFailed.length > 0 || result.embeddingFailed.length > 0 ? "degraded" : "success"
3172
+ });
3173
+ input.logger.emit({
3174
+ type: "daemon.cycle.completed",
3175
+ reason,
3176
+ result
3177
+ });
3178
+ } catch (error) {
3179
+ input.catalog.markDaemonCycleCompleted({
3180
+ completedAt: nowIso3(),
3181
+ status: "failed"
3182
+ });
3183
+ throw error;
3184
+ }
3185
+ };
3186
+ if (input.config.fetchOnStart && !input.signal?.aborted) {
3187
+ await runCycle("startup");
3188
+ }
3189
+ while (!input.signal?.aborted) {
3190
+ try {
3191
+ await sleep2(intervalMs, void 0, { signal: input.signal });
3192
+ } catch (error) {
3193
+ if (input.signal?.aborted) {
3194
+ break;
3195
+ }
3196
+ throw error;
3197
+ }
3198
+ if (input.signal?.aborted) {
3199
+ break;
3200
+ }
3201
+ await runCycle("interval");
3202
+ }
3203
+ input.logger.emit({
3204
+ type: "daemon.stopped"
3205
+ });
3206
+ }
3207
+
3208
+ // package.json
3209
+ var package_default = {
3210
+ name: "@bodhi-ventures/aiocs",
3211
+ version: "0.1.0",
3212
+ license: "MIT",
3213
+ type: "module",
3214
+ description: "Local-only documentation store, fetcher, and search CLI for AI agents.",
3215
+ keywords: [
3216
+ "ai",
3217
+ "docs",
3218
+ "search",
3219
+ "mcp",
3220
+ "cli"
3221
+ ],
3222
+ homepage: "https://github.com/Bodhi-Ventures/aiocs",
3223
+ bugs: {
3224
+ url: "https://github.com/Bodhi-Ventures/aiocs/issues"
3225
+ },
3226
+ repository: {
3227
+ type: "git",
3228
+ url: "https://github.com/Bodhi-Ventures/aiocs.git"
3229
+ },
3230
+ publishConfig: {
3231
+ access: "public",
3232
+ provenance: true
3233
+ },
3234
+ packageManager: "pnpm@9.15.9",
3235
+ files: [
3236
+ "dist",
3237
+ "sources",
3238
+ "docs",
3239
+ "README.md",
3240
+ "LICENSE",
3241
+ "skills"
3242
+ ],
3243
+ bin: {
3244
+ docs: "./dist/cli.js",
3245
+ "aiocs-mcp": "./dist/mcp-server.js"
3246
+ },
3247
+ engines: {
3248
+ node: ">=22"
3249
+ },
3250
+ scripts: {
3251
+ build: "tsup --config tsup.config.ts",
3252
+ dev: "tsx src/cli.ts",
3253
+ "dev:mcp": "tsx src/mcp-server.ts",
3254
+ lint: "tsc --noEmit",
3255
+ test: "vitest run",
3256
+ "test:watch": "vitest"
3257
+ },
3258
+ dependencies: {
3259
+ "@modelcontextprotocol/sdk": "^1.28.0",
3260
+ "@mozilla/readability": "^0.6.0",
3261
+ "@qdrant/js-client-rest": "1.17.0",
3262
+ "better-sqlite3": "^12.4.1",
3263
+ commander: "^14.0.1",
3264
+ jsdom: "^27.0.1",
3265
+ playwright: "^1.57.0",
3266
+ turndown: "^7.2.1",
3267
+ "turndown-plugin-gfm": "^1.0.2",
3268
+ yaml: "^2.8.1",
3269
+ zod: "^4.1.12"
3270
+ },
3271
+ devDependencies: {
3272
+ "@types/better-sqlite3": "^7.6.13",
3273
+ "@types/jsdom": "^21.1.7",
3274
+ "@types/node": "^24.7.2",
3275
+ "@types/turndown": "^5.0.5",
3276
+ execa: "^9.6.0",
3277
+ tsup: "^8.5.0",
3278
+ tsx: "^4.20.6",
3279
+ typescript: "^5.9.3",
3280
+ vitest: "^3.2.4"
3281
+ }
3282
+ };
3283
+
3284
+ // src/runtime/package-metadata.ts
3285
+ var packageName = package_default.name;
3286
+ var packageVersion = package_default.version;
3287
+ var packageDescription = package_default.description;
3288
+
3289
+ // src/services.ts
3290
+ import { resolve as resolve7 } from "path";
3291
+
3292
+ // src/backup.ts
3293
+ import { cp, mkdir, readdir as readdir2, readFile as readFile2, rename, rm, stat, writeFile } from "fs/promises";
3294
+ import { basename, dirname as dirname2, join as join6, resolve as resolve5 } from "path";
3295
+ import { randomUUID as randomUUID2 } from "crypto";
3296
+ import Database2 from "better-sqlite3";
3297
+ var CATALOG_DB_FILENAME = "catalog.sqlite";
3298
+ var SQLITE_SIDE_CAR_SUFFIXES = ["-wal", "-shm"];
3299
+ async function pathExists2(path) {
3300
+ try {
3301
+ await stat(path);
3302
+ return true;
3303
+ } catch {
3304
+ return false;
3305
+ }
3306
+ }
3307
+ async function assertSourceDirExists(path) {
3308
+ if (!await pathExists2(path)) {
3309
+ throw new AiocsError(
3310
+ AIOCS_ERROR_CODES.backupSourceMissing,
3311
+ `Backup source path does not exist: ${path}`
3312
+ );
3313
+ }
3314
+ }
3315
+ async function isDirectoryEmpty(path) {
3316
+ if (!await pathExists2(path)) {
3317
+ return true;
3318
+ }
3319
+ return (await readdir2(path)).length === 0;
3320
+ }
3321
+ async function listEntries(root, relativePath = "") {
3322
+ const absolutePath = relativePath ? join6(root, relativePath) : root;
3323
+ const stats = await stat(absolutePath);
3324
+ if (!stats.isDirectory()) {
3325
+ return [{
3326
+ relativePath,
3327
+ type: "file",
3328
+ size: stats.size
3329
+ }];
3330
+ }
3331
+ const childNames = await readdir2(absolutePath);
3332
+ const entries = relativePath ? [{
3333
+ relativePath,
3334
+ type: "directory",
3335
+ size: 0
3336
+ }] : [];
3337
+ for (const childName of childNames.sort()) {
3338
+ entries.push(...await listEntries(root, relativePath ? join6(relativePath, childName) : childName));
3339
+ }
3340
+ return entries;
3341
+ }
3342
+ async function copyIfPresent(from, to, entries, relativePrefix) {
3343
+ if (!await pathExists2(from)) {
3344
+ return;
3345
+ }
3346
+ await mkdir(to, { recursive: true });
3347
+ await cp(from, to, { recursive: true, force: true });
3348
+ const copiedEntries = await listEntries(to);
3349
+ entries.push(
3350
+ ...copiedEntries.map((entry) => ({
3351
+ ...entry,
3352
+ relativePath: join6(relativePrefix, entry.relativePath)
3353
+ }))
3354
+ );
3355
+ }
3356
+ async function copyDataDirForBackup(from, to) {
3357
+ const sourceCatalogPath = join6(from, CATALOG_DB_FILENAME);
3358
+ if (!await pathExists2(sourceCatalogPath)) {
3359
+ throw new AiocsError(
3360
+ AIOCS_ERROR_CODES.backupSourceMissing,
3361
+ `Backup source is missing the catalog database: ${sourceCatalogPath}`
3362
+ );
3363
+ }
3364
+ await mkdir(to, { recursive: true });
3365
+ await cp(from, to, {
3366
+ recursive: true,
3367
+ force: true,
3368
+ filter: (source) => {
3369
+ const name = basename(source);
3370
+ if (name === CATALOG_DB_FILENAME) {
3371
+ return false;
3372
+ }
3373
+ return !SQLITE_SIDE_CAR_SUFFIXES.some((suffix) => name === `${CATALOG_DB_FILENAME}${suffix}`);
3374
+ }
3375
+ });
3376
+ const targetCatalogPath = join6(to, CATALOG_DB_FILENAME);
3377
+ const sourceCatalog = new Database2(sourceCatalogPath, { readonly: true });
3378
+ try {
3379
+ await sourceCatalog.backup(targetCatalogPath);
3380
+ } finally {
3381
+ sourceCatalog.close();
3382
+ }
3383
+ }
3384
+ async function loadValidatedBackupPayload(inputDir) {
3385
+ const manifestPath = join6(inputDir, "manifest.json");
3386
+ await assertSourceDirExists(inputDir);
3387
+ if (!await pathExists2(manifestPath)) {
3388
+ throw new AiocsError(
3389
+ AIOCS_ERROR_CODES.backupInvalid,
3390
+ `Backup manifest not found: ${manifestPath}`
3391
+ );
3392
+ }
3393
+ const manifest = JSON.parse(await readFile2(manifestPath, "utf8"));
3394
+ if (manifest.formatVersion !== 1 || !Array.isArray(manifest.entries)) {
3395
+ throw new AiocsError(
3396
+ AIOCS_ERROR_CODES.backupInvalid,
3397
+ `Invalid backup manifest: ${manifestPath}`
3398
+ );
3399
+ }
3400
+ const backupDataDir = join6(inputDir, "data");
3401
+ if (!await pathExists2(backupDataDir)) {
3402
+ throw new AiocsError(
3403
+ AIOCS_ERROR_CODES.backupInvalid,
3404
+ `Backup payload is missing the data directory: ${backupDataDir}`
3405
+ );
3406
+ }
3407
+ const backupCatalogPath = join6(backupDataDir, CATALOG_DB_FILENAME);
3408
+ if (!await pathExists2(backupCatalogPath)) {
3409
+ throw new AiocsError(
3410
+ AIOCS_ERROR_CODES.backupInvalid,
3411
+ `Backup payload is missing the catalog database: ${backupCatalogPath}`
3412
+ );
3413
+ }
3414
+ const backupConfigDir = join6(inputDir, "config");
3415
+ return {
3416
+ manifest,
3417
+ backupDataDir,
3418
+ ...await pathExists2(backupConfigDir) ? { backupConfigDir } : {}
3419
+ };
3420
+ }
3421
+ async function prepareReplacementTarget(backupDir, targetDir) {
3422
+ const parentDir = dirname2(targetDir);
3423
+ const stagingDir = join6(parentDir, `.${basename(targetDir)}.import-${randomUUID2()}`);
3424
+ await rm(stagingDir, { recursive: true, force: true });
3425
+ await mkdir(parentDir, { recursive: true });
3426
+ await cp(backupDir, stagingDir, { recursive: true, force: true });
3427
+ return stagingDir;
3428
+ }
3429
+ async function exportBackup(input) {
3430
+ const dataDir = resolve5(input.dataDir);
3431
+ const outputDir = resolve5(input.outputDir);
3432
+ const configDir = input.configDir ? resolve5(input.configDir) : void 0;
3433
+ await assertSourceDirExists(dataDir);
3434
+ if (!await isDirectoryEmpty(outputDir)) {
3435
+ if (!input.replaceExisting) {
3436
+ throw new AiocsError(
3437
+ AIOCS_ERROR_CODES.backupConflict,
3438
+ `Backup output directory is not empty: ${outputDir}`
3439
+ );
3440
+ }
3441
+ await rm(outputDir, { recursive: true, force: true });
3442
+ }
3443
+ await mkdir(outputDir, { recursive: true });
3444
+ const entries = [];
3445
+ await copyDataDirForBackup(dataDir, join6(outputDir, "data"));
3446
+ entries.push(...(await listEntries(join6(outputDir, "data"))).map((entry) => ({
3447
+ ...entry,
3448
+ relativePath: join6("data", entry.relativePath)
3449
+ })));
3450
+ if (configDir) {
3451
+ await copyIfPresent(configDir, join6(outputDir, "config"), entries, "config");
3452
+ }
3453
+ const manifest = {
3454
+ formatVersion: 1,
3455
+ createdAt: (/* @__PURE__ */ new Date()).toISOString(),
3456
+ packageVersion,
3457
+ entries
3458
+ };
3459
+ const manifestPath = join6(outputDir, "manifest.json");
3460
+ await writeFile(manifestPath, JSON.stringify(manifest, null, 2), "utf8");
3461
+ return {
3462
+ outputDir,
3463
+ manifestPath,
3464
+ manifest
3465
+ };
3466
+ }
3467
+ async function importBackup(input) {
3468
+ const inputDir = resolve5(input.inputDir);
3469
+ const dataDir = resolve5(input.dataDir);
3470
+ const configDir = input.configDir ? resolve5(input.configDir) : void 0;
3471
+ const { manifest, backupDataDir, backupConfigDir } = await loadValidatedBackupPayload(inputDir);
3472
+ if (!await isDirectoryEmpty(dataDir)) {
3473
+ if (!input.replaceExisting) {
3474
+ throw new AiocsError(
3475
+ AIOCS_ERROR_CODES.backupConflict,
3476
+ `Backup target data directory is not empty: ${dataDir}`
3477
+ );
3478
+ }
3479
+ }
3480
+ if (configDir && backupConfigDir && !await isDirectoryEmpty(configDir)) {
3481
+ if (!input.replaceExisting) {
3482
+ throw new AiocsError(
3483
+ AIOCS_ERROR_CODES.backupConflict,
3484
+ `Backup target config directory is not empty: ${configDir}`
3485
+ );
3486
+ }
3487
+ }
3488
+ const stagedDataDir = await prepareReplacementTarget(backupDataDir, dataDir);
3489
+ const stagedConfigDir = configDir && backupConfigDir ? await prepareReplacementTarget(backupConfigDir, configDir) : void 0;
3490
+ try {
3491
+ await rm(dataDir, { recursive: true, force: true });
3492
+ await rename(stagedDataDir, dataDir);
3493
+ if (configDir && stagedConfigDir) {
3494
+ await rm(configDir, { recursive: true, force: true });
3495
+ await rename(stagedConfigDir, configDir);
3496
+ }
3497
+ } catch (error) {
3498
+ await rm(stagedDataDir, { recursive: true, force: true });
3499
+ if (stagedConfigDir) {
3500
+ await rm(stagedConfigDir, { recursive: true, force: true });
3501
+ }
3502
+ throw error;
3503
+ }
3504
+ return {
3505
+ inputDir,
3506
+ dataDir,
3507
+ ...configDir ? { configDir } : {},
3508
+ manifest
3509
+ };
3510
+ }
3511
+
3512
+ // src/coverage.ts
3513
+ import { readFile as readFile3 } from "fs/promises";
3514
+ import { resolve as resolve6 } from "path";
3515
+ function normalizeText(value) {
3516
+ return value.replace(/[`*_~]+/g, "").replace(/\s+/g, " ").trim().toLowerCase();
3517
+ }
3518
+ function extractHeadings(markdown) {
3519
+ const matches = [...markdown.matchAll(/^#{1,6}\s+(.+)$/gm)];
3520
+ return matches.map((match) => match[1]?.trim() ?? "").filter(Boolean);
3521
+ }
3522
+ function extractComparableLines(markdown) {
3523
+ return markdown.split("\n").map((line) => line.replace(/^\s*(#{1,6}|\d+\.\s+|[-*+]\s+)/, "").trim()).map((line) => normalizeText(line)).filter(Boolean);
3524
+ }
3525
+ function classifyHeading(heading, pageTitles, sectionTitles, comparableMarkdownLines) {
3526
+ const normalizedHeading = normalizeText(heading);
3527
+ if (!normalizedHeading) {
3528
+ return null;
3529
+ }
3530
+ if (pageTitles.has(normalizedHeading)) {
3531
+ return "page_title";
3532
+ }
3533
+ if (sectionTitles.has(normalizedHeading)) {
3534
+ return "section_title";
3535
+ }
3536
+ if (comparableMarkdownLines.has(normalizedHeading)) {
3537
+ return "body";
3538
+ }
3539
+ return null;
3540
+ }
3541
+ async function verifyCoverageAgainstReferences(corpus, referenceFiles) {
3542
+ if (referenceFiles.length === 0) {
3543
+ throw new AiocsError(
3544
+ AIOCS_ERROR_CODES.invalidArgument,
3545
+ "At least one reference file is required for coverage verification."
3546
+ );
3547
+ }
3548
+ const pageTitles = new Set(corpus.entries.map((entry) => normalizeText(entry.pageTitle)).filter(Boolean));
3549
+ const sectionTitles = new Set(corpus.entries.map((entry) => normalizeText(entry.sectionTitle)).filter(Boolean));
3550
+ const comparableMarkdownLines = new Set(
3551
+ corpus.entries.flatMap((entry) => extractComparableLines(entry.markdown))
3552
+ );
3553
+ const files = [];
3554
+ let headingCount = 0;
3555
+ let matchedHeadingCount = 0;
3556
+ let missingHeadingCount = 0;
3557
+ const matchCounts = {
3558
+ pageTitle: 0,
3559
+ sectionTitle: 0,
3560
+ body: 0
3561
+ };
3562
+ for (const referenceFile of referenceFiles) {
3563
+ const resolvedReferenceFile = resolve6(referenceFile);
3564
+ let raw;
3565
+ try {
3566
+ raw = await readFile3(resolvedReferenceFile, "utf8");
3567
+ } catch (error) {
3568
+ if (error?.code === "ENOENT") {
3569
+ throw new AiocsError(
3570
+ AIOCS_ERROR_CODES.referenceFileNotFound,
3571
+ `Reference file not found: ${resolvedReferenceFile}`
3572
+ );
3573
+ }
3574
+ throw error;
3575
+ }
3576
+ const headings = extractHeadings(raw);
3577
+ if (headings.length === 0) {
3578
+ throw new AiocsError(
3579
+ AIOCS_ERROR_CODES.invalidReferenceFile,
3580
+ `Reference file does not contain any markdown headings: ${resolvedReferenceFile}`
3581
+ );
3582
+ }
3583
+ const fileMatchCounts = {
3584
+ pageTitle: 0,
3585
+ sectionTitle: 0,
3586
+ body: 0
3587
+ };
3588
+ const missingHeadings = [];
3589
+ for (const heading of headings) {
3590
+ const matchType = classifyHeading(heading, pageTitles, sectionTitles, comparableMarkdownLines);
3591
+ if (matchType === "page_title") {
3592
+ fileMatchCounts.pageTitle += 1;
3593
+ matchCounts.pageTitle += 1;
3594
+ matchedHeadingCount += 1;
3595
+ } else if (matchType === "section_title") {
3596
+ fileMatchCounts.sectionTitle += 1;
3597
+ matchCounts.sectionTitle += 1;
3598
+ matchedHeadingCount += 1;
3599
+ } else if (matchType === "body") {
3600
+ fileMatchCounts.body += 1;
3601
+ matchCounts.body += 1;
3602
+ matchedHeadingCount += 1;
3603
+ } else {
3604
+ missingHeadings.push(heading);
3605
+ missingHeadingCount += 1;
3606
+ }
3607
+ }
3608
+ headingCount += headings.length;
3609
+ files.push({
3610
+ referenceFile: resolvedReferenceFile,
3611
+ headingCount: headings.length,
3612
+ matchedHeadingCount: headings.length - missingHeadings.length,
3613
+ missingHeadingCount: missingHeadings.length,
3614
+ missingHeadings,
3615
+ matchCounts: fileMatchCounts
3616
+ });
3617
+ }
3618
+ return {
3619
+ sourceId: corpus.sourceId,
3620
+ snapshotId: corpus.snapshotId,
3621
+ complete: missingHeadingCount === 0,
3622
+ summary: {
3623
+ fileCount: files.length,
3624
+ headingCount,
3625
+ matchedHeadingCount,
3626
+ missingHeadingCount,
3627
+ matchCounts
3628
+ },
3629
+ files
3630
+ };
3631
+ }
3632
+
3633
+ // src/doctor.ts
3634
+ import { access as access2 } from "fs/promises";
3635
+ import { execFile } from "child_process";
3636
+ import { promisify } from "util";
3637
+ var execFileAsync = promisify(execFile);
3638
+ function summarize(checks) {
3639
+ const passCount = checks.filter((check) => check.status === "pass").length;
3640
+ const warnCount = checks.filter((check) => check.status === "warn").length;
3641
+ const failCount = checks.filter((check) => check.status === "fail").length;
3642
+ return {
3643
+ status: failCount > 0 ? "unhealthy" : warnCount > 0 ? "degraded" : "healthy",
3644
+ checkCount: checks.length,
3645
+ passCount,
3646
+ warnCount,
3647
+ failCount
3648
+ };
3649
+ }
3650
+ function toErrorMessage(error) {
3651
+ if (error instanceof Error) {
3652
+ return error.message;
3653
+ }
3654
+ return String(error);
3655
+ }
3656
+ function parseTimestamp(value) {
3657
+ if (!value) {
3658
+ return null;
3659
+ }
3660
+ const parsed = Date.parse(value);
3661
+ return Number.isNaN(parsed) ? null : parsed;
3662
+ }
3663
+ async function checkCatalog(env) {
3664
+ const dataDir = getAiocsDataDir(env);
3665
+ const configDir = getAiocsConfigDir(env);
3666
+ let catalog = null;
3667
+ try {
3668
+ catalog = openCatalog({ dataDir });
3669
+ const sourceCount = catalog.listSources().length;
3670
+ const projectLinkCount = catalog.listProjectLinks().length;
3671
+ return {
3672
+ id: "catalog",
3673
+ status: "pass",
3674
+ summary: `Catalog opened successfully at ${dataDir}`,
3675
+ details: {
3676
+ dataDir,
3677
+ configDir,
3678
+ sourceCount,
3679
+ projectLinkCount
3680
+ }
3681
+ };
3682
+ } catch (error) {
3683
+ return {
3684
+ id: "catalog",
3685
+ status: "fail",
3686
+ summary: `Catalog unavailable: ${toErrorMessage(error)}`,
3687
+ details: {
3688
+ dataDir,
3689
+ configDir
3690
+ }
3691
+ };
3692
+ } finally {
3693
+ catalog?.close();
3694
+ }
3695
+ }
3696
+ async function checkPlaywright() {
3697
+ try {
3698
+ const { chromium: chromium2 } = await import("playwright");
3699
+ const executablePath = chromium2.executablePath();
3700
+ if (!executablePath) {
3701
+ return {
3702
+ id: "playwright",
3703
+ status: "fail",
3704
+ summary: "Playwright is installed but Chromium has no resolved executable path."
3705
+ };
3706
+ }
3707
+ await access2(executablePath);
3708
+ return {
3709
+ id: "playwright",
3710
+ status: "pass",
3711
+ summary: "Playwright Chromium executable is available.",
3712
+ details: {
3713
+ executablePath
3714
+ }
3715
+ };
3716
+ } catch (error) {
3717
+ return {
3718
+ id: "playwright",
3719
+ status: "fail",
3720
+ summary: `Playwright is not ready: ${toErrorMessage(error)}`
3721
+ };
3722
+ }
3723
+ }
3724
+ async function checkDaemonConfig(env) {
3725
+ try {
3726
+ const daemonConfig = parseDaemonConfig(env, {
3727
+ bundledSourceDir: getBundledSourcesDir()
3728
+ });
3729
+ return {
3730
+ daemonConfig,
3731
+ daemonConfigCheck: {
3732
+ id: "daemon-config",
3733
+ status: "pass",
3734
+ summary: "Daemon configuration parsed successfully.",
3735
+ details: daemonConfig
3736
+ }
3737
+ };
3738
+ } catch (error) {
3739
+ return {
3740
+ daemonConfig: null,
3741
+ daemonConfigCheck: {
3742
+ id: "daemon-config",
3743
+ status: "fail",
3744
+ summary: `Daemon configuration is invalid: ${toErrorMessage(error)}`
3745
+ }
3746
+ };
3747
+ }
3748
+ }
3749
+ async function checkSourceSpecDirs(daemonConfig) {
3750
+ if (!daemonConfig) {
3751
+ return {
3752
+ id: "source-spec-dirs",
3753
+ status: "fail",
3754
+ summary: "Source spec directories cannot be validated until daemon configuration is valid."
3755
+ };
3756
+ }
3757
+ const directories = await Promise.all(daemonConfig.sourceSpecDirs.map(async (directory) => {
3758
+ const exists = await pathExists(directory);
3759
+ const specFiles = exists ? await walkSourceSpecFiles(directory) : [];
3760
+ return {
3761
+ directory,
3762
+ exists,
3763
+ specCount: specFiles.length
3764
+ };
3765
+ }));
3766
+ const existingCount = directories.filter((directory) => directory.exists).length;
3767
+ const totalSpecCount = directories.reduce((sum, directory) => sum + directory.specCount, 0);
3768
+ let status = "pass";
3769
+ let summary = `Validated ${directories.length} source spec director${directories.length === 1 ? "y" : "ies"}.`;
3770
+ if (directories.length === 0) {
3771
+ status = "fail";
3772
+ summary = "No source spec directories are configured.";
3773
+ } else if (daemonConfig.strictSourceSpecDirs && directories.some((directory) => !directory.exists)) {
3774
+ status = "fail";
3775
+ summary = "One or more explicitly configured source spec directories are missing.";
3776
+ } else if (existingCount === 0) {
3777
+ status = "warn";
3778
+ summary = "No configured source spec directories currently exist.";
3779
+ } else if (totalSpecCount === 0) {
3780
+ status = "warn";
3781
+ summary = "Configured source spec directories exist but contain no source specs.";
3782
+ } else if (directories.some((directory) => !directory.exists)) {
3783
+ status = "warn";
3784
+ summary = "Some optional source spec directories are missing.";
3785
+ }
3786
+ return {
3787
+ id: "source-spec-dirs",
3788
+ status,
3789
+ summary,
3790
+ details: {
3791
+ strict: daemonConfig.strictSourceSpecDirs,
3792
+ directories
3793
+ }
3794
+ };
3795
+ }
3796
+ async function checkFreshness(env) {
3797
+ const dataDir = getAiocsDataDir(env);
3798
+ let catalog = null;
3799
+ try {
3800
+ catalog = openCatalog({ dataDir });
3801
+ const sources = catalog.listSources();
3802
+ const referenceTime = Date.now();
3803
+ if (sources.length === 0) {
3804
+ return {
3805
+ id: "freshness",
3806
+ status: "pass",
3807
+ summary: "No sources are registered, so no source freshness checks are pending.",
3808
+ details: {
3809
+ sourceCount: 0
3810
+ }
3811
+ };
3812
+ }
3813
+ const staleSources = sources.filter((source) => !source.lastSuccessfulSnapshotId || Date.parse(source.nextDueAt) <= referenceTime).map((source) => ({
3814
+ sourceId: source.id,
3815
+ nextDueAt: source.nextDueAt,
3816
+ lastSuccessfulSnapshotAt: source.lastSuccessfulSnapshotAt,
3817
+ lastSuccessfulSnapshotAgeMinutes: source.lastSuccessfulSnapshotAt ? Math.floor((referenceTime - Date.parse(source.lastSuccessfulSnapshotAt)) / 6e4) : null
3818
+ }));
3819
+ const staleCanaries = sources.filter(
3820
+ (source) => source.nextCanaryDueAt && Date.parse(source.nextCanaryDueAt) <= referenceTime || source.lastCanaryStatus === "fail"
3821
+ ).map((source) => ({
3822
+ sourceId: source.id,
3823
+ nextCanaryDueAt: source.nextCanaryDueAt,
3824
+ lastCanaryCheckedAt: source.lastCanaryCheckedAt,
3825
+ lastCanaryStatus: source.lastCanaryStatus
3826
+ }));
3827
+ const status = staleSources.length > 0 || staleCanaries.length > 0 ? "warn" : "pass";
3828
+ const summary = status === "pass" ? "Source snapshots and canaries are fresh." : `Source freshness issues detected: ${staleSources.length} stale snapshot scope(s), ${staleCanaries.length} stale/failed canary scope(s).`;
3829
+ return {
3830
+ id: "freshness",
3831
+ status,
3832
+ summary,
3833
+ details: {
3834
+ sourceCount: sources.length,
3835
+ staleSources,
3836
+ staleCanaries,
3837
+ checkedAt: new Date(referenceTime).toISOString()
3838
+ }
3839
+ };
3840
+ } catch (error) {
3841
+ return {
3842
+ id: "freshness",
3843
+ status: "fail",
3844
+ summary: `Freshness checks failed: ${toErrorMessage(error)}`
3845
+ };
3846
+ } finally {
3847
+ catalog?.close();
3848
+ }
3849
+ }
3850
+ async function checkDaemonHeartbeat(env) {
3851
+ const dataDir = getAiocsDataDir(env);
3852
+ let catalog = null;
3853
+ try {
3854
+ catalog = openCatalog({ dataDir });
3855
+ const daemonState = catalog.getDaemonState();
3856
+ if (!daemonState) {
3857
+ return {
3858
+ id: "daemon-heartbeat",
3859
+ status: "warn",
3860
+ summary: "No daemon heartbeat has been recorded yet."
3861
+ };
3862
+ }
3863
+ const intervalMinutes = daemonState.intervalMinutes ?? 60;
3864
+ const completedAt = parseTimestamp(daemonState.lastCycleCompletedAt);
3865
+ if (!completedAt) {
3866
+ return {
3867
+ id: "daemon-heartbeat",
3868
+ status: "warn",
3869
+ summary: "Daemon heartbeat exists but no completed cycle has been recorded yet.",
3870
+ details: daemonState
3871
+ };
3872
+ }
3873
+ const ageMinutes = Math.floor((Date.now() - completedAt) / 6e4);
3874
+ const stale = ageMinutes > intervalMinutes * 2;
3875
+ const unhealthyStatus = daemonState.lastCycleStatus === "failed" || daemonState.lastCycleStatus === "degraded";
3876
+ return {
3877
+ id: "daemon-heartbeat",
3878
+ status: stale || unhealthyStatus ? "warn" : "pass",
3879
+ summary: stale || unhealthyStatus ? `Daemon heartbeat is stale or unhealthy (age=${ageMinutes}m, status=${daemonState.lastCycleStatus ?? "unknown"}).` : `Daemon heartbeat is recent (age=${ageMinutes}m).`,
3880
+ details: {
3881
+ ...daemonState,
3882
+ ageMinutes
3883
+ }
3884
+ };
3885
+ } catch (error) {
3886
+ return {
3887
+ id: "daemon-heartbeat",
3888
+ status: "fail",
3889
+ summary: `Daemon heartbeat check failed: ${toErrorMessage(error)}`
3890
+ };
3891
+ } finally {
3892
+ catalog?.close();
3893
+ }
3894
+ }
3895
+ async function checkEmbeddingProvider(env) {
3896
+ try {
3897
+ const config = getHybridRuntimeConfig(env);
3898
+ const status = await getEmbeddingProviderStatus(config);
3899
+ return {
3900
+ id: "embedding-provider",
3901
+ status: status.ok ? "pass" : "warn",
3902
+ summary: status.ok ? `Embedding provider is ready with model ${status.model}.` : `Embedding provider is reachable but model ${status.model} is not available locally.`,
3903
+ details: status
3904
+ };
3905
+ } catch (error) {
3906
+ return {
3907
+ id: "embedding-provider",
3908
+ status: "fail",
3909
+ summary: `Embedding provider check failed: ${toErrorMessage(error)}`
3910
+ };
3911
+ }
3912
+ }
3913
+ async function checkVectorStore(env) {
3914
+ try {
3915
+ const config = getHybridRuntimeConfig(env);
3916
+ const status = await new AiocsVectorStore(config).getHealth();
3917
+ return {
3918
+ id: "vector-store",
3919
+ status: status.ok ? "pass" : "warn",
3920
+ summary: status.ok ? `Qdrant is reachable at ${config.qdrantUrl}.` : `Qdrant is not ready at ${config.qdrantUrl}: ${status.errorMessage ?? "unknown error"}`,
3921
+ details: {
3922
+ qdrantUrl: config.qdrantUrl,
3923
+ collection: config.qdrantCollection,
3924
+ ...status.collections ? { collections: status.collections } : {}
3925
+ }
3926
+ };
3927
+ } catch (error) {
3928
+ return {
3929
+ id: "vector-store",
3930
+ status: "fail",
3931
+ summary: `Vector store check failed: ${toErrorMessage(error)}`
3932
+ };
3933
+ }
3934
+ }
3935
+ async function checkEmbeddings(env) {
3936
+ const dataDir = getAiocsDataDir(env);
3937
+ let catalog = null;
3938
+ try {
3939
+ catalog = openCatalog({ dataDir });
3940
+ const overview = catalog.getEmbeddingOverview();
3941
+ const underIndexedSources = overview.sources.filter((source) => source.totalChunks > 0 && source.indexedChunks < source.totalChunks).map((source) => ({
3942
+ sourceId: source.sourceId,
3943
+ snapshotId: source.snapshotId,
3944
+ coverageRatio: source.coverageRatio,
3945
+ totalChunks: source.totalChunks,
3946
+ indexedChunks: source.indexedChunks,
3947
+ pendingChunks: source.pendingChunks,
3948
+ failedChunks: source.failedChunks,
3949
+ staleChunks: source.staleChunks
3950
+ }));
3951
+ const status = overview.queue.failedJobs > 0 ? "warn" : underIndexedSources.length > 0 || overview.queue.pendingJobs > 0 || overview.queue.runningJobs > 0 ? "warn" : "pass";
3952
+ return {
3953
+ id: "embeddings",
3954
+ status,
3955
+ summary: status === "pass" ? "Embedding coverage is complete for latest snapshots." : `Embedding backlog detected: ${overview.queue.pendingJobs} pending, ${overview.queue.runningJobs} running, ${overview.queue.failedJobs} failed job(s).`,
3956
+ details: {
3957
+ queue: overview.queue,
3958
+ underIndexedSources
3959
+ }
3960
+ };
3961
+ } catch (error) {
3962
+ return {
3963
+ id: "embeddings",
3964
+ status: "fail",
3965
+ summary: `Embedding status check failed: ${toErrorMessage(error)}`
3966
+ };
3967
+ } finally {
3968
+ catalog?.close();
3969
+ }
3970
+ }
3971
+ async function checkDocker() {
3972
+ try {
3973
+ const { stdout } = await execFileAsync("docker", ["info", "--format", "{{json .ServerVersion}}"]);
3974
+ const version = JSON.parse(stdout.trim());
3975
+ return {
3976
+ id: "docker",
3977
+ status: "pass",
3978
+ summary: `Docker is available (server ${version}).`,
3979
+ details: {
3980
+ serverVersion: version
3981
+ }
3982
+ };
3983
+ } catch (error) {
3984
+ const message = toErrorMessage(error);
3985
+ if (message.includes("ENOENT")) {
3986
+ return {
3987
+ id: "docker",
3988
+ status: "warn",
3989
+ summary: "Docker CLI is not installed; Docker-based daemon deployment is unavailable on this machine."
3990
+ };
3991
+ }
3992
+ return {
3993
+ id: "docker",
3994
+ status: "warn",
3995
+ summary: `Docker is not ready: ${message}`
3996
+ };
3997
+ }
3998
+ }
3999
+ async function runDoctor(env = process.env) {
4000
+ const catalogCheck = await checkCatalog(env);
4001
+ const playwrightCheck = await checkPlaywright();
4002
+ const { daemonConfigCheck, daemonConfig } = await checkDaemonConfig(env);
4003
+ const sourceSpecDirsCheck = await checkSourceSpecDirs(daemonConfig);
4004
+ const freshnessCheck = await checkFreshness(env);
4005
+ const daemonHeartbeatCheck = await checkDaemonHeartbeat(env);
4006
+ const embeddingProviderCheck = await checkEmbeddingProvider(env);
4007
+ const vectorStoreCheck = await checkVectorStore(env);
4008
+ const embeddingsCheck = await checkEmbeddings(env);
4009
+ const dockerCheck = await checkDocker();
4010
+ const checks = [
4011
+ catalogCheck,
4012
+ playwrightCheck,
4013
+ daemonConfigCheck,
4014
+ sourceSpecDirsCheck,
4015
+ freshnessCheck,
4016
+ daemonHeartbeatCheck,
4017
+ embeddingProviderCheck,
4018
+ vectorStoreCheck,
4019
+ embeddingsCheck,
4020
+ dockerCheck
4021
+ ];
4022
+ return {
4023
+ summary: summarize(checks),
4024
+ checks
4025
+ };
4026
+ }
4027
+
4028
+ // src/hybrid/rank.ts
4029
+ function reciprocalRankFusion(candidateLists, rrfK) {
4030
+ const byChunkId = /* @__PURE__ */ new Map();
4031
+ for (const candidates of candidateLists) {
4032
+ for (const candidate of candidates) {
4033
+ const current = byChunkId.get(candidate.chunkId) ?? {
4034
+ fusedScore: 0,
4035
+ signals: /* @__PURE__ */ new Set()
4036
+ };
4037
+ current.fusedScore += 1 / (rrfK + candidate.rank);
4038
+ current.signals.add(candidate.signal);
4039
+ byChunkId.set(candidate.chunkId, current);
4040
+ }
4041
+ }
4042
+ return [...byChunkId.entries()].map(([chunkId, value]) => ({
4043
+ chunkId,
4044
+ fusedScore: value.fusedScore,
4045
+ signals: [...value.signals]
4046
+ })).sort((left, right) => right.fusedScore - left.fusedScore || left.chunkId - right.chunkId);
4047
+ }
4048
+
4049
+ // src/hybrid/search.ts
4050
+ function windowSize(limit, offset, minimum) {
4051
+ return Math.max(limit + offset, minimum);
4052
+ }
4053
+ function withScores(rows, scoreLookup) {
4054
+ return rows.map((row) => {
4055
+ const score = scoreLookup.get(row.chunkId) ?? {
4056
+ score: 0,
4057
+ signals: ["lexical"]
4058
+ };
4059
+ return {
4060
+ ...row,
4061
+ score: score.score,
4062
+ signals: score.signals
4063
+ };
4064
+ });
4065
+ }
4066
+ async function searchHybridCatalog(input) {
4067
+ const scope = input.catalog.resolveSearchScope({
4068
+ query: input.query,
4069
+ ...input.searchInput.cwd ? { cwd: input.searchInput.cwd } : {},
4070
+ ...input.searchInput.sourceIds ? { sourceIds: input.searchInput.sourceIds } : {},
4071
+ ...input.searchInput.snapshotId ? { snapshotId: input.searchInput.snapshotId } : {},
4072
+ ...input.searchInput.all ? { all: true } : {},
4073
+ ...typeof input.searchInput.limit === "number" ? { limit: input.searchInput.limit } : {},
4074
+ ...typeof input.searchInput.offset === "number" ? { offset: input.searchInput.offset } : {}
4075
+ });
4076
+ const lexicalOnly = () => {
4077
+ const lexical = input.catalog.searchLexical({
4078
+ query: input.query,
4079
+ scope
4080
+ });
4081
+ return {
4082
+ query: input.query,
4083
+ total: lexical.total,
4084
+ limit: lexical.limit,
4085
+ offset: lexical.offset,
4086
+ hasMore: lexical.hasMore,
4087
+ modeRequested: input.mode,
4088
+ modeUsed: "lexical",
4089
+ results: lexical.results.map((result, index) => ({
4090
+ ...result,
4091
+ score: 1 / (index + 1),
4092
+ signals: ["lexical"]
4093
+ }))
4094
+ };
4095
+ };
4096
+ if (scope.snapshotIds.length === 0) {
4097
+ return {
4098
+ query: input.query,
4099
+ total: 0,
4100
+ limit: scope.limit,
4101
+ offset: scope.offset,
4102
+ hasMore: false,
4103
+ modeRequested: input.mode,
4104
+ modeUsed: input.mode === "semantic" ? "semantic" : "lexical",
4105
+ results: []
4106
+ };
4107
+ }
4108
+ if (input.mode === "lexical") {
4109
+ return lexicalOnly();
4110
+ }
4111
+ const overview = input.catalog.getEmbeddingOverview();
4112
+ const snapshotIdSet = new Set(scope.snapshotIds);
4113
+ const scopedSources = overview.sources.filter(
4114
+ (source) => source.snapshotId ? snapshotIdSet.has(source.snapshotId) : false
4115
+ );
4116
+ const allSnapshotsIndexed = scopedSources.every(
4117
+ (source) => !source.snapshotId || source.totalChunks > 0 && source.indexedChunks === source.totalChunks
4118
+ );
4119
+ if (input.mode === "auto" && !allSnapshotsIndexed) {
4120
+ return lexicalOnly();
4121
+ }
4122
+ let queryVector;
4123
+ let vectorCandidates = [];
4124
+ const modelKey = getEmbeddingModelKey(input.config);
4125
+ try {
4126
+ const embedding = await embedTexts(input.config, [input.query]);
4127
+ queryVector = embedding[0];
4128
+ if (!queryVector) {
4129
+ throw new AiocsError(
4130
+ AIOCS_ERROR_CODES.embeddingProviderUnavailable,
4131
+ "Embedding provider returned no vector for the search query"
4132
+ );
4133
+ }
4134
+ const vectorStore = new AiocsVectorStore(input.config);
4135
+ vectorCandidates = await vectorStore.search({
4136
+ vector: queryVector,
4137
+ snapshotIds: scope.snapshotIds,
4138
+ sourceIds: scope.sourceIds,
4139
+ modelKey,
4140
+ limit: windowSize(scope.limit, scope.offset, input.config.vectorCandidateWindow)
4141
+ });
4142
+ } catch (error) {
4143
+ if (input.mode === "auto") {
4144
+ return lexicalOnly();
4145
+ }
4146
+ throw error;
4147
+ }
4148
+ if (input.mode === "auto" && vectorCandidates.length === 0) {
4149
+ return lexicalOnly();
4150
+ }
4151
+ if (input.mode === "semantic") {
4152
+ const orderedChunkIds2 = vectorCandidates.map((candidate) => candidate.chunkId);
4153
+ const chunkRows2 = input.catalog.getChunksByIds(orderedChunkIds2);
4154
+ const chunkMap2 = new Map(chunkRows2.map((row) => [row.chunkId, row]));
4155
+ const orderedRows2 = orderedChunkIds2.map((chunkId) => chunkMap2.get(chunkId)).filter((row) => Boolean(row));
4156
+ const pagedRows2 = orderedRows2.slice(scope.offset, scope.offset + scope.limit);
4157
+ const scoreLookup2 = new Map(vectorCandidates.map((candidate) => [
4158
+ candidate.chunkId,
4159
+ { score: candidate.score, signals: ["vector"] }
4160
+ ]));
4161
+ return {
4162
+ query: input.query,
4163
+ total: orderedRows2.length,
4164
+ limit: scope.limit,
4165
+ offset: scope.offset,
4166
+ hasMore: scope.offset + pagedRows2.length < orderedRows2.length,
4167
+ modeRequested: input.mode,
4168
+ modeUsed: "semantic",
4169
+ results: withScores(pagedRows2, scoreLookup2)
4170
+ };
4171
+ }
4172
+ const lexicalCandidates = input.catalog.searchLexical({
4173
+ query: input.query,
4174
+ scope,
4175
+ limit: windowSize(scope.limit, scope.offset, input.config.lexicalCandidateWindow),
4176
+ offset: 0
4177
+ });
4178
+ const fused = reciprocalRankFusion([
4179
+ lexicalCandidates.results.map((result, index) => ({
4180
+ chunkId: result.chunkId,
4181
+ rank: index + 1,
4182
+ signal: "lexical"
4183
+ })),
4184
+ vectorCandidates.map((result, index) => ({
4185
+ chunkId: result.chunkId,
4186
+ rank: index + 1,
4187
+ signal: "vector",
4188
+ score: result.score
4189
+ }))
4190
+ ], input.config.rrfK);
4191
+ const orderedChunkIds = fused.map((result) => result.chunkId);
4192
+ const chunkRows = input.catalog.getChunksByIds(orderedChunkIds);
4193
+ const chunkMap = new Map(chunkRows.map((row) => [row.chunkId, row]));
4194
+ const orderedRows = orderedChunkIds.map((chunkId) => chunkMap.get(chunkId)).filter((row) => Boolean(row));
4195
+ const pagedRows = orderedRows.slice(scope.offset, scope.offset + scope.limit);
4196
+ const scoreLookup = new Map(fused.map((candidate) => [
4197
+ candidate.chunkId,
4198
+ {
4199
+ score: candidate.fusedScore,
4200
+ signals: candidate.signals
4201
+ }
4202
+ ]));
4203
+ return {
4204
+ query: input.query,
4205
+ total: orderedRows.length,
4206
+ limit: scope.limit,
4207
+ offset: scope.offset,
4208
+ hasMore: scope.offset + pagedRows.length < orderedRows.length,
4209
+ modeRequested: input.mode,
4210
+ modeUsed: "hybrid",
4211
+ results: withScores(pagedRows, scoreLookup)
4212
+ };
4213
+ }
4214
+
4215
+ // src/services.ts
4216
+ function createCatalog() {
4217
+ const dataDir = getAiocsDataDir();
4218
+ getAiocsConfigDir();
4219
+ return {
4220
+ dataDir,
4221
+ catalog: openCatalog({ dataDir })
4222
+ };
4223
+ }
4224
+ function withCatalog(run) {
4225
+ const ctx = createCatalog();
4226
+ return Promise.resolve(run(ctx)).finally(() => ctx.catalog.close());
4227
+ }
4228
+ async function upsertSourceFromSpecFile(specFile) {
4229
+ const specPath = resolve7(specFile);
4230
+ const spec = await loadSourceSpec(specPath);
4231
+ const result = await withCatalog(({ catalog }) => catalog.upsertSource(spec, { specPath }));
4232
+ return {
4233
+ sourceId: result.sourceId,
4234
+ configHash: result.configHash,
4235
+ specPath
4236
+ };
4237
+ }
4238
+ async function listSources() {
4239
+ const sources = await withCatalog(({ catalog }) => catalog.listSources());
4240
+ return { sources };
4241
+ }
4242
+ async function fetchSources(sourceIdOrAll) {
4243
+ const results = await withCatalog(async ({ catalog, dataDir }) => {
4244
+ const sourceIds = sourceIdOrAll === "all" ? catalog.listSources().map((item) => item.id) : [sourceIdOrAll];
4245
+ if (sourceIds.length === 0) {
4246
+ return [];
4247
+ }
4248
+ const fetched = [];
4249
+ for (const sourceId of sourceIds) {
4250
+ const result = await fetchSource({ catalog, sourceId, dataDir });
4251
+ fetched.push({
4252
+ sourceId,
4253
+ snapshotId: result.snapshotId,
4254
+ pageCount: result.pageCount,
4255
+ reused: result.reused
4256
+ });
4257
+ }
4258
+ await processEmbeddingJobs({
4259
+ catalog,
4260
+ config: getHybridRuntimeConfig()
4261
+ });
4262
+ return fetched;
4263
+ });
4264
+ return { results };
4265
+ }
4266
+ async function refreshDueSources(sourceIdOrAll = "all") {
4267
+ const results = await withCatalog(async ({ catalog, dataDir }) => {
4268
+ const dueIds = sourceIdOrAll === "all" ? catalog.listDueSourceIds() : (() => {
4269
+ const spec = catalog.getSourceSpec(sourceIdOrAll);
4270
+ if (!spec) {
4271
+ throw new AiocsError(
4272
+ AIOCS_ERROR_CODES.sourceNotFound,
4273
+ `Unknown source '${sourceIdOrAll}'`
4274
+ );
4275
+ }
4276
+ return catalog.listDueSourceIds().includes(sourceIdOrAll) ? [sourceIdOrAll] : [];
4277
+ })();
4278
+ const fetched = [];
4279
+ for (const sourceId of dueIds) {
4280
+ const result = await fetchSource({ catalog, sourceId, dataDir });
4281
+ fetched.push({
4282
+ sourceId,
4283
+ snapshotId: result.snapshotId,
4284
+ pageCount: result.pageCount,
4285
+ reused: result.reused
4286
+ });
4287
+ }
4288
+ await processEmbeddingJobs({
4289
+ catalog,
4290
+ config: getHybridRuntimeConfig()
4291
+ });
4292
+ return fetched;
4293
+ });
4294
+ return { results };
4295
+ }
4296
+ async function runSourceCanaries(sourceIdOrAll) {
4297
+ const results = await withCatalog(async ({ catalog }) => {
4298
+ const sourceIds = sourceIdOrAll === "all" ? catalog.listSources().map((item) => item.id) : [sourceIdOrAll];
4299
+ if (sourceIds.length === 0) {
4300
+ return [];
4301
+ }
4302
+ const canaried = [];
4303
+ for (const sourceId of sourceIds) {
4304
+ canaried.push(await runSourceCanary({
4305
+ catalog,
4306
+ sourceId,
4307
+ env: process.env
4308
+ }));
4309
+ }
4310
+ return canaried;
4311
+ });
4312
+ return { results };
4313
+ }
4314
+ async function listSnapshotsForSource(sourceId) {
4315
+ const snapshots = await withCatalog(({ catalog }) => catalog.listSnapshots(sourceId));
4316
+ return {
4317
+ sourceId,
4318
+ snapshots
4319
+ };
4320
+ }
4321
+ async function diffSnapshotsForSource(input) {
4322
+ return withCatalog(({ catalog }) => catalog.diffSnapshots(input));
4323
+ }
4324
+ async function linkProjectSources(projectPath, sourceIds) {
4325
+ const resolvedProjectPath = resolve7(projectPath);
4326
+ await withCatalog(({ catalog }) => {
4327
+ catalog.linkProject(resolvedProjectPath, sourceIds);
4328
+ });
4329
+ return {
4330
+ projectPath: resolvedProjectPath,
4331
+ sourceIds
4332
+ };
4333
+ }
4334
+ async function unlinkProjectSources(projectPath, sourceIds) {
4335
+ const resolvedProjectPath = resolve7(projectPath);
4336
+ await withCatalog(({ catalog }) => {
4337
+ catalog.unlinkProject(resolvedProjectPath, sourceIds);
4338
+ });
4339
+ return {
4340
+ projectPath: resolvedProjectPath,
4341
+ sourceIds
4342
+ };
4343
+ }
4344
+ async function searchCatalog(query, options) {
4345
+ const cwd = options.project ? resolve7(options.project) : process.cwd();
4346
+ const explicitSources = options.source.length > 0;
4347
+ const results = await withCatalog(({ catalog }) => {
4348
+ const hybridConfig = getHybridRuntimeConfig();
4349
+ const scope = resolveProjectScope(cwd, catalog.listProjectLinks());
4350
+ if (!explicitSources && !options.all && !scope) {
4351
+ throw new AiocsError(
4352
+ AIOCS_ERROR_CODES.noProjectScope,
4353
+ "No linked project scope found. Use --source or --all."
4354
+ );
4355
+ }
4356
+ return searchHybridCatalog({
4357
+ catalog,
4358
+ config: hybridConfig,
4359
+ query,
4360
+ mode: options.mode ?? hybridConfig.defaultSearchMode,
4361
+ searchInput: {
4362
+ cwd,
4363
+ ...explicitSources ? { sourceIds: options.source } : {},
4364
+ ...options.snapshot ? { snapshotId: options.snapshot } : {},
4365
+ ...options.all ? { all: true } : {},
4366
+ ...typeof options.limit === "number" ? { limit: options.limit } : {},
4367
+ ...typeof options.offset === "number" ? { offset: options.offset } : {}
4368
+ }
4369
+ });
4370
+ });
4371
+ return {
4372
+ query,
4373
+ total: results.total,
4374
+ limit: results.limit,
4375
+ offset: results.offset,
4376
+ hasMore: results.hasMore,
4377
+ modeRequested: results.modeRequested,
4378
+ modeUsed: results.modeUsed,
4379
+ results: results.results
4380
+ };
4381
+ }
4382
+ async function showChunk(chunkId) {
4383
+ const chunk = await withCatalog(({ catalog }) => catalog.getChunkById(chunkId));
4384
+ if (!chunk) {
4385
+ throw new AiocsError(
4386
+ AIOCS_ERROR_CODES.chunkNotFound,
4387
+ `Chunk ${chunkId} not found`
4388
+ );
4389
+ }
4390
+ return { chunk };
4391
+ }
4392
+ async function verifyCoverage(input) {
4393
+ return withCatalog(async ({ catalog }) => {
4394
+ const corpus = catalog.getCoverageCorpus({
4395
+ sourceId: input.sourceId,
4396
+ ...input.snapshotId ? { snapshotId: input.snapshotId } : {}
4397
+ });
4398
+ return verifyCoverageAgainstReferences(corpus, input.referenceFiles);
4399
+ });
4400
+ }
4401
+ async function initBuiltInSources(options) {
4402
+ const sourceSpecDir = options?.sourceSpecDir ?? getBundledSourcesDir();
4403
+ const fetched = options?.fetch ?? false;
4404
+ const userSourceDir = getAiocsSourcesDir();
4405
+ return withCatalog(async ({ catalog, dataDir }) => {
4406
+ const bootstrapped = await bootstrapSourceSpecs({
4407
+ catalog,
4408
+ sourceSpecDirs: [sourceSpecDir],
4409
+ strictSourceSpecDirs: true
4410
+ });
4411
+ const fetchResults = [];
4412
+ if (fetched) {
4413
+ for (const source of bootstrapped.sources) {
4414
+ const result = await fetchSource({
4415
+ catalog,
4416
+ dataDir,
4417
+ sourceId: source.sourceId
4418
+ });
4419
+ fetchResults.push({
4420
+ sourceId: source.sourceId,
4421
+ snapshotId: result.snapshotId,
4422
+ pageCount: result.pageCount,
4423
+ reused: result.reused
4424
+ });
4425
+ }
4426
+ await processEmbeddingJobs({
4427
+ catalog,
4428
+ config: getHybridRuntimeConfig()
4429
+ });
4430
+ }
4431
+ return {
4432
+ sourceSpecDir,
4433
+ userSourceDir,
4434
+ fetched,
4435
+ initializedSources: bootstrapped.sources,
4436
+ removedSourceIds: bootstrapped.removedSourceIds,
4437
+ fetchResults
4438
+ };
4439
+ });
4440
+ }
4441
+ function getManagedSourceSpecDirectories() {
4442
+ return {
4443
+ bundledSourceDir: getBundledSourcesDir(),
4444
+ userSourceDir: getAiocsSourcesDir()
4445
+ };
4446
+ }
4447
+ function getDoctorReport(env = process.env) {
4448
+ return runDoctor(env);
4449
+ }
4450
+ async function exportCatalogBackup(input) {
4451
+ return exportBackup({
4452
+ dataDir: getAiocsDataDir(),
4453
+ configDir: getAiocsConfigDir(),
4454
+ outputDir: input.outputDir,
4455
+ ...typeof input.replaceExisting === "boolean" ? { replaceExisting: input.replaceExisting } : {}
4456
+ });
4457
+ }
4458
+ async function importCatalogBackup(input) {
4459
+ const result = await importBackup({
4460
+ inputDir: input.inputDir,
4461
+ dataDir: getAiocsDataDir(),
4462
+ configDir: getAiocsConfigDir(),
4463
+ ...typeof input.replaceExisting === "boolean" ? { replaceExisting: input.replaceExisting } : {}
4464
+ });
4465
+ try {
4466
+ await new AiocsVectorStore(getHybridRuntimeConfig()).clearCollection();
4467
+ } catch {
4468
+ }
4469
+ await withCatalog(({ catalog }) => {
4470
+ catalog.resetEmbeddingsAfterImport();
4471
+ });
4472
+ return result;
4473
+ }
4474
+ async function getEmbeddingStatus() {
4475
+ return withCatalog(({ catalog }) => catalog.getEmbeddingOverview());
4476
+ }
4477
+ async function backfillEmbeddings(sourceIdOrAll) {
4478
+ return withCatalog(({ catalog }) => sourceIdOrAll === "all" ? catalog.requeueLatestEmbeddingJobs() : catalog.requeueLatestEmbeddingJobs([sourceIdOrAll]));
4479
+ }
4480
+ async function clearEmbeddings(sourceIdOrAll) {
4481
+ return withCatalog(async ({ catalog }) => {
4482
+ const hybridConfig = getHybridRuntimeConfig();
4483
+ const vectorStore = new AiocsVectorStore(hybridConfig);
4484
+ if (sourceIdOrAll === "all") {
4485
+ await vectorStore.clearCollection();
4486
+ return catalog.clearEmbeddings();
4487
+ }
4488
+ const chunkIds = catalog.listEmbeddingChunkIds([sourceIdOrAll]);
4489
+ if (chunkIds.length > 0) {
4490
+ await vectorStore.deleteChunkIds(chunkIds);
4491
+ }
4492
+ return catalog.clearEmbeddings([sourceIdOrAll]);
4493
+ });
4494
+ }
4495
+ async function runEmbeddingWorker() {
4496
+ return withCatalog(({ catalog }) => processEmbeddingJobs({
4497
+ catalog,
4498
+ config: getHybridRuntimeConfig()
4499
+ }));
4500
+ }
4501
+
4502
+ export {
4503
+ AIOCS_ERROR_CODES,
4504
+ AiocsError,
4505
+ toAiocsError,
4506
+ openCatalog,
4507
+ getAiocsDataDir,
4508
+ getAiocsConfigDir,
4509
+ parseDaemonConfig,
4510
+ startDaemon,
4511
+ packageName,
4512
+ packageVersion,
4513
+ packageDescription,
4514
+ upsertSourceFromSpecFile,
4515
+ listSources,
4516
+ fetchSources,
4517
+ refreshDueSources,
4518
+ runSourceCanaries,
4519
+ listSnapshotsForSource,
4520
+ diffSnapshotsForSource,
4521
+ linkProjectSources,
4522
+ unlinkProjectSources,
4523
+ searchCatalog,
4524
+ showChunk,
4525
+ verifyCoverage,
4526
+ initBuiltInSources,
4527
+ getManagedSourceSpecDirectories,
4528
+ getDoctorReport,
4529
+ exportCatalogBackup,
4530
+ importCatalogBackup,
4531
+ getEmbeddingStatus,
4532
+ backfillEmbeddings,
4533
+ clearEmbeddings,
4534
+ runEmbeddingWorker
4535
+ };