@bodhi-ventures/aiocs 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +105 -60
- package/dist/{chunk-AJ5NZDK4.js → chunk-M7YEYMJL.js} +1094 -261
- package/dist/cli.js +14 -2
- package/dist/mcp-server.js +22 -4
- package/docs/README.md +2 -2
- package/docs/codex-integration.md +33 -19
- package/docs/json-contract.md +21 -3
- package/package.json +20 -20
- package/skills/aiocs/SKILL.md +25 -38
- package/skills/aiocs-curation/SKILL.md +110 -0
- package/sources/nktkas-hyperliquid.yaml +30 -0
- package/docs/2026-03-26-agent-json-and-daemon-design.md +0 -157
- package/docs/2026-03-28-hybrid-search-design.md +0 -423
- package/docs/examples/codex-agents/aiocs-docs-specialist.example.toml +0 -21
- package/docs/superpowers/specs/2026-03-29-tag-driven-release-pipeline-design.md +0 -135
|
@@ -43,50 +43,198 @@ function toAiocsError(error) {
|
|
|
43
43
|
return new AiocsError(AIOCS_ERROR_CODES.internalError, String(error));
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
-
// src/
|
|
46
|
+
// src/runtime/paths.ts
|
|
47
|
+
import { homedir } from "os";
|
|
48
|
+
import { join as join2, relative, resolve, sep } from "path";
|
|
47
49
|
import { mkdirSync } from "fs";
|
|
48
|
-
|
|
50
|
+
|
|
51
|
+
// src/runtime/bundled-sources.ts
|
|
52
|
+
import { existsSync } from "fs";
|
|
53
|
+
import { dirname, join } from "path";
|
|
54
|
+
import { fileURLToPath } from "url";
|
|
55
|
+
function findPackageRoot(startDir) {
|
|
56
|
+
let currentDir = startDir;
|
|
57
|
+
while (true) {
|
|
58
|
+
if (existsSync(join(currentDir, "package.json")) && existsSync(join(currentDir, "sources"))) {
|
|
59
|
+
return currentDir;
|
|
60
|
+
}
|
|
61
|
+
const parentDir = dirname(currentDir);
|
|
62
|
+
if (parentDir === currentDir) {
|
|
63
|
+
throw new Error(`Could not locate aiocs package root from ${startDir}`);
|
|
64
|
+
}
|
|
65
|
+
currentDir = parentDir;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
function getBundledSourcesDir() {
|
|
69
|
+
const currentFilePath = fileURLToPath(import.meta.url);
|
|
70
|
+
const packageRoot = findPackageRoot(dirname(currentFilePath));
|
|
71
|
+
return join(packageRoot, "sources");
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// src/runtime/paths.ts
|
|
75
|
+
var PORTABLE_USER_SOURCES_PREFIX = "~/.aiocs/sources";
|
|
76
|
+
var PORTABLE_BUNDLED_SOURCES_PREFIX = "aiocs://bundled";
|
|
77
|
+
var CONTAINER_USER_SOURCES_DIR = "/root/.aiocs/sources";
|
|
78
|
+
var CONTAINER_BUNDLED_SOURCES_DIR = "/app/sources";
|
|
79
|
+
function expandTilde(path) {
|
|
80
|
+
if (path === "~") {
|
|
81
|
+
return homedir();
|
|
82
|
+
}
|
|
83
|
+
if (path.startsWith("~/")) {
|
|
84
|
+
return join2(homedir(), path.slice(2));
|
|
85
|
+
}
|
|
86
|
+
return path;
|
|
87
|
+
}
|
|
88
|
+
function getAiocsDataDir(env = process.env) {
|
|
89
|
+
const override = env.AIOCS_DATA_DIR;
|
|
90
|
+
if (override) {
|
|
91
|
+
mkdirSync(expandTilde(override), { recursive: true });
|
|
92
|
+
return expandTilde(override);
|
|
93
|
+
}
|
|
94
|
+
const target = join2(homedir(), ".aiocs", "data");
|
|
95
|
+
mkdirSync(target, { recursive: true });
|
|
96
|
+
return target;
|
|
97
|
+
}
|
|
98
|
+
function getAiocsConfigDir(env = process.env) {
|
|
99
|
+
const override = env.AIOCS_CONFIG_DIR;
|
|
100
|
+
if (override) {
|
|
101
|
+
mkdirSync(expandTilde(override), { recursive: true });
|
|
102
|
+
return expandTilde(override);
|
|
103
|
+
}
|
|
104
|
+
const target = join2(homedir(), ".aiocs", "config");
|
|
105
|
+
mkdirSync(target, { recursive: true });
|
|
106
|
+
return target;
|
|
107
|
+
}
|
|
108
|
+
function getAiocsSourcesDir(env = process.env) {
|
|
109
|
+
const override = env.AIOCS_SOURCES_DIR;
|
|
110
|
+
if (override) {
|
|
111
|
+
mkdirSync(expandTilde(override), { recursive: true });
|
|
112
|
+
return expandTilde(override);
|
|
113
|
+
}
|
|
114
|
+
const target = join2(homedir(), ".aiocs", "sources");
|
|
115
|
+
mkdirSync(target, { recursive: true });
|
|
116
|
+
return target;
|
|
117
|
+
}
|
|
118
|
+
function isWithinRoot(candidatePath, rootPath) {
|
|
119
|
+
return candidatePath === rootPath || candidatePath.startsWith(`${rootPath}${sep}`);
|
|
120
|
+
}
|
|
121
|
+
function toPortablePath(prefix, rootPath, candidatePath) {
|
|
122
|
+
const relativePath = relative(rootPath, candidatePath).split(sep).join("/");
|
|
123
|
+
return relativePath ? `${prefix}/${relativePath}` : prefix;
|
|
124
|
+
}
|
|
125
|
+
function canonicalizeManagedSpecPath(specPath, env = process.env) {
|
|
126
|
+
if (specPath === PORTABLE_USER_SOURCES_PREFIX || specPath.startsWith(`${PORTABLE_USER_SOURCES_PREFIX}/`) || specPath === PORTABLE_BUNDLED_SOURCES_PREFIX || specPath.startsWith(`${PORTABLE_BUNDLED_SOURCES_PREFIX}/`)) {
|
|
127
|
+
return specPath;
|
|
128
|
+
}
|
|
129
|
+
const resolvedPath = resolve(specPath);
|
|
130
|
+
const userRoots = [resolve(getAiocsSourcesDir(env)), CONTAINER_USER_SOURCES_DIR];
|
|
131
|
+
for (const rootPath of userRoots) {
|
|
132
|
+
if (isWithinRoot(resolvedPath, rootPath)) {
|
|
133
|
+
return toPortablePath(PORTABLE_USER_SOURCES_PREFIX, rootPath, resolvedPath);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
const bundledRoots = [resolve(getBundledSourcesDir()), CONTAINER_BUNDLED_SOURCES_DIR];
|
|
137
|
+
for (const rootPath of bundledRoots) {
|
|
138
|
+
if (isWithinRoot(resolvedPath, rootPath)) {
|
|
139
|
+
return toPortablePath(PORTABLE_BUNDLED_SOURCES_PREFIX, rootPath, resolvedPath);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return resolvedPath;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// src/catalog/catalog.ts
|
|
146
|
+
import { mkdirSync as mkdirSync2 } from "fs";
|
|
147
|
+
import { join as join3, resolve as resolve3 } from "path";
|
|
49
148
|
import { randomUUID } from "crypto";
|
|
50
149
|
import Database from "better-sqlite3";
|
|
51
150
|
|
|
52
151
|
// src/catalog/chunking.ts
|
|
53
152
|
var MAX_CHUNK_BYTES = 16384;
|
|
153
|
+
var CHUNK_OVERLAP_LINES = 6;
|
|
54
154
|
var HEADING_PATTERN = /^(#{1,6})\s+(.*)$/;
|
|
55
155
|
function byteLength(value) {
|
|
56
156
|
return Buffer.byteLength(value, "utf8");
|
|
57
157
|
}
|
|
158
|
+
function normalizeLanguage(filePath, language) {
|
|
159
|
+
if (language) {
|
|
160
|
+
return language.toLowerCase();
|
|
161
|
+
}
|
|
162
|
+
if (!filePath) {
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
const lower = filePath.toLowerCase();
|
|
166
|
+
if (lower.endsWith(".md") || lower.endsWith(".mdx")) {
|
|
167
|
+
return "markdown";
|
|
168
|
+
}
|
|
169
|
+
if (lower.endsWith(".ts")) {
|
|
170
|
+
return "typescript";
|
|
171
|
+
}
|
|
172
|
+
if (lower.endsWith(".tsx")) {
|
|
173
|
+
return "tsx";
|
|
174
|
+
}
|
|
175
|
+
if (lower.endsWith(".js")) {
|
|
176
|
+
return "javascript";
|
|
177
|
+
}
|
|
178
|
+
if (lower.endsWith(".jsx")) {
|
|
179
|
+
return "jsx";
|
|
180
|
+
}
|
|
181
|
+
if (lower.endsWith(".json")) {
|
|
182
|
+
return "json";
|
|
183
|
+
}
|
|
184
|
+
if (lower.endsWith(".yaml") || lower.endsWith(".yml")) {
|
|
185
|
+
return "yaml";
|
|
186
|
+
}
|
|
187
|
+
if (lower.endsWith(".toml")) {
|
|
188
|
+
return "toml";
|
|
189
|
+
}
|
|
190
|
+
if (lower.endsWith(".py")) {
|
|
191
|
+
return "python";
|
|
192
|
+
}
|
|
193
|
+
if (lower.endsWith(".rs")) {
|
|
194
|
+
return "rust";
|
|
195
|
+
}
|
|
196
|
+
if (lower.endsWith(".go")) {
|
|
197
|
+
return "go";
|
|
198
|
+
}
|
|
199
|
+
if (lower.endsWith(".sql")) {
|
|
200
|
+
return "sql";
|
|
201
|
+
}
|
|
202
|
+
if (lower.endsWith(".sh")) {
|
|
203
|
+
return "shell";
|
|
204
|
+
}
|
|
205
|
+
return null;
|
|
206
|
+
}
|
|
207
|
+
function flushChunk(chunks, sectionTitle, current, chunkOrder) {
|
|
208
|
+
const trimmed = current.trim();
|
|
209
|
+
if (!trimmed) {
|
|
210
|
+
return chunkOrder;
|
|
211
|
+
}
|
|
212
|
+
chunks.push({
|
|
213
|
+
sectionTitle,
|
|
214
|
+
markdown: trimmed,
|
|
215
|
+
chunkOrder
|
|
216
|
+
});
|
|
217
|
+
return chunkOrder + 1;
|
|
218
|
+
}
|
|
58
219
|
function splitLargeSection(sectionTitle, markdown, startOrder) {
|
|
59
220
|
const lines = markdown.split("\n");
|
|
60
221
|
const chunks = [];
|
|
61
222
|
let current = "";
|
|
62
223
|
let order = startOrder;
|
|
63
|
-
const flush = () => {
|
|
64
|
-
const trimmed = current.trim();
|
|
65
|
-
if (!trimmed) {
|
|
66
|
-
current = "";
|
|
67
|
-
return;
|
|
68
|
-
}
|
|
69
|
-
chunks.push({
|
|
70
|
-
sectionTitle,
|
|
71
|
-
markdown: trimmed,
|
|
72
|
-
chunkOrder: order
|
|
73
|
-
});
|
|
74
|
-
order += 1;
|
|
75
|
-
current = "";
|
|
76
|
-
};
|
|
77
224
|
for (const line of lines) {
|
|
78
225
|
const next = current ? `${current}
|
|
79
226
|
${line}` : line;
|
|
80
227
|
if (current && byteLength(next) > MAX_CHUNK_BYTES) {
|
|
81
|
-
|
|
228
|
+
order = flushChunk(chunks, sectionTitle, current, order);
|
|
229
|
+
current = "";
|
|
82
230
|
}
|
|
83
231
|
current = current ? `${current}
|
|
84
232
|
${line}` : line;
|
|
85
233
|
}
|
|
86
|
-
|
|
234
|
+
flushChunk(chunks, sectionTitle, current, order);
|
|
87
235
|
return chunks;
|
|
88
236
|
}
|
|
89
|
-
function
|
|
237
|
+
function chunkMarkdownSectioned(pageTitle, markdown) {
|
|
90
238
|
const trimmed = markdown.trim();
|
|
91
239
|
if (!trimmed) {
|
|
92
240
|
return [];
|
|
@@ -134,6 +282,154 @@ function chunkMarkdown(pageTitle, markdown) {
|
|
|
134
282
|
}
|
|
135
283
|
return chunks;
|
|
136
284
|
}
|
|
285
|
+
function symbolBoundary(line, language) {
|
|
286
|
+
const trimmed = line.trim();
|
|
287
|
+
if (!trimmed) {
|
|
288
|
+
return null;
|
|
289
|
+
}
|
|
290
|
+
const patterns = [];
|
|
291
|
+
switch (language) {
|
|
292
|
+
case "typescript":
|
|
293
|
+
case "tsx":
|
|
294
|
+
case "javascript":
|
|
295
|
+
case "jsx":
|
|
296
|
+
patterns.push(
|
|
297
|
+
/^(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+([A-Za-z0-9_$]+)/,
|
|
298
|
+
/^(?:export\s+)?(?:default\s+)?class\s+([A-Za-z0-9_$]+)/,
|
|
299
|
+
/^(?:export\s+)?(?:interface|type|enum)\s+([A-Za-z0-9_$]+)/,
|
|
300
|
+
/^(?:export\s+)?const\s+([A-Za-z0-9_$]+)\s*=/
|
|
301
|
+
);
|
|
302
|
+
break;
|
|
303
|
+
case "python":
|
|
304
|
+
patterns.push(/^(?:async\s+def|def|class)\s+([A-Za-z0-9_]+)/);
|
|
305
|
+
break;
|
|
306
|
+
case "rust":
|
|
307
|
+
patterns.push(/^(?:pub\s+)?(?:async\s+)?fn\s+([A-Za-z0-9_]+)/, /^(?:pub\s+)?(?:struct|enum|trait)\s+([A-Za-z0-9_]+)/);
|
|
308
|
+
break;
|
|
309
|
+
case "go":
|
|
310
|
+
patterns.push(/^func\s+([A-Za-z0-9_]+)/, /^type\s+([A-Za-z0-9_]+)/);
|
|
311
|
+
break;
|
|
312
|
+
case "json":
|
|
313
|
+
case "yaml":
|
|
314
|
+
case "toml":
|
|
315
|
+
patterns.push(/^["']?([A-Za-z0-9_.-]+)["']?\s*[:=]/);
|
|
316
|
+
break;
|
|
317
|
+
default:
|
|
318
|
+
patterns.push(/^(?:export\s+)?(?:async\s+)?function\s+([A-Za-z0-9_$]+)/, /^(?:class|interface|type|enum)\s+([A-Za-z0-9_$]+)/);
|
|
319
|
+
break;
|
|
320
|
+
}
|
|
321
|
+
for (const pattern of patterns) {
|
|
322
|
+
const match = trimmed.match(pattern);
|
|
323
|
+
if (match?.[1]) {
|
|
324
|
+
return match[1];
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
return null;
|
|
328
|
+
}
|
|
329
|
+
function discoverBoundaries(lines, title, language) {
|
|
330
|
+
const boundaries = [];
|
|
331
|
+
lines.forEach((line, index) => {
|
|
332
|
+
const symbol = symbolBoundary(line, language);
|
|
333
|
+
if (symbol) {
|
|
334
|
+
boundaries.push({
|
|
335
|
+
index,
|
|
336
|
+
title: symbol
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
});
|
|
340
|
+
if (boundaries.length === 0 || boundaries[0].index !== 0) {
|
|
341
|
+
boundaries.unshift({
|
|
342
|
+
index: 0,
|
|
343
|
+
title
|
|
344
|
+
});
|
|
345
|
+
}
|
|
346
|
+
return boundaries;
|
|
347
|
+
}
|
|
348
|
+
function buildWindowTitle(title, startLine, endLine) {
|
|
349
|
+
return `${title} (${startLine}-${endLine})`;
|
|
350
|
+
}
|
|
351
|
+
function chunkLineWindows(title, content, startOrder) {
|
|
352
|
+
const lines = content.split("\n");
|
|
353
|
+
const chunks = [];
|
|
354
|
+
let start = 0;
|
|
355
|
+
let order = startOrder;
|
|
356
|
+
while (start < lines.length) {
|
|
357
|
+
let end = start;
|
|
358
|
+
let current = "";
|
|
359
|
+
while (end < lines.length) {
|
|
360
|
+
const candidate = current ? `${current}
|
|
361
|
+
${lines[end]}` : lines[end];
|
|
362
|
+
if (current && byteLength(candidate) > MAX_CHUNK_BYTES) {
|
|
363
|
+
break;
|
|
364
|
+
}
|
|
365
|
+
current = candidate;
|
|
366
|
+
end += 1;
|
|
367
|
+
}
|
|
368
|
+
const trimmed = current.trim();
|
|
369
|
+
if (!trimmed) {
|
|
370
|
+
break;
|
|
371
|
+
}
|
|
372
|
+
chunks.push({
|
|
373
|
+
sectionTitle: buildWindowTitle(title, start + 1, end),
|
|
374
|
+
markdown: trimmed,
|
|
375
|
+
chunkOrder: order
|
|
376
|
+
});
|
|
377
|
+
order += 1;
|
|
378
|
+
if (end >= lines.length) {
|
|
379
|
+
break;
|
|
380
|
+
}
|
|
381
|
+
start = Math.max(start + 1, end - CHUNK_OVERLAP_LINES);
|
|
382
|
+
}
|
|
383
|
+
return chunks;
|
|
384
|
+
}
|
|
385
|
+
function chunkByBoundaries(input, language) {
|
|
386
|
+
const trimmed = input.content.trim();
|
|
387
|
+
if (!trimmed) {
|
|
388
|
+
return [];
|
|
389
|
+
}
|
|
390
|
+
if (byteLength(trimmed) <= MAX_CHUNK_BYTES) {
|
|
391
|
+
return [{ sectionTitle: input.title, markdown: trimmed, chunkOrder: 0 }];
|
|
392
|
+
}
|
|
393
|
+
const lines = trimmed.split("\n");
|
|
394
|
+
const boundaries = discoverBoundaries(lines, input.title, language);
|
|
395
|
+
const chunks = [];
|
|
396
|
+
let order = 0;
|
|
397
|
+
for (let index = 0; index < boundaries.length; index += 1) {
|
|
398
|
+
const boundary = boundaries[index];
|
|
399
|
+
const nextIndex = boundaries[index + 1]?.index ?? lines.length;
|
|
400
|
+
const sectionLines = lines.slice(boundary.index, nextIndex);
|
|
401
|
+
const sectionContent = sectionLines.join("\n").trim();
|
|
402
|
+
if (!sectionContent) {
|
|
403
|
+
continue;
|
|
404
|
+
}
|
|
405
|
+
if (byteLength(sectionContent) <= MAX_CHUNK_BYTES) {
|
|
406
|
+
chunks.push({
|
|
407
|
+
sectionTitle: boundary.title,
|
|
408
|
+
markdown: sectionContent,
|
|
409
|
+
chunkOrder: order
|
|
410
|
+
});
|
|
411
|
+
order += 1;
|
|
412
|
+
continue;
|
|
413
|
+
}
|
|
414
|
+
const splitChunks = chunkLineWindows(boundary.title, sectionContent, order);
|
|
415
|
+
chunks.push(...splitChunks);
|
|
416
|
+
order = chunks.length;
|
|
417
|
+
}
|
|
418
|
+
return chunks.length > 0 ? chunks : chunkLineWindows(input.title, trimmed, 0);
|
|
419
|
+
}
|
|
420
|
+
function chunkContent(input) {
|
|
421
|
+
const language = normalizeLanguage(input.filePath, input.language);
|
|
422
|
+
if (language === "markdown") {
|
|
423
|
+
return chunkMarkdownSectioned(input.title, input.content);
|
|
424
|
+
}
|
|
425
|
+
if (!language) {
|
|
426
|
+
return chunkLineWindows(input.title, input.content.trim(), 0);
|
|
427
|
+
}
|
|
428
|
+
return chunkByBoundaries(input, language);
|
|
429
|
+
}
|
|
430
|
+
function detectLanguage(filePath, language) {
|
|
431
|
+
return normalizeLanguage(filePath, language);
|
|
432
|
+
}
|
|
137
433
|
|
|
138
434
|
// src/catalog/fingerprint.ts
|
|
139
435
|
import { createHash } from "crypto";
|
|
@@ -145,6 +441,7 @@ function buildSnapshotFingerprint(input) {
|
|
|
145
441
|
const payload = JSON.stringify({
|
|
146
442
|
sourceId: input.sourceId,
|
|
147
443
|
configHash: input.configHash,
|
|
444
|
+
revisionKey: input.revisionKey ?? null,
|
|
148
445
|
pages: normalizedPages
|
|
149
446
|
});
|
|
150
447
|
return sha256(payload);
|
|
@@ -152,12 +449,12 @@ function buildSnapshotFingerprint(input) {
|
|
|
152
449
|
|
|
153
450
|
// src/catalog/project-scope.ts
|
|
154
451
|
import { realpathSync } from "fs";
|
|
155
|
-
import { resolve } from "path";
|
|
452
|
+
import { resolve as resolve2 } from "path";
|
|
156
453
|
function isWithin(candidate, root) {
|
|
157
454
|
return candidate === root || candidate.startsWith(`${root}/`);
|
|
158
455
|
}
|
|
159
456
|
function canonicalizeProjectPath(path) {
|
|
160
|
-
const resolved =
|
|
457
|
+
const resolved = resolve2(path);
|
|
161
458
|
try {
|
|
162
459
|
return realpathSync.native(resolved);
|
|
163
460
|
} catch {
|
|
@@ -173,22 +470,55 @@ function resolveProjectScope(cwd, scopes) {
|
|
|
173
470
|
return normalizedScopes[0] ?? null;
|
|
174
471
|
}
|
|
175
472
|
|
|
473
|
+
// src/patterns.ts
|
|
474
|
+
function escapeRegex(value) {
|
|
475
|
+
return value.replace(/[|\\{}()[\]^$+?.]/g, "\\$&");
|
|
476
|
+
}
|
|
477
|
+
function patternToRegex(pattern) {
|
|
478
|
+
let regex = "^";
|
|
479
|
+
for (let index = 0; index < pattern.length; index += 1) {
|
|
480
|
+
const current = pattern[index];
|
|
481
|
+
const next = pattern[index + 1];
|
|
482
|
+
if (current === "*" && next === "*") {
|
|
483
|
+
regex += ".*";
|
|
484
|
+
index += 1;
|
|
485
|
+
continue;
|
|
486
|
+
}
|
|
487
|
+
if (current === "*") {
|
|
488
|
+
regex += "[^?#]*";
|
|
489
|
+
continue;
|
|
490
|
+
}
|
|
491
|
+
regex += escapeRegex(current ?? "");
|
|
492
|
+
}
|
|
493
|
+
return new RegExp(`${regex}$`);
|
|
494
|
+
}
|
|
495
|
+
function matchesPatterns(value, patterns) {
|
|
496
|
+
return patterns.some((pattern) => patternToRegex(pattern).test(value));
|
|
497
|
+
}
|
|
498
|
+
function toSqliteGlob(pattern) {
|
|
499
|
+
return pattern.replace(/\*\*/g, "*");
|
|
500
|
+
}
|
|
501
|
+
|
|
176
502
|
// src/spec/source-spec.ts
|
|
177
503
|
import { readFile } from "fs/promises";
|
|
178
504
|
import { extname } from "path";
|
|
179
505
|
import YAML from "yaml";
|
|
180
506
|
import { z } from "zod";
|
|
181
507
|
var patternSchema = z.string().min(1);
|
|
508
|
+
var positiveIntSchema = z.number().int().positive();
|
|
509
|
+
var scheduleSchema = z.object({
|
|
510
|
+
everyHours: positiveIntSchema
|
|
511
|
+
});
|
|
182
512
|
var interactionSchema = z.discriminatedUnion("action", [
|
|
183
513
|
z.object({
|
|
184
514
|
action: z.literal("hover"),
|
|
185
515
|
selector: z.string().min(1),
|
|
186
|
-
timeoutMs:
|
|
516
|
+
timeoutMs: positiveIntSchema.optional()
|
|
187
517
|
}),
|
|
188
518
|
z.object({
|
|
189
519
|
action: z.literal("click"),
|
|
190
520
|
selector: z.string().min(1),
|
|
191
|
-
timeoutMs:
|
|
521
|
+
timeoutMs: positiveIntSchema.optional()
|
|
192
522
|
}),
|
|
193
523
|
z.object({
|
|
194
524
|
action: z.literal("press"),
|
|
@@ -196,13 +526,13 @@ var interactionSchema = z.discriminatedUnion("action", [
|
|
|
196
526
|
}),
|
|
197
527
|
z.object({
|
|
198
528
|
action: z.literal("wait"),
|
|
199
|
-
timeoutMs:
|
|
529
|
+
timeoutMs: positiveIntSchema
|
|
200
530
|
})
|
|
201
531
|
]);
|
|
202
532
|
var clipboardExtractSchema = z.object({
|
|
203
533
|
strategy: z.literal("clipboardButton"),
|
|
204
534
|
interactions: z.array(interactionSchema).min(1),
|
|
205
|
-
clipboardTimeoutMs:
|
|
535
|
+
clipboardTimeoutMs: positiveIntSchema.default(1e4)
|
|
206
536
|
});
|
|
207
537
|
var selectorExtractSchema = z.object({
|
|
208
538
|
strategy: z.literal("selector"),
|
|
@@ -211,13 +541,13 @@ var selectorExtractSchema = z.object({
|
|
|
211
541
|
var readabilityExtractSchema = z.object({
|
|
212
542
|
strategy: z.literal("readability")
|
|
213
543
|
});
|
|
214
|
-
var
|
|
544
|
+
var webAuthHeaderSchema = z.object({
|
|
215
545
|
name: z.string().min(1),
|
|
216
546
|
valueFromEnv: z.string().min(1),
|
|
217
547
|
hosts: z.array(z.string().min(1)).min(1).optional(),
|
|
218
548
|
include: z.array(patternSchema).min(1).optional()
|
|
219
549
|
});
|
|
220
|
-
var
|
|
550
|
+
var webAuthCookieSchema = z.object({
|
|
221
551
|
name: z.string().min(1),
|
|
222
552
|
valueFromEnv: z.string().min(1),
|
|
223
553
|
domain: z.string().min(1),
|
|
@@ -226,21 +556,36 @@ var authCookieSchema = z.object({
|
|
|
226
556
|
httpOnly: z.boolean().optional(),
|
|
227
557
|
sameSite: z.enum(["Strict", "Lax", "None"]).optional()
|
|
228
558
|
});
|
|
229
|
-
var
|
|
559
|
+
var webCanaryCheckSchema = z.object({
|
|
230
560
|
url: z.string().url(),
|
|
231
561
|
expectedTitle: z.string().min(1).optional(),
|
|
232
562
|
expectedText: z.string().min(1).optional(),
|
|
233
|
-
minMarkdownLength:
|
|
563
|
+
minMarkdownLength: positiveIntSchema.default(40)
|
|
564
|
+
});
|
|
565
|
+
var gitAuthSchema = z.object({
|
|
566
|
+
tokenFromEnv: z.string().min(1),
|
|
567
|
+
username: z.string().min(1).default("x-access-token"),
|
|
568
|
+
scheme: z.enum(["basic", "bearer"]).default("basic")
|
|
569
|
+
});
|
|
570
|
+
var gitCanaryCheckSchema = z.object({
|
|
571
|
+
path: z.string().min(1),
|
|
572
|
+
expectedTitle: z.string().min(1).optional(),
|
|
573
|
+
expectedText: z.string().min(1).optional(),
|
|
574
|
+
minContentLength: positiveIntSchema.default(40)
|
|
234
575
|
});
|
|
235
|
-
var
|
|
576
|
+
var baseSourceSpecSchema = z.object({
|
|
236
577
|
id: z.string().min(1).regex(/^[a-z0-9-]+$/),
|
|
237
578
|
label: z.string().min(1),
|
|
579
|
+
schedule: scheduleSchema
|
|
580
|
+
});
|
|
581
|
+
var webSourceSpecSchema = baseSourceSpecSchema.extend({
|
|
582
|
+
kind: z.literal("web").default("web"),
|
|
238
583
|
startUrls: z.array(z.string().url()).min(1),
|
|
239
584
|
allowedHosts: z.array(z.string().min(1)).min(1),
|
|
240
585
|
discovery: z.object({
|
|
241
586
|
include: z.array(patternSchema).min(1),
|
|
242
|
-
exclude: z.array(patternSchema),
|
|
243
|
-
maxPages:
|
|
587
|
+
exclude: z.array(patternSchema).default([]),
|
|
588
|
+
maxPages: positiveIntSchema
|
|
244
589
|
}),
|
|
245
590
|
extract: z.discriminatedUnion("strategy", [
|
|
246
591
|
clipboardExtractSchema,
|
|
@@ -250,16 +595,13 @@ var sourceSpecSchema = z.object({
|
|
|
250
595
|
normalize: z.object({
|
|
251
596
|
prependSourceComment: z.boolean().default(true)
|
|
252
597
|
}),
|
|
253
|
-
schedule: z.object({
|
|
254
|
-
everyHours: z.number().int().positive()
|
|
255
|
-
}),
|
|
256
598
|
auth: z.object({
|
|
257
|
-
headers: z.array(
|
|
258
|
-
cookies: z.array(
|
|
599
|
+
headers: z.array(webAuthHeaderSchema).default([]),
|
|
600
|
+
cookies: z.array(webAuthCookieSchema).default([])
|
|
259
601
|
}).optional(),
|
|
260
602
|
canary: z.object({
|
|
261
|
-
everyHours:
|
|
262
|
-
checks: z.array(
|
|
603
|
+
everyHours: positiveIntSchema.optional(),
|
|
604
|
+
checks: z.array(webCanaryCheckSchema).min(1)
|
|
263
605
|
}).optional()
|
|
264
606
|
}).superRefine((spec, context) => {
|
|
265
607
|
for (const [index, header] of (spec.auth?.headers ?? []).entries()) {
|
|
@@ -277,6 +619,47 @@ var sourceSpecSchema = z.object({
|
|
|
277
619
|
}
|
|
278
620
|
}
|
|
279
621
|
});
|
|
622
|
+
var gitSourceSpecSchema = baseSourceSpecSchema.extend({
|
|
623
|
+
kind: z.literal("git"),
|
|
624
|
+
repo: z.object({
|
|
625
|
+
url: z.string().url(),
|
|
626
|
+
ref: z.string().min(1).default("HEAD"),
|
|
627
|
+
include: z.array(patternSchema).min(1),
|
|
628
|
+
exclude: z.array(patternSchema).default([]),
|
|
629
|
+
maxFiles: positiveIntSchema.default(2e3),
|
|
630
|
+
textFileMaxBytes: positiveIntSchema.default(262144),
|
|
631
|
+
auth: gitAuthSchema.optional()
|
|
632
|
+
}),
|
|
633
|
+
canary: z.object({
|
|
634
|
+
everyHours: positiveIntSchema.optional(),
|
|
635
|
+
checks: z.array(gitCanaryCheckSchema).min(1)
|
|
636
|
+
}).optional()
|
|
637
|
+
}).superRefine((spec, context) => {
|
|
638
|
+
const protocol = new URL(spec.repo.url).protocol;
|
|
639
|
+
if (!["https:", "http:", "file:"].includes(protocol)) {
|
|
640
|
+
context.addIssue({
|
|
641
|
+
code: z.ZodIssueCode.custom,
|
|
642
|
+
path: ["repo", "url"],
|
|
643
|
+
message: `Unsupported git source protocol '${protocol}'. Use https:// or file://.`
|
|
644
|
+
});
|
|
645
|
+
}
|
|
646
|
+
});
|
|
647
|
+
var sourceSpecSchema = z.preprocess((value) => {
|
|
648
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
649
|
+
return value;
|
|
650
|
+
}
|
|
651
|
+
const candidate = value;
|
|
652
|
+
if (!("kind" in candidate)) {
|
|
653
|
+
return {
|
|
654
|
+
...candidate,
|
|
655
|
+
kind: "web"
|
|
656
|
+
};
|
|
657
|
+
}
|
|
658
|
+
return candidate;
|
|
659
|
+
}, z.discriminatedUnion("kind", [
|
|
660
|
+
webSourceSpecSchema,
|
|
661
|
+
gitSourceSpecSchema
|
|
662
|
+
]));
|
|
280
663
|
function parseSourceSpec(raw, ext) {
|
|
281
664
|
if (ext === ".json") {
|
|
282
665
|
return JSON.parse(raw);
|
|
@@ -288,8 +671,27 @@ async function loadSourceSpec(path) {
|
|
|
288
671
|
const parsed = parseSourceSpec(raw, extname(path).toLowerCase());
|
|
289
672
|
return sourceSpecSchema.parse(parsed);
|
|
290
673
|
}
|
|
674
|
+
function parseSourceSpecObject(value) {
|
|
675
|
+
return sourceSpecSchema.parse(value);
|
|
676
|
+
}
|
|
677
|
+
function isGitSourceSpec(spec) {
|
|
678
|
+
return spec.kind === "git";
|
|
679
|
+
}
|
|
291
680
|
function resolveSourceCanary(spec) {
|
|
681
|
+
if (spec.kind === "git") {
|
|
682
|
+
return {
|
|
683
|
+
kind: "git",
|
|
684
|
+
everyHours: spec.canary?.everyHours ?? Math.max(1, Math.min(spec.schedule.everyHours, 6)),
|
|
685
|
+
checks: spec.canary?.checks ?? [
|
|
686
|
+
{
|
|
687
|
+
path: "README.md",
|
|
688
|
+
minContentLength: 40
|
|
689
|
+
}
|
|
690
|
+
]
|
|
691
|
+
};
|
|
692
|
+
}
|
|
292
693
|
return {
|
|
694
|
+
kind: "web",
|
|
293
695
|
everyHours: spec.canary?.everyHours ?? Math.max(1, Math.min(spec.schedule.everyHours, 6)),
|
|
294
696
|
checks: spec.canary?.checks ?? [
|
|
295
697
|
{
|
|
@@ -341,6 +743,9 @@ function initSchema(db) {
|
|
|
341
743
|
title TEXT NOT NULL,
|
|
342
744
|
markdown TEXT NOT NULL,
|
|
343
745
|
content_hash TEXT NOT NULL,
|
|
746
|
+
page_kind TEXT NOT NULL DEFAULT 'document' CHECK(page_kind IN ('document', 'file')),
|
|
747
|
+
file_path TEXT,
|
|
748
|
+
language TEXT,
|
|
344
749
|
UNIQUE(snapshot_id, url)
|
|
345
750
|
);
|
|
346
751
|
|
|
@@ -353,7 +758,10 @@ function initSchema(db) {
|
|
|
353
758
|
page_title TEXT NOT NULL,
|
|
354
759
|
section_title TEXT NOT NULL,
|
|
355
760
|
chunk_order INTEGER NOT NULL,
|
|
356
|
-
markdown TEXT NOT NULL
|
|
761
|
+
markdown TEXT NOT NULL,
|
|
762
|
+
page_kind TEXT NOT NULL DEFAULT 'document' CHECK(page_kind IN ('document', 'file')),
|
|
763
|
+
file_path TEXT,
|
|
764
|
+
language TEXT
|
|
357
765
|
);
|
|
358
766
|
|
|
359
767
|
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
@@ -469,6 +877,26 @@ function initSchema(db) {
|
|
|
469
877
|
if (!sourceColumns.some((column) => column.name === "next_canary_due_at")) {
|
|
470
878
|
db.exec("ALTER TABLE sources ADD COLUMN next_canary_due_at TEXT");
|
|
471
879
|
}
|
|
880
|
+
const pageColumns = db.prepare("PRAGMA table_info(pages)").all();
|
|
881
|
+
if (!pageColumns.some((column) => column.name === "page_kind")) {
|
|
882
|
+
db.exec(`ALTER TABLE pages ADD COLUMN page_kind TEXT NOT NULL DEFAULT 'document'`);
|
|
883
|
+
}
|
|
884
|
+
if (!pageColumns.some((column) => column.name === "file_path")) {
|
|
885
|
+
db.exec("ALTER TABLE pages ADD COLUMN file_path TEXT");
|
|
886
|
+
}
|
|
887
|
+
if (!pageColumns.some((column) => column.name === "language")) {
|
|
888
|
+
db.exec("ALTER TABLE pages ADD COLUMN language TEXT");
|
|
889
|
+
}
|
|
890
|
+
const chunkColumns = db.prepare("PRAGMA table_info(chunks)").all();
|
|
891
|
+
if (!chunkColumns.some((column) => column.name === "page_kind")) {
|
|
892
|
+
db.exec(`ALTER TABLE chunks ADD COLUMN page_kind TEXT NOT NULL DEFAULT 'document'`);
|
|
893
|
+
}
|
|
894
|
+
if (!chunkColumns.some((column) => column.name === "file_path")) {
|
|
895
|
+
db.exec("ALTER TABLE chunks ADD COLUMN file_path TEXT");
|
|
896
|
+
}
|
|
897
|
+
if (!chunkColumns.some((column) => column.name === "language")) {
|
|
898
|
+
db.exec("ALTER TABLE chunks ADD COLUMN language TEXT");
|
|
899
|
+
}
|
|
472
900
|
}
|
|
473
901
|
function nowIso() {
|
|
474
902
|
return (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -476,6 +904,9 @@ function nowIso() {
|
|
|
476
904
|
function addHoursIso(hours) {
|
|
477
905
|
return new Date(Date.now() + hours * 60 * 60 * 1e3).toISOString();
|
|
478
906
|
}
|
|
907
|
+
function resolveCanaryEveryHours(spec) {
|
|
908
|
+
return spec.canary?.everyHours ?? Math.max(1, Math.min(spec.schedule.everyHours, 6));
|
|
909
|
+
}
|
|
479
910
|
function stableStringify(value) {
|
|
480
911
|
if (Array.isArray(value)) {
|
|
481
912
|
return `[${value.map((entry) => stableStringify(entry)).join(",")}]`;
|
|
@@ -492,6 +923,20 @@ function normalizeQuery(query) {
|
|
|
492
923
|
const words = query.replace(/[^\p{L}\p{N}]+/gu, " ").split(/\s+/).map((part) => part.trim()).filter(Boolean);
|
|
493
924
|
return words.join(" ");
|
|
494
925
|
}
|
|
926
|
+
function normalizePatternFilters(patterns) {
|
|
927
|
+
if (!patterns || patterns.length === 0) {
|
|
928
|
+
return null;
|
|
929
|
+
}
|
|
930
|
+
const normalized = [...new Set(patterns.map((pattern) => pattern.trim()).filter(Boolean))];
|
|
931
|
+
return normalized.length > 0 ? normalized : null;
|
|
932
|
+
}
|
|
933
|
+
function normalizeLanguageFilters(languages) {
|
|
934
|
+
if (!languages || languages.length === 0) {
|
|
935
|
+
return null;
|
|
936
|
+
}
|
|
937
|
+
const normalized = [...new Set(languages.map((language) => language.trim().toLowerCase()).filter(Boolean))];
|
|
938
|
+
return normalized.length > 0 ? normalized : null;
|
|
939
|
+
}
|
|
495
940
|
function assertPaginationValue(value, field, fallback) {
|
|
496
941
|
if (typeof value === "undefined") {
|
|
497
942
|
return fallback;
|
|
@@ -511,9 +956,9 @@ function assertPaginationValue(value, field, fallback) {
|
|
|
511
956
|
return value;
|
|
512
957
|
}
|
|
513
958
|
function openCatalog(options) {
|
|
514
|
-
const dataDir =
|
|
515
|
-
|
|
516
|
-
const db = new Database(
|
|
959
|
+
const dataDir = resolve3(options.dataDir);
|
|
960
|
+
mkdirSync2(dataDir, { recursive: true });
|
|
961
|
+
const db = new Database(join3(dataDir, "catalog.sqlite"));
|
|
517
962
|
initSchema(db);
|
|
518
963
|
const listProjectLinks = () => {
|
|
519
964
|
const rows = db.prepare("SELECT project_path, source_id FROM project_links ORDER BY project_path, source_id").all();
|
|
@@ -548,7 +993,9 @@ function openCatalog(options) {
|
|
|
548
993
|
limit,
|
|
549
994
|
offset,
|
|
550
995
|
sourceIds: null,
|
|
551
|
-
snapshotIds: []
|
|
996
|
+
snapshotIds: [],
|
|
997
|
+
pathPatterns: normalizePatternFilters(input.pathPatterns),
|
|
998
|
+
languages: normalizeLanguageFilters(input.languages)
|
|
552
999
|
};
|
|
553
1000
|
}
|
|
554
1001
|
const filterSourceIds = sourceIds && sourceIds.length > 0 ? [...new Set(sourceIds)] : null;
|
|
@@ -562,7 +1009,9 @@ function openCatalog(options) {
|
|
|
562
1009
|
limit,
|
|
563
1010
|
offset,
|
|
564
1011
|
sourceIds: filterSourceIds,
|
|
565
|
-
snapshotIds: latestSnapshotIds
|
|
1012
|
+
snapshotIds: latestSnapshotIds,
|
|
1013
|
+
pathPatterns: normalizePatternFilters(input.pathPatterns),
|
|
1014
|
+
languages: normalizeLanguageFilters(input.languages)
|
|
566
1015
|
};
|
|
567
1016
|
};
|
|
568
1017
|
const searchLexicalByScope = (input) => {
|
|
@@ -580,10 +1029,14 @@ function openCatalog(options) {
|
|
|
580
1029
|
}
|
|
581
1030
|
const whereSnapshotPlaceholders = input.scope.snapshotIds.map(() => "?").join(",");
|
|
582
1031
|
const sourceSql = input.scope.sourceIds ? `AND c.source_id IN (${input.scope.sourceIds.map(() => "?").join(",")})` : "";
|
|
1032
|
+
const pathSql = input.scope.pathPatterns ? `AND c.file_path IS NOT NULL AND (${input.scope.pathPatterns.map(() => "c.file_path GLOB ?").join(" OR ")})` : "";
|
|
1033
|
+
const languageSql = input.scope.languages ? `AND c.language IN (${input.scope.languages.map(() => "?").join(",")})` : "";
|
|
583
1034
|
const queryArgs = [
|
|
584
1035
|
normalized,
|
|
585
1036
|
...input.scope.snapshotIds,
|
|
586
|
-
...input.scope.sourceIds ?? []
|
|
1037
|
+
...input.scope.sourceIds ?? [],
|
|
1038
|
+
...(input.scope.pathPatterns ?? []).map((pattern) => toSqliteGlob(pattern)),
|
|
1039
|
+
...input.scope.languages ?? []
|
|
587
1040
|
];
|
|
588
1041
|
const totalRow = db.prepare(`
|
|
589
1042
|
SELECT COUNT(*) AS total
|
|
@@ -592,6 +1045,8 @@ function openCatalog(options) {
|
|
|
592
1045
|
WHERE chunks_fts MATCH ?
|
|
593
1046
|
AND c.snapshot_id IN (${whereSnapshotPlaceholders})
|
|
594
1047
|
${sourceSql}
|
|
1048
|
+
${pathSql}
|
|
1049
|
+
${languageSql}
|
|
595
1050
|
`).get(...queryArgs);
|
|
596
1051
|
const rows = db.prepare(`
|
|
597
1052
|
SELECT
|
|
@@ -601,12 +1056,17 @@ function openCatalog(options) {
|
|
|
601
1056
|
c.page_url,
|
|
602
1057
|
c.page_title,
|
|
603
1058
|
c.section_title,
|
|
604
|
-
c.markdown
|
|
1059
|
+
c.markdown,
|
|
1060
|
+
c.page_kind,
|
|
1061
|
+
c.file_path,
|
|
1062
|
+
c.language
|
|
605
1063
|
FROM chunks_fts
|
|
606
1064
|
JOIN chunks c ON c.id = chunks_fts.rowid
|
|
607
1065
|
WHERE chunks_fts MATCH ?
|
|
608
1066
|
AND c.snapshot_id IN (${whereSnapshotPlaceholders})
|
|
609
1067
|
${sourceSql}
|
|
1068
|
+
${pathSql}
|
|
1069
|
+
${languageSql}
|
|
610
1070
|
ORDER BY bm25(chunks_fts), c.id
|
|
611
1071
|
LIMIT ?
|
|
612
1072
|
OFFSET ?
|
|
@@ -618,7 +1078,10 @@ function openCatalog(options) {
|
|
|
618
1078
|
pageUrl: row.page_url,
|
|
619
1079
|
pageTitle: row.page_title,
|
|
620
1080
|
sectionTitle: row.section_title,
|
|
621
|
-
markdown: row.markdown
|
|
1081
|
+
markdown: row.markdown,
|
|
1082
|
+
pageKind: row.page_kind,
|
|
1083
|
+
filePath: row.file_path,
|
|
1084
|
+
language: row.language
|
|
622
1085
|
}));
|
|
623
1086
|
return {
|
|
624
1087
|
total: totalRow.total,
|
|
@@ -788,10 +1251,10 @@ function openCatalog(options) {
|
|
|
788
1251
|
const timestamp = nowIso();
|
|
789
1252
|
const configHash = sha256(stableStringify(spec));
|
|
790
1253
|
const existing = db.prepare("SELECT id, created_at, next_due_at, next_canary_due_at, config_hash FROM sources WHERE id = ?").get(spec.id);
|
|
791
|
-
const resolvedSpecPath = options2?.specPath ?
|
|
1254
|
+
const resolvedSpecPath = options2?.specPath ? canonicalizeManagedSpecPath(options2.specPath) : null;
|
|
792
1255
|
const nextDueAt = !existing ? timestamp : existing.config_hash === configHash ? existing.next_due_at : timestamp;
|
|
793
|
-
const
|
|
794
|
-
const nextCanaryDueAt = !existing ? timestamp : existing.config_hash === configHash ? existing.next_canary_due_at ?? addHoursIso(
|
|
1256
|
+
const canaryEveryHours = resolveCanaryEveryHours(spec);
|
|
1257
|
+
const nextCanaryDueAt = !existing ? timestamp : existing.config_hash === configHash ? existing.next_canary_due_at ?? addHoursIso(canaryEveryHours) : timestamp;
|
|
795
1258
|
const configChanged = Boolean(existing && existing.config_hash !== configHash);
|
|
796
1259
|
db.prepare(`
|
|
797
1260
|
INSERT INTO sources (
|
|
@@ -836,6 +1299,7 @@ function openCatalog(options) {
|
|
|
836
1299
|
SELECT
|
|
837
1300
|
id,
|
|
838
1301
|
label,
|
|
1302
|
+
spec_json,
|
|
839
1303
|
spec_path,
|
|
840
1304
|
next_due_at,
|
|
841
1305
|
next_canary_due_at,
|
|
@@ -848,21 +1312,25 @@ function openCatalog(options) {
|
|
|
848
1312
|
FROM sources
|
|
849
1313
|
ORDER BY id
|
|
850
1314
|
`).all();
|
|
851
|
-
return rows.map((row) =>
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
1315
|
+
return rows.map((row) => {
|
|
1316
|
+
const storedSpec = parseSourceSpecObject(JSON.parse(row.spec_json));
|
|
1317
|
+
return {
|
|
1318
|
+
id: row.id,
|
|
1319
|
+
kind: storedSpec.kind,
|
|
1320
|
+
label: row.label,
|
|
1321
|
+
specPath: row.spec_path ? canonicalizeManagedSpecPath(row.spec_path) : null,
|
|
1322
|
+
nextDueAt: row.next_due_at,
|
|
1323
|
+
isDue: Date.parse(row.next_due_at) <= Date.now(),
|
|
1324
|
+
nextCanaryDueAt: row.next_canary_due_at,
|
|
1325
|
+
isCanaryDue: row.next_canary_due_at ? Date.parse(row.next_canary_due_at) <= Date.now() : false,
|
|
1326
|
+
lastCheckedAt: row.last_checked_at,
|
|
1327
|
+
lastSuccessfulSnapshotAt: row.last_successful_snapshot_at,
|
|
1328
|
+
lastSuccessfulSnapshotId: row.last_successful_snapshot_id,
|
|
1329
|
+
lastCanaryCheckedAt: row.last_canary_checked_at,
|
|
1330
|
+
lastSuccessfulCanaryAt: row.last_successful_canary_at,
|
|
1331
|
+
lastCanaryStatus: row.last_canary_status
|
|
1332
|
+
};
|
|
1333
|
+
});
|
|
866
1334
|
},
|
|
867
1335
|
listDueSourceIds(referenceTime = nowIso()) {
|
|
868
1336
|
const rows = db.prepare(`
|
|
@@ -923,11 +1391,15 @@ function openCatalog(options) {
|
|
|
923
1391
|
const pagesWithHashes = input.pages.map((page) => ({
|
|
924
1392
|
...page,
|
|
925
1393
|
markdown: page.markdown.trim(),
|
|
926
|
-
contentHash: sha256(page.markdown.trim())
|
|
1394
|
+
contentHash: sha256(page.markdown.trim()),
|
|
1395
|
+
pageKind: page.pageKind ?? "document",
|
|
1396
|
+
filePath: page.filePath ?? null,
|
|
1397
|
+
language: detectLanguage(page.filePath, page.language)
|
|
927
1398
|
}));
|
|
928
1399
|
const fingerprint = buildSnapshotFingerprint({
|
|
929
1400
|
sourceId: input.sourceId,
|
|
930
1401
|
configHash: sourceRow.config_hash,
|
|
1402
|
+
...input.revisionKey ? { revisionKey: input.revisionKey } : {},
|
|
931
1403
|
pages: pagesWithHashes.map((page) => ({
|
|
932
1404
|
url: page.url,
|
|
933
1405
|
contentHash: page.contentHash
|
|
@@ -964,13 +1436,13 @@ function openCatalog(options) {
|
|
|
964
1436
|
) VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
965
1437
|
`);
|
|
966
1438
|
const insertPage = db.prepare(`
|
|
967
|
-
INSERT INTO pages (snapshot_id, url, title, markdown, content_hash)
|
|
968
|
-
VALUES (?, ?, ?, ?, ?)
|
|
1439
|
+
INSERT INTO pages (snapshot_id, url, title, markdown, content_hash, page_kind, file_path, language)
|
|
1440
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
969
1441
|
`);
|
|
970
1442
|
const insertChunk = db.prepare(`
|
|
971
1443
|
INSERT INTO chunks (
|
|
972
|
-
source_id, snapshot_id, page_id, page_url, page_title, section_title, chunk_order, markdown
|
|
973
|
-
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
1444
|
+
source_id, snapshot_id, page_id, page_url, page_title, section_title, chunk_order, markdown, page_kind, file_path, language
|
|
1445
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
974
1446
|
`);
|
|
975
1447
|
const insertRun = db.prepare(`
|
|
976
1448
|
INSERT INTO fetch_runs (id, source_id, status, snapshot_id, started_at, finished_at)
|
|
@@ -987,9 +1459,23 @@ function openCatalog(options) {
|
|
|
987
1459
|
checkedAt
|
|
988
1460
|
);
|
|
989
1461
|
for (const page of pagesWithHashes) {
|
|
990
|
-
const pageInsert = insertPage.run(
|
|
1462
|
+
const pageInsert = insertPage.run(
|
|
1463
|
+
snapshotId,
|
|
1464
|
+
page.url,
|
|
1465
|
+
page.title,
|
|
1466
|
+
page.markdown,
|
|
1467
|
+
page.contentHash,
|
|
1468
|
+
page.pageKind,
|
|
1469
|
+
page.filePath,
|
|
1470
|
+
page.language
|
|
1471
|
+
);
|
|
991
1472
|
const pageId = Number(pageInsert.lastInsertRowid);
|
|
992
|
-
const chunks =
|
|
1473
|
+
const chunks = chunkContent({
|
|
1474
|
+
title: page.title,
|
|
1475
|
+
content: page.markdown,
|
|
1476
|
+
filePath: page.filePath,
|
|
1477
|
+
language: page.language
|
|
1478
|
+
});
|
|
993
1479
|
for (const chunk of chunks) {
|
|
994
1480
|
insertChunk.run(
|
|
995
1481
|
input.sourceId,
|
|
@@ -999,7 +1485,10 @@ function openCatalog(options) {
|
|
|
999
1485
|
page.title,
|
|
1000
1486
|
chunk.sectionTitle,
|
|
1001
1487
|
chunk.chunkOrder,
|
|
1002
|
-
chunk.markdown
|
|
1488
|
+
chunk.markdown,
|
|
1489
|
+
page.pageKind,
|
|
1490
|
+
page.filePath,
|
|
1491
|
+
page.language
|
|
1003
1492
|
);
|
|
1004
1493
|
}
|
|
1005
1494
|
}
|
|
@@ -1050,7 +1539,6 @@ function openCatalog(options) {
|
|
|
1050
1539
|
);
|
|
1051
1540
|
}
|
|
1052
1541
|
const spec = JSON.parse(sourceRow.spec_json);
|
|
1053
|
-
const canary = resolveSourceCanary(spec);
|
|
1054
1542
|
db.prepare(`
|
|
1055
1543
|
INSERT INTO canary_runs (id, source_id, status, checked_at, details_json)
|
|
1056
1544
|
VALUES (?, ?, ?, ?, ?)
|
|
@@ -1075,7 +1563,7 @@ function openCatalog(options) {
|
|
|
1075
1563
|
input.status,
|
|
1076
1564
|
input.checkedAt,
|
|
1077
1565
|
input.status,
|
|
1078
|
-
addHoursIso(
|
|
1566
|
+
addHoursIso(resolveCanaryEveryHours(spec)),
|
|
1079
1567
|
input.checkedAt,
|
|
1080
1568
|
input.sourceId
|
|
1081
1569
|
);
|
|
@@ -1086,8 +1574,9 @@ function openCatalog(options) {
|
|
|
1086
1574
|
return [];
|
|
1087
1575
|
}
|
|
1088
1576
|
const activeSourceKeys = new Set(
|
|
1089
|
-
input.activeSources.map((source) => `${source.sourceId}::${
|
|
1577
|
+
input.activeSources.map((source) => `${source.sourceId}::${canonicalizeManagedSpecPath(source.specPath)}`)
|
|
1090
1578
|
);
|
|
1579
|
+
const normalizedManagedRoots = input.managedRoots.map((managedRoot) => canonicalizeManagedSpecPath(managedRoot));
|
|
1091
1580
|
const rows = db.prepare(`
|
|
1092
1581
|
SELECT id, spec_path
|
|
1093
1582
|
FROM sources
|
|
@@ -1098,8 +1587,8 @@ function openCatalog(options) {
|
|
|
1098
1587
|
if (!row.spec_path) {
|
|
1099
1588
|
return false;
|
|
1100
1589
|
}
|
|
1101
|
-
const normalizedSpecPath =
|
|
1102
|
-
return
|
|
1590
|
+
const normalizedSpecPath = canonicalizeManagedSpecPath(row.spec_path);
|
|
1591
|
+
return normalizedManagedRoots.some(
|
|
1103
1592
|
(managedRoot) => normalizedSpecPath === managedRoot || normalizedSpecPath.startsWith(`${managedRoot}/`)
|
|
1104
1593
|
) && !activeSourceKeys.has(`${row.id}::${normalizedSpecPath}`);
|
|
1105
1594
|
}).map((row) => row.id);
|
|
@@ -1154,7 +1643,7 @@ function openCatalog(options) {
|
|
|
1154
1643
|
);
|
|
1155
1644
|
}
|
|
1156
1645
|
const loadSnapshotPages = (snapshotId) => db.prepare(`
|
|
1157
|
-
SELECT url, title, markdown, content_hash
|
|
1646
|
+
SELECT url, title, markdown, content_hash, page_kind, file_path, language
|
|
1158
1647
|
FROM pages
|
|
1159
1648
|
WHERE snapshot_id = ?
|
|
1160
1649
|
ORDER BY url
|
|
@@ -1165,11 +1654,17 @@ function openCatalog(options) {
|
|
|
1165
1654
|
const afterMap = new Map(afterPages.map((page) => [page.url, page]));
|
|
1166
1655
|
const addedPages = afterPages.filter((page) => !beforeMap.has(page.url)).map((page) => ({
|
|
1167
1656
|
url: page.url,
|
|
1168
|
-
title: page.title
|
|
1657
|
+
title: page.title,
|
|
1658
|
+
pageKind: page.page_kind,
|
|
1659
|
+
filePath: page.file_path,
|
|
1660
|
+
language: page.language
|
|
1169
1661
|
}));
|
|
1170
1662
|
const removedPages = beforePages.filter((page) => !afterMap.has(page.url)).map((page) => ({
|
|
1171
1663
|
url: page.url,
|
|
1172
|
-
title: page.title
|
|
1664
|
+
title: page.title,
|
|
1665
|
+
pageKind: page.page_kind,
|
|
1666
|
+
filePath: page.file_path,
|
|
1667
|
+
language: page.language
|
|
1173
1668
|
}));
|
|
1174
1669
|
const summarizeLineDiff = (beforeMarkdown, afterMarkdown) => {
|
|
1175
1670
|
const beforeLines = beforeMarkdown.split("\n");
|
|
@@ -1194,6 +1689,9 @@ function openCatalog(options) {
|
|
|
1194
1689
|
url: before.url,
|
|
1195
1690
|
beforeTitle: before.title,
|
|
1196
1691
|
afterTitle: after.title,
|
|
1692
|
+
pageKind: after.page_kind,
|
|
1693
|
+
filePath: after.file_path,
|
|
1694
|
+
language: after.language,
|
|
1197
1695
|
lineSummary: summarizeLineDiff(before.markdown, after.markdown)
|
|
1198
1696
|
}));
|
|
1199
1697
|
const unchangedPageCount = beforePages.filter((page) => {
|
|
@@ -1239,11 +1737,14 @@ function openCatalog(options) {
|
|
|
1239
1737
|
c.page_url,
|
|
1240
1738
|
c.page_title,
|
|
1241
1739
|
c.section_title,
|
|
1242
|
-
c.markdown
|
|
1740
|
+
c.markdown,
|
|
1741
|
+
c.page_kind,
|
|
1742
|
+
c.file_path,
|
|
1743
|
+
c.language
|
|
1243
1744
|
FROM chunks c
|
|
1244
1745
|
WHERE c.source_id = ?
|
|
1245
1746
|
AND c.snapshot_id = ?
|
|
1246
|
-
|
|
1747
|
+
ORDER BY c.id
|
|
1247
1748
|
`).all(input.sourceId, input.snapshotId);
|
|
1248
1749
|
return rows.map((row) => ({
|
|
1249
1750
|
chunkId: row.chunk_id,
|
|
@@ -1253,6 +1754,9 @@ function openCatalog(options) {
|
|
|
1253
1754
|
pageTitle: row.page_title,
|
|
1254
1755
|
sectionTitle: row.section_title,
|
|
1255
1756
|
markdown: row.markdown,
|
|
1757
|
+
pageKind: row.page_kind,
|
|
1758
|
+
filePath: row.file_path,
|
|
1759
|
+
language: row.language,
|
|
1256
1760
|
contentHash: sha256(row.markdown)
|
|
1257
1761
|
}));
|
|
1258
1762
|
},
|
|
@@ -1303,7 +1807,10 @@ function openCatalog(options) {
|
|
|
1303
1807
|
c.page_url,
|
|
1304
1808
|
c.page_title,
|
|
1305
1809
|
c.section_title,
|
|
1306
|
-
c.markdown
|
|
1810
|
+
c.markdown,
|
|
1811
|
+
c.page_kind,
|
|
1812
|
+
c.file_path,
|
|
1813
|
+
c.language
|
|
1307
1814
|
FROM chunks c
|
|
1308
1815
|
WHERE c.id IN (${chunkIds.map(() => "?").join(",")})
|
|
1309
1816
|
`).all(...chunkIds);
|
|
@@ -1314,7 +1821,10 @@ function openCatalog(options) {
|
|
|
1314
1821
|
pageUrl: row.page_url,
|
|
1315
1822
|
pageTitle: row.page_title,
|
|
1316
1823
|
sectionTitle: row.section_title,
|
|
1317
|
-
markdown: row.markdown
|
|
1824
|
+
markdown: row.markdown,
|
|
1825
|
+
pageKind: row.page_kind,
|
|
1826
|
+
filePath: row.file_path,
|
|
1827
|
+
language: row.language
|
|
1318
1828
|
}));
|
|
1319
1829
|
},
|
|
1320
1830
|
queueLatestEmbeddingJobs(sourceIds) {
|
|
@@ -1698,7 +2208,10 @@ function openCatalog(options) {
|
|
|
1698
2208
|
c.page_url,
|
|
1699
2209
|
c.page_title,
|
|
1700
2210
|
c.section_title,
|
|
1701
|
-
c.markdown
|
|
2211
|
+
c.markdown,
|
|
2212
|
+
c.page_kind,
|
|
2213
|
+
c.file_path,
|
|
2214
|
+
c.language
|
|
1702
2215
|
FROM chunks c
|
|
1703
2216
|
WHERE c.id = ?
|
|
1704
2217
|
`).get(chunkId);
|
|
@@ -1712,65 +2225,335 @@ function openCatalog(options) {
|
|
|
1712
2225
|
pageUrl: row.page_url,
|
|
1713
2226
|
pageTitle: row.page_title,
|
|
1714
2227
|
sectionTitle: row.section_title,
|
|
1715
|
-
markdown: row.markdown
|
|
2228
|
+
markdown: row.markdown,
|
|
2229
|
+
pageKind: row.page_kind,
|
|
2230
|
+
filePath: row.file_path,
|
|
2231
|
+
language: row.language
|
|
1716
2232
|
};
|
|
1717
2233
|
}
|
|
1718
2234
|
};
|
|
1719
2235
|
}
|
|
1720
2236
|
|
|
1721
|
-
// src/
|
|
1722
|
-
import {
|
|
1723
|
-
import {
|
|
1724
|
-
import {
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
2237
|
+
// src/daemon.ts
|
|
2238
|
+
import { existsSync as existsSync3 } from "fs";
|
|
2239
|
+
import { resolve as resolve5 } from "path";
|
|
2240
|
+
import { setTimeout as sleep2 } from "timers/promises";
|
|
2241
|
+
|
|
2242
|
+
// src/fetch/fetch-source.ts
|
|
2243
|
+
import { mkdirSync as mkdirSync4, writeFileSync as writeFileSync2 } from "fs";
|
|
2244
|
+
import { join as join5 } from "path";
|
|
2245
|
+
import { setTimeout as sleep } from "timers/promises";
|
|
2246
|
+
import { chromium } from "playwright";
|
|
2247
|
+
|
|
2248
|
+
// src/git/git-source.ts
|
|
2249
|
+
import { existsSync as existsSync2, mkdirSync as mkdirSync3, writeFileSync } from "fs";
|
|
2250
|
+
import { dirname as dirname2, join as join4 } from "path";
|
|
2251
|
+
import { execFile } from "child_process";
|
|
2252
|
+
import { promisify } from "util";
|
|
2253
|
+
var execFileAsync = promisify(execFile);
|
|
2254
|
+
function nowIso2() {
|
|
2255
|
+
return (/* @__PURE__ */ new Date()).toISOString();
|
|
2256
|
+
}
|
|
2257
|
+
function getGitMirrorDir(dataDir, sourceId) {
|
|
2258
|
+
return join4(dataDir, "git-mirrors", `${sourceId}.git`);
|
|
2259
|
+
}
|
|
2260
|
+
function resolveEnvValue(name, env) {
|
|
2261
|
+
const value = env[name];
|
|
2262
|
+
if (!value) {
|
|
2263
|
+
throw new AiocsError(
|
|
2264
|
+
AIOCS_ERROR_CODES.authEnvMissing,
|
|
2265
|
+
`Missing required environment variable '${name}' for authenticated source access`,
|
|
2266
|
+
{ envVar: name }
|
|
2267
|
+
);
|
|
1728
2268
|
}
|
|
1729
|
-
|
|
1730
|
-
|
|
2269
|
+
return value;
|
|
2270
|
+
}
|
|
2271
|
+
function buildGitAuthHeader(spec, env) {
|
|
2272
|
+
if (!spec.repo.auth) {
|
|
2273
|
+
return null;
|
|
1731
2274
|
}
|
|
1732
|
-
|
|
2275
|
+
const token = resolveEnvValue(spec.repo.auth.tokenFromEnv, env);
|
|
2276
|
+
if (spec.repo.auth.scheme === "bearer") {
|
|
2277
|
+
return `AUTHORIZATION: Bearer ${token}`;
|
|
2278
|
+
}
|
|
2279
|
+
const credentials = Buffer.from(`${spec.repo.auth.username}:${token}`, "utf8").toString("base64");
|
|
2280
|
+
return `AUTHORIZATION: Basic ${credentials}`;
|
|
2281
|
+
}
|
|
2282
|
+
async function runGit(args, options = {}) {
|
|
2283
|
+
const commandArgs = options.authHeader ? ["-c", `http.extraHeader=${options.authHeader}`, ...args] : args;
|
|
2284
|
+
const result = await execFileAsync("git", commandArgs, {
|
|
2285
|
+
cwd: options.cwd,
|
|
2286
|
+
env: {
|
|
2287
|
+
...process.env,
|
|
2288
|
+
...options.env,
|
|
2289
|
+
GIT_TERMINAL_PROMPT: "0"
|
|
2290
|
+
},
|
|
2291
|
+
encoding: options.encoding ?? "utf8",
|
|
2292
|
+
maxBuffer: 32 * 1024 * 1024
|
|
2293
|
+
}).catch((error) => {
|
|
2294
|
+
throw new AiocsError(
|
|
2295
|
+
AIOCS_ERROR_CODES.internalError,
|
|
2296
|
+
`Git command failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
2297
|
+
{
|
|
2298
|
+
args
|
|
2299
|
+
}
|
|
2300
|
+
);
|
|
2301
|
+
});
|
|
2302
|
+
return result.stdout;
|
|
2303
|
+
}
|
|
2304
|
+
async function ensureGitMirror(spec, dataDir, env) {
|
|
2305
|
+
const mirrorDir = getGitMirrorDir(dataDir, spec.id);
|
|
2306
|
+
mkdirSync3(dirname2(mirrorDir), { recursive: true });
|
|
2307
|
+
const authHeader = buildGitAuthHeader(spec, env);
|
|
2308
|
+
if (!existsSync2(mirrorDir)) {
|
|
2309
|
+
await runGit(["clone", "--mirror", spec.repo.url, mirrorDir], {
|
|
2310
|
+
env,
|
|
2311
|
+
authHeader
|
|
2312
|
+
});
|
|
2313
|
+
return mirrorDir;
|
|
2314
|
+
}
|
|
2315
|
+
await runGit(["--git-dir", mirrorDir, "remote", "set-url", "origin", spec.repo.url], {
|
|
2316
|
+
env,
|
|
2317
|
+
authHeader
|
|
2318
|
+
});
|
|
2319
|
+
await runGit(["--git-dir", mirrorDir, "fetch", "--prune", "--prune-tags", "--tags", "origin"], {
|
|
2320
|
+
env,
|
|
2321
|
+
authHeader
|
|
2322
|
+
});
|
|
2323
|
+
return mirrorDir;
|
|
1733
2324
|
}
|
|
1734
|
-
function
|
|
1735
|
-
const
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
2325
|
+
async function resolveGitCommit(mirrorDir, ref, env) {
|
|
2326
|
+
const stdout = await runGit(["--git-dir", mirrorDir, "rev-parse", `${ref}^{commit}`], {
|
|
2327
|
+
env
|
|
2328
|
+
});
|
|
2329
|
+
return String(stdout).trim();
|
|
2330
|
+
}
|
|
2331
|
+
async function listRepoFiles(mirrorDir, commitSha, env) {
|
|
2332
|
+
const stdout = await runGit(["--git-dir", mirrorDir, "ls-tree", "-r", "-z", "--name-only", commitSha], {
|
|
2333
|
+
env,
|
|
2334
|
+
encoding: "buffer"
|
|
2335
|
+
});
|
|
2336
|
+
const entries = stdout instanceof Buffer ? stdout.toString("utf8") : String(stdout);
|
|
2337
|
+
return entries.split("\0").map((entry) => entry.trim()).filter(Boolean);
|
|
2338
|
+
}
|
|
2339
|
+
function isIncluded(spec, filePath) {
|
|
2340
|
+
if (!matchesPatterns(filePath, spec.repo.include)) {
|
|
2341
|
+
return false;
|
|
1739
2342
|
}
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
2343
|
+
if (spec.repo.exclude.length > 0 && matchesPatterns(filePath, spec.repo.exclude)) {
|
|
2344
|
+
return false;
|
|
2345
|
+
}
|
|
2346
|
+
return true;
|
|
1743
2347
|
}
|
|
1744
|
-
function
|
|
1745
|
-
const
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
2348
|
+
async function getObjectSize(mirrorDir, commitSha, filePath, env) {
|
|
2349
|
+
const stdout = await runGit(["--git-dir", mirrorDir, "cat-file", "-s", `${commitSha}:${filePath}`], {
|
|
2350
|
+
env
|
|
2351
|
+
});
|
|
2352
|
+
return Number(String(stdout).trim());
|
|
2353
|
+
}
|
|
2354
|
+
function isProbablyBinary(buffer) {
|
|
2355
|
+
return buffer.includes(0);
|
|
2356
|
+
}
|
|
2357
|
+
async function readRepoFile(mirrorDir, commitSha, filePath, env) {
|
|
2358
|
+
const stdout = await runGit(["--git-dir", mirrorDir, "show", `${commitSha}:${filePath}`], {
|
|
2359
|
+
env,
|
|
2360
|
+
encoding: "buffer"
|
|
2361
|
+
});
|
|
2362
|
+
return stdout instanceof Buffer ? stdout : Buffer.from(String(stdout), "utf8");
|
|
2363
|
+
}
|
|
2364
|
+
function normalizeRepoUrl(repoUrl) {
|
|
2365
|
+
return new URL(repoUrl);
|
|
2366
|
+
}
|
|
2367
|
+
function normalizeRepoWebBase(repoUrl) {
|
|
2368
|
+
const url = normalizeRepoUrl(repoUrl);
|
|
2369
|
+
const pathname = url.pathname.replace(/\.git$/i, "");
|
|
2370
|
+
return `${url.origin}${pathname}`;
|
|
2371
|
+
}
|
|
2372
|
+
function buildRepoFileUrl(spec, filePath) {
|
|
2373
|
+
const url = normalizeRepoUrl(spec.repo.url);
|
|
2374
|
+
const encodedPath = filePath.split("/").map((segment) => encodeURIComponent(segment)).join("/");
|
|
2375
|
+
const encodedRef = spec.repo.ref.split("/").map((segment) => encodeURIComponent(segment)).join("/");
|
|
2376
|
+
if (url.protocol === "file:") {
|
|
2377
|
+
return `${spec.repo.url}#ref=${encodeURIComponent(spec.repo.ref)}&path=${encodeURIComponent(filePath)}`;
|
|
1749
2378
|
}
|
|
1750
|
-
const
|
|
1751
|
-
|
|
1752
|
-
|
|
2379
|
+
const base = normalizeRepoWebBase(spec.repo.url);
|
|
2380
|
+
if (url.hostname === "github.com") {
|
|
2381
|
+
return `${base}/blob/${encodedRef}/${encodedPath}`;
|
|
2382
|
+
}
|
|
2383
|
+
if (url.hostname === "gitlab.com") {
|
|
2384
|
+
return `${base}/-/blob/${encodedRef}/${encodedPath}`;
|
|
2385
|
+
}
|
|
2386
|
+
return `${base}#ref=${encodeURIComponent(spec.repo.ref)}&path=${encodeURIComponent(filePath)}`;
|
|
1753
2387
|
}
|
|
1754
|
-
function
|
|
1755
|
-
const
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
2388
|
+
function persistGitSnapshotFiles(input, snapshotId, pages) {
|
|
2389
|
+
const snapshotDir = join4(input.dataDir, "sources", input.sourceId, "snapshots", snapshotId, "files");
|
|
2390
|
+
for (const page of pages) {
|
|
2391
|
+
const filePath = join4(snapshotDir, page.filePath);
|
|
2392
|
+
mkdirSync3(dirname2(filePath), { recursive: true });
|
|
2393
|
+
writeFileSync(filePath, page.markdown, "utf8");
|
|
1759
2394
|
}
|
|
1760
|
-
const target = join2(homedir(), ".aiocs", "sources");
|
|
1761
|
-
mkdirSync2(target, { recursive: true });
|
|
1762
|
-
return target;
|
|
1763
2395
|
}
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
2396
|
+
async function materializeGitPages(spec, mirrorDir, commitSha, env) {
|
|
2397
|
+
const repoFiles = await listRepoFiles(mirrorDir, commitSha, env);
|
|
2398
|
+
const includedFiles = repoFiles.filter((filePath) => isIncluded(spec, filePath));
|
|
2399
|
+
if (includedFiles.length > spec.repo.maxFiles) {
|
|
2400
|
+
throw new AiocsError(
|
|
2401
|
+
AIOCS_ERROR_CODES.invalidArgument,
|
|
2402
|
+
`Git source '${spec.id}' matched ${includedFiles.length} files, exceeding maxFiles=${spec.repo.maxFiles}`
|
|
2403
|
+
);
|
|
2404
|
+
}
|
|
2405
|
+
const pages = [];
|
|
2406
|
+
for (const filePath of includedFiles) {
|
|
2407
|
+
const size = await getObjectSize(mirrorDir, commitSha, filePath, env);
|
|
2408
|
+
if (!Number.isFinite(size) || size > spec.repo.textFileMaxBytes) {
|
|
2409
|
+
continue;
|
|
2410
|
+
}
|
|
2411
|
+
const content = await readRepoFile(mirrorDir, commitSha, filePath, env).catch(() => null);
|
|
2412
|
+
if (!content || isProbablyBinary(content)) {
|
|
2413
|
+
continue;
|
|
2414
|
+
}
|
|
2415
|
+
const markdown = content.toString("utf8").trimEnd();
|
|
2416
|
+
if (!markdown.trim()) {
|
|
2417
|
+
continue;
|
|
2418
|
+
}
|
|
2419
|
+
pages.push({
|
|
2420
|
+
url: buildRepoFileUrl(spec, filePath),
|
|
2421
|
+
title: filePath,
|
|
2422
|
+
markdown,
|
|
2423
|
+
pageKind: "file",
|
|
2424
|
+
filePath,
|
|
2425
|
+
language: detectLanguage(filePath)
|
|
2426
|
+
});
|
|
2427
|
+
}
|
|
2428
|
+
return pages.sort((left, right) => left.filePath.localeCompare(right.filePath));
|
|
2429
|
+
}
|
|
2430
|
+
function assertCanaryPathInScope(spec, check) {
|
|
2431
|
+
if (!isIncluded(spec, check.path)) {
|
|
2432
|
+
throw new AiocsError(
|
|
2433
|
+
AIOCS_ERROR_CODES.invalidArgument,
|
|
2434
|
+
`Git canary path '${check.path}' is outside the configured include/exclude scope`
|
|
2435
|
+
);
|
|
2436
|
+
}
|
|
2437
|
+
}
|
|
2438
|
+
async function readCanaryTarget(spec, mirrorDir, commitSha, check, env) {
|
|
2439
|
+
assertCanaryPathInScope(spec, check);
|
|
2440
|
+
const content = await readRepoFile(mirrorDir, commitSha, check.path, env);
|
|
2441
|
+
if (isProbablyBinary(content)) {
|
|
2442
|
+
throw new Error(`Canary target '${check.path}' is binary`);
|
|
2443
|
+
}
|
|
2444
|
+
return {
|
|
2445
|
+
url: buildRepoFileUrl(spec, check.path),
|
|
2446
|
+
title: check.path,
|
|
2447
|
+
markdown: content.toString("utf8").trimEnd(),
|
|
2448
|
+
pageKind: "file",
|
|
2449
|
+
filePath: check.path,
|
|
2450
|
+
language: detectLanguage(check.path)
|
|
2451
|
+
};
|
|
2452
|
+
}
|
|
2453
|
+
async function fetchGitSource(input) {
|
|
2454
|
+
const spec = input.catalog.getSourceSpec(input.sourceId);
|
|
2455
|
+
if (!spec || !isGitSourceSpec(spec)) {
|
|
2456
|
+
throw new AiocsError(
|
|
2457
|
+
AIOCS_ERROR_CODES.sourceNotFound,
|
|
2458
|
+
`Unknown git source '${input.sourceId}'`
|
|
2459
|
+
);
|
|
2460
|
+
}
|
|
2461
|
+
const env = input.env ?? process.env;
|
|
2462
|
+
const mirrorDir = await ensureGitMirror(spec, input.dataDir, env);
|
|
2463
|
+
const commitSha = await resolveGitCommit(mirrorDir, spec.repo.ref, env);
|
|
2464
|
+
const pages = await materializeGitPages(spec, mirrorDir, commitSha, env);
|
|
2465
|
+
if (pages.length === 0) {
|
|
2466
|
+
throw new AiocsError(
|
|
2467
|
+
AIOCS_ERROR_CODES.noPagesFetched,
|
|
2468
|
+
`No text files fetched for git source '${input.sourceId}'`
|
|
2469
|
+
);
|
|
2470
|
+
}
|
|
2471
|
+
const result = input.catalog.recordSuccessfulSnapshot({
|
|
2472
|
+
sourceId: input.sourceId,
|
|
2473
|
+
detectedVersion: commitSha,
|
|
2474
|
+
revisionKey: commitSha,
|
|
2475
|
+
pages
|
|
2476
|
+
});
|
|
2477
|
+
if (!result.reused) {
|
|
2478
|
+
persistGitSnapshotFiles(input, result.snapshotId, pages);
|
|
2479
|
+
}
|
|
2480
|
+
return {
|
|
2481
|
+
snapshotId: result.snapshotId,
|
|
2482
|
+
pageCount: pages.length,
|
|
2483
|
+
reused: result.reused,
|
|
2484
|
+
detectedVersion: commitSha
|
|
2485
|
+
};
|
|
2486
|
+
}
|
|
2487
|
+
async function runGitSourceCanary(input) {
|
|
2488
|
+
const spec = input.catalog.getSourceSpec(input.sourceId);
|
|
2489
|
+
if (!spec || !isGitSourceSpec(spec)) {
|
|
2490
|
+
throw new AiocsError(
|
|
2491
|
+
AIOCS_ERROR_CODES.sourceNotFound,
|
|
2492
|
+
`Unknown git source '${input.sourceId}'`
|
|
2493
|
+
);
|
|
2494
|
+
}
|
|
2495
|
+
const env = input.env ?? process.env;
|
|
2496
|
+
const dataDir = input.dataDir ?? join4(process.env.HOME ?? "", ".aiocs", "data");
|
|
2497
|
+
const mirrorDir = await ensureGitMirror(spec, dataDir, env);
|
|
2498
|
+
const commitSha = await resolveGitCommit(mirrorDir, spec.repo.ref, env);
|
|
2499
|
+
const canary = resolveSourceCanary(spec);
|
|
2500
|
+
const checkedAt = nowIso2();
|
|
2501
|
+
const checks = [];
|
|
2502
|
+
for (const check of canary.checks) {
|
|
2503
|
+
try {
|
|
2504
|
+
const page = await readCanaryTarget(spec, mirrorDir, commitSha, check, env);
|
|
2505
|
+
if (check.expectedTitle && !page.title.includes(check.expectedTitle)) {
|
|
2506
|
+
throw new Error(`Expected title to include '${check.expectedTitle}'`);
|
|
2507
|
+
}
|
|
2508
|
+
if (check.expectedText && !page.markdown.includes(check.expectedText)) {
|
|
2509
|
+
throw new Error(`Expected markdown to include '${check.expectedText}'`);
|
|
2510
|
+
}
|
|
2511
|
+
if (page.markdown.trim().length < check.minContentLength) {
|
|
2512
|
+
throw new Error(`Expected content length to be at least ${check.minContentLength}`);
|
|
2513
|
+
}
|
|
2514
|
+
checks.push({
|
|
2515
|
+
path: check.path,
|
|
2516
|
+
status: "pass",
|
|
2517
|
+
title: page.title,
|
|
2518
|
+
markdownLength: page.markdown.trim().length
|
|
2519
|
+
});
|
|
2520
|
+
} catch (error) {
|
|
2521
|
+
checks.push({
|
|
2522
|
+
path: check.path,
|
|
2523
|
+
status: "fail",
|
|
2524
|
+
errorMessage: error instanceof Error ? error.message : String(error)
|
|
2525
|
+
});
|
|
2526
|
+
}
|
|
2527
|
+
}
|
|
2528
|
+
const passCount = checks.filter((check) => check.status === "pass").length;
|
|
2529
|
+
const failCount = checks.length - passCount;
|
|
2530
|
+
const status = failCount > 0 ? "fail" : "pass";
|
|
2531
|
+
const result = {
|
|
2532
|
+
sourceId: input.sourceId,
|
|
2533
|
+
status,
|
|
2534
|
+
checkedAt,
|
|
2535
|
+
summary: {
|
|
2536
|
+
checkCount: checks.length,
|
|
2537
|
+
passCount,
|
|
2538
|
+
failCount
|
|
2539
|
+
},
|
|
2540
|
+
checks
|
|
2541
|
+
};
|
|
2542
|
+
input.catalog.recordCanaryRun({
|
|
2543
|
+
sourceId: input.sourceId,
|
|
2544
|
+
status,
|
|
2545
|
+
checkedAt,
|
|
2546
|
+
details: result
|
|
2547
|
+
});
|
|
2548
|
+
if (status === "fail") {
|
|
2549
|
+
throw new AiocsError(
|
|
2550
|
+
AIOCS_ERROR_CODES.canaryFailed,
|
|
2551
|
+
`Git source canary failed for '${input.sourceId}'`,
|
|
2552
|
+
result
|
|
2553
|
+
);
|
|
2554
|
+
}
|
|
2555
|
+
return result;
|
|
2556
|
+
}
|
|
1774
2557
|
|
|
1775
2558
|
// src/fetch/extract.ts
|
|
1776
2559
|
import { JSDOM } from "jsdom";
|
|
@@ -1952,36 +2735,10 @@ async function extractPage(page, strategy) {
|
|
|
1952
2735
|
return runReadabilityStrategy(page);
|
|
1953
2736
|
}
|
|
1954
2737
|
|
|
1955
|
-
// src/fetch/url-patterns.ts
|
|
1956
|
-
function escapeRegex(value) {
|
|
1957
|
-
return value.replace(/[|\\{}()[\]^$+?.]/g, "\\$&");
|
|
1958
|
-
}
|
|
1959
|
-
function patternToRegex(pattern) {
|
|
1960
|
-
let regex = "^";
|
|
1961
|
-
for (let index = 0; index < pattern.length; index += 1) {
|
|
1962
|
-
const current = pattern[index];
|
|
1963
|
-
const next = pattern[index + 1];
|
|
1964
|
-
if (current === "*" && next === "*") {
|
|
1965
|
-
regex += ".*";
|
|
1966
|
-
index += 1;
|
|
1967
|
-
continue;
|
|
1968
|
-
}
|
|
1969
|
-
if (current === "*") {
|
|
1970
|
-
regex += "[^?#]*";
|
|
1971
|
-
continue;
|
|
1972
|
-
}
|
|
1973
|
-
regex += escapeRegex(current ?? "");
|
|
1974
|
-
}
|
|
1975
|
-
return new RegExp(`${regex}$`);
|
|
1976
|
-
}
|
|
1977
|
-
function matchesPatterns(url, patterns) {
|
|
1978
|
-
return patterns.some((pattern) => patternToRegex(pattern).test(url));
|
|
1979
|
-
}
|
|
1980
|
-
|
|
1981
2738
|
// src/fetch/fetch-source.ts
|
|
1982
2739
|
var MAX_FETCH_ATTEMPTS = 3;
|
|
1983
2740
|
var RETRY_DELAY_MS = 250;
|
|
1984
|
-
function
|
|
2741
|
+
function nowIso3() {
|
|
1985
2742
|
return (/* @__PURE__ */ new Date()).toISOString();
|
|
1986
2743
|
}
|
|
1987
2744
|
function canonicalizeUrl(raw) {
|
|
@@ -2051,14 +2808,14 @@ async function extractRawMarkdownPage(url, response) {
|
|
|
2051
2808
|
};
|
|
2052
2809
|
}
|
|
2053
2810
|
function persistSnapshotPages(input, snapshotId, pages) {
|
|
2054
|
-
const snapshotDir =
|
|
2055
|
-
|
|
2811
|
+
const snapshotDir = join5(input.dataDir, "sources", input.sourceId, "snapshots", snapshotId, "pages");
|
|
2812
|
+
mkdirSync4(snapshotDir, { recursive: true });
|
|
2056
2813
|
pages.forEach((page, index) => {
|
|
2057
2814
|
const filename = `${String(index + 1).padStart(3, "0")}-${slugify(page.title)}.md`;
|
|
2058
|
-
|
|
2815
|
+
writeFileSync2(join5(snapshotDir, filename), page.markdown, "utf8");
|
|
2059
2816
|
});
|
|
2060
2817
|
}
|
|
2061
|
-
function
|
|
2818
|
+
function resolveEnvValue2(name, env) {
|
|
2062
2819
|
const value = env[name];
|
|
2063
2820
|
if (!value) {
|
|
2064
2821
|
throw new AiocsError(
|
|
@@ -2074,13 +2831,13 @@ function resolveEnvValue(name, env) {
|
|
|
2074
2831
|
function resolveSourceAuth(spec, env) {
|
|
2075
2832
|
const scopedHeaders = (spec.auth?.headers ?? []).map((header) => ({
|
|
2076
2833
|
name: header.name,
|
|
2077
|
-
value:
|
|
2834
|
+
value: resolveEnvValue2(header.valueFromEnv, env),
|
|
2078
2835
|
hosts: header.hosts ?? spec.allowedHosts,
|
|
2079
2836
|
...header.include ? { include: header.include } : {}
|
|
2080
2837
|
}));
|
|
2081
2838
|
const cookies = (spec.auth?.cookies ?? []).map((cookie) => ({
|
|
2082
2839
|
name: cookie.name,
|
|
2083
|
-
value:
|
|
2840
|
+
value: resolveEnvValue2(cookie.valueFromEnv, env),
|
|
2084
2841
|
domain: cookie.domain,
|
|
2085
2842
|
path: cookie.path,
|
|
2086
2843
|
...typeof cookie.secure === "boolean" ? { secure: cookie.secure } : {},
|
|
@@ -2179,6 +2936,14 @@ async function fetchSourceOnce(input) {
|
|
|
2179
2936
|
`Unknown source '${input.sourceId}'`
|
|
2180
2937
|
);
|
|
2181
2938
|
}
|
|
2939
|
+
if (isGitSourceSpec(spec)) {
|
|
2940
|
+
const result = await fetchGitSource(input);
|
|
2941
|
+
return {
|
|
2942
|
+
snapshotId: result.snapshotId,
|
|
2943
|
+
pageCount: result.pageCount,
|
|
2944
|
+
reused: result.reused
|
|
2945
|
+
};
|
|
2946
|
+
}
|
|
2182
2947
|
const session = await createSourceContext(spec, input.env ?? process.env);
|
|
2183
2948
|
const { page } = session;
|
|
2184
2949
|
const queue = spec.startUrls.map((url) => canonicalizeUrl(url));
|
|
@@ -2305,6 +3070,9 @@ async function runSourceCanaryOnce(input) {
|
|
|
2305
3070
|
`Unknown source '${input.sourceId}'`
|
|
2306
3071
|
);
|
|
2307
3072
|
}
|
|
3073
|
+
if (isGitSourceSpec(spec)) {
|
|
3074
|
+
return runGitSourceCanary(input);
|
|
3075
|
+
}
|
|
2308
3076
|
const canary = resolveSourceCanary(spec);
|
|
2309
3077
|
const session = await createSourceContext(spec, input.env ?? process.env);
|
|
2310
3078
|
const { page } = session;
|
|
@@ -2355,7 +3123,7 @@ async function runSourceCanaryOnce(input) {
|
|
|
2355
3123
|
const result = {
|
|
2356
3124
|
sourceId: input.sourceId,
|
|
2357
3125
|
status: checks.every((check) => check.status === "pass") ? "pass" : "fail",
|
|
2358
|
-
checkedAt:
|
|
3126
|
+
checkedAt: nowIso3(),
|
|
2359
3127
|
summary: {
|
|
2360
3128
|
checkCount: checks.length,
|
|
2361
3129
|
passCount: checks.filter((check) => check.status === "pass").length,
|
|
@@ -2404,6 +3172,30 @@ function getEmbeddingModelKey(config) {
|
|
|
2404
3172
|
function normalizeBaseUrl(baseUrl) {
|
|
2405
3173
|
return baseUrl.endsWith("/") ? baseUrl.slice(0, -1) : baseUrl;
|
|
2406
3174
|
}
|
|
3175
|
+
function normalizeEmbeddingWhitespace(value) {
|
|
3176
|
+
return value.replace(/\s+/g, " ").trim();
|
|
3177
|
+
}
|
|
3178
|
+
function truncateEmbeddingText(value, maxChars) {
|
|
3179
|
+
if (value.length <= maxChars) {
|
|
3180
|
+
return value;
|
|
3181
|
+
}
|
|
3182
|
+
const slice = value.slice(0, maxChars);
|
|
3183
|
+
const lastWhitespace = slice.lastIndexOf(" ");
|
|
3184
|
+
if (lastWhitespace >= Math.floor(maxChars * 0.8)) {
|
|
3185
|
+
return slice.slice(0, lastWhitespace).trim();
|
|
3186
|
+
}
|
|
3187
|
+
return slice.trim();
|
|
3188
|
+
}
|
|
3189
|
+
function prepareTextForEmbedding(markdown, maxChars) {
|
|
3190
|
+
const withoutComments = markdown.replace(/<!--[\s\S]*?-->/g, " ");
|
|
3191
|
+
const withoutImages = withoutComments.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, "$1");
|
|
3192
|
+
const withoutLinks = withoutImages.replace(/\[([^\]]+)\]\(([^)]+)\)/g, "$1");
|
|
3193
|
+
const withoutHtml = withoutLinks.replace(/<[^>]+>/g, " ");
|
|
3194
|
+
const withoutCodeFenceMarkers = withoutHtml.replace(/```[^\n]*\n/g, "\n").replace(/```/g, "\n");
|
|
3195
|
+
const withoutInlineCodeTicks = withoutCodeFenceMarkers.replace(/`([^`]+)`/g, "$1");
|
|
3196
|
+
const normalized = normalizeEmbeddingWhitespace(withoutInlineCodeTicks);
|
|
3197
|
+
return truncateEmbeddingText(normalized, maxChars);
|
|
3198
|
+
}
|
|
2407
3199
|
async function parseJsonResponse(response) {
|
|
2408
3200
|
const text = await response.text();
|
|
2409
3201
|
if (!text) {
|
|
@@ -2422,6 +3214,7 @@ async function embedTexts(config, texts) {
|
|
|
2422
3214
|
if (texts.length === 0) {
|
|
2423
3215
|
return [];
|
|
2424
3216
|
}
|
|
3217
|
+
const preparedTexts = texts.map((text) => prepareTextForEmbedding(text, config.ollamaMaxInputChars));
|
|
2425
3218
|
const response = await fetch(`${normalizeBaseUrl(config.ollamaBaseUrl)}/api/embed`, {
|
|
2426
3219
|
method: "POST",
|
|
2427
3220
|
headers: {
|
|
@@ -2430,7 +3223,7 @@ async function embedTexts(config, texts) {
|
|
|
2430
3223
|
signal: AbortSignal.timeout(config.ollamaTimeoutMs),
|
|
2431
3224
|
body: JSON.stringify({
|
|
2432
3225
|
model: config.ollamaEmbeddingModel,
|
|
2433
|
-
input:
|
|
3226
|
+
input: preparedTexts
|
|
2434
3227
|
})
|
|
2435
3228
|
}).catch((error) => {
|
|
2436
3229
|
throw new AiocsError(
|
|
@@ -2573,6 +3366,9 @@ var AiocsVectorStore = class {
|
|
|
2573
3366
|
pageUrl: point.pageUrl,
|
|
2574
3367
|
pageTitle: point.pageTitle,
|
|
2575
3368
|
sectionTitle: point.sectionTitle,
|
|
3369
|
+
pageKind: point.pageKind,
|
|
3370
|
+
filePath: point.filePath,
|
|
3371
|
+
language: point.language,
|
|
2576
3372
|
modelKey: input.modelKey
|
|
2577
3373
|
}
|
|
2578
3374
|
}));
|
|
@@ -2785,7 +3581,10 @@ async function processEmbeddingJobs(input) {
|
|
|
2785
3581
|
snapshotId: chunk.snapshotId,
|
|
2786
3582
|
pageUrl: chunk.pageUrl,
|
|
2787
3583
|
pageTitle: chunk.pageTitle,
|
|
2788
|
-
sectionTitle: chunk.sectionTitle
|
|
3584
|
+
sectionTitle: chunk.sectionTitle,
|
|
3585
|
+
pageKind: chunk.pageKind,
|
|
3586
|
+
filePath: chunk.filePath,
|
|
3587
|
+
language: chunk.language
|
|
2789
3588
|
}))
|
|
2790
3589
|
});
|
|
2791
3590
|
indexedChunkIds.push(...batch.map((chunk) => chunk.chunkId));
|
|
@@ -2823,29 +3622,6 @@ async function processEmbeddingJobs(input) {
|
|
|
2823
3622
|
};
|
|
2824
3623
|
}
|
|
2825
3624
|
|
|
2826
|
-
// src/runtime/bundled-sources.ts
|
|
2827
|
-
import { existsSync } from "fs";
|
|
2828
|
-
import { dirname, join as join4 } from "path";
|
|
2829
|
-
import { fileURLToPath } from "url";
|
|
2830
|
-
function findPackageRoot(startDir) {
|
|
2831
|
-
let currentDir = startDir;
|
|
2832
|
-
while (true) {
|
|
2833
|
-
if (existsSync(join4(currentDir, "package.json")) && existsSync(join4(currentDir, "sources"))) {
|
|
2834
|
-
return currentDir;
|
|
2835
|
-
}
|
|
2836
|
-
const parentDir = dirname(currentDir);
|
|
2837
|
-
if (parentDir === currentDir) {
|
|
2838
|
-
throw new Error(`Could not locate aiocs package root from ${startDir}`);
|
|
2839
|
-
}
|
|
2840
|
-
currentDir = parentDir;
|
|
2841
|
-
}
|
|
2842
|
-
}
|
|
2843
|
-
function getBundledSourcesDir() {
|
|
2844
|
-
const currentFilePath = fileURLToPath(import.meta.url);
|
|
2845
|
-
const packageRoot = findPackageRoot(dirname(currentFilePath));
|
|
2846
|
-
return join4(packageRoot, "sources");
|
|
2847
|
-
}
|
|
2848
|
-
|
|
2849
3625
|
// src/runtime/hybrid-config.ts
|
|
2850
3626
|
function parsePositiveInteger(value, field, fallback) {
|
|
2851
3627
|
if (typeof value === "undefined" || value.trim() === "") {
|
|
@@ -2888,7 +3664,8 @@ function getHybridRuntimeConfig(env = process.env) {
|
|
|
2888
3664
|
embeddingProvider: "ollama",
|
|
2889
3665
|
ollamaBaseUrl: env.AIOCS_OLLAMA_BASE_URL ?? "http://127.0.0.1:11434",
|
|
2890
3666
|
ollamaEmbeddingModel: env.AIOCS_OLLAMA_EMBEDDING_MODEL ?? "nomic-embed-text",
|
|
2891
|
-
ollamaTimeoutMs: parsePositiveInteger(env.AIOCS_OLLAMA_TIMEOUT_MS, "AIOCS_OLLAMA_TIMEOUT_MS",
|
|
3667
|
+
ollamaTimeoutMs: parsePositiveInteger(env.AIOCS_OLLAMA_TIMEOUT_MS, "AIOCS_OLLAMA_TIMEOUT_MS", 1e4),
|
|
3668
|
+
ollamaMaxInputChars: parsePositiveInteger(env.AIOCS_OLLAMA_MAX_INPUT_CHARS, "AIOCS_OLLAMA_MAX_INPUT_CHARS", 4e3),
|
|
2892
3669
|
embeddingBatchSize: parsePositiveInteger(env.AIOCS_EMBEDDING_BATCH_SIZE, "AIOCS_EMBEDDING_BATCH_SIZE", 32),
|
|
2893
3670
|
embeddingJobsPerCycle: parsePositiveInteger(env.AIOCS_EMBEDDING_JOB_LIMIT_PER_CYCLE, "AIOCS_EMBEDDING_JOB_LIMIT_PER_CYCLE", 2),
|
|
2894
3671
|
lexicalCandidateWindow: parsePositiveInteger(env.AIOCS_LEXICAL_CANDIDATE_WINDOW, "AIOCS_LEXICAL_CANDIDATE_WINDOW", 40),
|
|
@@ -2900,13 +3677,13 @@ function getHybridRuntimeConfig(env = process.env) {
|
|
|
2900
3677
|
// src/spec/source-spec-files.ts
|
|
2901
3678
|
import { access, readdir } from "fs/promises";
|
|
2902
3679
|
import { constants as fsConstants } from "fs";
|
|
2903
|
-
import { extname as extname2, join as
|
|
3680
|
+
import { extname as extname2, join as join6, resolve as resolve4 } from "path";
|
|
2904
3681
|
var SOURCE_SPEC_EXTENSIONS = /* @__PURE__ */ new Set([".yaml", ".yml", ".json"]);
|
|
2905
3682
|
function uniqueResolvedPaths(paths) {
|
|
2906
3683
|
const seen = /* @__PURE__ */ new Set();
|
|
2907
3684
|
const unique = [];
|
|
2908
3685
|
for (const rawPath of paths) {
|
|
2909
|
-
const normalized =
|
|
3686
|
+
const normalized = resolve4(rawPath);
|
|
2910
3687
|
if (seen.has(normalized)) {
|
|
2911
3688
|
continue;
|
|
2912
3689
|
}
|
|
@@ -2927,7 +3704,7 @@ async function walkSourceSpecFiles(rootDir) {
|
|
|
2927
3704
|
const entries = await readdir(rootDir, { withFileTypes: true });
|
|
2928
3705
|
const discovered = [];
|
|
2929
3706
|
for (const entry of entries.sort((left, right) => left.name.localeCompare(right.name))) {
|
|
2930
|
-
const entryPath =
|
|
3707
|
+
const entryPath = join6(rootDir, entry.name);
|
|
2931
3708
|
if (entry.isDirectory()) {
|
|
2932
3709
|
discovered.push(...await walkSourceSpecFiles(entryPath));
|
|
2933
3710
|
continue;
|
|
@@ -2947,7 +3724,7 @@ var DEFAULT_INTERVAL_MINUTES = 60;
|
|
|
2947
3724
|
var DEFAULT_CONTAINER_SOURCE_DIR = "/app/sources";
|
|
2948
3725
|
var BOOLEAN_TRUE_VALUES = /* @__PURE__ */ new Set(["1", "true", "yes", "on"]);
|
|
2949
3726
|
var BOOLEAN_FALSE_VALUES = /* @__PURE__ */ new Set(["0", "false", "no", "off"]);
|
|
2950
|
-
function
|
|
3727
|
+
function nowIso4() {
|
|
2951
3728
|
return (/* @__PURE__ */ new Date()).toISOString();
|
|
2952
3729
|
}
|
|
2953
3730
|
function parsePositiveInteger2(raw, variableName) {
|
|
@@ -2973,10 +3750,11 @@ function parseBoolean(raw, variableName) {
|
|
|
2973
3750
|
function parseDaemonConfig(env, options = {}) {
|
|
2974
3751
|
const intervalMinutes = env.AIOCS_DAEMON_INTERVAL_MINUTES ? parsePositiveInteger2(env.AIOCS_DAEMON_INTERVAL_MINUTES, "AIOCS_DAEMON_INTERVAL_MINUTES") : DEFAULT_INTERVAL_MINUTES;
|
|
2975
3752
|
const fetchOnStart = env.AIOCS_DAEMON_FETCH_ON_START ? parseBoolean(env.AIOCS_DAEMON_FETCH_ON_START, "AIOCS_DAEMON_FETCH_ON_START") : true;
|
|
3753
|
+
const defaultContainerSourceDir = options.containerSourceDir ?? (existsSync3(DEFAULT_CONTAINER_SOURCE_DIR) ? DEFAULT_CONTAINER_SOURCE_DIR : void 0);
|
|
2976
3754
|
const defaultSourceDirs = uniqueResolvedPaths([
|
|
2977
3755
|
options.bundledSourceDir ?? getBundledSourcesDir(),
|
|
2978
3756
|
options.userSourceDir ?? getAiocsSourcesDir(env),
|
|
2979
|
-
|
|
3757
|
+
...defaultContainerSourceDir ? [defaultContainerSourceDir] : []
|
|
2980
3758
|
]);
|
|
2981
3759
|
const sourceSpecDirs = env.AIOCS_SOURCE_SPEC_DIRS ? uniqueResolvedPaths(
|
|
2982
3760
|
env.AIOCS_SOURCE_SPEC_DIRS.split(",").map((entry) => entry.trim()).filter(Boolean)
|
|
@@ -3023,7 +3801,7 @@ async function bootstrapSourceSpecs(input) {
|
|
|
3023
3801
|
throw new Error(`No source spec files found in configured directories: ${normalizedSourceSpecDirs.join(", ")}`);
|
|
3024
3802
|
}
|
|
3025
3803
|
const removedSourceIds = input.catalog.removeManagedSources({
|
|
3026
|
-
managedRoots: existingDirs.map((sourceSpecDir) =>
|
|
3804
|
+
managedRoots: existingDirs.map((sourceSpecDir) => resolve5(sourceSpecDir)),
|
|
3027
3805
|
activeSources: sources.map((source) => ({
|
|
3028
3806
|
sourceId: source.sourceId,
|
|
3029
3807
|
specPath: source.specPath
|
|
@@ -3036,7 +3814,7 @@ async function bootstrapSourceSpecs(input) {
|
|
|
3036
3814
|
};
|
|
3037
3815
|
}
|
|
3038
3816
|
async function runDaemonCycle(input) {
|
|
3039
|
-
const startedAt =
|
|
3817
|
+
const startedAt = nowIso4();
|
|
3040
3818
|
const bootstrapped = await bootstrapSourceSpecs({
|
|
3041
3819
|
catalog: input.catalog,
|
|
3042
3820
|
sourceSpecDirs: input.sourceSpecDirs,
|
|
@@ -3066,6 +3844,7 @@ async function runDaemonCycle(input) {
|
|
|
3066
3844
|
const result = await runSourceCanary({
|
|
3067
3845
|
catalog: input.catalog,
|
|
3068
3846
|
sourceId,
|
|
3847
|
+
dataDir: input.dataDir,
|
|
3069
3848
|
env: process.env
|
|
3070
3849
|
});
|
|
3071
3850
|
canaried.push({
|
|
@@ -3124,7 +3903,7 @@ async function runDaemonCycle(input) {
|
|
|
3124
3903
|
}
|
|
3125
3904
|
return {
|
|
3126
3905
|
startedAt,
|
|
3127
|
-
finishedAt:
|
|
3906
|
+
finishedAt: nowIso4(),
|
|
3128
3907
|
dueSourceIds,
|
|
3129
3908
|
canaryDueSourceIds,
|
|
3130
3909
|
bootstrapped,
|
|
@@ -3140,7 +3919,7 @@ async function startDaemon(input) {
|
|
|
3140
3919
|
const intervalMs = input.config.intervalMinutes * 6e4;
|
|
3141
3920
|
input.catalog.resetRunningEmbeddingJobs();
|
|
3142
3921
|
input.catalog.markDaemonStarted({
|
|
3143
|
-
startedAt:
|
|
3922
|
+
startedAt: nowIso4(),
|
|
3144
3923
|
intervalMinutes: input.config.intervalMinutes,
|
|
3145
3924
|
fetchOnStart: input.config.fetchOnStart
|
|
3146
3925
|
});
|
|
@@ -3151,7 +3930,7 @@ async function startDaemon(input) {
|
|
|
3151
3930
|
sourceSpecDirs: input.config.sourceSpecDirs
|
|
3152
3931
|
});
|
|
3153
3932
|
const runCycle = async (reason) => {
|
|
3154
|
-
const startedAt =
|
|
3933
|
+
const startedAt = nowIso4();
|
|
3155
3934
|
input.catalog.markDaemonCycleStarted(startedAt);
|
|
3156
3935
|
input.logger.emit({
|
|
3157
3936
|
type: "daemon.cycle.started",
|
|
@@ -3177,7 +3956,7 @@ async function startDaemon(input) {
|
|
|
3177
3956
|
});
|
|
3178
3957
|
} catch (error) {
|
|
3179
3958
|
input.catalog.markDaemonCycleCompleted({
|
|
3180
|
-
completedAt:
|
|
3959
|
+
completedAt: nowIso4(),
|
|
3181
3960
|
status: "failed"
|
|
3182
3961
|
});
|
|
3183
3962
|
throw error;
|
|
@@ -3208,7 +3987,7 @@ async function startDaemon(input) {
|
|
|
3208
3987
|
// package.json
|
|
3209
3988
|
var package_default = {
|
|
3210
3989
|
name: "@bodhi-ventures/aiocs",
|
|
3211
|
-
version: "0.
|
|
3990
|
+
version: "0.2.0",
|
|
3212
3991
|
license: "MIT",
|
|
3213
3992
|
type: "module",
|
|
3214
3993
|
description: "Local-only documentation store, fetcher, and search CLI for AI agents.",
|
|
@@ -3256,28 +4035,28 @@ var package_default = {
|
|
|
3256
4035
|
"test:watch": "vitest"
|
|
3257
4036
|
},
|
|
3258
4037
|
dependencies: {
|
|
3259
|
-
"@modelcontextprotocol/sdk": "
|
|
3260
|
-
"@mozilla/readability": "
|
|
4038
|
+
"@modelcontextprotocol/sdk": "1.28.0",
|
|
4039
|
+
"@mozilla/readability": "0.6.0",
|
|
3261
4040
|
"@qdrant/js-client-rest": "1.17.0",
|
|
3262
|
-
"better-sqlite3": "
|
|
3263
|
-
commander: "
|
|
3264
|
-
jsdom: "
|
|
3265
|
-
playwright: "
|
|
3266
|
-
turndown: "
|
|
3267
|
-
"turndown-plugin-gfm": "
|
|
3268
|
-
yaml: "
|
|
3269
|
-
zod: "
|
|
4041
|
+
"better-sqlite3": "12.4.1",
|
|
4042
|
+
commander: "14.0.1",
|
|
4043
|
+
jsdom: "27.0.1",
|
|
4044
|
+
playwright: "1.57.0",
|
|
4045
|
+
turndown: "7.2.1",
|
|
4046
|
+
"turndown-plugin-gfm": "1.0.2",
|
|
4047
|
+
yaml: "2.8.1",
|
|
4048
|
+
zod: "4.1.12"
|
|
3270
4049
|
},
|
|
3271
4050
|
devDependencies: {
|
|
3272
|
-
"@types/better-sqlite3": "
|
|
3273
|
-
"@types/jsdom": "
|
|
3274
|
-
"@types/node": "
|
|
3275
|
-
"@types/turndown": "
|
|
3276
|
-
execa: "
|
|
3277
|
-
tsup: "
|
|
3278
|
-
tsx: "
|
|
3279
|
-
typescript: "
|
|
3280
|
-
vitest: "
|
|
4051
|
+
"@types/better-sqlite3": "7.6.13",
|
|
4052
|
+
"@types/jsdom": "21.1.7",
|
|
4053
|
+
"@types/node": "24.7.2",
|
|
4054
|
+
"@types/turndown": "5.0.5",
|
|
4055
|
+
execa: "9.6.0",
|
|
4056
|
+
tsup: "8.5.0",
|
|
4057
|
+
tsx: "4.20.6",
|
|
4058
|
+
typescript: "5.9.3",
|
|
4059
|
+
vitest: "3.2.4"
|
|
3281
4060
|
}
|
|
3282
4061
|
};
|
|
3283
4062
|
|
|
@@ -3287,11 +4066,11 @@ var packageVersion = package_default.version;
|
|
|
3287
4066
|
var packageDescription = package_default.description;
|
|
3288
4067
|
|
|
3289
4068
|
// src/services.ts
|
|
3290
|
-
import { resolve as
|
|
4069
|
+
import { resolve as resolve8 } from "path";
|
|
3291
4070
|
|
|
3292
4071
|
// src/backup.ts
|
|
3293
4072
|
import { cp, mkdir, readdir as readdir2, readFile as readFile2, rename, rm, stat, writeFile } from "fs/promises";
|
|
3294
|
-
import { basename, dirname as
|
|
4073
|
+
import { basename, dirname as dirname3, join as join7, resolve as resolve6 } from "path";
|
|
3295
4074
|
import { randomUUID as randomUUID2 } from "crypto";
|
|
3296
4075
|
import Database2 from "better-sqlite3";
|
|
3297
4076
|
var CATALOG_DB_FILENAME = "catalog.sqlite";
|
|
@@ -3319,7 +4098,7 @@ async function isDirectoryEmpty(path) {
|
|
|
3319
4098
|
return (await readdir2(path)).length === 0;
|
|
3320
4099
|
}
|
|
3321
4100
|
async function listEntries(root, relativePath = "") {
|
|
3322
|
-
const absolutePath = relativePath ?
|
|
4101
|
+
const absolutePath = relativePath ? join7(root, relativePath) : root;
|
|
3323
4102
|
const stats = await stat(absolutePath);
|
|
3324
4103
|
if (!stats.isDirectory()) {
|
|
3325
4104
|
return [{
|
|
@@ -3335,7 +4114,7 @@ async function listEntries(root, relativePath = "") {
|
|
|
3335
4114
|
size: 0
|
|
3336
4115
|
}] : [];
|
|
3337
4116
|
for (const childName of childNames.sort()) {
|
|
3338
|
-
entries.push(...await listEntries(root, relativePath ?
|
|
4117
|
+
entries.push(...await listEntries(root, relativePath ? join7(relativePath, childName) : childName));
|
|
3339
4118
|
}
|
|
3340
4119
|
return entries;
|
|
3341
4120
|
}
|
|
@@ -3349,12 +4128,12 @@ async function copyIfPresent(from, to, entries, relativePrefix) {
|
|
|
3349
4128
|
entries.push(
|
|
3350
4129
|
...copiedEntries.map((entry) => ({
|
|
3351
4130
|
...entry,
|
|
3352
|
-
relativePath:
|
|
4131
|
+
relativePath: join7(relativePrefix, entry.relativePath)
|
|
3353
4132
|
}))
|
|
3354
4133
|
);
|
|
3355
4134
|
}
|
|
3356
4135
|
async function copyDataDirForBackup(from, to) {
|
|
3357
|
-
const sourceCatalogPath =
|
|
4136
|
+
const sourceCatalogPath = join7(from, CATALOG_DB_FILENAME);
|
|
3358
4137
|
if (!await pathExists2(sourceCatalogPath)) {
|
|
3359
4138
|
throw new AiocsError(
|
|
3360
4139
|
AIOCS_ERROR_CODES.backupSourceMissing,
|
|
@@ -3370,10 +4149,13 @@ async function copyDataDirForBackup(from, to) {
|
|
|
3370
4149
|
if (name === CATALOG_DB_FILENAME) {
|
|
3371
4150
|
return false;
|
|
3372
4151
|
}
|
|
4152
|
+
if (name === "git-mirrors") {
|
|
4153
|
+
return false;
|
|
4154
|
+
}
|
|
3373
4155
|
return !SQLITE_SIDE_CAR_SUFFIXES.some((suffix) => name === `${CATALOG_DB_FILENAME}${suffix}`);
|
|
3374
4156
|
}
|
|
3375
4157
|
});
|
|
3376
|
-
const targetCatalogPath =
|
|
4158
|
+
const targetCatalogPath = join7(to, CATALOG_DB_FILENAME);
|
|
3377
4159
|
const sourceCatalog = new Database2(sourceCatalogPath, { readonly: true });
|
|
3378
4160
|
try {
|
|
3379
4161
|
await sourceCatalog.backup(targetCatalogPath);
|
|
@@ -3382,7 +4164,7 @@ async function copyDataDirForBackup(from, to) {
|
|
|
3382
4164
|
}
|
|
3383
4165
|
}
|
|
3384
4166
|
async function loadValidatedBackupPayload(inputDir) {
|
|
3385
|
-
const manifestPath =
|
|
4167
|
+
const manifestPath = join7(inputDir, "manifest.json");
|
|
3386
4168
|
await assertSourceDirExists(inputDir);
|
|
3387
4169
|
if (!await pathExists2(manifestPath)) {
|
|
3388
4170
|
throw new AiocsError(
|
|
@@ -3397,21 +4179,21 @@ async function loadValidatedBackupPayload(inputDir) {
|
|
|
3397
4179
|
`Invalid backup manifest: ${manifestPath}`
|
|
3398
4180
|
);
|
|
3399
4181
|
}
|
|
3400
|
-
const backupDataDir =
|
|
4182
|
+
const backupDataDir = join7(inputDir, "data");
|
|
3401
4183
|
if (!await pathExists2(backupDataDir)) {
|
|
3402
4184
|
throw new AiocsError(
|
|
3403
4185
|
AIOCS_ERROR_CODES.backupInvalid,
|
|
3404
4186
|
`Backup payload is missing the data directory: ${backupDataDir}`
|
|
3405
4187
|
);
|
|
3406
4188
|
}
|
|
3407
|
-
const backupCatalogPath =
|
|
4189
|
+
const backupCatalogPath = join7(backupDataDir, CATALOG_DB_FILENAME);
|
|
3408
4190
|
if (!await pathExists2(backupCatalogPath)) {
|
|
3409
4191
|
throw new AiocsError(
|
|
3410
4192
|
AIOCS_ERROR_CODES.backupInvalid,
|
|
3411
4193
|
`Backup payload is missing the catalog database: ${backupCatalogPath}`
|
|
3412
4194
|
);
|
|
3413
4195
|
}
|
|
3414
|
-
const backupConfigDir =
|
|
4196
|
+
const backupConfigDir = join7(inputDir, "config");
|
|
3415
4197
|
return {
|
|
3416
4198
|
manifest,
|
|
3417
4199
|
backupDataDir,
|
|
@@ -3419,17 +4201,17 @@ async function loadValidatedBackupPayload(inputDir) {
|
|
|
3419
4201
|
};
|
|
3420
4202
|
}
|
|
3421
4203
|
async function prepareReplacementTarget(backupDir, targetDir) {
|
|
3422
|
-
const parentDir =
|
|
3423
|
-
const stagingDir =
|
|
4204
|
+
const parentDir = dirname3(targetDir);
|
|
4205
|
+
const stagingDir = join7(parentDir, `.${basename(targetDir)}.import-${randomUUID2()}`);
|
|
3424
4206
|
await rm(stagingDir, { recursive: true, force: true });
|
|
3425
4207
|
await mkdir(parentDir, { recursive: true });
|
|
3426
4208
|
await cp(backupDir, stagingDir, { recursive: true, force: true });
|
|
3427
4209
|
return stagingDir;
|
|
3428
4210
|
}
|
|
3429
4211
|
async function exportBackup(input) {
|
|
3430
|
-
const dataDir =
|
|
3431
|
-
const outputDir =
|
|
3432
|
-
const configDir = input.configDir ?
|
|
4212
|
+
const dataDir = resolve6(input.dataDir);
|
|
4213
|
+
const outputDir = resolve6(input.outputDir);
|
|
4214
|
+
const configDir = input.configDir ? resolve6(input.configDir) : void 0;
|
|
3433
4215
|
await assertSourceDirExists(dataDir);
|
|
3434
4216
|
if (!await isDirectoryEmpty(outputDir)) {
|
|
3435
4217
|
if (!input.replaceExisting) {
|
|
@@ -3442,13 +4224,13 @@ async function exportBackup(input) {
|
|
|
3442
4224
|
}
|
|
3443
4225
|
await mkdir(outputDir, { recursive: true });
|
|
3444
4226
|
const entries = [];
|
|
3445
|
-
await copyDataDirForBackup(dataDir,
|
|
3446
|
-
entries.push(...(await listEntries(
|
|
4227
|
+
await copyDataDirForBackup(dataDir, join7(outputDir, "data"));
|
|
4228
|
+
entries.push(...(await listEntries(join7(outputDir, "data"))).map((entry) => ({
|
|
3447
4229
|
...entry,
|
|
3448
|
-
relativePath:
|
|
4230
|
+
relativePath: join7("data", entry.relativePath)
|
|
3449
4231
|
})));
|
|
3450
4232
|
if (configDir) {
|
|
3451
|
-
await copyIfPresent(configDir,
|
|
4233
|
+
await copyIfPresent(configDir, join7(outputDir, "config"), entries, "config");
|
|
3452
4234
|
}
|
|
3453
4235
|
const manifest = {
|
|
3454
4236
|
formatVersion: 1,
|
|
@@ -3456,7 +4238,7 @@ async function exportBackup(input) {
|
|
|
3456
4238
|
packageVersion,
|
|
3457
4239
|
entries
|
|
3458
4240
|
};
|
|
3459
|
-
const manifestPath =
|
|
4241
|
+
const manifestPath = join7(outputDir, "manifest.json");
|
|
3460
4242
|
await writeFile(manifestPath, JSON.stringify(manifest, null, 2), "utf8");
|
|
3461
4243
|
return {
|
|
3462
4244
|
outputDir,
|
|
@@ -3465,9 +4247,9 @@ async function exportBackup(input) {
|
|
|
3465
4247
|
};
|
|
3466
4248
|
}
|
|
3467
4249
|
async function importBackup(input) {
|
|
3468
|
-
const inputDir =
|
|
3469
|
-
const dataDir =
|
|
3470
|
-
const configDir = input.configDir ?
|
|
4250
|
+
const inputDir = resolve6(input.inputDir);
|
|
4251
|
+
const dataDir = resolve6(input.dataDir);
|
|
4252
|
+
const configDir = input.configDir ? resolve6(input.configDir) : void 0;
|
|
3471
4253
|
const { manifest, backupDataDir, backupConfigDir } = await loadValidatedBackupPayload(inputDir);
|
|
3472
4254
|
if (!await isDirectoryEmpty(dataDir)) {
|
|
3473
4255
|
if (!input.replaceExisting) {
|
|
@@ -3511,7 +4293,7 @@ async function importBackup(input) {
|
|
|
3511
4293
|
|
|
3512
4294
|
// src/coverage.ts
|
|
3513
4295
|
import { readFile as readFile3 } from "fs/promises";
|
|
3514
|
-
import { resolve as
|
|
4296
|
+
import { resolve as resolve7 } from "path";
|
|
3515
4297
|
function normalizeText(value) {
|
|
3516
4298
|
return value.replace(/[`*_~]+/g, "").replace(/\s+/g, " ").trim().toLowerCase();
|
|
3517
4299
|
}
|
|
@@ -3560,7 +4342,7 @@ async function verifyCoverageAgainstReferences(corpus, referenceFiles) {
|
|
|
3560
4342
|
body: 0
|
|
3561
4343
|
};
|
|
3562
4344
|
for (const referenceFile of referenceFiles) {
|
|
3563
|
-
const resolvedReferenceFile =
|
|
4345
|
+
const resolvedReferenceFile = resolve7(referenceFile);
|
|
3564
4346
|
let raw;
|
|
3565
4347
|
try {
|
|
3566
4348
|
raw = await readFile3(resolvedReferenceFile, "utf8");
|
|
@@ -3632,9 +4414,9 @@ async function verifyCoverageAgainstReferences(corpus, referenceFiles) {
|
|
|
3632
4414
|
|
|
3633
4415
|
// src/doctor.ts
|
|
3634
4416
|
import { access as access2 } from "fs/promises";
|
|
3635
|
-
import { execFile } from "child_process";
|
|
3636
|
-
import { promisify } from "util";
|
|
3637
|
-
var
|
|
4417
|
+
import { execFile as execFile2 } from "child_process";
|
|
4418
|
+
import { promisify as promisify2 } from "util";
|
|
4419
|
+
var execFileAsync2 = promisify2(execFile2);
|
|
3638
4420
|
function summarize(checks) {
|
|
3639
4421
|
const passCount = checks.filter((check) => check.status === "pass").length;
|
|
3640
4422
|
const warnCount = checks.filter((check) => check.status === "warn").length;
|
|
@@ -3721,6 +4503,25 @@ async function checkPlaywright() {
|
|
|
3721
4503
|
};
|
|
3722
4504
|
}
|
|
3723
4505
|
}
|
|
4506
|
+
async function checkGit() {
|
|
4507
|
+
try {
|
|
4508
|
+
const { stdout } = await execFileAsync2("git", ["--version"]);
|
|
4509
|
+
return {
|
|
4510
|
+
id: "git",
|
|
4511
|
+
status: "pass",
|
|
4512
|
+
summary: "Git executable is available.",
|
|
4513
|
+
details: {
|
|
4514
|
+
version: stdout.trim()
|
|
4515
|
+
}
|
|
4516
|
+
};
|
|
4517
|
+
} catch (error) {
|
|
4518
|
+
return {
|
|
4519
|
+
id: "git",
|
|
4520
|
+
status: "fail",
|
|
4521
|
+
summary: `Git is not ready: ${toErrorMessage(error)}`
|
|
4522
|
+
};
|
|
4523
|
+
}
|
|
4524
|
+
}
|
|
3724
4525
|
async function checkDaemonConfig(env) {
|
|
3725
4526
|
try {
|
|
3726
4527
|
const daemonConfig = parseDaemonConfig(env, {
|
|
@@ -3970,7 +4771,7 @@ async function checkEmbeddings(env) {
|
|
|
3970
4771
|
}
|
|
3971
4772
|
async function checkDocker() {
|
|
3972
4773
|
try {
|
|
3973
|
-
const { stdout } = await
|
|
4774
|
+
const { stdout } = await execFileAsync2("docker", ["info", "--format", "{{json .ServerVersion}}"]);
|
|
3974
4775
|
const version = JSON.parse(stdout.trim());
|
|
3975
4776
|
return {
|
|
3976
4777
|
id: "docker",
|
|
@@ -3986,7 +4787,7 @@ async function checkDocker() {
|
|
|
3986
4787
|
return {
|
|
3987
4788
|
id: "docker",
|
|
3988
4789
|
status: "warn",
|
|
3989
|
-
summary: "Docker CLI is not installed; Docker-based daemon deployment is unavailable
|
|
4790
|
+
summary: "Docker CLI is not installed; Docker-based daemon deployment is unavailable in this environment."
|
|
3990
4791
|
};
|
|
3991
4792
|
}
|
|
3992
4793
|
return {
|
|
@@ -3998,6 +4799,7 @@ async function checkDocker() {
|
|
|
3998
4799
|
}
|
|
3999
4800
|
async function runDoctor(env = process.env) {
|
|
4000
4801
|
const catalogCheck = await checkCatalog(env);
|
|
4802
|
+
const gitCheck = await checkGit();
|
|
4001
4803
|
const playwrightCheck = await checkPlaywright();
|
|
4002
4804
|
const { daemonConfigCheck, daemonConfig } = await checkDaemonConfig(env);
|
|
4003
4805
|
const sourceSpecDirsCheck = await checkSourceSpecDirs(daemonConfig);
|
|
@@ -4009,6 +4811,7 @@ async function runDoctor(env = process.env) {
|
|
|
4009
4811
|
const dockerCheck = await checkDocker();
|
|
4010
4812
|
const checks = [
|
|
4011
4813
|
catalogCheck,
|
|
4814
|
+
gitCheck,
|
|
4012
4815
|
playwrightCheck,
|
|
4013
4816
|
daemonConfigCheck,
|
|
4014
4817
|
sourceSpecDirsCheck,
|
|
@@ -4063,6 +4866,19 @@ function withScores(rows, scoreLookup) {
|
|
|
4063
4866
|
};
|
|
4064
4867
|
});
|
|
4065
4868
|
}
|
|
4869
|
+
function matchesChunkFilters(row, filters) {
|
|
4870
|
+
if (filters.pathPatterns && filters.pathPatterns.length > 0) {
|
|
4871
|
+
if (!row.filePath || !matchesPatterns(row.filePath, filters.pathPatterns)) {
|
|
4872
|
+
return false;
|
|
4873
|
+
}
|
|
4874
|
+
}
|
|
4875
|
+
if (filters.languages && filters.languages.length > 0) {
|
|
4876
|
+
if (!row.language || !filters.languages.includes(row.language.toLowerCase())) {
|
|
4877
|
+
return false;
|
|
4878
|
+
}
|
|
4879
|
+
}
|
|
4880
|
+
return true;
|
|
4881
|
+
}
|
|
4066
4882
|
async function searchHybridCatalog(input) {
|
|
4067
4883
|
const scope = input.catalog.resolveSearchScope({
|
|
4068
4884
|
query: input.query,
|
|
@@ -4070,6 +4886,8 @@ async function searchHybridCatalog(input) {
|
|
|
4070
4886
|
...input.searchInput.sourceIds ? { sourceIds: input.searchInput.sourceIds } : {},
|
|
4071
4887
|
...input.searchInput.snapshotId ? { snapshotId: input.searchInput.snapshotId } : {},
|
|
4072
4888
|
...input.searchInput.all ? { all: true } : {},
|
|
4889
|
+
...input.searchInput.pathPatterns ? { pathPatterns: input.searchInput.pathPatterns } : {},
|
|
4890
|
+
...input.searchInput.languages ? { languages: input.searchInput.languages } : {},
|
|
4073
4891
|
...typeof input.searchInput.limit === "number" ? { limit: input.searchInput.limit } : {},
|
|
4074
4892
|
...typeof input.searchInput.offset === "number" ? { offset: input.searchInput.offset } : {}
|
|
4075
4893
|
});
|
|
@@ -4132,13 +4950,25 @@ async function searchHybridCatalog(input) {
|
|
|
4132
4950
|
);
|
|
4133
4951
|
}
|
|
4134
4952
|
const vectorStore = new AiocsVectorStore(input.config);
|
|
4135
|
-
|
|
4953
|
+
const rawVectorCandidates = await vectorStore.search({
|
|
4136
4954
|
vector: queryVector,
|
|
4137
4955
|
snapshotIds: scope.snapshotIds,
|
|
4138
4956
|
sourceIds: scope.sourceIds,
|
|
4139
4957
|
modelKey,
|
|
4140
4958
|
limit: windowSize(scope.limit, scope.offset, input.config.vectorCandidateWindow)
|
|
4141
4959
|
});
|
|
4960
|
+
if (rawVectorCandidates.length > 0 && (scope.pathPatterns || scope.languages)) {
|
|
4961
|
+
const candidateRows = input.catalog.getChunksByIds(rawVectorCandidates.map((candidate) => candidate.chunkId));
|
|
4962
|
+
const allowedIds = new Set(
|
|
4963
|
+
candidateRows.filter((row) => matchesChunkFilters(row, {
|
|
4964
|
+
pathPatterns: scope.pathPatterns,
|
|
4965
|
+
languages: scope.languages
|
|
4966
|
+
})).map((row) => row.chunkId)
|
|
4967
|
+
);
|
|
4968
|
+
vectorCandidates = rawVectorCandidates.filter((candidate) => allowedIds.has(candidate.chunkId));
|
|
4969
|
+
} else {
|
|
4970
|
+
vectorCandidates = rawVectorCandidates;
|
|
4971
|
+
}
|
|
4142
4972
|
} catch (error) {
|
|
4143
4973
|
if (input.mode === "auto") {
|
|
4144
4974
|
return lexicalOnly();
|
|
@@ -4226,7 +5056,7 @@ function withCatalog(run) {
|
|
|
4226
5056
|
return Promise.resolve(run(ctx)).finally(() => ctx.catalog.close());
|
|
4227
5057
|
}
|
|
4228
5058
|
async function upsertSourceFromSpecFile(specFile) {
|
|
4229
|
-
const specPath =
|
|
5059
|
+
const specPath = resolve8(specFile);
|
|
4230
5060
|
const spec = await loadSourceSpec(specPath);
|
|
4231
5061
|
const result = await withCatalog(({ catalog }) => catalog.upsertSource(spec, { specPath }));
|
|
4232
5062
|
return {
|
|
@@ -4294,7 +5124,7 @@ async function refreshDueSources(sourceIdOrAll = "all") {
|
|
|
4294
5124
|
return { results };
|
|
4295
5125
|
}
|
|
4296
5126
|
async function runSourceCanaries(sourceIdOrAll) {
|
|
4297
|
-
const results = await withCatalog(async ({ catalog }) => {
|
|
5127
|
+
const results = await withCatalog(async ({ catalog, dataDir }) => {
|
|
4298
5128
|
const sourceIds = sourceIdOrAll === "all" ? catalog.listSources().map((item) => item.id) : [sourceIdOrAll];
|
|
4299
5129
|
if (sourceIds.length === 0) {
|
|
4300
5130
|
return [];
|
|
@@ -4304,6 +5134,7 @@ async function runSourceCanaries(sourceIdOrAll) {
|
|
|
4304
5134
|
canaried.push(await runSourceCanary({
|
|
4305
5135
|
catalog,
|
|
4306
5136
|
sourceId,
|
|
5137
|
+
dataDir,
|
|
4307
5138
|
env: process.env
|
|
4308
5139
|
}));
|
|
4309
5140
|
}
|
|
@@ -4322,7 +5153,7 @@ async function diffSnapshotsForSource(input) {
|
|
|
4322
5153
|
return withCatalog(({ catalog }) => catalog.diffSnapshots(input));
|
|
4323
5154
|
}
|
|
4324
5155
|
async function linkProjectSources(projectPath, sourceIds) {
|
|
4325
|
-
const resolvedProjectPath =
|
|
5156
|
+
const resolvedProjectPath = resolve8(projectPath);
|
|
4326
5157
|
await withCatalog(({ catalog }) => {
|
|
4327
5158
|
catalog.linkProject(resolvedProjectPath, sourceIds);
|
|
4328
5159
|
});
|
|
@@ -4332,7 +5163,7 @@ async function linkProjectSources(projectPath, sourceIds) {
|
|
|
4332
5163
|
};
|
|
4333
5164
|
}
|
|
4334
5165
|
async function unlinkProjectSources(projectPath, sourceIds) {
|
|
4335
|
-
const resolvedProjectPath =
|
|
5166
|
+
const resolvedProjectPath = resolve8(projectPath);
|
|
4336
5167
|
await withCatalog(({ catalog }) => {
|
|
4337
5168
|
catalog.unlinkProject(resolvedProjectPath, sourceIds);
|
|
4338
5169
|
});
|
|
@@ -4342,7 +5173,7 @@ async function unlinkProjectSources(projectPath, sourceIds) {
|
|
|
4342
5173
|
};
|
|
4343
5174
|
}
|
|
4344
5175
|
async function searchCatalog(query, options) {
|
|
4345
|
-
const cwd = options.project ?
|
|
5176
|
+
const cwd = options.project ? resolve8(options.project) : process.cwd();
|
|
4346
5177
|
const explicitSources = options.source.length > 0;
|
|
4347
5178
|
const results = await withCatalog(({ catalog }) => {
|
|
4348
5179
|
const hybridConfig = getHybridRuntimeConfig();
|
|
@@ -4363,6 +5194,8 @@ async function searchCatalog(query, options) {
|
|
|
4363
5194
|
...explicitSources ? { sourceIds: options.source } : {},
|
|
4364
5195
|
...options.snapshot ? { snapshotId: options.snapshot } : {},
|
|
4365
5196
|
...options.all ? { all: true } : {},
|
|
5197
|
+
...options.path && options.path.length > 0 ? { pathPatterns: options.path } : {},
|
|
5198
|
+
...options.language && options.language.length > 0 ? { languages: options.language } : {},
|
|
4366
5199
|
...typeof options.limit === "number" ? { limit: options.limit } : {},
|
|
4367
5200
|
...typeof options.offset === "number" ? { offset: options.offset } : {}
|
|
4368
5201
|
}
|
|
@@ -4508,9 +5341,9 @@ export {
|
|
|
4508
5341
|
AIOCS_ERROR_CODES,
|
|
4509
5342
|
AiocsError,
|
|
4510
5343
|
toAiocsError,
|
|
4511
|
-
openCatalog,
|
|
4512
5344
|
getAiocsDataDir,
|
|
4513
5345
|
getAiocsConfigDir,
|
|
5346
|
+
openCatalog,
|
|
4514
5347
|
parseDaemonConfig,
|
|
4515
5348
|
startDaemon,
|
|
4516
5349
|
packageName,
|