@meyverick/omnicode 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -35
- package/package.json +6 -3
- package/src/bin/omnicode-runtime.js +66 -20
- package/src/bin/omnicode.js +59 -8
- package/src/installer/AGENTS.template.md +13 -0
- package/src/installer/lib.js +917 -22
- package/src/installer/mineru-client.js +164 -0
- package/src/installer/tree-sitter.js +270 -0
package/src/installer/lib.js
CHANGED
|
@@ -1,11 +1,24 @@
|
|
|
1
|
-
import { execFileSync, execFile } from "node:child_process";
|
|
1
|
+
import { spawn, execFileSync, execFile } from "node:child_process";
|
|
2
2
|
import { promisify } from "node:util";
|
|
3
|
-
import { existsSync } from "node:fs";
|
|
4
|
-
import { join } from "node:path";
|
|
3
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync, statSync, rmSync, unlinkSync, promises as fsPromises } from "node:fs";
|
|
4
|
+
import { join, basename, dirname, extname } from "node:path";
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
5
6
|
import os from "node:os";
|
|
7
|
+
import { randomUUID } from "node:crypto";
|
|
8
|
+
import { processComplexDocument } from "./mineru-client.js";
|
|
9
|
+
import { chunkWithTreeSitter } from "./tree-sitter.js";
|
|
6
10
|
|
|
7
11
|
const execFileAsync = promisify(execFile);
|
|
12
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
8
13
|
const isWindows = process.platform === "win32";
|
|
14
|
+
const FASTEMBED_MODEL_NAME = "BAAI/bge-small-en-v1.5";
|
|
15
|
+
const FASTEMBED_MODEL_CACHE_DIR = "models--qdrant--bge-small-en-v1.5-onnx-q";
|
|
16
|
+
const FASTEMBED_MIN_MODEL_SIZE = 40 * 1024 * 1024;
|
|
17
|
+
const DEFAULT_INDEX_CONCURRENCY = Math.max(1, Math.floor(os.cpus().length * 0.25));
|
|
18
|
+
const FASTEMBED_WARMUP_SCRIPT = `from fastembed import TextEmbedding; list(TextEmbedding('${FASTEMBED_MODEL_NAME}').passage_embed(['warmup']))`;
|
|
19
|
+
const QDRANT_INSTRUCTIONS_BEGIN = "<!-- qdrant:instructions:begin";
|
|
20
|
+
const QDRANT_INSTRUCTIONS_END = "<!-- qdrant:instructions:end -->";
|
|
21
|
+
const QDRANT_AGENTS_TEMPLATE = readFileSync(join(__dirname, "AGENTS.template.md"), "utf8").trim() + "\n";
|
|
9
22
|
|
|
10
23
|
export function commandExists(command) {
|
|
11
24
|
const tool = isWindows ? "where" : "which";
|
|
@@ -17,56 +30,270 @@ export function commandExists(command) {
|
|
|
17
30
|
}
|
|
18
31
|
}
|
|
19
32
|
|
|
20
|
-
export function
|
|
33
|
+
export function countProcesses(name) {
|
|
21
34
|
try {
|
|
22
35
|
if (isWindows) {
|
|
36
|
+
let count = 0;
|
|
23
37
|
const extensions = [".exe", ".cmd", ".bat"];
|
|
24
38
|
for (const ext of extensions) {
|
|
25
39
|
const out = execFileSync("tasklist", ["/FI", `IMAGENAME eq ${name}${ext}`, "/NH"], {
|
|
26
40
|
stdio: ["ignore", "pipe", "ignore"],
|
|
27
41
|
encoding: "utf8",
|
|
28
42
|
});
|
|
29
|
-
|
|
43
|
+
const lines = out.trim().split("\n").filter(line => line.includes(`${name}${ext}`));
|
|
44
|
+
count += lines.length;
|
|
30
45
|
}
|
|
31
|
-
return
|
|
46
|
+
return count;
|
|
32
47
|
}
|
|
33
|
-
execFileSync("pgrep", ["-f", name], {
|
|
48
|
+
const out = execFileSync("pgrep", ["-f", name], {
|
|
49
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
50
|
+
encoding: "utf8",
|
|
51
|
+
});
|
|
52
|
+
return out.trim().split("\n").filter(Boolean).length;
|
|
53
|
+
} catch {
|
|
54
|
+
return 0;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function isProcessRunning(name) {
|
|
59
|
+
return countProcesses(name) > 0;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export async function isProcessRunningAsync(name) {
|
|
63
|
+
return isProcessRunning(name);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export function isPidAlive(pid) {
|
|
67
|
+
try {
|
|
68
|
+
process.kill(pid, 0);
|
|
34
69
|
return true;
|
|
35
70
|
} catch {
|
|
36
71
|
return false;
|
|
37
72
|
}
|
|
38
73
|
}
|
|
39
74
|
|
|
40
|
-
export
|
|
75
|
+
export function getQdrantRunningCount() {
|
|
76
|
+
let count = 0;
|
|
77
|
+
const pidFile = getQdrantPidFile();
|
|
41
78
|
try {
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
79
|
+
const pid = parseInt(readFileSync(pidFile, "utf8").trim(), 10);
|
|
80
|
+
if (isPidAlive(pid)) count++;
|
|
81
|
+
} catch {}
|
|
82
|
+
|
|
83
|
+
// Fallback/Check: Check if the Docker container is running
|
|
84
|
+
if (commandExists("docker")) {
|
|
85
|
+
try {
|
|
86
|
+
const inspectOut = execFileSync("docker", ["inspect", "-f", "{{.State.Running}}", "omnicode-qdrant"], {
|
|
87
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
88
|
+
encoding: "utf8"
|
|
89
|
+
}).trim();
|
|
90
|
+
if (inspectOut === "true") count++;
|
|
91
|
+
} catch {}
|
|
92
|
+
}
|
|
93
|
+
return count;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function countActiveIndexers() {
|
|
97
|
+
const cwds = new Set();
|
|
98
|
+
|
|
99
|
+
if (isWindows) {
|
|
100
|
+
try {
|
|
101
|
+
const out = execFileSync("wmic", ["process", "where", "name='node.exe' or name='bun.exe'", "get", "CommandLine"], {
|
|
102
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
103
|
+
encoding: "utf8"
|
|
104
|
+
});
|
|
105
|
+
// Windows extraction is trickier, fallback to checking if any indexing lock exists
|
|
106
|
+
// But we can check if it contains opencode
|
|
107
|
+
if (out.includes("opencode")) {
|
|
108
|
+
// As a simple fallback on Windows, just check the current dir
|
|
109
|
+
if (existsSync(join(process.cwd(), ".qdrant", ".indexing"))) {
|
|
110
|
+
cwds.add(process.cwd());
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
} catch {}
|
|
114
|
+
} else {
|
|
115
|
+
try {
|
|
116
|
+
const out = execFileSync("pgrep", ["-f", "opencode"], {
|
|
117
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
118
|
+
encoding: "utf8"
|
|
119
|
+
});
|
|
120
|
+
const pids = out.trim().split("\n").filter(Boolean);
|
|
121
|
+
for (const pid of pids) {
|
|
45
122
|
try {
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
123
|
+
let cwd = null;
|
|
124
|
+
try {
|
|
125
|
+
cwd = readFileSync(`/proc/${pid}/cwd`); // This won't work, it's a symlink
|
|
126
|
+
} catch {}
|
|
127
|
+
if (!cwd) {
|
|
128
|
+
// Use readlink
|
|
129
|
+
try {
|
|
130
|
+
const linkOut = execFileSync("readlink", ["-e", `/proc/${pid}/cwd`], { encoding: "utf8" });
|
|
131
|
+
cwd = linkOut.trim();
|
|
132
|
+
} catch {}
|
|
133
|
+
}
|
|
134
|
+
if (!cwd && commandExists("pwdx")) {
|
|
135
|
+
try {
|
|
136
|
+
const pwdxOut = execFileSync("pwdx", [pid], { encoding: "utf8" });
|
|
137
|
+
const match = pwdxOut.match(/:\s*(.+)$/);
|
|
138
|
+
if (match) cwd = match[1].trim();
|
|
139
|
+
} catch {}
|
|
140
|
+
}
|
|
141
|
+
if (!cwd && commandExists("lsof")) {
|
|
142
|
+
try {
|
|
143
|
+
const lsofOut = execFileSync("lsof", ["-p", pid, "-a", "-d", "cwd", "-F", "n"], { encoding: "utf8" });
|
|
144
|
+
const lines = lsofOut.trim().split("\n");
|
|
145
|
+
if (lines.length > 1 && lines[1].startsWith("n")) cwd = lines[1].slice(1).trim();
|
|
146
|
+
} catch {}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (cwd) {
|
|
150
|
+
cwds.add(cwd);
|
|
151
|
+
}
|
|
50
152
|
} catch {}
|
|
51
153
|
}
|
|
52
|
-
|
|
154
|
+
} catch {}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// If no processes found or resolution failed, at least check the current workspace
|
|
158
|
+
if (cwds.size === 0) {
|
|
159
|
+
cwds.add(process.cwd());
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
let activeIndexers = 0;
|
|
163
|
+
for (const cwd of cwds) {
|
|
164
|
+
if (existsSync(join(cwd, ".qdrant", ".indexing"))) {
|
|
165
|
+
activeIndexers++;
|
|
53
166
|
}
|
|
54
|
-
await execFileAsync("pgrep", ["-f", name]);
|
|
55
|
-
return true;
|
|
56
|
-
} catch {
|
|
57
|
-
return false;
|
|
58
167
|
}
|
|
168
|
+
|
|
169
|
+
return activeIndexers;
|
|
59
170
|
}
|
|
60
171
|
|
|
61
|
-
|
|
172
|
+
|
|
173
|
+
export function detectQdrantMcp() {
|
|
174
|
+
if (!commandExists("uvx")) return false;
|
|
62
175
|
try {
|
|
63
|
-
|
|
176
|
+
execFileSync("uvx", ["mcp-server-qdrant", "--help"], { stdio: "ignore" });
|
|
64
177
|
return true;
|
|
65
178
|
} catch {
|
|
66
179
|
return false;
|
|
67
180
|
}
|
|
68
181
|
}
|
|
69
182
|
|
|
183
|
+
export function resolveCollectionName() {
|
|
184
|
+
const qdrantDir = join(process.cwd(), ".qdrant");
|
|
185
|
+
const idFile = join(qdrantDir, "id");
|
|
186
|
+
|
|
187
|
+
if (existsSync(qdrantDir)) {
|
|
188
|
+
const stat = statSync(qdrantDir);
|
|
189
|
+
if (stat.isFile()) {
|
|
190
|
+
try {
|
|
191
|
+
const id = readFileSync(qdrantDir, "utf8").trim();
|
|
192
|
+
unlinkSync(qdrantDir);
|
|
193
|
+
mkdirSync(qdrantDir, { recursive: true });
|
|
194
|
+
writeFileSync(idFile, id, "utf8");
|
|
195
|
+
if (id) return id;
|
|
196
|
+
} catch (e) {
|
|
197
|
+
console.warn("[omnicode] warning: failed to migrate .qdrant file to directory:", e.message);
|
|
198
|
+
}
|
|
199
|
+
} else if (stat.isDirectory()) {
|
|
200
|
+
if (existsSync(idFile)) {
|
|
201
|
+
try {
|
|
202
|
+
const id = readFileSync(idFile, "utf8").trim();
|
|
203
|
+
if (id) return id;
|
|
204
|
+
} catch {}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
} else {
|
|
208
|
+
try { mkdirSync(qdrantDir, { recursive: true }); } catch {}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const newId = `references-${randomUUID()}`;
|
|
212
|
+
try { writeFileSync(idFile, newId, "utf8"); } catch {}
|
|
213
|
+
return newId;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
export function generateQdrantConfig() {
|
|
217
|
+
const collectionName = resolveCollectionName();
|
|
218
|
+
const cacheDir = getFastEmbedCacheDir();
|
|
219
|
+
|
|
220
|
+
// Resolve concurrency from environment, fallback to existing config, fallback to default (25% of cores)
|
|
221
|
+
let concurrency = process.env.INDEXING_CONCURRENCY;
|
|
222
|
+
if (!concurrency) {
|
|
223
|
+
try {
|
|
224
|
+
const configPath = join(process.cwd(), "opencode.jsonc");
|
|
225
|
+
if (existsSync(configPath)) {
|
|
226
|
+
const existing = JSON.parse(readFileSync(configPath, "utf8"));
|
|
227
|
+
concurrency = existing?.mcp?.qdrant?.env?.INDEXING_CONCURRENCY;
|
|
228
|
+
}
|
|
229
|
+
} catch {}
|
|
230
|
+
}
|
|
231
|
+
const resolvedConcurrency = concurrency ? String(concurrency) : String(DEFAULT_INDEX_CONCURRENCY);
|
|
232
|
+
|
|
233
|
+
let command;
|
|
234
|
+
if (isWindows) {
|
|
235
|
+
command = [
|
|
236
|
+
"cmd.exe",
|
|
237
|
+
"/c",
|
|
238
|
+
`set QDRANT_URL=http://localhost:6333&& set COLLECTION_NAME=${collectionName}&& set EMBEDDING_MODEL=${FASTEMBED_MODEL_NAME}&& set FASTEMBED_CACHE_PATH=${cacheDir}&& set OMP_NUM_THREADS=${resolvedConcurrency}&& set ONNXRUNTIME_NUM_THREADS=${resolvedConcurrency}&& set ORT_DEFAULT_NUM_THREADS=${resolvedConcurrency}&& set UV_THREADPOOL_SIZE=${resolvedConcurrency}&& set RAYON_NUM_THREADS=${resolvedConcurrency}&& set INDEXING_CONCURRENCY=${resolvedConcurrency}&& uvx mcp-server-qdrant`
|
|
239
|
+
];
|
|
240
|
+
} else {
|
|
241
|
+
command = [
|
|
242
|
+
"sh",
|
|
243
|
+
"-c",
|
|
244
|
+
`QDRANT_URL=http://localhost:6333 COLLECTION_NAME=${collectionName} EMBEDDING_MODEL=${FASTEMBED_MODEL_NAME} FASTEMBED_CACHE_PATH=${cacheDir} OMP_NUM_THREADS=${resolvedConcurrency} ONNXRUNTIME_NUM_THREADS=${resolvedConcurrency} ORT_DEFAULT_NUM_THREADS=${resolvedConcurrency} UV_THREADPOOL_SIZE=${resolvedConcurrency} RAYON_NUM_THREADS=${resolvedConcurrency} INDEXING_CONCURRENCY=${resolvedConcurrency} uvx mcp-server-qdrant`
|
|
245
|
+
];
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return {
|
|
249
|
+
type: "local",
|
|
250
|
+
enabled: true,
|
|
251
|
+
disabled: false,
|
|
252
|
+
command,
|
|
253
|
+
env: {
|
|
254
|
+
QDRANT_URL: "http://localhost:6333",
|
|
255
|
+
COLLECTION_NAME: collectionName,
|
|
256
|
+
EMBEDDING_MODEL: FASTEMBED_MODEL_NAME,
|
|
257
|
+
FASTEMBED_CACHE_PATH: cacheDir,
|
|
258
|
+
INDEXING_CONCURRENCY: resolvedConcurrency,
|
|
259
|
+
},
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
export function ensureOpencodeConfig(qdrantConfig) {
|
|
264
|
+
const configPath = join(process.cwd(), "opencode.jsonc");
|
|
265
|
+
let config = { $schema: "https://opencode.ai/config.json", mcp: {} };
|
|
266
|
+
if (existsSync(configPath)) {
|
|
267
|
+
try {
|
|
268
|
+
config = JSON.parse(readFileSync(configPath, "utf8"));
|
|
269
|
+
} catch {}
|
|
270
|
+
if (!config.mcp) config.mcp = {};
|
|
271
|
+
}
|
|
272
|
+
config.mcp.qdrant = { ...qdrantConfig, disabled: false };
|
|
273
|
+
writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n", "utf8");
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
export function ensureQdrantAgentInstructions(agentsPath = join(process.cwd(), "AGENTS.md"), template = QDRANT_AGENTS_TEMPLATE) {
|
|
277
|
+
if (!existsSync(agentsPath)) {
|
|
278
|
+
writeFileSync(agentsPath, template, "utf8");
|
|
279
|
+
return;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
const current = readFileSync(agentsPath, "utf8");
|
|
283
|
+
const beginIndex = current.indexOf(QDRANT_INSTRUCTIONS_BEGIN);
|
|
284
|
+
const endIndex = current.indexOf(QDRANT_INSTRUCTIONS_END);
|
|
285
|
+
|
|
286
|
+
if (beginIndex === -1 || endIndex === -1 || endIndex < beginIndex) {
|
|
287
|
+
const separator = current.endsWith("\n") ? "\n" : "\n\n";
|
|
288
|
+
writeFileSync(agentsPath, current + separator + template, "utf8");
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
const endOfBlock = endIndex + QDRANT_INSTRUCTIONS_END.length;
|
|
293
|
+
const next = current.slice(0, beginIndex) + template.trimEnd() + current.slice(endOfBlock);
|
|
294
|
+
writeFileSync(agentsPath, next.endsWith("\n") ? next : `${next}\n`, "utf8");
|
|
295
|
+
}
|
|
296
|
+
|
|
70
297
|
export function getDataDir() {
|
|
71
298
|
return join(os.homedir(), ".local", "share", "omnicode");
|
|
72
299
|
}
|
|
@@ -76,3 +303,671 @@ export function getOpencodeDbPath() {
|
|
|
76
303
|
if (!existsSync(dbPath)) return null;
|
|
77
304
|
return dbPath;
|
|
78
305
|
}
|
|
306
|
+
|
|
307
|
+
export function getFastEmbedCacheDir() {
|
|
308
|
+
if (isWindows) return join(process.env.LOCALAPPDATA || os.tmpdir(), "fastembed");
|
|
309
|
+
return join(os.homedir(), ".cache", "fastembed");
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
export function getQdrantPidFile() {
|
|
313
|
+
return join(getDataDir(), "qdrant.pid");
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
export function isQdrantRunning() {
|
|
317
|
+
const pidFile = getQdrantPidFile();
|
|
318
|
+
try {
|
|
319
|
+
const pid = parseInt(readFileSync(pidFile, "utf8").trim(), 10);
|
|
320
|
+
if (isPidAlive(pid)) return true;
|
|
321
|
+
} catch {}
|
|
322
|
+
|
|
323
|
+
// Fallback: Check if the Docker container is running
|
|
324
|
+
if (commandExists("docker")) {
|
|
325
|
+
try {
|
|
326
|
+
const inspectOut = execFileSync("docker", ["inspect", "-f", "{{.State.Running}}", "omnicode-qdrant"], {
|
|
327
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
328
|
+
encoding: "utf8"
|
|
329
|
+
}).trim();
|
|
330
|
+
return inspectOut === "true";
|
|
331
|
+
} catch {}
|
|
332
|
+
}
|
|
333
|
+
return false;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
export async function startQdrantContainer() {
|
|
337
|
+
if (!commandExists("docker")) {
|
|
338
|
+
console.log("[omnicode] docker not found, skipping container initialization");
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
341
|
+
const storagePath = join(getDataDir(), "qdrant-storage");
|
|
342
|
+
try { mkdirSync(storagePath, { recursive: true }); } catch {}
|
|
343
|
+
try {
|
|
344
|
+
execFileSync("docker", ["run", "-d", "--name", "omnicode-qdrant", "-p", "6333:6333", "-v", `${storagePath}:/qdrant/storage`, "qdrant/qdrant"], { stdio: "ignore" });
|
|
345
|
+
console.log("[omnicode] qdrant container started");
|
|
346
|
+
} catch {
|
|
347
|
+
// might be running already or conflict
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
const start = Date.now();
|
|
351
|
+
while (Date.now() - start < 15000) {
|
|
352
|
+
try {
|
|
353
|
+
const res = await fetch("http://localhost:6333/readyz");
|
|
354
|
+
if (res.ok) return;
|
|
355
|
+
} catch {}
|
|
356
|
+
await new Promise(r => setTimeout(r, 500));
|
|
357
|
+
}
|
|
358
|
+
console.log("[omnicode] WARNING: qdrant container did not become ready in time");
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
export function stopQdrantContainer() {
|
|
362
|
+
if (!commandExists("docker")) return;
|
|
363
|
+
try {
|
|
364
|
+
execFileSync("docker", ["rm", "-f", "omnicode-qdrant"], { stdio: "ignore" });
|
|
365
|
+
} catch {}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
export function cleanQdrantStaleData(qdrantConfig) {
|
|
369
|
+
const localPath = qdrantConfig?.env?.QDRANT_LOCAL_PATH;
|
|
370
|
+
if (!localPath) return;
|
|
371
|
+
const lockPath = join(localPath, ".lock");
|
|
372
|
+
const walPath = join(localPath, "collection", qdrantConfig?.env?.COLLECTION_NAME || "references", "wal");
|
|
373
|
+
try {
|
|
374
|
+
if (existsSync(lockPath)) {
|
|
375
|
+
rmSync(lockPath, { force: true });
|
|
376
|
+
}
|
|
377
|
+
if (existsSync(walPath)) {
|
|
378
|
+
rmSync(walPath, { recursive: true, force: true });
|
|
379
|
+
}
|
|
380
|
+
} catch {}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
export function getFastEmbedModelPath(cacheDir = getFastEmbedCacheDir()) {
|
|
384
|
+
const snapshotsDir = join(cacheDir, FASTEMBED_MODEL_CACHE_DIR, "snapshots");
|
|
385
|
+
try {
|
|
386
|
+
const snapshots = readdirSync(snapshotsDir, { withFileTypes: true })
|
|
387
|
+
.filter((entry) => entry.isDirectory())
|
|
388
|
+
.flatMap((entry) => ["model.onnx", "model_optimized.onnx"].map((f) => join(snapshotsDir, entry.name, f)));
|
|
389
|
+
return snapshots.find((modelPath) => existsSync(modelPath)) || null;
|
|
390
|
+
} catch {
|
|
391
|
+
return null;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
export function getQdrantStoreEnv(qdrantConfig) {
|
|
398
|
+
const unifiedConcurrency = qdrantConfig.env.INDEXING_CONCURRENCY || process.env.INDEXING_CONCURRENCY;
|
|
399
|
+
const threads = qdrantConfig.env.QRANT_NUM_THREADS || unifiedConcurrency || String(DEFAULT_INDEX_CONCURRENCY);
|
|
400
|
+
const indexConcurrency = qdrantConfig.env.QRANT_INDEX_CONCURRENCY || unifiedConcurrency || String(DEFAULT_INDEX_CONCURRENCY);
|
|
401
|
+
return Object.assign({}, qdrantConfig.env, {
|
|
402
|
+
OMP_NUM_THREADS: threads,
|
|
403
|
+
ONNXRUNTIME_NUM_THREADS: threads,
|
|
404
|
+
UV_THREADPOOL_SIZE: threads,
|
|
405
|
+
ORT_DEFAULT_NUM_THREADS: threads,
|
|
406
|
+
QRANT_NUM_THREADS: threads,
|
|
407
|
+
QRANT_INDEX_CONCURRENCY: indexConcurrency,
|
|
408
|
+
QDRANT_URL: qdrantConfig.env.QDRANT_URL,
|
|
409
|
+
COLLECTION_NAME: qdrantConfig.env.COLLECTION_NAME,
|
|
410
|
+
EMBEDDING_MODEL: qdrantConfig.env.EMBEDDING_MODEL,
|
|
411
|
+
FASTEMBED_CACHE_PATH: qdrantConfig.env.FASTEMBED_CACHE_PATH || getFastEmbedCacheDir(),
|
|
412
|
+
});
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
export async function startMcpServer(env, options = {}) {
|
|
416
|
+
const spawnFn = options.spawn || spawn;
|
|
417
|
+
const initTimeout = options.initTimeout || 10000;
|
|
418
|
+
const pidFile = options.pidFile || null;
|
|
419
|
+
|
|
420
|
+
if (pidFile && existsSync(pidFile)) {
|
|
421
|
+
try {
|
|
422
|
+
const existingPid = parseInt(readFileSync(pidFile, "utf8").trim(), 10);
|
|
423
|
+
if (isPidAlive(existingPid)) {
|
|
424
|
+
console.log(`[omnicode] qdrant MCP already running (pid: ${existingPid})`);
|
|
425
|
+
return null;
|
|
426
|
+
}
|
|
427
|
+
} catch {}
|
|
428
|
+
try { rmSync(pidFile, { force: true }); } catch {}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
const cacheDir = env.FASTEMBED_CACHE_PATH || getFastEmbedCacheDir();
|
|
432
|
+
const modelPath = getFastEmbedModelPath(cacheDir);
|
|
433
|
+
if (modelPath) {
|
|
434
|
+
try {
|
|
435
|
+
if (statSync(modelPath).size < FASTEMBED_MIN_MODEL_SIZE) {
|
|
436
|
+
console.log("[omnicode] index: embedding model appears corrupted or incomplete, MCP server will re-download it");
|
|
437
|
+
try { rmSync(join(cacheDir, FASTEMBED_MODEL_CACHE_DIR), { recursive: true, force: true }); } catch {}
|
|
438
|
+
}
|
|
439
|
+
} catch {}
|
|
440
|
+
} else {
|
|
441
|
+
console.log("[omnicode] index: embedding model not found, MCP server will download it during initialization (this may take a while)");
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
cleanQdrantStaleData({ env: { QDRANT_LOCAL_PATH: env.QDRANT_LOCAL_PATH, COLLECTION_NAME: env.COLLECTION_NAME } });
|
|
445
|
+
const child = spawnFn("uvx", ["mcp-server-qdrant"], {
|
|
446
|
+
env: { ...process.env, ...env, FASTEMBED_CACHE_PATH: env.FASTEMBED_CACHE_PATH || getFastEmbedCacheDir() },
|
|
447
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
const pending = new Map();
|
|
451
|
+
let buffer = "";
|
|
452
|
+
let stderr = "";
|
|
453
|
+
let nextId = 1;
|
|
454
|
+
let closed = false;
|
|
455
|
+
|
|
456
|
+
const resolvePending = (id, message) => {
|
|
457
|
+
if (!pending.has(id)) return;
|
|
458
|
+
const request = pending.get(id);
|
|
459
|
+
pending.delete(id);
|
|
460
|
+
clearTimeout(request.timeout);
|
|
461
|
+
request.resolve(message);
|
|
462
|
+
};
|
|
463
|
+
|
|
464
|
+
const rejectPending = (message) => {
|
|
465
|
+
closed = true;
|
|
466
|
+
for (const id of pending.keys()) {
|
|
467
|
+
resolvePending(id, { error: { message } });
|
|
468
|
+
}
|
|
469
|
+
};
|
|
470
|
+
|
|
471
|
+
const parseMessages = (data) => {
|
|
472
|
+
buffer += data.toString();
|
|
473
|
+
const lines = buffer.split("\n");
|
|
474
|
+
buffer = lines.pop() || "";
|
|
475
|
+
|
|
476
|
+
for (const line of lines) {
|
|
477
|
+
const trimmed = line.trim();
|
|
478
|
+
if (!trimmed) continue;
|
|
479
|
+
|
|
480
|
+
let message;
|
|
481
|
+
try {
|
|
482
|
+
message = JSON.parse(trimmed);
|
|
483
|
+
} catch {
|
|
484
|
+
continue;
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
if (message.id !== undefined) resolvePending(message.id, message);
|
|
488
|
+
}
|
|
489
|
+
};
|
|
490
|
+
|
|
491
|
+
const request = (method, params, timeoutMs, timeoutMessage) => {
|
|
492
|
+
const id = nextId++;
|
|
493
|
+
const response = new Promise((resolve, reject) => {
|
|
494
|
+
const timeout = setTimeout(() => {
|
|
495
|
+
pending.delete(id);
|
|
496
|
+
reject(new Error(timeoutMessage));
|
|
497
|
+
}, timeoutMs);
|
|
498
|
+
pending.set(id, { resolve, timeout });
|
|
499
|
+
});
|
|
500
|
+
|
|
501
|
+
child.stdin.write(JSON.stringify({ jsonrpc: "2.0", id, method, params }) + "\n");
|
|
502
|
+
return response;
|
|
503
|
+
};
|
|
504
|
+
|
|
505
|
+
const notify = (method, params) => {
|
|
506
|
+
child.stdin.write(JSON.stringify({ jsonrpc: "2.0", method, ...(params ? { params } : {}) }) + "\n");
|
|
507
|
+
};
|
|
508
|
+
|
|
509
|
+
child.stdout.on("data", parseMessages);
|
|
510
|
+
child.stderr.on("data", (data) => {
|
|
511
|
+
stderr += data.toString();
|
|
512
|
+
if (stderr.length > 5000) stderr = stderr.substring(stderr.length - 5000);
|
|
513
|
+
});
|
|
514
|
+
child.stdin.on("error", () => {});
|
|
515
|
+
child.on("error", (err) => { rejectPending(err.message); });
|
|
516
|
+
child.on("close", (code) => {
|
|
517
|
+
rejectPending(stderr.trim().split("\n").pop() || `MCP server exited with code ${code}`);
|
|
518
|
+
});
|
|
519
|
+
process.on("exit", () => { try { child.kill("SIGKILL"); } catch {} });
|
|
520
|
+
|
|
521
|
+
let initialized;
|
|
522
|
+
try {
|
|
523
|
+
initialized = await request("initialize", {
|
|
524
|
+
protocolVersion: "2025-03-26",
|
|
525
|
+
capabilities: {},
|
|
526
|
+
clientInfo: { name: "omnicode-indexer", version: "0.1" },
|
|
527
|
+
}, initTimeout, "MCP server did not initialize in time");
|
|
528
|
+
} catch (err) {
|
|
529
|
+
try { child.kill(); } catch {}
|
|
530
|
+
throw err;
|
|
531
|
+
}
|
|
532
|
+
if (initialized.error) {
|
|
533
|
+
try { child.kill(); } catch {}
|
|
534
|
+
throw new Error(initialized.error.message || "MCP server failed to initialize");
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
notify("notifications/initialized");
|
|
538
|
+
|
|
539
|
+
if (pidFile) {
|
|
540
|
+
try {
|
|
541
|
+
mkdirSync(dirname(pidFile), { recursive: true });
|
|
542
|
+
writeFileSync(pidFile, String(child.pid), { mode: 0o600 });
|
|
543
|
+
} catch {}
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
return { child, request, notify, pending, pidFile, get closed() { return closed; } };
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
export function stopMcpServer(mcpServer) {
|
|
550
|
+
if (!mcpServer || !mcpServer.child) return;
|
|
551
|
+
try { mcpServer.notify("exit", {}); } catch (e) { if (e) {} }
|
|
552
|
+
try { mcpServer.child.stdin.end(); } catch (e) { if (e) {} }
|
|
553
|
+
try { mcpServer.child.kill("SIGTERM"); } catch (e) { if (e) {} }
|
|
554
|
+
const timer = setTimeout(() => {
|
|
555
|
+
try { mcpServer.child.kill("SIGKILL"); } catch {}
|
|
556
|
+
if (mcpServer.pidFile) try { rmSync(mcpServer.pidFile, { force: true }); } catch {}
|
|
557
|
+
}, 5000);
|
|
558
|
+
timer.unref();
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
export function getSystemMemoryInfo() {
|
|
562
|
+
const total = os.totalmem();
|
|
563
|
+
const free = os.freemem();
|
|
564
|
+
return { total, free, usedPercent: ((total - free) / total) * 100, rss: process.memoryUsage().rss };
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
export function warnIfMemoryPressure(thresholdPercent = 75) {
|
|
568
|
+
const info = getSystemMemoryInfo();
|
|
569
|
+
if (info.usedPercent >= thresholdPercent) {
|
|
570
|
+
console.warn(`[omnicode] WARNING: system memory usage at ${info.usedPercent.toFixed(1)}% (threshold: ${thresholdPercent}%). Indexing may be slower or unstable.`);
|
|
571
|
+
}
|
|
572
|
+
return info.usedPercent < thresholdPercent;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
const BINARY_DOC_EXTENSIONS = new Set([
|
|
576
|
+
".pdf",
|
|
577
|
+
".png", ".jpg", ".jpeg", ".jp2", ".webp", ".gif", ".bmp",
|
|
578
|
+
".doc", ".docx",
|
|
579
|
+
".ppt", ".pptx",
|
|
580
|
+
".xls", ".xlsx"
|
|
581
|
+
]);
|
|
582
|
+
|
|
583
|
+
const TEXT_EXTENSIONS = new Set([
|
|
584
|
+
".md", ".txt", ".json", ".yaml", ".yml", ".ts", ".js", ".mjs", ".cjs", ".sh", ".bash", ".zsh", ".toml", ".cfg", ".conf", ".ini", ".env", ".gitignore", ".dockerfile", ".html", ".htm",
|
|
585
|
+
...BINARY_DOC_EXTENSIONS
|
|
586
|
+
]);
|
|
587
|
+
|
|
588
|
+
export function isComplexDocument(filePath, buffer) {
|
|
589
|
+
const ext = extname(filePath).toLowerCase();
|
|
590
|
+
if (!BINARY_DOC_EXTENSIONS.has(ext)) return false;
|
|
591
|
+
|
|
592
|
+
// Skip small images (under 50KB) as they are likely UI icons/assets rather than doc pages
|
|
593
|
+
const isImage = [".png", ".jpg", ".jpeg", ".jp2", ".webp", ".gif", ".bmp"].includes(ext);
|
|
594
|
+
if (isImage && buffer && buffer.length < 50 * 1024) {
|
|
595
|
+
return false;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
return true;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
export async function* walkReferencesAsync(dir) {
|
|
602
|
+
try {
|
|
603
|
+
const entries = await fsPromises.readdir(dir, { withFileTypes: true });
|
|
604
|
+
for (const entry of entries) {
|
|
605
|
+
const fullPath = join(dir, entry.name);
|
|
606
|
+
if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
|
|
607
|
+
if (entry.isDirectory()) {
|
|
608
|
+
yield* walkReferencesAsync(fullPath);
|
|
609
|
+
} else if (entry.isFile()) {
|
|
610
|
+
const ext = extname(entry.name).toLowerCase();
|
|
611
|
+
const name = entry.name.toLowerCase();
|
|
612
|
+
if (TEXT_EXTENSIONS.has(ext) || TEXT_EXTENSIONS.has(name)) {
|
|
613
|
+
const st = await fsPromises.stat(fullPath);
|
|
614
|
+
yield { path: fullPath, mtimeMs: st.mtimeMs };
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
} catch (err) {
|
|
619
|
+
if (err.code !== "EACCES" && err.code !== "EPERM") throw err;
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
export function chunkFile(content, filePath) {
|
|
624
|
+
let chunks = [];
|
|
625
|
+
const isMarkdown = filePath.endsWith(".md");
|
|
626
|
+
if (isMarkdown) {
|
|
627
|
+
const lines = content.split("\n");
|
|
628
|
+
let currentChunk = [];
|
|
629
|
+
for (const line of lines) {
|
|
630
|
+
if (line.startsWith("## ")) {
|
|
631
|
+
if (currentChunk.length > 0) {
|
|
632
|
+
chunks.push(currentChunk.join("\n").trim());
|
|
633
|
+
}
|
|
634
|
+
currentChunk = [line];
|
|
635
|
+
} else {
|
|
636
|
+
currentChunk.push(line);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
if (currentChunk.length > 0) chunks.push(currentChunk.join("\n").trim());
|
|
640
|
+
} else {
|
|
641
|
+
const lines = content.split("\n");
|
|
642
|
+
for (let i = 0; i < lines.length; i += 50) {
|
|
643
|
+
chunks.push(lines.slice(i, i + 50).join("\n").trim());
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
chunks = chunks.filter((c) => c.length > 0);
|
|
648
|
+
const finalChunks = [];
|
|
649
|
+
for (const chunk of chunks) {
|
|
650
|
+
if (chunk.length > 4000) {
|
|
651
|
+
let remaining = chunk;
|
|
652
|
+
while (remaining.length > 0) {
|
|
653
|
+
finalChunks.push(remaining.substring(0, 4000));
|
|
654
|
+
remaining = remaining.substring(4000);
|
|
655
|
+
}
|
|
656
|
+
} else {
|
|
657
|
+
finalChunks.push(chunk);
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
return finalChunks;
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
export function loadIndexState(statePath) {
|
|
664
|
+
try {
|
|
665
|
+
return JSON.parse(readFileSync(statePath, "utf8"));
|
|
666
|
+
} catch {
|
|
667
|
+
return {};
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
export async function saveIndexState(statePath, state) {
|
|
672
|
+
const tmpPath = `${statePath}.${randomUUID()}.tmp`;
|
|
673
|
+
await fsPromises.writeFile(tmpPath, JSON.stringify(state, null, 2) + "\n", "utf8");
|
|
674
|
+
await fsPromises.rename(tmpPath, statePath);
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
export async function callQdrantStore(chunks, env, concurrency = DEFAULT_INDEX_CONCURRENCY, mcpServer = null) {
|
|
678
|
+
const ownsServer = !mcpServer;
|
|
679
|
+
const server = mcpServer || await startMcpServer(env);
|
|
680
|
+
const results = [];
|
|
681
|
+
|
|
682
|
+
const storeChunks = chunks.filter((c) => c.text.length >= 10);
|
|
683
|
+
const workerCount = Math.min(Math.max(1, concurrency), storeChunks.length);
|
|
684
|
+
let nextChunk = 0;
|
|
685
|
+
let nextId = 2;
|
|
686
|
+
let consecutiveTimeouts = 0;
|
|
687
|
+
|
|
688
|
+
const storeChunk = async (chunkObj) => {
|
|
689
|
+
const requestNumber = nextId++;
|
|
690
|
+
const response = server.request("tools/call", {
|
|
691
|
+
name: "qdrant-store",
|
|
692
|
+
arguments: { information: chunkObj.text, metadata: { source: chunkObj.path } },
|
|
693
|
+
}, 30000, `chunk ${requestNumber} timed out`)
|
|
694
|
+
.catch((err) => ({ error: { message: err.message } }));
|
|
695
|
+
|
|
696
|
+
const message = await response;
|
|
697
|
+
if (message.error) {
|
|
698
|
+
console.warn(`[omnicode] index: warning: ${message.error.message || "chunk failed"}`);
|
|
699
|
+
if (message.error.message && message.error.message.includes("timed out")) {
|
|
700
|
+
consecutiveTimeouts++;
|
|
701
|
+
} else {
|
|
702
|
+
consecutiveTimeouts = 0;
|
|
703
|
+
}
|
|
704
|
+
return;
|
|
705
|
+
}
|
|
706
|
+
consecutiveTimeouts = 0;
|
|
707
|
+
results.push({ chunk: chunkObj.text.substring(0, 80), stored: true });
|
|
708
|
+
};
|
|
709
|
+
|
|
710
|
+
const worker = async () => {
|
|
711
|
+
while (nextChunk < storeChunks.length && !server.closed) {
|
|
712
|
+
if (consecutiveTimeouts > 3) {
|
|
713
|
+
console.error("[omnicode] index: aborting batch due to 3+ consecutive timeouts (MCP deadlock detected)");
|
|
714
|
+
server.closed = true;
|
|
715
|
+
break;
|
|
716
|
+
}
|
|
717
|
+
const chunkObj = storeChunks[nextChunk++];
|
|
718
|
+
await storeChunk(chunkObj);
|
|
719
|
+
}
|
|
720
|
+
};
|
|
721
|
+
|
|
722
|
+
try {
|
|
723
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
724
|
+
return results;
|
|
725
|
+
} finally {
|
|
726
|
+
if (ownsServer) stopMcpServer(server);
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
export async function indexReferences(refsDir, qdrantConfig, mcpServer = null, forceReindex = false, abortSignal = null) {
|
|
731
|
+
const qdrantDir = join(process.cwd(), ".qdrant");
|
|
732
|
+
|
|
733
|
+
if (forceReindex) {
|
|
734
|
+
console.log("[omnicode] index: forcing full reindex, clearing .qdrant directory...");
|
|
735
|
+
try { rmSync(qdrantDir, { recursive: true, force: true }); } catch {}
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
const stateFile = join(qdrantDir, "index.json");
|
|
739
|
+
const state = loadIndexState(stateFile);
|
|
740
|
+
|
|
741
|
+
if (!existsSync(refsDir)) {
|
|
742
|
+
console.log("[omnicode] index: no references/ folder found");
|
|
743
|
+
return;
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
const files = [];
|
|
747
|
+
const currentPaths = new Set();
|
|
748
|
+
for await (const file of walkReferencesAsync(refsDir)) {
|
|
749
|
+
files.push(file);
|
|
750
|
+
currentPaths.add(file.path);
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
let stateChanged = false;
|
|
754
|
+
for (const path of Object.keys(state)) {
|
|
755
|
+
if (!currentPaths.has(path)) {
|
|
756
|
+
delete state[path];
|
|
757
|
+
stateChanged = true;
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
if (stateChanged) await saveIndexState(stateFile, state);
|
|
761
|
+
|
|
762
|
+
const newFiles = files.filter((f) => (state[f.path] || 0) < f.mtimeMs);
|
|
763
|
+
|
|
764
|
+
if (newFiles.length === 0) {
|
|
765
|
+
console.log("[omnicode] index: all files up to date");
|
|
766
|
+
return;
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
console.log(`[omnicode] index: ${newFiles.length} files to index`);
|
|
770
|
+
|
|
771
|
+
const isMemoryPressure = !warnIfMemoryPressure();
|
|
772
|
+
if (isMemoryPressure) {
|
|
773
|
+
console.warn("[omnicode] index: high memory pressure detected, forcing effective concurrency to 1");
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
try { mkdirSync(qdrantDir, { recursive: true }); } catch {}
|
|
777
|
+
|
|
778
|
+
let ownsServer = false;
|
|
779
|
+
if (!mcpServer) {
|
|
780
|
+
try {
|
|
781
|
+
const env = getQdrantStoreEnv(qdrantConfig);
|
|
782
|
+
mcpServer = await startMcpServer(env);
|
|
783
|
+
ownsServer = true;
|
|
784
|
+
} catch (err) {
|
|
785
|
+
console.error(`[omnicode] index: failed to start MCP server — ${err.message}`);
|
|
786
|
+
return;
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
let cancelled = false;
|
|
791
|
+
const onCancel = () => {
|
|
792
|
+
if (cancelled) return;
|
|
793
|
+
console.log("\n[omnicode] index: interrupted, saving partial state...");
|
|
794
|
+
cancelled = true;
|
|
795
|
+
if (mcpServer) stopMcpServer(mcpServer);
|
|
796
|
+
};
|
|
797
|
+
const onSigint = onCancel;
|
|
798
|
+
process.on("SIGINT", onSigint);
|
|
799
|
+
if (abortSignal) {
|
|
800
|
+
abortSignal.addEventListener("abort", onCancel);
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
try {
|
|
804
|
+
const lockFile = join(qdrantDir, ".indexing");
|
|
805
|
+
try { writeFileSync(lockFile, "1"); } catch {}
|
|
806
|
+
const env = getQdrantStoreEnv(qdrantConfig);
|
|
807
|
+
const unifiedConcurrency = env.INDEXING_CONCURRENCY || process.env.INDEXING_CONCURRENCY;
|
|
808
|
+
let concurrency = Number.parseInt(env.QRANT_INDEX_CONCURRENCY || unifiedConcurrency || String(DEFAULT_INDEX_CONCURRENCY), 10);
|
|
809
|
+
if (isMemoryPressure) concurrency = 1;
|
|
810
|
+
const workerConcurrency = Number.isFinite(concurrency) && concurrency > 0 ? concurrency : DEFAULT_INDEX_CONCURRENCY;
|
|
811
|
+
let totalStored = 0;
|
|
812
|
+
let filesProcessed = 0;
|
|
813
|
+
|
|
814
|
+
let batchFiles = [];
|
|
815
|
+
let batchChunks = [];
|
|
816
|
+
let batchBytes = 0;
|
|
817
|
+
let minerUDisabled = false;
|
|
818
|
+
const activeMinerUTasks = [];
|
|
819
|
+
|
|
820
|
+
const flushBatch = async () => {
|
|
821
|
+
if (batchChunks.length === 0) return;
|
|
822
|
+
if (mcpServer && mcpServer.closed) {
|
|
823
|
+
cancelled = true;
|
|
824
|
+
return;
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
const currentChunks = batchChunks;
|
|
828
|
+
const currentFiles = batchFiles;
|
|
829
|
+
batchChunks = [];
|
|
830
|
+
batchFiles = [];
|
|
831
|
+
batchBytes = 0;
|
|
832
|
+
|
|
833
|
+
const start = Date.now();
|
|
834
|
+
console.log(`[omnicode] index: storing batch of ${currentChunks.length} chunks (${filesProcessed}/${newFiles.length} files processed)`);
|
|
835
|
+
|
|
836
|
+
await callQdrantStore(currentChunks, env, workerConcurrency, mcpServer);
|
|
837
|
+
|
|
838
|
+
const duration = Date.now() - start;
|
|
839
|
+
console.log(`[omnicode] index: batch complete in ${duration}ms`);
|
|
840
|
+
if (duration > 30000) console.warn(`[omnicode] index: WARNING: batch took > 30s to process!`);
|
|
841
|
+
|
|
842
|
+
totalStored += currentChunks.length;
|
|
843
|
+
for (const file of currentFiles) {
|
|
844
|
+
state[file.path] = file.mtimeMs;
|
|
845
|
+
}
|
|
846
|
+
await saveIndexState(stateFile, state);
|
|
847
|
+
};
|
|
848
|
+
|
|
849
|
+
const CONCURRENCY_LIMIT = workerConcurrency;
|
|
850
|
+
const workers = [];
|
|
851
|
+
let fileIndex = 0;
|
|
852
|
+
|
|
853
|
+
const processNextFile = async () => {
|
|
854
|
+
while (fileIndex < newFiles.length) {
|
|
855
|
+
if (cancelled || (mcpServer && mcpServer.closed)) break;
|
|
856
|
+
const file = newFiles[fileIndex++];
|
|
857
|
+
if (!file) continue;
|
|
858
|
+
|
|
859
|
+
try {
|
|
860
|
+
const fileBuffer = await fsPromises.readFile(file.path);
|
|
861
|
+
await new Promise(r => setImmediate(r)); // yield event loop
|
|
862
|
+
|
|
863
|
+
if (cancelled || (mcpServer && mcpServer.closed)) break;
|
|
864
|
+
|
|
865
|
+
const apiKey = process.env.MINERU_API_KEY;
|
|
866
|
+
const isComplex = isComplexDocument(file.path, fileBuffer);
|
|
867
|
+
|
|
868
|
+
if (isComplex && apiKey && !minerUDisabled) {
|
|
869
|
+
const taskPromise = processComplexDocument(fileBuffer, basename(file.path), apiKey)
|
|
870
|
+
.then(async markdown => {
|
|
871
|
+
if (cancelled || (mcpServer && mcpServer.closed)) return;
|
|
872
|
+
// Append .md to ensure markdown chunking logic applies
|
|
873
|
+
let chunks = await chunkWithTreeSitter(markdown, file.path + ".md");
|
|
874
|
+
let algo = "Tree-sitter (structural)";
|
|
875
|
+
if (!chunks) {
|
|
876
|
+
chunks = chunkFile(markdown, file.path + ".md");
|
|
877
|
+
algo = "Linear (sequential)";
|
|
878
|
+
}
|
|
879
|
+
console.log(`[omnicode] Indexed: ${file.path} (via MinerU) using ${algo} chunking`);
|
|
880
|
+
for (const c of chunks) {
|
|
881
|
+
batchChunks.push({ path: file.path, text: c });
|
|
882
|
+
batchBytes += Buffer.byteLength(c, "utf8");
|
|
883
|
+
}
|
|
884
|
+
batchFiles.push(file);
|
|
885
|
+
filesProcessed++;
|
|
886
|
+
if (batchFiles.length >= 100 || batchBytes > 20_000_000) {
|
|
887
|
+
await flushBatch();
|
|
888
|
+
}
|
|
889
|
+
})
|
|
890
|
+
.catch(async err => {
|
|
891
|
+
if (err.status === 401 || err.status === 402) {
|
|
892
|
+
console.warn(`[omnicode] index: MinerU API quota/auth error (${err.status}), disabling MinerU routing.`);
|
|
893
|
+
minerUDisabled = true;
|
|
894
|
+
} else {
|
|
895
|
+
console.warn(`[omnicode] index: MinerU API failed for ${file.path}, falling back to local chunking: ${err.message}`);
|
|
896
|
+
}
|
|
897
|
+
// Fallback to local
|
|
898
|
+
const isBinaryDoc = BINARY_DOC_EXTENSIONS.has(extname(file.path).toLowerCase());
|
|
899
|
+
const contentStr = isBinaryDoc ? "Binary Content Placeholder" : fileBuffer.toString("utf8");
|
|
900
|
+
let chunks = await chunkWithTreeSitter(contentStr, file.path);
|
|
901
|
+
let algo = "Tree-sitter (structural)";
|
|
902
|
+
if (!chunks) {
|
|
903
|
+
chunks = chunkFile(contentStr, file.path);
|
|
904
|
+
algo = "Linear (sequential)";
|
|
905
|
+
}
|
|
906
|
+
console.log(`[omnicode] Indexed: ${file.path} (local fallback) using ${algo} chunking`);
|
|
907
|
+
for (const c of chunks) {
|
|
908
|
+
batchChunks.push({ path: file.path, text: c });
|
|
909
|
+
batchBytes += Buffer.byteLength(c, "utf8");
|
|
910
|
+
}
|
|
911
|
+
batchFiles.push(file);
|
|
912
|
+
filesProcessed++;
|
|
913
|
+
if (batchFiles.length >= 100 || batchBytes > 20_000_000) {
|
|
914
|
+
await flushBatch();
|
|
915
|
+
}
|
|
916
|
+
});
|
|
917
|
+
|
|
918
|
+
activeMinerUTasks.push(taskPromise);
|
|
919
|
+
continue;
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
// Standard processing for non-complex or if API missing/disabled
|
|
923
|
+
const isBinaryDoc = BINARY_DOC_EXTENSIONS.has(extname(file.path).toLowerCase());
|
|
924
|
+
const content = isBinaryDoc ? "Binary Content Placeholder" : fileBuffer.toString("utf8");
|
|
925
|
+
let chunks = await chunkWithTreeSitter(content, file.path);
|
|
926
|
+
let algo = "Tree-sitter (structural)";
|
|
927
|
+
if (!chunks) {
|
|
928
|
+
chunks = chunkFile(content, file.path);
|
|
929
|
+
algo = "Linear (sequential)";
|
|
930
|
+
}
|
|
931
|
+
console.log(`[omnicode] Indexed: ${file.path} using ${algo} chunking`);
|
|
932
|
+
for (const c of chunks) {
|
|
933
|
+
batchChunks.push({ path: file.path, text: c });
|
|
934
|
+
batchBytes += Buffer.byteLength(c, "utf8");
|
|
935
|
+
}
|
|
936
|
+
batchFiles.push(file);
|
|
937
|
+
filesProcessed++;
|
|
938
|
+
|
|
939
|
+
if (batchFiles.length >= 100 || batchBytes > 20_000_000) {
|
|
940
|
+
await flushBatch();
|
|
941
|
+
}
|
|
942
|
+
} catch (err) {
|
|
943
|
+
console.warn(`[omnicode] index: skipping ${file.path} — ${err.message}`);
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
};
|
|
947
|
+
|
|
948
|
+
for (let i = 0; i < CONCURRENCY_LIMIT; i++) {
|
|
949
|
+
workers.push(processNextFile());
|
|
950
|
+
}
|
|
951
|
+
await Promise.all(workers);
|
|
952
|
+
|
|
953
|
+
// Wait for any remaining asynchronous MinerU tasks to resolve before final flush
|
|
954
|
+
await Promise.allSettled(activeMinerUTasks);
|
|
955
|
+
|
|
956
|
+
if (!cancelled) await flushBatch();
|
|
957
|
+
|
|
958
|
+
if (cancelled) {
|
|
959
|
+
console.log("[omnicode] index: aborted by user");
|
|
960
|
+
} else {
|
|
961
|
+
console.log("[omnicode] index: complete");
|
|
962
|
+
}
|
|
963
|
+
} catch (err) {
|
|
964
|
+
console.error(`[omnicode] index: failed — ${err.message}`);
|
|
965
|
+
} finally {
|
|
966
|
+
process.off("SIGINT", onSigint);
|
|
967
|
+
if (abortSignal) abortSignal.removeEventListener("abort", onCancel);
|
|
968
|
+
if (ownsServer) stopMcpServer(mcpServer);
|
|
969
|
+
await saveIndexState(stateFile, state);
|
|
970
|
+
const lockFile = join(qdrantDir, ".indexing");
|
|
971
|
+
try { rmSync(lockFile, { force: true }); } catch {}
|
|
972
|
+
}
|
|
973
|
+
}
|