@pi-unipi/cocoindex 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bridge.ts +75 -7
- package/package.json +1 -1
package/bridge.ts
CHANGED
|
@@ -64,6 +64,7 @@ export interface CocoindexDeps {
|
|
|
64
64
|
const COCOINDEX_STATE_DIR = ".cocoindex";
|
|
65
65
|
const DEFAULT_PIPELINE_DIR = ".unipi/cocoindex";
|
|
66
66
|
const DEFAULT_LANCEDB_PATH = ".unipi/cocoindex/.lancedb";
|
|
67
|
+
const DEFAULT_UPDATE_TIMEOUT_MS = 15 * 60 * 1000;
|
|
67
68
|
const DEFAULT_LEXICAL_SCAN_LIMIT = 50_000;
|
|
68
69
|
|
|
69
70
|
// ─────────────────────────────────────────────────────────
|
|
@@ -268,11 +269,22 @@ export async function indexProject(projectDir: string): Promise<IndexResult> {
|
|
|
268
269
|
const proc = spawn(cocoindexBin, ["update", "main.py"], {
|
|
269
270
|
cwd: pipelineDir,
|
|
270
271
|
stdio: ["pipe", "pipe", "pipe"],
|
|
271
|
-
timeout: 300000, // 5 min timeout
|
|
272
272
|
});
|
|
273
273
|
|
|
274
274
|
let stdout = "";
|
|
275
275
|
let stderr = "";
|
|
276
|
+
let timedOut = false;
|
|
277
|
+
let settled = false;
|
|
278
|
+
const timeoutMs = getUpdateTimeoutMs();
|
|
279
|
+
|
|
280
|
+
const timer = setTimeout(() => {
|
|
281
|
+
timedOut = true;
|
|
282
|
+
proc.kill("SIGTERM");
|
|
283
|
+
setTimeout(() => {
|
|
284
|
+
if (!settled) proc.kill("SIGKILL");
|
|
285
|
+
}, 5000).unref();
|
|
286
|
+
}, timeoutMs);
|
|
287
|
+
timer.unref();
|
|
276
288
|
|
|
277
289
|
proc.stdout.on("data", (data: Buffer) => {
|
|
278
290
|
stdout += data.toString();
|
|
@@ -282,7 +294,9 @@ export async function indexProject(projectDir: string): Promise<IndexResult> {
|
|
|
282
294
|
stderr += data.toString();
|
|
283
295
|
});
|
|
284
296
|
|
|
285
|
-
proc.on("close", (code: number | null) => {
|
|
297
|
+
proc.on("close", (code: number | null, signal: NodeJS.Signals | null) => {
|
|
298
|
+
settled = true;
|
|
299
|
+
clearTimeout(timer);
|
|
286
300
|
const durationMs = Date.now() - start;
|
|
287
301
|
const chunksProcessed = parseChunksProcessed(stdout);
|
|
288
302
|
|
|
@@ -293,12 +307,14 @@ export async function indexProject(projectDir: string): Promise<IndexResult> {
|
|
|
293
307
|
success: false,
|
|
294
308
|
chunksProcessed,
|
|
295
309
|
durationMs,
|
|
296
|
-
error:
|
|
310
|
+
error: formatIndexFailure({ code, signal, timedOut, timeoutMs, stdout, stderr }),
|
|
297
311
|
});
|
|
298
312
|
}
|
|
299
313
|
});
|
|
300
314
|
|
|
301
315
|
proc.on("error", (err: Error) => {
|
|
316
|
+
settled = true;
|
|
317
|
+
clearTimeout(timer);
|
|
302
318
|
resolve({
|
|
303
319
|
success: false,
|
|
304
320
|
chunksProcessed: 0,
|
|
@@ -310,6 +326,46 @@ export async function indexProject(projectDir: string): Promise<IndexResult> {
|
|
|
310
326
|
}
|
|
311
327
|
|
|
312
328
|
/** Parse the number of files processed from cocoindex v1.0+ output. */
|
|
329
|
+
function getUpdateTimeoutMs(): number {
|
|
330
|
+
const raw = process.env.COCOINDEX_UPDATE_TIMEOUT_MS;
|
|
331
|
+
if (!raw) return DEFAULT_UPDATE_TIMEOUT_MS;
|
|
332
|
+
|
|
333
|
+
const parsed = Number(raw);
|
|
334
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_UPDATE_TIMEOUT_MS;
|
|
335
|
+
return parsed;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
function formatIndexFailure(args: {
|
|
339
|
+
code: number | null;
|
|
340
|
+
signal: NodeJS.Signals | null;
|
|
341
|
+
timedOut: boolean;
|
|
342
|
+
timeoutMs: number;
|
|
343
|
+
stdout: string;
|
|
344
|
+
stderr: string;
|
|
345
|
+
}): string {
|
|
346
|
+
const parts: string[] = [];
|
|
347
|
+
|
|
348
|
+
if (args.timedOut) {
|
|
349
|
+
parts.push(`Timed out after ${(args.timeoutMs / 1000).toFixed(0)}s`);
|
|
350
|
+
} else if (args.signal) {
|
|
351
|
+
parts.push(`Process terminated by ${args.signal}`);
|
|
352
|
+
} else {
|
|
353
|
+
parts.push(`Process exited with code ${args.code ?? "unknown"}`);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
const stderr = tailText(args.stderr.trim(), 4000);
|
|
357
|
+
const stdout = tailText(args.stdout.trim(), 2000);
|
|
358
|
+
if (stderr) parts.push(`stderr:\n${stderr}`);
|
|
359
|
+
if (stdout) parts.push(`stdout:\n${stdout}`);
|
|
360
|
+
|
|
361
|
+
return parts.join("\n\n");
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
function tailText(text: string, maxChars: number): string {
|
|
365
|
+
if (text.length <= maxChars) return text;
|
|
366
|
+
return `…${text.slice(-maxChars)}`;
|
|
367
|
+
}
|
|
368
|
+
|
|
313
369
|
function parseChunksProcessed(output: string): number {
|
|
314
370
|
// v1.0+ format: "✅ process_file: 604 total | 604 added"
|
|
315
371
|
// Capture the last "added" or "reprocessed" count for process_file
|
|
@@ -321,9 +377,14 @@ function parseChunksProcessed(output: string): number {
|
|
|
321
377
|
}
|
|
322
378
|
}
|
|
323
379
|
if (lastProcessLine) {
|
|
324
|
-
//
|
|
325
|
-
|
|
326
|
-
|
|
380
|
+
// Prefer completed work counts. Lines can contain multiple counters, e.g.
|
|
381
|
+
// "process_file: 615 total | 8 added, 606 reprocessed".
|
|
382
|
+
const matches = [...lastProcessLine.matchAll(/(\d+)\s+(?:added|reprocessed|skipped|deleted)/g)];
|
|
383
|
+
const completed = matches.reduce((sum, match) => sum + parseInt(match[1], 10), 0);
|
|
384
|
+
if (completed > 0) return completed;
|
|
385
|
+
|
|
386
|
+
const total = lastProcessLine.match(/process_file:\s*(\d+)\s+total/);
|
|
387
|
+
if (total) return parseInt(total[1], 10);
|
|
327
388
|
}
|
|
328
389
|
|
|
329
390
|
// Fallback: old format "Processed 42 chunks"
|
|
@@ -634,6 +695,8 @@ import os
|
|
|
634
695
|
|
|
635
696
|
# ── Configuration ────────────────────────────────────
|
|
636
697
|
PROJECT_ROOT = os.environ.get("PROJECT_ROOT", "${projectDir}")
|
|
698
|
+
# Safety limit for huge generated/lock files. Set COCO_MAX_FILE_CHARS=0 to disable.
|
|
699
|
+
MAX_FILE_CHARS = int(os.environ.get("COCO_MAX_FILE_CHARS", "200000"))
|
|
637
700
|
|
|
638
701
|
# ── LanceDB context key ──────────────────────────────
|
|
639
702
|
db_key = coco.ContextKey("lancedb/${projectBasename}")
|
|
@@ -703,6 +766,8 @@ async def process_file(
|
|
|
703
766
|
|
|
704
767
|
if not content.strip():
|
|
705
768
|
return
|
|
769
|
+
if MAX_FILE_CHARS > 0 and len(content) > MAX_FILE_CHARS:
|
|
770
|
+
return
|
|
706
771
|
|
|
707
772
|
relative = file.file_path.path.as_posix()
|
|
708
773
|
chunks = await chunk_text(content)
|
|
@@ -750,7 +815,10 @@ async def app_main() -> None:
|
|
|
750
815
|
excluded_patterns=[
|
|
751
816
|
"**/node_modules/**", "**/.git/**", "**/dist/**",
|
|
752
817
|
"**/build/**", "**/.next/**", "**/__pycache__/**",
|
|
753
|
-
"**/.
|
|
818
|
+
"**/coverage/**", "**/.turbo/**", "**/.cache/**",
|
|
819
|
+
"**/.unipi/**",
|
|
820
|
+
"**/*.min.js", "**/bundled.js", "**/bundle.js", "**/*bundle*.js",
|
|
821
|
+
"**/package-lock.json", "**/pnpm-lock.yaml", "**/yarn.lock",
|
|
754
822
|
],
|
|
755
823
|
),
|
|
756
824
|
)
|
package/package.json
CHANGED