@byte5ai/palaia 2.0.7 → 2.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/config.ts +5 -0
- package/src/hooks.ts +32 -10
- package/src/runner.ts +289 -0
package/package.json
CHANGED
package/src/config.ts
CHANGED
|
@@ -58,6 +58,10 @@ export interface PalaiaPluginConfig {
|
|
|
58
58
|
// ── Recall Quality (Issue #65) ───────────────────────────────
|
|
59
59
|
/** Minimum score for a recall result to be considered relevant (default: 0.7) */
|
|
60
60
|
recallMinScore: number;
|
|
61
|
+
|
|
62
|
+
// ── Embedding Server (v2.0.8) ──────────────────────────────
|
|
63
|
+
/** Enable long-lived embedding server subprocess for fast queries (default: true) */
|
|
64
|
+
embeddingServer: boolean;
|
|
61
65
|
}
|
|
62
66
|
|
|
63
67
|
export const DEFAULT_RECALL_TYPE_WEIGHTS: RecallTypeWeights = {
|
|
@@ -81,6 +85,7 @@ export const DEFAULT_CONFIG: PalaiaPluginConfig = {
|
|
|
81
85
|
recallMode: "query",
|
|
82
86
|
recallTypeWeight: { ...DEFAULT_RECALL_TYPE_WEIGHTS },
|
|
83
87
|
recallMinScore: 0.7,
|
|
88
|
+
embeddingServer: true,
|
|
84
89
|
};
|
|
85
90
|
|
|
86
91
|
/**
|
package/src/hooks.ts
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
import fs from "node:fs/promises";
|
|
15
15
|
import path from "node:path";
|
|
16
16
|
import os from "node:os";
|
|
17
|
-
import { run, runJson, recover, type RunnerOpts } from "./runner.js";
|
|
17
|
+
import { run, runJson, recover, type RunnerOpts, getEmbedServerManager } from "./runner.js";
|
|
18
18
|
import type { PalaiaPluginConfig, RecallTypeWeights } from "./config.js";
|
|
19
19
|
|
|
20
20
|
// ============================================================================
|
|
@@ -1521,17 +1521,39 @@ export function registerHooks(api: any, config: PalaiaPluginConfig): void {
|
|
|
1521
1521
|
: (event.prompt || null);
|
|
1522
1522
|
|
|
1523
1523
|
if (userMessage && userMessage.length >= 5) {
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1524
|
+
// Try embed server first (fast path: ~0.5s), then CLI fallback (~3-14s)
|
|
1525
|
+
let serverQueried = false;
|
|
1526
|
+
if (config.embeddingServer) {
|
|
1527
|
+
try {
|
|
1528
|
+
const mgr = getEmbedServerManager(opts);
|
|
1529
|
+
const resp = await mgr.query({
|
|
1530
|
+
text: userMessage,
|
|
1531
|
+
top_k: limit,
|
|
1532
|
+
include_cold: config.tier === "all",
|
|
1533
|
+
}, config.timeoutMs || 3000);
|
|
1534
|
+
if (resp?.result?.results && Array.isArray(resp.result.results)) {
|
|
1535
|
+
entries = resp.result.results;
|
|
1536
|
+
serverQueried = true;
|
|
1537
|
+
}
|
|
1538
|
+
} catch (serverError) {
|
|
1539
|
+
logger.warn(`[palaia] Embed server query failed, falling back to CLI: ${serverError}`);
|
|
1528
1540
|
}
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1541
|
+
}
|
|
1542
|
+
|
|
1543
|
+
// CLI fallback
|
|
1544
|
+
if (!serverQueried) {
|
|
1545
|
+
try {
|
|
1546
|
+
const queryArgs: string[] = ["query", userMessage, "--limit", String(limit)];
|
|
1547
|
+
if (config.tier === "all") {
|
|
1548
|
+
queryArgs.push("--all");
|
|
1549
|
+
}
|
|
1550
|
+
const result = await runJson<QueryResult>(queryArgs, { ...opts, timeoutMs: 15000 });
|
|
1551
|
+
if (result && Array.isArray(result.results)) {
|
|
1552
|
+
entries = result.results;
|
|
1553
|
+
}
|
|
1554
|
+
} catch (queryError) {
|
|
1555
|
+
logger.warn(`[palaia] Query recall failed, falling back to list: ${queryError}`);
|
|
1532
1556
|
}
|
|
1533
|
-
} catch (queryError) {
|
|
1534
|
-
logger.warn(`[palaia] Query recall failed, falling back to list: ${queryError}`);
|
|
1535
1557
|
}
|
|
1536
1558
|
}
|
|
1537
1559
|
}
|
package/src/runner.ts
CHANGED
|
@@ -232,3 +232,292 @@ export async function recover(opts: RunnerOpts = {}): Promise<{
|
|
|
232
232
|
export function resetCache(): void {
|
|
233
233
|
cachedBinary = null;
|
|
234
234
|
}
|
|
235
|
+
|
|
236
|
+
// ============================================================================
|
|
237
|
+
// EmbedServerManager (v2.0.8)
|
|
238
|
+
// ============================================================================
|
|
239
|
+
|
|
240
|
+
import { spawn, type ChildProcess } from "node:child_process";
|
|
241
|
+
import { createInterface, type Interface as ReadlineInterface } from "node:readline";
|
|
242
|
+
|
|
243
|
+
export interface EmbedServerQueryParams {
|
|
244
|
+
text: string;
|
|
245
|
+
top_k?: number;
|
|
246
|
+
agent?: string;
|
|
247
|
+
project?: string;
|
|
248
|
+
scope?: string;
|
|
249
|
+
type?: string;
|
|
250
|
+
status?: string;
|
|
251
|
+
priority?: string;
|
|
252
|
+
assignee?: string;
|
|
253
|
+
instance?: string;
|
|
254
|
+
include_cold?: boolean;
|
|
255
|
+
cross_project?: boolean;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
interface PendingRequest {
|
|
259
|
+
resolve: (value: any) => void;
|
|
260
|
+
reject: (reason: any) => void;
|
|
261
|
+
timer: ReturnType<typeof setTimeout>;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Manages a long-lived `palaia embed-server` subprocess.
|
|
266
|
+
*
|
|
267
|
+
* The subprocess loads the embedding model once and serves queries over
|
|
268
|
+
* stdin/stdout JSON-RPC, reducing query time from ~14s to ~0.5s.
|
|
269
|
+
*
|
|
270
|
+
* Features:
|
|
271
|
+
* - Lazy start on first use
|
|
272
|
+
* - Auto-restart on crash (max 3 retries)
|
|
273
|
+
* - Graceful shutdown on process exit
|
|
274
|
+
* - Timeout per request with fallback to CLI
|
|
275
|
+
*/
|
|
276
|
+
export class EmbedServerManager {
|
|
277
|
+
private proc: ChildProcess | null = null;
|
|
278
|
+
private rl: ReadlineInterface | null = null;
|
|
279
|
+
private ready = false;
|
|
280
|
+
private starting = false;
|
|
281
|
+
private restartCount = 0;
|
|
282
|
+
private maxRestarts = 3;
|
|
283
|
+
private opts: RunnerOpts;
|
|
284
|
+
private pendingRequest: PendingRequest | null = null;
|
|
285
|
+
private cleanupRegistered = false;
|
|
286
|
+
|
|
287
|
+
constructor(opts: RunnerOpts = {}) {
|
|
288
|
+
this.opts = opts;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Start the embed-server subprocess. Resolves when the server signals ready.
|
|
293
|
+
*/
|
|
294
|
+
async start(): Promise<void> {
|
|
295
|
+
if (this.ready || this.starting) return;
|
|
296
|
+
this.starting = true;
|
|
297
|
+
|
|
298
|
+
try {
|
|
299
|
+
const binary = await detectBinary(this.opts.binaryPath);
|
|
300
|
+
let cmd: string;
|
|
301
|
+
let args: string[];
|
|
302
|
+
|
|
303
|
+
if (isPythonModule(binary)) {
|
|
304
|
+
cmd = binary;
|
|
305
|
+
args = ["-m", "palaia", "embed-server"];
|
|
306
|
+
} else {
|
|
307
|
+
cmd = binary;
|
|
308
|
+
args = ["embed-server"];
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
this.proc = spawn(cmd, args, {
|
|
312
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
313
|
+
cwd: this.opts.workspace,
|
|
314
|
+
env: {
|
|
315
|
+
...process.env,
|
|
316
|
+
...(this.opts.workspace ? { PALAIA_HOME: this.opts.workspace } : {}),
|
|
317
|
+
},
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
this.rl = createInterface({ input: this.proc.stdout! });
|
|
321
|
+
|
|
322
|
+
// Wait for the "ready" signal
|
|
323
|
+
await new Promise<void>((resolve, reject) => {
|
|
324
|
+
const timeout = setTimeout(() => {
|
|
325
|
+
reject(new Error("Embed server startup timed out"));
|
|
326
|
+
}, 30_000);
|
|
327
|
+
|
|
328
|
+
const onLine = (line: string) => {
|
|
329
|
+
try {
|
|
330
|
+
const msg = JSON.parse(line);
|
|
331
|
+
if (msg.result === "ready") {
|
|
332
|
+
clearTimeout(timeout);
|
|
333
|
+
this.ready = true;
|
|
334
|
+
this.starting = false;
|
|
335
|
+
this.restartCount = 0;
|
|
336
|
+
// Set up ongoing line handler
|
|
337
|
+
this.rl!.on("line", (l) => this.handleLine(l));
|
|
338
|
+
resolve();
|
|
339
|
+
}
|
|
340
|
+
} catch {
|
|
341
|
+
// Ignore non-JSON lines during startup
|
|
342
|
+
}
|
|
343
|
+
};
|
|
344
|
+
|
|
345
|
+
this.rl!.once("line", onLine);
|
|
346
|
+
|
|
347
|
+
this.proc!.on("error", (err) => {
|
|
348
|
+
clearTimeout(timeout);
|
|
349
|
+
reject(err);
|
|
350
|
+
});
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
// Handle crash
|
|
354
|
+
this.proc.on("exit", (code) => {
|
|
355
|
+
this.ready = false;
|
|
356
|
+
this.proc = null;
|
|
357
|
+
this.rl = null;
|
|
358
|
+
// Reject any pending request
|
|
359
|
+
if (this.pendingRequest) {
|
|
360
|
+
this.pendingRequest.reject(new Error(`Embed server exited with code ${code}`));
|
|
361
|
+
clearTimeout(this.pendingRequest.timer);
|
|
362
|
+
this.pendingRequest = null;
|
|
363
|
+
}
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
// Register cleanup
|
|
367
|
+
if (!this.cleanupRegistered) {
|
|
368
|
+
this.cleanupRegistered = true;
|
|
369
|
+
process.on("exit", () => this.stopSync());
|
|
370
|
+
}
|
|
371
|
+
} catch (err) {
|
|
372
|
+
this.starting = false;
|
|
373
|
+
throw err;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* Send a query to the embed server.
|
|
379
|
+
*/
|
|
380
|
+
async query(params: EmbedServerQueryParams, timeoutMs = 3000): Promise<any> {
|
|
381
|
+
await this.ensureRunning();
|
|
382
|
+
return this.sendRequest({ method: "query", params }, timeoutMs);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
/**
|
|
386
|
+
* Trigger warmup (index missing entries).
|
|
387
|
+
*/
|
|
388
|
+
async warmup(timeoutMs = 60_000): Promise<any> {
|
|
389
|
+
await this.ensureRunning();
|
|
390
|
+
return this.sendRequest({ method: "warmup" }, timeoutMs);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Health check.
|
|
395
|
+
*/
|
|
396
|
+
async ping(timeoutMs = 3000): Promise<boolean> {
|
|
397
|
+
try {
|
|
398
|
+
await this.ensureRunning();
|
|
399
|
+
const resp = await this.sendRequest({ method: "ping" }, timeoutMs);
|
|
400
|
+
return resp?.result === "pong";
|
|
401
|
+
} catch {
|
|
402
|
+
return false;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
/**
|
|
407
|
+
* Get server status.
|
|
408
|
+
*/
|
|
409
|
+
async status(timeoutMs = 3000): Promise<any> {
|
|
410
|
+
await this.ensureRunning();
|
|
411
|
+
return this.sendRequest({ method: "status" }, timeoutMs);
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Stop the subprocess gracefully.
|
|
416
|
+
*/
|
|
417
|
+
async stop(): Promise<void> {
|
|
418
|
+
if (!this.proc) return;
|
|
419
|
+
try {
|
|
420
|
+
await this.sendRequest({ method: "shutdown" }, 3000);
|
|
421
|
+
} catch {
|
|
422
|
+
// Force kill if shutdown request fails
|
|
423
|
+
}
|
|
424
|
+
this.stopSync();
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
/**
|
|
428
|
+
* Synchronous stop (for process.on('exit')).
|
|
429
|
+
*/
|
|
430
|
+
private stopSync(): void {
|
|
431
|
+
if (this.proc) {
|
|
432
|
+
try {
|
|
433
|
+
this.proc.kill("SIGTERM");
|
|
434
|
+
} catch {
|
|
435
|
+
// Already dead
|
|
436
|
+
}
|
|
437
|
+
this.proc = null;
|
|
438
|
+
this.rl = null;
|
|
439
|
+
this.ready = false;
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
/**
|
|
444
|
+
* Whether the server is currently running and ready.
|
|
445
|
+
*/
|
|
446
|
+
get isRunning(): boolean {
|
|
447
|
+
return this.ready && this.proc !== null;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
private async ensureRunning(): Promise<void> {
|
|
451
|
+
if (this.ready && this.proc) return;
|
|
452
|
+
if (this.restartCount >= this.maxRestarts) {
|
|
453
|
+
throw new Error("Embed server max restarts exceeded");
|
|
454
|
+
}
|
|
455
|
+
this.restartCount++;
|
|
456
|
+
await this.start();
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
private sendRequest(request: Record<string, unknown>, timeoutMs: number): Promise<any> {
|
|
460
|
+
return new Promise((resolve, reject) => {
|
|
461
|
+
if (!this.proc?.stdin?.writable) {
|
|
462
|
+
reject(new Error("Embed server not running"));
|
|
463
|
+
return;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// Only one request at a time (sequential protocol)
|
|
467
|
+
if (this.pendingRequest) {
|
|
468
|
+
reject(new Error("Embed server busy"));
|
|
469
|
+
return;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
const timer = setTimeout(() => {
|
|
473
|
+
this.pendingRequest = null;
|
|
474
|
+
reject(new Error(`Embed server request timed out after ${timeoutMs}ms`));
|
|
475
|
+
}, timeoutMs);
|
|
476
|
+
|
|
477
|
+
this.pendingRequest = { resolve, reject, timer };
|
|
478
|
+
|
|
479
|
+
const line = JSON.stringify(request) + "\n";
|
|
480
|
+
this.proc.stdin!.write(line);
|
|
481
|
+
});
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
private handleLine(line: string): void {
|
|
485
|
+
if (!this.pendingRequest) return;
|
|
486
|
+
try {
|
|
487
|
+
const msg = JSON.parse(line);
|
|
488
|
+
const pending = this.pendingRequest;
|
|
489
|
+
this.pendingRequest = null;
|
|
490
|
+
clearTimeout(pending.timer);
|
|
491
|
+
if (msg.error) {
|
|
492
|
+
pending.reject(new Error(msg.error));
|
|
493
|
+
} else {
|
|
494
|
+
pending.resolve(msg);
|
|
495
|
+
}
|
|
496
|
+
} catch {
|
|
497
|
+
// Ignore non-JSON lines
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
/** Singleton embed server manager instance. */
|
|
503
|
+
let _embedServerManager: EmbedServerManager | null = null;
|
|
504
|
+
|
|
505
|
+
/**
|
|
506
|
+
* Get or create the singleton EmbedServerManager.
|
|
507
|
+
*/
|
|
508
|
+
export function getEmbedServerManager(opts?: RunnerOpts): EmbedServerManager {
|
|
509
|
+
if (!_embedServerManager) {
|
|
510
|
+
_embedServerManager = new EmbedServerManager(opts);
|
|
511
|
+
}
|
|
512
|
+
return _embedServerManager;
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
/**
|
|
516
|
+
* Reset the singleton (for testing).
|
|
517
|
+
*/
|
|
518
|
+
export async function resetEmbedServerManager(): Promise<void> {
|
|
519
|
+
if (_embedServerManager) {
|
|
520
|
+
await _embedServerManager.stop();
|
|
521
|
+
_embedServerManager = null;
|
|
522
|
+
}
|
|
523
|
+
}
|