npm - @byte5ai/palaia - Versions diffs - 2.0.7 → 2.0.9 - Mend

@byte5ai/palaia 2.0.7 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@byte5ai/palaia",
-  "version": "2.0.7",
+  "version": "2.0.9",
   "description": "Palaia memory backend for OpenClaw",
   "main": "index.ts",
   "openclaw": {

package/src/config.ts CHANGED Viewed

@@ -58,6 +58,10 @@ export interface PalaiaPluginConfig {
   // ── Recall Quality (Issue #65) ───────────────────────────────
   /** Minimum score for a recall result to be considered relevant (default: 0.7) */
   recallMinScore: number;
+  // ── Embedding Server (v2.0.8) ──────────────────────────────
+  /** Enable long-lived embedding server subprocess for fast queries (default: true) */
+  embeddingServer: boolean;
 }
 export const DEFAULT_RECALL_TYPE_WEIGHTS: RecallTypeWeights = {
@@ -81,6 +85,7 @@ export const DEFAULT_CONFIG: PalaiaPluginConfig = {
   recallMode: "query",
   recallTypeWeight: { ...DEFAULT_RECALL_TYPE_WEIGHTS },
   recallMinScore: 0.7,
+  embeddingServer: true,
 };
 /**

package/src/hooks.ts CHANGED Viewed

@@ -14,7 +14,7 @@
 import fs from "node:fs/promises";
 import path from "node:path";
 import os from "node:os";
-import { run, runJson, recover, type RunnerOpts } from "./runner.js";
+import { run, runJson, recover, type RunnerOpts, getEmbedServerManager } from "./runner.js";
 import type { PalaiaPluginConfig, RecallTypeWeights } from "./config.js";
 // ============================================================================
@@ -1521,17 +1521,39 @@ export function registerHooks(api: any, config: PalaiaPluginConfig): void {
             : (event.prompt || null);
           if (userMessage && userMessage.length >= 5) {
-            try {
-              const queryArgs: string[] = ["query", userMessage, "--limit", String(limit)];
-              if (config.tier === "all") {
-                queryArgs.push("--all");
+            // Try embed server first (fast path: ~0.5s), then CLI fallback (~3-14s)
+            let serverQueried = false;
+            if (config.embeddingServer) {
+              try {
+                const mgr = getEmbedServerManager(opts);
+                const resp = await mgr.query({
+                  text: userMessage,
+                  top_k: limit,
+                  include_cold: config.tier === "all",
+                }, config.timeoutMs || 3000);
+                if (resp?.result?.results && Array.isArray(resp.result.results)) {
+                  entries = resp.result.results;
+                  serverQueried = true;
+                }
+              } catch (serverError) {
+                logger.warn(`[palaia] Embed server query failed, falling back to CLI: ${serverError}`);
               }
-              const result = await runJson<QueryResult>(queryArgs, opts);
-              if (result && Array.isArray(result.results)) {
-                entries = result.results;
+            }
+            // CLI fallback
+            if (!serverQueried) {
+              try {
+                const queryArgs: string[] = ["query", userMessage, "--limit", String(limit)];
+                if (config.tier === "all") {
+                  queryArgs.push("--all");
+                }
+                const result = await runJson<QueryResult>(queryArgs, { ...opts, timeoutMs: 15000 });
+                if (result && Array.isArray(result.results)) {
+                  entries = result.results;
+                }
+              } catch (queryError) {
+                logger.warn(`[palaia] Query recall failed, falling back to list: ${queryError}`);
               }
-            } catch (queryError) {
-              logger.warn(`[palaia] Query recall failed, falling back to list: ${queryError}`);
             }
           }
         }

package/src/runner.ts CHANGED Viewed

@@ -232,3 +232,292 @@ export async function recover(opts: RunnerOpts = {}): Promise<{
 export function resetCache(): void {
   cachedBinary = null;
 }
+// ============================================================================
+// EmbedServerManager (v2.0.8)
+// ============================================================================
+import { spawn, type ChildProcess } from "node:child_process";
+import { createInterface, type Interface as ReadlineInterface } from "node:readline";
+export interface EmbedServerQueryParams {
+  text: string;
+  top_k?: number;
+  agent?: string;
+  project?: string;
+  scope?: string;
+  type?: string;
+  status?: string;
+  priority?: string;
+  assignee?: string;
+  instance?: string;
+  include_cold?: boolean;
+  cross_project?: boolean;
+}
+interface PendingRequest {
+  resolve: (value: any) => void;
+  reject: (reason: any) => void;
+  timer: ReturnType<typeof setTimeout>;
+}
+/**
+ * Manages a long-lived `palaia embed-server` subprocess.
+ *
+ * The subprocess loads the embedding model once and serves queries over
+ * stdin/stdout JSON-RPC, reducing query time from ~14s to ~0.5s.
+ *
+ * Features:
+ * - Lazy start on first use
+ * - Auto-restart on crash (max 3 retries)
+ * - Graceful shutdown on process exit
+ * - Timeout per request with fallback to CLI
+ */
+export class EmbedServerManager {
+  private proc: ChildProcess | null = null;
+  private rl: ReadlineInterface | null = null;
+  private ready = false;
+  private starting = false;
+  private restartCount = 0;
+  private maxRestarts = 3;
+  private opts: RunnerOpts;
+  private pendingRequest: PendingRequest | null = null;
+  private cleanupRegistered = false;
+  constructor(opts: RunnerOpts = {}) {
+    this.opts = opts;
+  }
+  /**
+   * Start the embed-server subprocess. Resolves when the server signals ready.
+   */
+  async start(): Promise<void> {
+    if (this.ready || this.starting) return;
+    this.starting = true;
+    try {
+      const binary = await detectBinary(this.opts.binaryPath);
+      let cmd: string;
+      let args: string[];
+      if (isPythonModule(binary)) {
+        cmd = binary;
+        args = ["-m", "palaia", "embed-server"];
+      } else {
+        cmd = binary;
+        args = ["embed-server"];
+      }
+      this.proc = spawn(cmd, args, {
+        stdio: ["pipe", "pipe", "pipe"],
+        cwd: this.opts.workspace,
+        env: {
+          ...process.env,
+          ...(this.opts.workspace ? { PALAIA_HOME: this.opts.workspace } : {}),
+        },
+      });
+      this.rl = createInterface({ input: this.proc.stdout! });
+      // Wait for the "ready" signal
+      await new Promise<void>((resolve, reject) => {
+        const timeout = setTimeout(() => {
+          reject(new Error("Embed server startup timed out"));
+        }, 30_000);
+        const onLine = (line: string) => {
+          try {
+            const msg = JSON.parse(line);
+            if (msg.result === "ready") {
+              clearTimeout(timeout);
+              this.ready = true;
+              this.starting = false;
+              this.restartCount = 0;
+              // Set up ongoing line handler
+              this.rl!.on("line", (l) => this.handleLine(l));
+              resolve();
+            }
+          } catch {
+            // Ignore non-JSON lines during startup
+          }
+        };
+        this.rl!.once("line", onLine);
+        this.proc!.on("error", (err) => {
+          clearTimeout(timeout);
+          reject(err);
+        });
+      });
+      // Handle crash
+      this.proc.on("exit", (code) => {
+        this.ready = false;
+        this.proc = null;
+        this.rl = null;
+        // Reject any pending request
+        if (this.pendingRequest) {
+          this.pendingRequest.reject(new Error(`Embed server exited with code ${code}`));
+          clearTimeout(this.pendingRequest.timer);
+          this.pendingRequest = null;
+        }
+      });
+      // Register cleanup
+      if (!this.cleanupRegistered) {
+        this.cleanupRegistered = true;
+        process.on("exit", () => this.stopSync());
+      }
+    } catch (err) {
+      this.starting = false;
+      throw err;
+    }
+  }
+  /**
+   * Send a query to the embed server.
+   */
+  async query(params: EmbedServerQueryParams, timeoutMs = 3000): Promise<any> {
+    await this.ensureRunning();
+    return this.sendRequest({ method: "query", params }, timeoutMs);
+  }
+  /**
+   * Trigger warmup (index missing entries).
+   */
+  async warmup(timeoutMs = 60_000): Promise<any> {
+    await this.ensureRunning();
+    return this.sendRequest({ method: "warmup" }, timeoutMs);
+  }
+  /**
+   * Health check.
+   */
+  async ping(timeoutMs = 3000): Promise<boolean> {
+    try {
+      await this.ensureRunning();
+      const resp = await this.sendRequest({ method: "ping" }, timeoutMs);
+      return resp?.result === "pong";
+    } catch {
+      return false;
+    }
+  }
+  /**
+   * Get server status.
+   */
+  async status(timeoutMs = 3000): Promise<any> {
+    await this.ensureRunning();
+    return this.sendRequest({ method: "status" }, timeoutMs);
+  }
+  /**
+   * Stop the subprocess gracefully.
+   */
+  async stop(): Promise<void> {
+    if (!this.proc) return;
+    try {
+      await this.sendRequest({ method: "shutdown" }, 3000);
+    } catch {
+      // Force kill if shutdown request fails
+    }
+    this.stopSync();
+  }
+  /**
+   * Synchronous stop (for process.on('exit')).
+   */
+  private stopSync(): void {
+    if (this.proc) {
+      try {
+        this.proc.kill("SIGTERM");
+      } catch {
+        // Already dead
+      }
+      this.proc = null;
+      this.rl = null;
+      this.ready = false;
+    }
+  }
+  /**
+   * Whether the server is currently running and ready.
+   */
+  get isRunning(): boolean {
+    return this.ready && this.proc !== null;
+  }
+  private async ensureRunning(): Promise<void> {
+    if (this.ready && this.proc) return;
+    if (this.restartCount >= this.maxRestarts) {
+      throw new Error("Embed server max restarts exceeded");
+    }
+    this.restartCount++;
+    await this.start();
+  }
+  private sendRequest(request: Record<string, unknown>, timeoutMs: number): Promise<any> {
+    return new Promise((resolve, reject) => {
+      if (!this.proc?.stdin?.writable) {
+        reject(new Error("Embed server not running"));
+        return;
+      }
+      // Only one request at a time (sequential protocol)
+      if (this.pendingRequest) {
+        reject(new Error("Embed server busy"));
+        return;
+      }
+      const timer = setTimeout(() => {
+        this.pendingRequest = null;
+        reject(new Error(`Embed server request timed out after ${timeoutMs}ms`));
+      }, timeoutMs);
+      this.pendingRequest = { resolve, reject, timer };
+      const line = JSON.stringify(request) + "\n";
+      this.proc.stdin!.write(line);
+    });
+  }
+  private handleLine(line: string): void {
+    if (!this.pendingRequest) return;
+    try {
+      const msg = JSON.parse(line);
+      const pending = this.pendingRequest;
+      this.pendingRequest = null;
+      clearTimeout(pending.timer);
+      if (msg.error) {
+        pending.reject(new Error(msg.error));
+      } else {
+        pending.resolve(msg);
+      }
+    } catch {
+      // Ignore non-JSON lines
+    }
+  }
+}
+/** Singleton embed server manager instance. */
+let _embedServerManager: EmbedServerManager | null = null;
+/**
+ * Get or create the singleton EmbedServerManager.
+ */
+export function getEmbedServerManager(opts?: RunnerOpts): EmbedServerManager {
+  if (!_embedServerManager) {
+    _embedServerManager = new EmbedServerManager(opts);
+  }
+  return _embedServerManager;
+}
+/**
+ * Reset the singleton (for testing).
+ */
+export async function resetEmbedServerManager(): Promise<void> {
+  if (_embedServerManager) {
+    await _embedServerManager.stop();
+    _embedServerManager = null;
+  }
+}