npm - @exulu/backend - Versions diffs - 1.66.0 → 1.68.0 - Mend

@exulu/backend 1.66.0 → 1.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/chunk-VPSLTGZF.js +10267 -0
package/dist/{convert-exulu-tools-to-ai-sdk-tools-ZFIN7A5V.js → convert-exulu-tools-to-ai-sdk-tools-CHQF36XW.js} +1 -2
package/dist/index.cjs +23930 -22308
package/dist/index.d.cts +401 -100
package/dist/index.d.ts +401 -100
package/dist/index.js +15215 -4233
package/ee/agentic-retrieval/v3/agent-loop.ts +4 -4
package/ee/agentic-retrieval/v3/index.ts +20 -6
package/ee/python/documents/processing/doc_processor.ts +79 -34
package/ee/python/requirements.txt +8 -1
package/ee/python/setup.sh +0 -49
package/ee/queues/decorator.ts +36 -0
package/ee/queues/prune-job-results.ts +55 -0
package/ee/schemas.ts +19 -0
package/ee/workers.ts +59 -32
package/package.json +1 -1
package/dist/chunk-KQDNL5WU.js +0 -19399
package/ee/agentic-retrieval/v4/agent-loop.ts +0 -208
package/ee/agentic-retrieval/v4/context-sampler.ts +0 -79
package/ee/agentic-retrieval/v4/index.ts +0 -690
package/ee/agentic-retrieval/v4/types.ts +0 -58
package/ee/python/.hermes/.env.example +0 -8
package/ee/python/.hermes/README.md +0 -44
package/ee/python/.hermes/SOUL.md.example +0 -8
package/ee/python/.hermes/config.yaml.example +0 -55

package/ee/agentic-retrieval/v3/agent-loop.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import { generateText, stepCountIs, tool } from "ai";
 import type { LanguageModel, Tool as AITool, ModelMessage } from "ai";
 import { z } from "zod";
 import { withRetry } from "@SRC/utils/with-retry";
-import type { ExuluReranker } from "@SRC/exulu/reranker";
+import type { ResolvedReranker } from "@SRC/exulu/resolve-reranker";
 import type { AgenticRetrievalOutput, ChunkResult, ClassificationResult } from "./types";
 import type { StrategyConfig } from "./strategies";
 import { createDynamicTools } from "./dynamic-tools";
@@ -69,7 +69,7 @@ export async function* runAgentLoop(params: {
   strategy: StrategyConfig;
   tools: Record<string, AITool>;
   model: LanguageModel;
-  reranker?: ExuluReranker;
+  reranker?: ResolvedReranker;
   contextGuidance?: string;
   customInstructions?: string;
   classification: ClassificationResult;
@@ -171,8 +171,8 @@ export async function* runAgentLoop(params: {
     // Rerank if reranker is available
     if (reranker && stepChunks.length > 0) {
-      console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.name}`);
-      stepChunks = await reranker.run(query, stepChunks as any);
+      console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.model}`);
+      stepChunks = await reranker.rerank(query, stepChunks);
     }
     // Create dynamic tools (browse adjacent pages, load specific pages)

package/ee/agentic-retrieval/v3/index.ts CHANGED Viewed

@@ -2,7 +2,8 @@ import { z } from "zod";
 import { createBashTool } from "bash-tool";
 import type { LanguageModel, Tool } from "ai";
 import type { ExuluContext } from "@SRC/exulu/context";
-import type { ExuluReranker } from "@SRC/exulu/reranker";
+import { resolveReranker } from "@SRC/exulu/resolve-reranker";
+import type { ResolvedReranker } from "@SRC/exulu/resolve-reranker";
 import { ExuluTool } from "@SRC/exulu/tool";
 import type { User } from "@EXULU_TYPES/models/user";
 import { checkLicense } from "@EE/entitlements";
@@ -34,7 +35,7 @@ async function* executeV3({
 }: {
   query: string;
   contexts: ExuluContext[];
-  reranker?: ExuluReranker;
+  reranker?: ResolvedReranker;
   toolVariablesConfig?: Record<string, any>;
   model: LanguageModel;
   user?: User;
@@ -189,7 +190,6 @@ async function* executeV3({
 export function createAgenticRetrievalToolV3({
   contexts,
   instructions: adminInstructions,
-  rerankers,
   user,
   role,
   model,
@@ -197,7 +197,6 @@ export function createAgenticRetrievalToolV3({
   memoryItems
 }: {
   contexts: ExuluContext[];
-  rerankers: ExuluReranker[];
   user?: User;
   role?: string;
   model?: LanguageModel;
@@ -355,7 +354,7 @@ export function createAgenticRetrievalToolV3({
       }
       let activeContexts = contexts;
-      let configuredReranker: ExuluReranker | undefined;
+      let configuredReranker: ResolvedReranker | undefined;
       let configInstructions = "";
       let logTrajectory = false;
       let requiresPreselectedContexts = false;
@@ -382,7 +381,22 @@ export function createAgenticRetrievalToolV3({
         const rerankerId = toolVariablesConfig["reranker"];
         if (rerankerId && rerankerId !== "none") {
-          configuredReranker = rerankers.find((r) => r.id === rerankerId);
+          // rerankerId is a LiteLLM model_name from config.litellm.yaml
+          // (model_info.type: reranker). Resolution is best-effort: a
+          // misconfigured model or an unready proxy must not break retrieval —
+          // it just runs unreranked, matching the old find()→undefined path.
+          try {
+            configuredReranker = await resolveReranker({
+              model: rerankerId,
+              user,
+              roleId: role,
+            });
+          } catch (err) {
+            console.warn(
+              `[EXULU] v3 — could not resolve reranker "${rerankerId}", continuing without reranking:`,
+              err,
+            );
+          }
         }
       }

package/ee/python/documents/processing/doc_processor.ts CHANGED Viewed

@@ -14,17 +14,41 @@ import { checkLicense } from '@EE/entitlements';
 import { executePythonScript } from '@SRC/utils/python-executor';
 import { setupPythonEnvironment, validatePythonEnvironment } from '@SRC/utils/python-setup';
 import { LiteParse } from '@llamaindex/liteparse';
-import { Mistral } from '@mistralai/mistralai';
-import { ExuluVariables } from '@SRC/index';
+import { resolveOcr } from '@SRC/exulu/resolve-ocr';
+import type { ResolveOcrInput } from '@SRC/exulu/resolve-ocr';
+import { resolveModel } from '@SRC/exulu/resolve-model';
 type DocumentProcessorConfig = {
   vlm?: {
-    model: LanguageModel;
+    /**
+     * LiteLLM model_name for the VLM page-validation pass (declared in
+     * config.litellm.yaml, e.g. "vertex-gemini-2.5-flash"). Resolved via
+     * resolveModel() so the VLM pass shares the same tag-based cost controls
+     * and provider-switching as chat / embeddings / OCR, and the underlying
+     * provider can be swapped without code changes.
+     */
+    model: string;
     concurrency: number;
   },
   processor: {
     name: "docling" | "liteparse" | "mistral" | "officeparser"
+    /**
+     * LiteLLM model_name for the "mistral" OCR processor (declared in
+     * config.litellm.yaml). Defaults to "mistral-ocr". OCR is routed through
+     * the LiteLLM proxy so it shares the same tag-based cost controls as chat
+     * and embeddings, and the underlying provider (mistral / azure_ai /
+     * vertex_ai) can be switched without code changes.
+     */
+    model?: string
   }
+  /**
+   * Optional cost-attribution context, forwarded to LiteLLM as spend tags
+   * (user / role / project / context) for both the OCR pass (resolveOcr) and
+   * the VLM page-validation pass (resolveModel). Not yet populated by callers;
+   * the wiring is in place so per-user/per-context budgets work the moment
+   * attribution is threaded through.
+   */
+  attribution?: Omit<ResolveOcrInput, "model">
   debugging?: {
     deleteTempFiles?: boolean;
   }
@@ -94,6 +118,38 @@ async function processWord(file: Buffer): Promise<ProcessorOutput> {
   }
 }
+/**
+ * Resolve the dev-supplied VLM `model` string (a LiteLLM model_name from
+ * config.litellm.yaml, e.g. "vertex-gemini-2.5-flash") into an `ai` SDK
+ * LanguageModel via resolveModel. This routes the VLM page-validation pass
+ * through the LiteLLM proxy — same tag-based cost controls and provider
+ * switching as chat / embeddings / OCR — and keeps the internal VLM helpers
+ * (validateWithVLM / validatePageWithVLM) working with a LanguageModel.
+ *
+ * Returns undefined when no VLM model is configured. Attribution (user /
+ * project / agent / routine) is forwarded for spend tagging when callers
+ * populate config.attribution; rbacBypass is set because this is a background
+ * package call where model-level access control is delegated to LiteLLM.
+ */
+async function resolveVlmModel(
+  config?: DocumentProcessorConfig,
+): Promise<LanguageModel | undefined> {
+  const modelId = config?.vlm?.model;
+  if (!modelId) return undefined;
+  const { languageModel } = await resolveModel({
+    modelId,
+    providers: [], // unused in LiteLLM mode; resolveModel ignores it there
+    user: config?.attribution?.user,
+    project: config?.attribution?.project,
+    agent: config?.attribution?.agent,
+    routine: config?.attribution?.routine,
+    rbacBypass: true,
+  });
+  return languageModel;
+}
 /**
  * Processes a standalone image file by optionally extracting content using VLM
  */
@@ -122,14 +178,15 @@ async function processImage(
     }];
     // If VLM is enabled, use it to extract content from the image
-    if (config?.vlm?.model) {
+    const vlmModel = await resolveVlmModel(config);
+    if (vlmModel) {
       console.log('[EXULU] Extracting content from image using VLM...');
       json = await validateWithVLM(
         json,
-        config.vlm.model,
+        vlmModel,
         verbose,
-        config.vlm.concurrency
+        config!.vlm!.concurrency
       );
       // Save the processed result
@@ -679,15 +736,6 @@ async function processDocument(
   };
 }
-const getMistralApiKey = async () => {
-  if (process.env.MISTRAL_API_KEY) {
-    return process.env.MISTRAL_API_KEY;
-  } else {
-    const variable = await ExuluVariables.get("MISTRAL_API_KEY");
-    return variable;
-  }
-}
 async function processPdf(
   buffer: Buffer,
   paths: ProcessingPaths,
@@ -759,28 +807,25 @@ async function processPdf(
     } else if (config?.processor.name === "mistral") {
-      const MISTRAL_API_KEY = await getMistralApiKey();
-      if (!MISTRAL_API_KEY) {
-        throw new Error('[EXULU] MISTRAL_API_KEY is not set, please set it in the environment variable via process.env or via an Exulu variable named "MISTRAL_API_KEY".');
-      }
+      // OCR is routed through the LiteLLM proxy's Mistral-compatible /v1/ocr
+      // endpoint (see resolveOcr) rather than the Mistral SDK directly. This
+      // gives us tag-based cost control and lets us switch the OCR provider
+      // (mistral / azure_ai / vertex_ai) from config.litellm.yaml.
+      const resolved = await resolveOcr({
+        model: config.processor.model ?? "mistral-ocr",
+        ...config.attribution,
+      });
       // Wait a randomn time between 1 and 5 seconds to prevent rate limiting
       await new Promise(resolve => setTimeout(resolve, Math.floor(Math.random() * 4000) + 1000));
       const base64Pdf = buffer.toString('base64');
-      const client = new Mistral({ apiKey: MISTRAL_API_KEY });
       const ocrResponse = await withRetry(async () => {
-        type MistralOCRResponse = Awaited<ReturnType<typeof client.ocr.process>>;
-        const ocrResponse: MistralOCRResponse = await client.ocr.process({
-          document: {
-            type: "document_url",
-            documentUrl: "data:application/pdf;base64," + base64Pdf
-          },
-          model: "mistral-ocr-latest",
-          includeImageBase64: false
-        });
-        return ocrResponse;
+        return await resolved.ocr({
+          type: "document_url",
+          document_url: "data:application/pdf;base64," + base64Pdf,
+        }, { includeImageBase64: false });
       }, 10);
       const parser = new LiteParse();
@@ -838,13 +883,14 @@ async function processPdf(
     }
     // Apply VLM validation if enabled
-    if (config?.vlm?.model && json.length > 0) {
+    const vlmModel = config?.vlm?.model ? await resolveVlmModel(config) : undefined;
+    if (vlmModel && json.length > 0) {
       json = await validateWithVLM(
         json,
-        config.vlm.model,
+        vlmModel,
         verbose,
-        config.vlm.concurrency
+        config!.vlm!.concurrency
       );
       console.log('[EXULU] \n📊 Processing Summary:');
@@ -1046,7 +1092,6 @@ export async function documentProcessor({
   } catch (error) {
     console.error('Error during chunking:', error);
     throw error;
   } finally {
     if (config?.debugging?.deleteTempFiles !== false) {
       // Delete the temp directory using the local array to avoid race conditions

package/ee/python/requirements.txt CHANGED Viewed

@@ -1,5 +1,9 @@
 docling
-transformers
+# transformers <5: the 5.x line requires huggingface_hub>=1.0, which removed the
+# `use_auth_token` kwarg that pyannote.audio 3.x still passes to hf_hub_download()
+# (→ "unexpected keyword argument 'use_auth_token'", diarization silently
+# disabled). whisperx only needs transformers>=4.48, so the 4.x line is fine.
+transformers>=4.48,<5
 pyinstaller
 docling-hierarchical-pdf
 defusedxml
@@ -17,6 +21,9 @@ torchaudio==2.5.1
 torchvision==0.20.1
 whisperx>=3.4.0
 pyannote.audio>=3.3.0
+# Belt-and-suspenders: keep huggingface_hub on the 0.x line so pyannote 3.x's
+# `use_auth_token=` calls keep working (1.x removed that kwarg → diarization off).
+huggingface_hub<1.0
 fastapi
 uvicorn
 python-multipart

package/ee/python/setup.sh CHANGED Viewed

@@ -253,46 +253,6 @@ if [ -n "$LITELLM_PROXY_DIR" ] && [ -f "$LITELLM_PROXY_DIR/schema.prisma" ]; the
         || print_warning "Prisma generate failed; LiteLLM database mode (database_url in config.litellm.yaml) may not work until you run 'cd $LITELLM_PROXY_DIR && PATH=$VENV_DIR/bin:\$PATH $VENV_DIR/bin/prisma generate'"
 fi
-# Step 6.6: Install the Hermes Agent harness (advanced agent mode).
-# Opt-in via ENABLE_HERMES_AGENT=true. Hermes is NOT a pip package — it ships
-# as a standalone binary via Nous Research's official installer (lands in
-# ~/.local/bin/hermes). We only install if it's not already present so re-runs
-# are fast, and we never fail the whole setup if the install fails (advanced
-# mode is optional; the operator can install it manually and retry).
-if [ "${ENABLE_HERMES_AGENT}" = "true" ]; then
-    echo ""
-    echo "Step 6.6: Installing Hermes Agent harness (ENABLE_HERMES_AGENT=true)..."
-    if command -v hermes &> /dev/null || [ -x "$HOME/.local/bin/hermes" ]; then
-        HERMES_VERSION=$( (command -v hermes &> /dev/null && hermes --version 2>/dev/null) || "$HOME/.local/bin/hermes" --version 2>/dev/null || echo "unknown")
-        print_success "Hermes already installed ($HERMES_VERSION) — skipping installer"
-    else
-        print_info "Running Hermes official installer..."
-        if curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash; then
-            print_success "Hermes Agent installed (binary at ~/.local/bin/hermes)"
-        else
-            print_warning "Hermes installer failed. Advanced agent mode will be unavailable until 'hermes' is on PATH. Install manually: https://hermes-agent.nousresearch.com/docs/getting-started/installation"
-        fi
-    fi
-    # Pre-pull the docker terminal-backend image so the first agent request
-    # isn't blocked on a cold image pull (~minute). Only when the backend is
-    # docker (the default) and docker is available; non-fatal otherwise.
-    HERMES_BACKEND="${HERMES_TERMINAL_BACKEND:-docker}"
-    if [ "${HERMES_BACKEND}" = "docker" ]; then
-        HERMES_IMG="${HERMES_DOCKER_IMAGE:-nikolaik/python-nodejs:python3.11-nodejs20}"
-        if command -v docker &> /dev/null; then
-            print_info "Pre-pulling Hermes docker backend image: ${HERMES_IMG}..."
-            if docker pull "${HERMES_IMG}" > /dev/null 2>&1; then
-                print_success "Docker backend image ready (${HERMES_IMG})"
-            else
-                print_warning "Could not pre-pull ${HERMES_IMG}; the first advanced-mode request will pull it (slower)."
-            fi
-        else
-            print_warning "Docker not found, but HERMES_TERMINAL_BACKEND=docker. Install Docker, or set HERMES_TERMINAL_BACKEND=local (unsandboxed)."
-        fi
-    fi
-fi
 # Step 7: Validate installation
 echo ""
 echo "Step 7: Validating installation..."
@@ -309,15 +269,6 @@ $PYTHON_CMD -c "import whisperx" 2>/dev/null && print_success "whisperx imported
 $PYTHON_CMD -c "import pyannote.audio" 2>/dev/null && print_success "pyannote.audio imported successfully" || print_warning "pyannote.audio not importable (diarization will be disabled even with HF_AUTH_TOKEN)"
 $PYTHON_CMD -c "import fastapi, uvicorn" 2>/dev/null && print_success "fastapi/uvicorn imported successfully" || print_warning "fastapi/uvicorn not importable (transcription server will not start)"
-# Hermes Agent binary check (advanced agent mode) — only when opted in.
-if [ "${ENABLE_HERMES_AGENT}" = "true" ]; then
-    if command -v hermes &> /dev/null || [ -x "$HOME/.local/bin/hermes" ]; then
-        print_success "hermes binary available (advanced agent mode ready)"
-    else
-        print_warning "hermes binary not found (advanced agent mode will be unavailable)"
-    fi
-fi
 # Step 8: Display summary
 echo ""
 echo -e "${GREEN}========================================${NC}"

package/ee/queues/decorator.ts CHANGED Viewed

@@ -2,6 +2,8 @@ import { Queue } from "bullmq";
 import { v4 as uuidv4 } from "uuid";
 import type { UIMessage } from "ai";
 import type { STATISTICS_LABELS } from "@EXULU_TYPES/statistics";
+import { postgresClient } from "@SRC/postgres/client";
+import { maybePruneJobResults } from "./prune-job-results";
 type ExuluJobType = "embedder" | "workflow" | "eval" | "processor";
@@ -120,6 +122,40 @@ export const bullmqDecorator = async ({
   };
   const redisId = uuidv4();
+  // Knowledge V2 (KB-7): record the job in job_results at ENQUEUE time (state
+  // "waiting") for processor/embedder jobs, so the item detail page can detect
+  // jobs that are queued-but-not-yet-started (which it couldn't if the row was
+  // only written at worker pickup). Inserted BEFORE queue.add so the row is
+  // guaranteed present before any worker can grab the job (no insert/update
+  // race). The worker-start update + completed/failed handlers drive the row
+  // through active → completed/failed, all keyed by this job_id.
+  if ((type === "processor" || type === "embedder") && context) {
+    try {
+      const { db } = await postgresClient();
+      const itemId =
+        item == null
+          ? null
+          : typeof item === "object"
+            ? ((item as { id?: unknown }).id ?? null)
+            : item;
+      await db.from("job_results").insert({
+        job_id: redisId,
+        label,
+        state: "waiting",
+        type,
+        item: itemId == null ? null : String(itemId),
+        context: String(context),
+        result: null,
+        metadata: {},
+      });
+      // Bound the table: every Nth added row, prune the oldest terminal rows.
+      void maybePruneJobResults(db);
+    } catch (err) {
+      console.error("[EXULU] enqueue job_results insert failed", err);
+    }
+  }
   const job = await queue.add(`${embedder || workflow || processor || evaluation}`, jobData, {
     jobId: redisId,
     // Setting it to 3 as a sensible default, as

package/ee/queues/prune-job-results.ts ADDED Viewed

@@ -0,0 +1,55 @@
+/**
+ * Periodic job_results cap (knowledge V2 KB-7 follow-up).
+ *
+ * We now write a job_results row at enqueue time, so the table grows faster.
+ * To bound it, every PRUNE_EVERY-th call we delete the oldest terminal rows
+ * (state failed/completed) beyond the newest MAX_TERMINAL — keeping a rolling
+ * window of recent finished jobs. Waiting/active/delayed rows are never
+ * pruned (they're still live).
+ *
+ * The counter is per-process (the API process counts enqueues; the worker
+ * process counts completions) — that's fine: the prune is idempotent, so it
+ * doesn't matter which process triggers it. A `pruning` guard avoids
+ * overlapping runs.
+ */
+const MAX_TERMINAL = 10_000;
+const PRUNE_EVERY = 100;
+const TERMINAL_STATES = ["failed", "completed"];
+let sinceLastPrune = 0;
+let pruning = false;
+export async function maybePruneJobResults(db: any): Promise<void> {
+  sinceLastPrune += 1;
+  if (sinceLastPrune < PRUNE_EVERY || pruning) return;
+  sinceLastPrune = 0;
+  pruning = true;
+  try {
+    // The (MAX_TERMINAL+1)-th newest terminal row marks the boundary; delete it
+    // and everything older. Dialect-agnostic (knex offset/limit) so it works on
+    // both Postgres and MySQL.
+    const boundary = await db("job_results")
+      .whereIn("state", TERMINAL_STATES)
+      .orderBy("createdAt", "desc")
+      .offset(MAX_TERMINAL)
+      .limit(1)
+      .first();
+    if (boundary?.createdAt) {
+      const deleted = await db("job_results")
+        .whereIn("state", TERMINAL_STATES)
+        .where("createdAt", "<=", boundary.createdAt)
+        .del();
+      if (deleted) {
+        console.log(
+          `[EXULU] pruned ${deleted} terminal job_results rows (cap ${MAX_TERMINAL}).`,
+        );
+      }
+    }
+  } catch (err) {
+    console.error("[EXULU] job_results prune failed", err);
+  } finally {
+    pruning = false;
+  }
+}

package/ee/schemas.ts CHANGED Viewed

@@ -241,6 +241,25 @@ export const jobResultsSchema: ExuluTableDefinition = {
             name: "metadata",
             type: "json",
         },
+        // Knowledge V2 (KB-7): per-item pipeline tracking. Written at ENQUEUE
+        // time (state "waiting") by the queue decorator so the item page can
+        // detect waiting jobs — not only worker-started ones. `type` is the
+        // job kind (processor/embedder/...); item + context indexed for the
+        // item-page query.
+        {
+            name: "item",
+            type: "text",
+            index: true,
+        },
+        {
+            name: "context",
+            type: "text",
+            index: true,
+        },
+        {
+            name: "type",
+            type: "text",
+        },
     ],
 };