@exulu/backend 1.66.0 → 1.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import { generateText, stepCountIs, tool } from "ai";
2
2
  import type { LanguageModel, Tool as AITool, ModelMessage } from "ai";
3
3
  import { z } from "zod";
4
4
  import { withRetry } from "@SRC/utils/with-retry";
5
- import type { ExuluReranker } from "@SRC/exulu/reranker";
5
+ import type { ResolvedReranker } from "@SRC/exulu/resolve-reranker";
6
6
  import type { AgenticRetrievalOutput, ChunkResult, ClassificationResult } from "./types";
7
7
  import type { StrategyConfig } from "./strategies";
8
8
  import { createDynamicTools } from "./dynamic-tools";
@@ -69,7 +69,7 @@ export async function* runAgentLoop(params: {
69
69
  strategy: StrategyConfig;
70
70
  tools: Record<string, AITool>;
71
71
  model: LanguageModel;
72
- reranker?: ExuluReranker;
72
+ reranker?: ResolvedReranker;
73
73
  contextGuidance?: string;
74
74
  customInstructions?: string;
75
75
  classification: ClassificationResult;
@@ -171,8 +171,8 @@ export async function* runAgentLoop(params: {
171
171
 
172
172
  // Rerank if reranker is available
173
173
  if (reranker && stepChunks.length > 0) {
174
- console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.name}`);
175
- stepChunks = await reranker.run(query, stepChunks as any);
174
+ console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.model}`);
175
+ stepChunks = await reranker.rerank(query, stepChunks);
176
176
  }
177
177
 
178
178
  // Create dynamic tools (browse adjacent pages, load specific pages)
@@ -2,7 +2,8 @@ import { z } from "zod";
2
2
  import { createBashTool } from "bash-tool";
3
3
  import type { LanguageModel, Tool } from "ai";
4
4
  import type { ExuluContext } from "@SRC/exulu/context";
5
- import type { ExuluReranker } from "@SRC/exulu/reranker";
5
+ import { resolveReranker } from "@SRC/exulu/resolve-reranker";
6
+ import type { ResolvedReranker } from "@SRC/exulu/resolve-reranker";
6
7
  import { ExuluTool } from "@SRC/exulu/tool";
7
8
  import type { User } from "@EXULU_TYPES/models/user";
8
9
  import { checkLicense } from "@EE/entitlements";
@@ -34,7 +35,7 @@ async function* executeV3({
34
35
  }: {
35
36
  query: string;
36
37
  contexts: ExuluContext[];
37
- reranker?: ExuluReranker;
38
+ reranker?: ResolvedReranker;
38
39
  toolVariablesConfig?: Record<string, any>;
39
40
  model: LanguageModel;
40
41
  user?: User;
@@ -189,7 +190,6 @@ async function* executeV3({
189
190
  export function createAgenticRetrievalToolV3({
190
191
  contexts,
191
192
  instructions: adminInstructions,
192
- rerankers,
193
193
  user,
194
194
  role,
195
195
  model,
@@ -197,7 +197,6 @@ export function createAgenticRetrievalToolV3({
197
197
  memoryItems
198
198
  }: {
199
199
  contexts: ExuluContext[];
200
- rerankers: ExuluReranker[];
201
200
  user?: User;
202
201
  role?: string;
203
202
  model?: LanguageModel;
@@ -355,7 +354,7 @@ export function createAgenticRetrievalToolV3({
355
354
  }
356
355
 
357
356
  let activeContexts = contexts;
358
- let configuredReranker: ExuluReranker | undefined;
357
+ let configuredReranker: ResolvedReranker | undefined;
359
358
  let configInstructions = "";
360
359
  let logTrajectory = false;
361
360
  let requiresPreselectedContexts = false;
@@ -382,7 +381,22 @@ export function createAgenticRetrievalToolV3({
382
381
  const rerankerId = toolVariablesConfig["reranker"];
383
382
 
384
383
  if (rerankerId && rerankerId !== "none") {
385
- configuredReranker = rerankers.find((r) => r.id === rerankerId);
384
+ // rerankerId is a LiteLLM model_name from config.litellm.yaml
385
+ // (model_info.type: reranker). Resolution is best-effort: a
386
+ // misconfigured model or an unready proxy must not break retrieval —
387
+ // it just runs unreranked, matching the old find()→undefined path.
388
+ try {
389
+ configuredReranker = await resolveReranker({
390
+ model: rerankerId,
391
+ user,
392
+ roleId: role,
393
+ });
394
+ } catch (err) {
395
+ console.warn(
396
+ `[EXULU] v3 — could not resolve reranker "${rerankerId}", continuing without reranking:`,
397
+ err,
398
+ );
399
+ }
386
400
  }
387
401
  }
388
402
 
@@ -14,17 +14,41 @@ import { checkLicense } from '@EE/entitlements';
14
14
  import { executePythonScript } from '@SRC/utils/python-executor';
15
15
  import { setupPythonEnvironment, validatePythonEnvironment } from '@SRC/utils/python-setup';
16
16
  import { LiteParse } from '@llamaindex/liteparse';
17
- import { Mistral } from '@mistralai/mistralai';
18
- import { ExuluVariables } from '@SRC/index';
17
+ import { resolveOcr } from '@SRC/exulu/resolve-ocr';
18
+ import type { ResolveOcrInput } from '@SRC/exulu/resolve-ocr';
19
+ import { resolveModel } from '@SRC/exulu/resolve-model';
19
20
 
20
21
  type DocumentProcessorConfig = {
21
22
  vlm?: {
22
- model: LanguageModel;
23
+ /**
24
+ * LiteLLM model_name for the VLM page-validation pass (declared in
25
+ * config.litellm.yaml, e.g. "vertex-gemini-2.5-flash"). Resolved via
26
+ * resolveModel() so the VLM pass shares the same tag-based cost controls
27
+ * and provider-switching as chat / embeddings / OCR, and the underlying
28
+ * provider can be swapped without code changes.
29
+ */
30
+ model: string;
23
31
  concurrency: number;
24
32
  },
25
33
  processor: {
26
34
  name: "docling" | "liteparse" | "mistral" | "officeparser"
35
+ /**
36
+ * LiteLLM model_name for the "mistral" OCR processor (declared in
37
+ * config.litellm.yaml). Defaults to "mistral-ocr". OCR is routed through
38
+ * the LiteLLM proxy so it shares the same tag-based cost controls as chat
39
+ * and embeddings, and the underlying provider (mistral / azure_ai /
40
+ * vertex_ai) can be switched without code changes.
41
+ */
42
+ model?: string
27
43
  }
44
+ /**
45
+ * Optional cost-attribution context, forwarded to LiteLLM as spend tags
46
+ * (user / role / project / context) for both the OCR pass (resolveOcr) and
47
+ * the VLM page-validation pass (resolveModel). Not yet populated by callers;
48
+ * the wiring is in place so per-user/per-context budgets work the moment
49
+ * attribution is threaded through.
50
+ */
51
+ attribution?: Omit<ResolveOcrInput, "model">
28
52
  debugging?: {
29
53
  deleteTempFiles?: boolean;
30
54
  }
@@ -94,6 +118,38 @@ async function processWord(file: Buffer): Promise<ProcessorOutput> {
94
118
  }
95
119
  }
96
120
 
121
+ /**
122
+ * Resolve the dev-supplied VLM `model` string (a LiteLLM model_name from
123
+ * config.litellm.yaml, e.g. "vertex-gemini-2.5-flash") into an `ai` SDK
124
+ * LanguageModel via resolveModel. This routes the VLM page-validation pass
125
+ * through the LiteLLM proxy — same tag-based cost controls and provider
126
+ * switching as chat / embeddings / OCR — and keeps the internal VLM helpers
127
+ * (validateWithVLM / validatePageWithVLM) working with a LanguageModel.
128
+ *
129
+ * Returns undefined when no VLM model is configured. Attribution (user /
130
+ * project / agent / routine) is forwarded for spend tagging when callers
131
+ * populate config.attribution; rbacBypass is set because this is a background
132
+ * package call where model-level access control is delegated to LiteLLM.
133
+ */
134
+ async function resolveVlmModel(
135
+ config?: DocumentProcessorConfig,
136
+ ): Promise<LanguageModel | undefined> {
137
+ const modelId = config?.vlm?.model;
138
+ if (!modelId) return undefined;
139
+
140
+ const { languageModel } = await resolveModel({
141
+ modelId,
142
+ providers: [], // unused in LiteLLM mode; resolveModel ignores it there
143
+ user: config?.attribution?.user,
144
+ project: config?.attribution?.project,
145
+ agent: config?.attribution?.agent,
146
+ routine: config?.attribution?.routine,
147
+ rbacBypass: true,
148
+ });
149
+
150
+ return languageModel;
151
+ }
152
+
97
153
  /**
98
154
  * Processes a standalone image file by optionally extracting content using VLM
99
155
  */
@@ -122,14 +178,15 @@ async function processImage(
122
178
  }];
123
179
 
124
180
  // If VLM is enabled, use it to extract content from the image
125
- if (config?.vlm?.model) {
181
+ const vlmModel = await resolveVlmModel(config);
182
+ if (vlmModel) {
126
183
  console.log('[EXULU] Extracting content from image using VLM...');
127
184
 
128
185
  json = await validateWithVLM(
129
186
  json,
130
- config.vlm.model,
187
+ vlmModel,
131
188
  verbose,
132
- config.vlm.concurrency
189
+ config!.vlm!.concurrency
133
190
  );
134
191
 
135
192
  // Save the processed result
@@ -679,15 +736,6 @@ async function processDocument(
679
736
  };
680
737
  }
681
738
 
682
- const getMistralApiKey = async () => {
683
- if (process.env.MISTRAL_API_KEY) {
684
- return process.env.MISTRAL_API_KEY;
685
- } else {
686
- const variable = await ExuluVariables.get("MISTRAL_API_KEY");
687
- return variable;
688
- }
689
- }
690
-
691
739
  async function processPdf(
692
740
  buffer: Buffer,
693
741
  paths: ProcessingPaths,
@@ -759,28 +807,25 @@ async function processPdf(
759
807
 
760
808
  } else if (config?.processor.name === "mistral") {
761
809
 
762
- const MISTRAL_API_KEY = await getMistralApiKey();
763
- if (!MISTRAL_API_KEY) {
764
- throw new Error('[EXULU] MISTRAL_API_KEY is not set, please set it in the environment variable via process.env or via an Exulu variable named "MISTRAL_API_KEY".');
765
- }
810
+ // OCR is routed through the LiteLLM proxy's Mistral-compatible /v1/ocr
811
+ // endpoint (see resolveOcr) rather than the Mistral SDK directly. This
812
+ // gives us tag-based cost control and lets us switch the OCR provider
813
+ // (mistral / azure_ai / vertex_ai) from config.litellm.yaml.
814
+ const resolved = await resolveOcr({
815
+ model: config.processor.model ?? "mistral-ocr",
816
+ ...config.attribution,
817
+ });
766
818
 
767
819
  // Wait a randomn time between 1 and 5 seconds to prevent rate limiting
768
820
  await new Promise(resolve => setTimeout(resolve, Math.floor(Math.random() * 4000) + 1000));
769
821
 
770
822
  const base64Pdf = buffer.toString('base64');
771
- const client = new Mistral({ apiKey: MISTRAL_API_KEY });
772
823
 
773
824
  const ocrResponse = await withRetry(async () => {
774
- type MistralOCRResponse = Awaited<ReturnType<typeof client.ocr.process>>;
775
- const ocrResponse: MistralOCRResponse = await client.ocr.process({
776
- document: {
777
- type: "document_url",
778
- documentUrl: "data:application/pdf;base64," + base64Pdf
779
- },
780
- model: "mistral-ocr-latest",
781
- includeImageBase64: false
782
- });
783
- return ocrResponse;
825
+ return await resolved.ocr({
826
+ type: "document_url",
827
+ document_url: "data:application/pdf;base64," + base64Pdf,
828
+ }, { includeImageBase64: false });
784
829
  }, 10);
785
830
 
786
831
  const parser = new LiteParse();
@@ -838,13 +883,14 @@ async function processPdf(
838
883
  }
839
884
 
840
885
  // Apply VLM validation if enabled
841
- if (config?.vlm?.model && json.length > 0) {
886
+ const vlmModel = config?.vlm?.model ? await resolveVlmModel(config) : undefined;
887
+ if (vlmModel && json.length > 0) {
842
888
 
843
889
  json = await validateWithVLM(
844
890
  json,
845
- config.vlm.model,
891
+ vlmModel,
846
892
  verbose,
847
- config.vlm.concurrency
893
+ config!.vlm!.concurrency
848
894
  );
849
895
 
850
896
  console.log('[EXULU] \n📊 Processing Summary:');
@@ -1046,7 +1092,6 @@ export async function documentProcessor({
1046
1092
  } catch (error) {
1047
1093
  console.error('Error during chunking:', error);
1048
1094
  throw error;
1049
-
1050
1095
  } finally {
1051
1096
  if (config?.debugging?.deleteTempFiles !== false) {
1052
1097
  // Delete the temp directory using the local array to avoid race conditions
@@ -1,5 +1,9 @@
1
1
  docling
2
- transformers
2
+ # transformers <5: the 5.x line requires huggingface_hub>=1.0, which removed the
3
+ # `use_auth_token` kwarg that pyannote.audio 3.x still passes to hf_hub_download()
4
+ # (→ "unexpected keyword argument 'use_auth_token'", diarization silently
5
+ # disabled). whisperx only needs transformers>=4.48, so the 4.x line is fine.
6
+ transformers>=4.48,<5
3
7
  pyinstaller
4
8
  docling-hierarchical-pdf
5
9
  defusedxml
@@ -17,6 +21,9 @@ torchaudio==2.5.1
17
21
  torchvision==0.20.1
18
22
  whisperx>=3.4.0
19
23
  pyannote.audio>=3.3.0
24
+ # Belt-and-suspenders: keep huggingface_hub on the 0.x line so pyannote 3.x's
25
+ # `use_auth_token=` calls keep working (1.x removed that kwarg → diarization off).
26
+ huggingface_hub<1.0
20
27
  fastapi
21
28
  uvicorn
22
29
  python-multipart
@@ -253,46 +253,6 @@ if [ -n "$LITELLM_PROXY_DIR" ] && [ -f "$LITELLM_PROXY_DIR/schema.prisma" ]; the
253
253
  || print_warning "Prisma generate failed; LiteLLM database mode (database_url in config.litellm.yaml) may not work until you run 'cd $LITELLM_PROXY_DIR && PATH=$VENV_DIR/bin:\$PATH $VENV_DIR/bin/prisma generate'"
254
254
  fi
255
255
 
256
- # Step 6.6: Install the Hermes Agent harness (advanced agent mode).
257
- # Opt-in via ENABLE_HERMES_AGENT=true. Hermes is NOT a pip package — it ships
258
- # as a standalone binary via Nous Research's official installer (lands in
259
- # ~/.local/bin/hermes). We only install if it's not already present so re-runs
260
- # are fast, and we never fail the whole setup if the install fails (advanced
261
- # mode is optional; the operator can install it manually and retry).
262
- if [ "${ENABLE_HERMES_AGENT}" = "true" ]; then
263
- echo ""
264
- echo "Step 6.6: Installing Hermes Agent harness (ENABLE_HERMES_AGENT=true)..."
265
- if command -v hermes &> /dev/null || [ -x "$HOME/.local/bin/hermes" ]; then
266
- HERMES_VERSION=$( (command -v hermes &> /dev/null && hermes --version 2>/dev/null) || "$HOME/.local/bin/hermes" --version 2>/dev/null || echo "unknown")
267
- print_success "Hermes already installed ($HERMES_VERSION) — skipping installer"
268
- else
269
- print_info "Running Hermes official installer..."
270
- if curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash; then
271
- print_success "Hermes Agent installed (binary at ~/.local/bin/hermes)"
272
- else
273
- print_warning "Hermes installer failed. Advanced agent mode will be unavailable until 'hermes' is on PATH. Install manually: https://hermes-agent.nousresearch.com/docs/getting-started/installation"
274
- fi
275
- fi
276
-
277
- # Pre-pull the docker terminal-backend image so the first agent request
278
- # isn't blocked on a cold image pull (~minute). Only when the backend is
279
- # docker (the default) and docker is available; non-fatal otherwise.
280
- HERMES_BACKEND="${HERMES_TERMINAL_BACKEND:-docker}"
281
- if [ "${HERMES_BACKEND}" = "docker" ]; then
282
- HERMES_IMG="${HERMES_DOCKER_IMAGE:-nikolaik/python-nodejs:python3.11-nodejs20}"
283
- if command -v docker &> /dev/null; then
284
- print_info "Pre-pulling Hermes docker backend image: ${HERMES_IMG}..."
285
- if docker pull "${HERMES_IMG}" > /dev/null 2>&1; then
286
- print_success "Docker backend image ready (${HERMES_IMG})"
287
- else
288
- print_warning "Could not pre-pull ${HERMES_IMG}; the first advanced-mode request will pull it (slower)."
289
- fi
290
- else
291
- print_warning "Docker not found, but HERMES_TERMINAL_BACKEND=docker. Install Docker, or set HERMES_TERMINAL_BACKEND=local (unsandboxed)."
292
- fi
293
- fi
294
- fi
295
-
296
256
  # Step 7: Validate installation
297
257
  echo ""
298
258
  echo "Step 7: Validating installation..."
@@ -309,15 +269,6 @@ $PYTHON_CMD -c "import whisperx" 2>/dev/null && print_success "whisperx imported
309
269
  $PYTHON_CMD -c "import pyannote.audio" 2>/dev/null && print_success "pyannote.audio imported successfully" || print_warning "pyannote.audio not importable (diarization will be disabled even with HF_AUTH_TOKEN)"
310
270
  $PYTHON_CMD -c "import fastapi, uvicorn" 2>/dev/null && print_success "fastapi/uvicorn imported successfully" || print_warning "fastapi/uvicorn not importable (transcription server will not start)"
311
271
 
312
- # Hermes Agent binary check (advanced agent mode) — only when opted in.
313
- if [ "${ENABLE_HERMES_AGENT}" = "true" ]; then
314
- if command -v hermes &> /dev/null || [ -x "$HOME/.local/bin/hermes" ]; then
315
- print_success "hermes binary available (advanced agent mode ready)"
316
- else
317
- print_warning "hermes binary not found (advanced agent mode will be unavailable)"
318
- fi
319
- fi
320
-
321
272
  # Step 8: Display summary
322
273
  echo ""
323
274
  echo -e "${GREEN}========================================${NC}"
@@ -2,6 +2,8 @@ import { Queue } from "bullmq";
2
2
  import { v4 as uuidv4 } from "uuid";
3
3
  import type { UIMessage } from "ai";
4
4
  import type { STATISTICS_LABELS } from "@EXULU_TYPES/statistics";
5
+ import { postgresClient } from "@SRC/postgres/client";
6
+ import { maybePruneJobResults } from "./prune-job-results";
5
7
 
6
8
  type ExuluJobType = "embedder" | "workflow" | "eval" | "processor";
7
9
 
@@ -120,6 +122,40 @@ export const bullmqDecorator = async ({
120
122
  };
121
123
 
122
124
  const redisId = uuidv4();
125
+
126
+ // Knowledge V2 (KB-7): record the job in job_results at ENQUEUE time (state
127
+ // "waiting") for processor/embedder jobs, so the item detail page can detect
128
+ // jobs that are queued-but-not-yet-started (which it couldn't if the row was
129
+ // only written at worker pickup). Inserted BEFORE queue.add so the row is
130
+ // guaranteed present before any worker can grab the job (no insert/update
131
+ // race). The worker-start update + completed/failed handlers drive the row
132
+ // through active → completed/failed, all keyed by this job_id.
133
+ if ((type === "processor" || type === "embedder") && context) {
134
+ try {
135
+ const { db } = await postgresClient();
136
+ const itemId =
137
+ item == null
138
+ ? null
139
+ : typeof item === "object"
140
+ ? ((item as { id?: unknown }).id ?? null)
141
+ : item;
142
+ await db.from("job_results").insert({
143
+ job_id: redisId,
144
+ label,
145
+ state: "waiting",
146
+ type,
147
+ item: itemId == null ? null : String(itemId),
148
+ context: String(context),
149
+ result: null,
150
+ metadata: {},
151
+ });
152
+ // Bound the table: every Nth added row, prune the oldest terminal rows.
153
+ void maybePruneJobResults(db);
154
+ } catch (err) {
155
+ console.error("[EXULU] enqueue job_results insert failed", err);
156
+ }
157
+ }
158
+
123
159
  const job = await queue.add(`${embedder || workflow || processor || evaluation}`, jobData, {
124
160
  jobId: redisId,
125
161
  // Setting it to 3 as a sensible default, as
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Periodic job_results cap (knowledge V2 KB-7 follow-up).
3
+ *
4
+ * We now write a job_results row at enqueue time, so the table grows faster.
5
+ * To bound it, every PRUNE_EVERY-th call we delete the oldest terminal rows
6
+ * (state failed/completed) beyond the newest MAX_TERMINAL — keeping a rolling
7
+ * window of recent finished jobs. Waiting/active/delayed rows are never
8
+ * pruned (they're still live).
9
+ *
10
+ * The counter is per-process (the API process counts enqueues; the worker
11
+ * process counts completions) — that's fine: the prune is idempotent, so it
12
+ * doesn't matter which process triggers it. A `pruning` guard avoids
13
+ * overlapping runs.
14
+ */
15
+
16
+ const MAX_TERMINAL = 10_000;
17
+ const PRUNE_EVERY = 100;
18
+ const TERMINAL_STATES = ["failed", "completed"];
19
+
20
+ let sinceLastPrune = 0;
21
+ let pruning = false;
22
+
23
+ export async function maybePruneJobResults(db: any): Promise<void> {
24
+ sinceLastPrune += 1;
25
+ if (sinceLastPrune < PRUNE_EVERY || pruning) return;
26
+ sinceLastPrune = 0;
27
+ pruning = true;
28
+ try {
29
+ // The (MAX_TERMINAL+1)-th newest terminal row marks the boundary; delete it
30
+ // and everything older. Dialect-agnostic (knex offset/limit) so it works on
31
+ // both Postgres and MySQL.
32
+ const boundary = await db("job_results")
33
+ .whereIn("state", TERMINAL_STATES)
34
+ .orderBy("createdAt", "desc")
35
+ .offset(MAX_TERMINAL)
36
+ .limit(1)
37
+ .first();
38
+
39
+ if (boundary?.createdAt) {
40
+ const deleted = await db("job_results")
41
+ .whereIn("state", TERMINAL_STATES)
42
+ .where("createdAt", "<=", boundary.createdAt)
43
+ .del();
44
+ if (deleted) {
45
+ console.log(
46
+ `[EXULU] pruned ${deleted} terminal job_results rows (cap ${MAX_TERMINAL}).`,
47
+ );
48
+ }
49
+ }
50
+ } catch (err) {
51
+ console.error("[EXULU] job_results prune failed", err);
52
+ } finally {
53
+ pruning = false;
54
+ }
55
+ }
package/ee/schemas.ts CHANGED
@@ -241,6 +241,25 @@ export const jobResultsSchema: ExuluTableDefinition = {
241
241
  name: "metadata",
242
242
  type: "json",
243
243
  },
244
+ // Knowledge V2 (KB-7): per-item pipeline tracking. Written at ENQUEUE
245
+ // time (state "waiting") by the queue decorator so the item page can
246
+ // detect waiting jobs — not only worker-started ones. `type` is the
247
+ // job kind (processor/embedder/...); item + context indexed for the
248
+ // item-page query.
249
+ {
250
+ name: "item",
251
+ type: "text",
252
+ index: true,
253
+ },
254
+ {
255
+ name: "context",
256
+ type: "text",
257
+ index: true,
258
+ },
259
+ {
260
+ name: "type",
261
+ type: "text",
262
+ },
244
263
  ],
245
264
  };
246
265