@exulu/backend 1.66.0 → 1.67.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,9 @@
1
1
  docling
2
- transformers
2
+ # transformers <5: the 5.x line requires huggingface_hub>=1.0, which removed the
3
+ # `use_auth_token` kwarg that pyannote.audio 3.x still passes to hf_hub_download()
4
+ # (→ "unexpected keyword argument 'use_auth_token'", diarization silently
5
+ # disabled). whisperx only needs transformers>=4.48, so the 4.x line is fine.
6
+ transformers>=4.48,<5
3
7
  pyinstaller
4
8
  docling-hierarchical-pdf
5
9
  defusedxml
@@ -17,6 +21,9 @@ torchaudio==2.5.1
17
21
  torchvision==0.20.1
18
22
  whisperx>=3.4.0
19
23
  pyannote.audio>=3.3.0
24
+ # Belt-and-suspenders: keep huggingface_hub on the 0.x line so pyannote 3.x's
25
+ # `use_auth_token=` calls keep working (1.x removed that kwarg → diarization off).
26
+ huggingface_hub<1.0
20
27
  fastapi
21
28
  uvicorn
22
29
  python-multipart
@@ -253,46 +253,6 @@ if [ -n "$LITELLM_PROXY_DIR" ] && [ -f "$LITELLM_PROXY_DIR/schema.prisma" ]; the
253
253
  || print_warning "Prisma generate failed; LiteLLM database mode (database_url in config.litellm.yaml) may not work until you run 'cd $LITELLM_PROXY_DIR && PATH=$VENV_DIR/bin:\$PATH $VENV_DIR/bin/prisma generate'"
254
254
  fi
255
255
 
256
- # Step 6.6: Install the Hermes Agent harness (advanced agent mode).
257
- # Opt-in via ENABLE_HERMES_AGENT=true. Hermes is NOT a pip package — it ships
258
- # as a standalone binary via Nous Research's official installer (lands in
259
- # ~/.local/bin/hermes). We only install if it's not already present so re-runs
260
- # are fast, and we never fail the whole setup if the install fails (advanced
261
- # mode is optional; the operator can install it manually and retry).
262
- if [ "${ENABLE_HERMES_AGENT}" = "true" ]; then
263
- echo ""
264
- echo "Step 6.6: Installing Hermes Agent harness (ENABLE_HERMES_AGENT=true)..."
265
- if command -v hermes &> /dev/null || [ -x "$HOME/.local/bin/hermes" ]; then
266
- HERMES_VERSION=$( (command -v hermes &> /dev/null && hermes --version 2>/dev/null) || "$HOME/.local/bin/hermes" --version 2>/dev/null || echo "unknown")
267
- print_success "Hermes already installed ($HERMES_VERSION) — skipping installer"
268
- else
269
- print_info "Running Hermes official installer..."
270
- if curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash; then
271
- print_success "Hermes Agent installed (binary at ~/.local/bin/hermes)"
272
- else
273
- print_warning "Hermes installer failed. Advanced agent mode will be unavailable until 'hermes' is on PATH. Install manually: https://hermes-agent.nousresearch.com/docs/getting-started/installation"
274
- fi
275
- fi
276
-
277
- # Pre-pull the docker terminal-backend image so the first agent request
278
- # isn't blocked on a cold image pull (~minute). Only when the backend is
279
- # docker (the default) and docker is available; non-fatal otherwise.
280
- HERMES_BACKEND="${HERMES_TERMINAL_BACKEND:-docker}"
281
- if [ "${HERMES_BACKEND}" = "docker" ]; then
282
- HERMES_IMG="${HERMES_DOCKER_IMAGE:-nikolaik/python-nodejs:python3.11-nodejs20}"
283
- if command -v docker &> /dev/null; then
284
- print_info "Pre-pulling Hermes docker backend image: ${HERMES_IMG}..."
285
- if docker pull "${HERMES_IMG}" > /dev/null 2>&1; then
286
- print_success "Docker backend image ready (${HERMES_IMG})"
287
- else
288
- print_warning "Could not pre-pull ${HERMES_IMG}; the first advanced-mode request will pull it (slower)."
289
- fi
290
- else
291
- print_warning "Docker not found, but HERMES_TERMINAL_BACKEND=docker. Install Docker, or set HERMES_TERMINAL_BACKEND=local (unsandboxed)."
292
- fi
293
- fi
294
- fi
295
-
296
256
  # Step 7: Validate installation
297
257
  echo ""
298
258
  echo "Step 7: Validating installation..."
@@ -309,15 +269,6 @@ $PYTHON_CMD -c "import whisperx" 2>/dev/null && print_success "whisperx imported
309
269
  $PYTHON_CMD -c "import pyannote.audio" 2>/dev/null && print_success "pyannote.audio imported successfully" || print_warning "pyannote.audio not importable (diarization will be disabled even with HF_AUTH_TOKEN)"
310
270
  $PYTHON_CMD -c "import fastapi, uvicorn" 2>/dev/null && print_success "fastapi/uvicorn imported successfully" || print_warning "fastapi/uvicorn not importable (transcription server will not start)"
311
271
 
312
- # Hermes Agent binary check (advanced agent mode) — only when opted in.
313
- if [ "${ENABLE_HERMES_AGENT}" = "true" ]; then
314
- if command -v hermes &> /dev/null || [ -x "$HOME/.local/bin/hermes" ]; then
315
- print_success "hermes binary available (advanced agent mode ready)"
316
- else
317
- print_warning "hermes binary not found (advanced agent mode will be unavailable)"
318
- fi
319
- fi
320
-
321
272
  # Step 8: Display summary
322
273
  echo ""
323
274
  echo -e "${GREEN}========================================${NC}"
@@ -2,6 +2,8 @@ import { Queue } from "bullmq";
2
2
  import { v4 as uuidv4 } from "uuid";
3
3
  import type { UIMessage } from "ai";
4
4
  import type { STATISTICS_LABELS } from "@EXULU_TYPES/statistics";
5
+ import { postgresClient } from "@SRC/postgres/client";
6
+ import { maybePruneJobResults } from "./prune-job-results";
5
7
 
6
8
  type ExuluJobType = "embedder" | "workflow" | "eval" | "processor";
7
9
 
@@ -120,6 +122,40 @@ export const bullmqDecorator = async ({
120
122
  };
121
123
 
122
124
  const redisId = uuidv4();
125
+
126
+ // Knowledge V2 (KB-7): record the job in job_results at ENQUEUE time (state
127
+ // "waiting") for processor/embedder jobs, so the item detail page can detect
128
+ // jobs that are queued-but-not-yet-started (which it couldn't if the row was
129
+ // only written at worker pickup). Inserted BEFORE queue.add so the row is
130
+ // guaranteed present before any worker can grab the job (no insert/update
131
+ // race). The worker-start update + completed/failed handlers drive the row
132
+ // through active → completed/failed, all keyed by this job_id.
133
+ if ((type === "processor" || type === "embedder") && context) {
134
+ try {
135
+ const { db } = await postgresClient();
136
+ const itemId =
137
+ item == null
138
+ ? null
139
+ : typeof item === "object"
140
+ ? ((item as { id?: unknown }).id ?? null)
141
+ : item;
142
+ await db.from("job_results").insert({
143
+ job_id: redisId,
144
+ label,
145
+ state: "waiting",
146
+ type,
147
+ item: itemId == null ? null : String(itemId),
148
+ context: String(context),
149
+ result: null,
150
+ metadata: {},
151
+ });
152
+ // Bound the table: every Nth added row, prune the oldest terminal rows.
153
+ void maybePruneJobResults(db);
154
+ } catch (err) {
155
+ console.error("[EXULU] enqueue job_results insert failed", err);
156
+ }
157
+ }
158
+
123
159
  const job = await queue.add(`${embedder || workflow || processor || evaluation}`, jobData, {
124
160
  jobId: redisId,
125
161
  // Setting it to 3 as a sensible default, as
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Periodic job_results cap (knowledge V2 KB-7 follow-up).
3
+ *
4
+ * We now write a job_results row at enqueue time, so the table grows faster.
5
+ * To bound it, every PRUNE_EVERY-th call we delete the oldest terminal rows
6
+ * (state failed/completed) beyond the newest MAX_TERMINAL — keeping a rolling
7
+ * window of recent finished jobs. Waiting/active/delayed rows are never
8
+ * pruned (they're still live).
9
+ *
10
+ * The counter is per-process (the API process counts enqueues; the worker
11
+ * process counts completions) — that's fine: the prune is idempotent, so it
12
+ * doesn't matter which process triggers it. A `pruning` guard avoids
13
+ * overlapping runs.
14
+ */
15
+
16
+ const MAX_TERMINAL = 10_000;
17
+ const PRUNE_EVERY = 100;
18
+ const TERMINAL_STATES = ["failed", "completed"];
19
+
20
+ let sinceLastPrune = 0;
21
+ let pruning = false;
22
+
23
+ export async function maybePruneJobResults(db: any): Promise<void> {
24
+ sinceLastPrune += 1;
25
+ if (sinceLastPrune < PRUNE_EVERY || pruning) return;
26
+ sinceLastPrune = 0;
27
+ pruning = true;
28
+ try {
29
+ // The (MAX_TERMINAL+1)-th newest terminal row marks the boundary; delete it
30
+ // and everything older. Dialect-agnostic (knex offset/limit) so it works on
31
+ // both Postgres and MySQL.
32
+ const boundary = await db("job_results")
33
+ .whereIn("state", TERMINAL_STATES)
34
+ .orderBy("createdAt", "desc")
35
+ .offset(MAX_TERMINAL)
36
+ .limit(1)
37
+ .first();
38
+
39
+ if (boundary?.createdAt) {
40
+ const deleted = await db("job_results")
41
+ .whereIn("state", TERMINAL_STATES)
42
+ .where("createdAt", "<=", boundary.createdAt)
43
+ .del();
44
+ if (deleted) {
45
+ console.log(
46
+ `[EXULU] pruned ${deleted} terminal job_results rows (cap ${MAX_TERMINAL}).`,
47
+ );
48
+ }
49
+ }
50
+ } catch (err) {
51
+ console.error("[EXULU] job_results prune failed", err);
52
+ } finally {
53
+ pruning = false;
54
+ }
55
+ }
package/ee/schemas.ts CHANGED
@@ -241,6 +241,25 @@ export const jobResultsSchema: ExuluTableDefinition = {
241
241
  name: "metadata",
242
242
  type: "json",
243
243
  },
244
+ // Knowledge V2 (KB-7): per-item pipeline tracking. Written at ENQUEUE
245
+ // time (state "waiting") by the queue decorator so the item page can
246
+ // detect waiting jobs — not only worker-started ones. `type` is the
247
+ // job kind (processor/embedder/...); item + context indexed for the
248
+ // item-page query.
249
+ {
250
+ name: "item",
251
+ type: "text",
252
+ index: true,
253
+ },
254
+ {
255
+ name: "context",
256
+ type: "text",
257
+ index: true,
258
+ },
259
+ {
260
+ name: "type",
261
+ type: "text",
262
+ },
244
263
  ],
245
264
  };
246
265
 
package/ee/workers.ts CHANGED
@@ -13,6 +13,7 @@ import type { ExuluTool } from "@SRC/exulu/tool.ts";
13
13
  import { resolveModel } from "@SRC/exulu/resolve-model.ts";
14
14
  import { postgresClient } from "@SRC/postgres/client";
15
15
  import type { BullMqJobData } from "@EE/queues/decorator.ts";
16
+ import { maybePruneJobResults } from "@EE/queues/prune-job-results.ts";
16
17
  import { type Tracer } from "@opentelemetry/api";
17
18
  import { v4 as uuidv4 } from "uuid";
18
19
  import { type UIMessage } from "ai";
@@ -275,13 +276,7 @@ export const createWorkers = async (
275
276
 
276
277
  const label = `embedder-${bullmqJob.name}`;
277
278
 
278
- await db.from("job_results").insert({
279
- job_id: bullmqJob.id,
280
- label: label,
281
- state: await bullmqJob.getState(),
282
- result: null,
283
- metadata: {},
284
- });
279
+ await upsertJobStart(db, bullmqJob, label, "embedder");
285
280
 
286
281
  const context = contexts.find((context) => context.id === data.context);
287
282
 
@@ -331,13 +326,7 @@ export const createWorkers = async (
331
326
 
332
327
  const label = `processor-${bullmqJob.name}`;
333
328
 
334
- await db.from("job_results").insert({
335
- job_id: bullmqJob.id,
336
- label: label,
337
- state: await bullmqJob.getState(),
338
- result: null,
339
- metadata: {},
340
- });
329
+ await upsertJobStart(db, bullmqJob, label, "processor");
341
330
 
342
331
  const context = contexts.find((context) => context.id === data.context);
343
332
 
@@ -502,6 +491,7 @@ export const createWorkers = async (
502
491
  agent,
503
492
  provider,
504
493
  user,
494
+ workflow,
505
495
  messages: inputMessages,
506
496
  } = await validateWorkflowPayload(data, providers);
507
497
 
@@ -535,6 +525,8 @@ export const createWorkers = async (
535
525
  tools,
536
526
  config,
537
527
  variables: data.inputs,
528
+ // Tag LLM spend to this routine (cron + ad-hoc share this path).
529
+ routine: { id: workflow.id, name: workflow.name },
538
530
  });
539
531
  resolve(messages);
540
532
  break;
@@ -1021,6 +1013,9 @@ export const createWorkers = async (
1021
1013
  result: returnvalue.result != null ? JSON.stringify(returnvalue.result) : null,
1022
1014
  metadata: returnvalue.metadata != null ? JSON.stringify(returnvalue.metadata) : null,
1023
1015
  });
1016
+
1017
+ // Cap the table as rows become terminal (every Nth, idempotent).
1018
+ void maybePruneJobResults(db);
1024
1019
  },
1025
1020
  );
1026
1021
 
@@ -1034,6 +1029,9 @@ export const createWorkers = async (
1034
1029
  state: JOB_STATUS_ENUM.failed,
1035
1030
  error,
1036
1031
  });
1032
+
1033
+ // Cap the table as rows become terminal (every Nth, idempotent).
1034
+ void maybePruneJobResults(db);
1037
1035
  return;
1038
1036
  }
1039
1037
  console.error(
@@ -1331,6 +1329,7 @@ export const processUiMessagesFlow = async ({
1331
1329
  tools,
1332
1330
  config,
1333
1331
  variables,
1332
+ routine,
1334
1333
  }: {
1335
1334
  providers: ExuluProvider[];
1336
1335
  agent: ExuluAgent;
@@ -1342,6 +1341,14 @@ export const processUiMessagesFlow = async ({
1342
1341
  tools: ExuluTool[];
1343
1342
  config: ExuluConfig;
1344
1343
  variables?: Record<string, any>;
1344
+ /**
1345
+ * Set when this flow is invoked from a workflow_template run (one-shot via
1346
+ * runWorkflow or cron via upsertWorkflowSchedule). Forwarded to resolveModel
1347
+ * so buildTags() emits routine_id_/routine_name_ alongside user/agent tags
1348
+ * for /analytics + /admin/budgets attribution. /chat and /openai-gateway
1349
+ * callers leave this undefined — they have no routine context.
1350
+ */
1351
+ routine?: { id: string; name: string };
1345
1352
  }): Promise<{
1346
1353
  messages: UIMessage[];
1347
1354
  metadata: {
@@ -1390,7 +1397,8 @@ export const processUiMessagesFlow = async ({
1390
1397
  modelId: agent.model,
1391
1398
  user,
1392
1399
  providers,
1393
- agent: agent
1400
+ agent: agent,
1401
+ routine,
1394
1402
  });
1395
1403
  const providerapikey = resolved.apiKey;
1396
1404
  const resolvedLanguageModel = resolved.languageModel;
@@ -1632,3 +1640,36 @@ function getAverage(arr: number[]): number {
1632
1640
  if (arr.length === 0) return 0; // Handle empty array
1633
1641
  return arr.reduce((a, b) => a + b, 0) / arr.length;
1634
1642
  }
1643
+
1644
+ // KB-7: at worker pickup, advance the enqueue-time job_results row (state
1645
+ // "waiting", written by the queue decorator) to the live state instead of
1646
+ // inserting a duplicate. Falls back to an insert for jobs enqueued before
1647
+ // this change. Used by the processor + embedder handlers; the completed/
1648
+ // failed worker events then drive the same row to its terminal state.
1649
+ async function upsertJobStart(
1650
+ db: any,
1651
+ bullmqJob: { id?: string; data?: any; getState: () => Promise<string> },
1652
+ label: string,
1653
+ fallbackType: string,
1654
+ ): Promise<void> {
1655
+ const state = await bullmqJob.getState();
1656
+ const rawItem = bullmqJob.data?.item;
1657
+ const itemId =
1658
+ rawItem == null ? null : typeof rawItem === "object" ? (rawItem.id ?? null) : rawItem;
1659
+ const updated = await db
1660
+ .from("job_results")
1661
+ .where({ job_id: bullmqJob.id })
1662
+ .update({ label, state });
1663
+ if (!updated) {
1664
+ await db.from("job_results").insert({
1665
+ job_id: bullmqJob.id,
1666
+ label,
1667
+ state,
1668
+ result: null,
1669
+ metadata: {},
1670
+ type: bullmqJob.data?.type ?? fallbackType,
1671
+ item: itemId == null ? null : String(itemId),
1672
+ context: bullmqJob.data?.context ? String(bullmqJob.data.context) : null,
1673
+ });
1674
+ }
1675
+ }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@exulu/backend",
3
3
  "author": "Qventu Bv.",
4
- "version": "1.66.0",
4
+ "version": "1.67.0",
5
5
  "main": "./dist/index.js",
6
6
  "private": false,
7
7
  "publishConfig": {