@exulu/backend 1.66.0 → 1.67.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-ZPZKOT6I.js +8978 -0
- package/dist/{convert-exulu-tools-to-ai-sdk-tools-ZFIN7A5V.js → convert-exulu-tools-to-ai-sdk-tools-4B7BQ5G2.js} +1 -2
- package/dist/index.cjs +18521 -18458
- package/dist/index.d.cts +149 -4
- package/dist/index.d.ts +149 -4
- package/dist/index.js +11028 -238
- package/ee/python/requirements.txt +8 -1
- package/ee/python/setup.sh +0 -49
- package/ee/queues/decorator.ts +36 -0
- package/ee/queues/prune-job-results.ts +55 -0
- package/ee/schemas.ts +19 -0
- package/ee/workers.ts +56 -15
- package/package.json +1 -1
- package/dist/chunk-KQDNL5WU.js +0 -19399
- package/ee/python/.hermes/.env.example +0 -8
- package/ee/python/.hermes/README.md +0 -44
- package/ee/python/.hermes/SOUL.md.example +0 -8
- package/ee/python/.hermes/config.yaml.example +0 -55
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
docling
|
|
2
|
-
transformers
|
|
2
|
+
# transformers <5: the 5.x line requires huggingface_hub>=1.0, which removed the
|
|
3
|
+
# `use_auth_token` kwarg that pyannote.audio 3.x still passes to hf_hub_download()
|
|
4
|
+
# (→ "unexpected keyword argument 'use_auth_token'", diarization silently
|
|
5
|
+
# disabled). whisperx only needs transformers>=4.48, so the 4.x line is fine.
|
|
6
|
+
transformers>=4.48,<5
|
|
3
7
|
pyinstaller
|
|
4
8
|
docling-hierarchical-pdf
|
|
5
9
|
defusedxml
|
|
@@ -17,6 +21,9 @@ torchaudio==2.5.1
|
|
|
17
21
|
torchvision==0.20.1
|
|
18
22
|
whisperx>=3.4.0
|
|
19
23
|
pyannote.audio>=3.3.0
|
|
24
|
+
# Belt-and-suspenders: keep huggingface_hub on the 0.x line so pyannote 3.x's
|
|
25
|
+
# `use_auth_token=` calls keep working (1.x removed that kwarg → diarization off).
|
|
26
|
+
huggingface_hub<1.0
|
|
20
27
|
fastapi
|
|
21
28
|
uvicorn
|
|
22
29
|
python-multipart
|
package/ee/python/setup.sh
CHANGED
|
@@ -253,46 +253,6 @@ if [ -n "$LITELLM_PROXY_DIR" ] && [ -f "$LITELLM_PROXY_DIR/schema.prisma" ]; the
|
|
|
253
253
|
|| print_warning "Prisma generate failed; LiteLLM database mode (database_url in config.litellm.yaml) may not work until you run 'cd $LITELLM_PROXY_DIR && PATH=$VENV_DIR/bin:\$PATH $VENV_DIR/bin/prisma generate'"
|
|
254
254
|
fi
|
|
255
255
|
|
|
256
|
-
# Step 6.6: Install the Hermes Agent harness (advanced agent mode).
|
|
257
|
-
# Opt-in via ENABLE_HERMES_AGENT=true. Hermes is NOT a pip package — it ships
|
|
258
|
-
# as a standalone binary via Nous Research's official installer (lands in
|
|
259
|
-
# ~/.local/bin/hermes). We only install if it's not already present so re-runs
|
|
260
|
-
# are fast, and we never fail the whole setup if the install fails (advanced
|
|
261
|
-
# mode is optional; the operator can install it manually and retry).
|
|
262
|
-
if [ "${ENABLE_HERMES_AGENT}" = "true" ]; then
|
|
263
|
-
echo ""
|
|
264
|
-
echo "Step 6.6: Installing Hermes Agent harness (ENABLE_HERMES_AGENT=true)..."
|
|
265
|
-
if command -v hermes &> /dev/null || [ -x "$HOME/.local/bin/hermes" ]; then
|
|
266
|
-
HERMES_VERSION=$( (command -v hermes &> /dev/null && hermes --version 2>/dev/null) || "$HOME/.local/bin/hermes" --version 2>/dev/null || echo "unknown")
|
|
267
|
-
print_success "Hermes already installed ($HERMES_VERSION) — skipping installer"
|
|
268
|
-
else
|
|
269
|
-
print_info "Running Hermes official installer..."
|
|
270
|
-
if curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash; then
|
|
271
|
-
print_success "Hermes Agent installed (binary at ~/.local/bin/hermes)"
|
|
272
|
-
else
|
|
273
|
-
print_warning "Hermes installer failed. Advanced agent mode will be unavailable until 'hermes' is on PATH. Install manually: https://hermes-agent.nousresearch.com/docs/getting-started/installation"
|
|
274
|
-
fi
|
|
275
|
-
fi
|
|
276
|
-
|
|
277
|
-
# Pre-pull the docker terminal-backend image so the first agent request
|
|
278
|
-
# isn't blocked on a cold image pull (~minute). Only when the backend is
|
|
279
|
-
# docker (the default) and docker is available; non-fatal otherwise.
|
|
280
|
-
HERMES_BACKEND="${HERMES_TERMINAL_BACKEND:-docker}"
|
|
281
|
-
if [ "${HERMES_BACKEND}" = "docker" ]; then
|
|
282
|
-
HERMES_IMG="${HERMES_DOCKER_IMAGE:-nikolaik/python-nodejs:python3.11-nodejs20}"
|
|
283
|
-
if command -v docker &> /dev/null; then
|
|
284
|
-
print_info "Pre-pulling Hermes docker backend image: ${HERMES_IMG}..."
|
|
285
|
-
if docker pull "${HERMES_IMG}" > /dev/null 2>&1; then
|
|
286
|
-
print_success "Docker backend image ready (${HERMES_IMG})"
|
|
287
|
-
else
|
|
288
|
-
print_warning "Could not pre-pull ${HERMES_IMG}; the first advanced-mode request will pull it (slower)."
|
|
289
|
-
fi
|
|
290
|
-
else
|
|
291
|
-
print_warning "Docker not found, but HERMES_TERMINAL_BACKEND=docker. Install Docker, or set HERMES_TERMINAL_BACKEND=local (unsandboxed)."
|
|
292
|
-
fi
|
|
293
|
-
fi
|
|
294
|
-
fi
|
|
295
|
-
|
|
296
256
|
# Step 7: Validate installation
|
|
297
257
|
echo ""
|
|
298
258
|
echo "Step 7: Validating installation..."
|
|
@@ -309,15 +269,6 @@ $PYTHON_CMD -c "import whisperx" 2>/dev/null && print_success "whisperx imported
|
|
|
309
269
|
$PYTHON_CMD -c "import pyannote.audio" 2>/dev/null && print_success "pyannote.audio imported successfully" || print_warning "pyannote.audio not importable (diarization will be disabled even with HF_AUTH_TOKEN)"
|
|
310
270
|
$PYTHON_CMD -c "import fastapi, uvicorn" 2>/dev/null && print_success "fastapi/uvicorn imported successfully" || print_warning "fastapi/uvicorn not importable (transcription server will not start)"
|
|
311
271
|
|
|
312
|
-
# Hermes Agent binary check (advanced agent mode) — only when opted in.
|
|
313
|
-
if [ "${ENABLE_HERMES_AGENT}" = "true" ]; then
|
|
314
|
-
if command -v hermes &> /dev/null || [ -x "$HOME/.local/bin/hermes" ]; then
|
|
315
|
-
print_success "hermes binary available (advanced agent mode ready)"
|
|
316
|
-
else
|
|
317
|
-
print_warning "hermes binary not found (advanced agent mode will be unavailable)"
|
|
318
|
-
fi
|
|
319
|
-
fi
|
|
320
|
-
|
|
321
272
|
# Step 8: Display summary
|
|
322
273
|
echo ""
|
|
323
274
|
echo -e "${GREEN}========================================${NC}"
|
package/ee/queues/decorator.ts
CHANGED
|
@@ -2,6 +2,8 @@ import { Queue } from "bullmq";
|
|
|
2
2
|
import { v4 as uuidv4 } from "uuid";
|
|
3
3
|
import type { UIMessage } from "ai";
|
|
4
4
|
import type { STATISTICS_LABELS } from "@EXULU_TYPES/statistics";
|
|
5
|
+
import { postgresClient } from "@SRC/postgres/client";
|
|
6
|
+
import { maybePruneJobResults } from "./prune-job-results";
|
|
5
7
|
|
|
6
8
|
type ExuluJobType = "embedder" | "workflow" | "eval" | "processor";
|
|
7
9
|
|
|
@@ -120,6 +122,40 @@ export const bullmqDecorator = async ({
|
|
|
120
122
|
};
|
|
121
123
|
|
|
122
124
|
const redisId = uuidv4();
|
|
125
|
+
|
|
126
|
+
// Knowledge V2 (KB-7): record the job in job_results at ENQUEUE time (state
|
|
127
|
+
// "waiting") for processor/embedder jobs, so the item detail page can detect
|
|
128
|
+
// jobs that are queued-but-not-yet-started (which it couldn't if the row was
|
|
129
|
+
// only written at worker pickup). Inserted BEFORE queue.add so the row is
|
|
130
|
+
// guaranteed present before any worker can grab the job (no insert/update
|
|
131
|
+
// race). The worker-start update + completed/failed handlers drive the row
|
|
132
|
+
// through active → completed/failed, all keyed by this job_id.
|
|
133
|
+
if ((type === "processor" || type === "embedder") && context) {
|
|
134
|
+
try {
|
|
135
|
+
const { db } = await postgresClient();
|
|
136
|
+
const itemId =
|
|
137
|
+
item == null
|
|
138
|
+
? null
|
|
139
|
+
: typeof item === "object"
|
|
140
|
+
? ((item as { id?: unknown }).id ?? null)
|
|
141
|
+
: item;
|
|
142
|
+
await db.from("job_results").insert({
|
|
143
|
+
job_id: redisId,
|
|
144
|
+
label,
|
|
145
|
+
state: "waiting",
|
|
146
|
+
type,
|
|
147
|
+
item: itemId == null ? null : String(itemId),
|
|
148
|
+
context: String(context),
|
|
149
|
+
result: null,
|
|
150
|
+
metadata: {},
|
|
151
|
+
});
|
|
152
|
+
// Bound the table: every Nth added row, prune the oldest terminal rows.
|
|
153
|
+
void maybePruneJobResults(db);
|
|
154
|
+
} catch (err) {
|
|
155
|
+
console.error("[EXULU] enqueue job_results insert failed", err);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
123
159
|
const job = await queue.add(`${embedder || workflow || processor || evaluation}`, jobData, {
|
|
124
160
|
jobId: redisId,
|
|
125
161
|
// Setting it to 3 as a sensible default, as
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Periodic job_results cap (knowledge V2 KB-7 follow-up).
|
|
3
|
+
*
|
|
4
|
+
* We now write a job_results row at enqueue time, so the table grows faster.
|
|
5
|
+
* To bound it, every PRUNE_EVERY-th call we delete the oldest terminal rows
|
|
6
|
+
* (state failed/completed) beyond the newest MAX_TERMINAL — keeping a rolling
|
|
7
|
+
* window of recent finished jobs. Waiting/active/delayed rows are never
|
|
8
|
+
* pruned (they're still live).
|
|
9
|
+
*
|
|
10
|
+
* The counter is per-process (the API process counts enqueues; the worker
|
|
11
|
+
* process counts completions) — that's fine: the prune is idempotent, so it
|
|
12
|
+
* doesn't matter which process triggers it. A `pruning` guard avoids
|
|
13
|
+
* overlapping runs.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
const MAX_TERMINAL = 10_000;
|
|
17
|
+
const PRUNE_EVERY = 100;
|
|
18
|
+
const TERMINAL_STATES = ["failed", "completed"];
|
|
19
|
+
|
|
20
|
+
let sinceLastPrune = 0;
|
|
21
|
+
let pruning = false;
|
|
22
|
+
|
|
23
|
+
export async function maybePruneJobResults(db: any): Promise<void> {
|
|
24
|
+
sinceLastPrune += 1;
|
|
25
|
+
if (sinceLastPrune < PRUNE_EVERY || pruning) return;
|
|
26
|
+
sinceLastPrune = 0;
|
|
27
|
+
pruning = true;
|
|
28
|
+
try {
|
|
29
|
+
// The (MAX_TERMINAL+1)-th newest terminal row marks the boundary; delete it
|
|
30
|
+
// and everything older. Dialect-agnostic (knex offset/limit) so it works on
|
|
31
|
+
// both Postgres and MySQL.
|
|
32
|
+
const boundary = await db("job_results")
|
|
33
|
+
.whereIn("state", TERMINAL_STATES)
|
|
34
|
+
.orderBy("createdAt", "desc")
|
|
35
|
+
.offset(MAX_TERMINAL)
|
|
36
|
+
.limit(1)
|
|
37
|
+
.first();
|
|
38
|
+
|
|
39
|
+
if (boundary?.createdAt) {
|
|
40
|
+
const deleted = await db("job_results")
|
|
41
|
+
.whereIn("state", TERMINAL_STATES)
|
|
42
|
+
.where("createdAt", "<=", boundary.createdAt)
|
|
43
|
+
.del();
|
|
44
|
+
if (deleted) {
|
|
45
|
+
console.log(
|
|
46
|
+
`[EXULU] pruned ${deleted} terminal job_results rows (cap ${MAX_TERMINAL}).`,
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
} catch (err) {
|
|
51
|
+
console.error("[EXULU] job_results prune failed", err);
|
|
52
|
+
} finally {
|
|
53
|
+
pruning = false;
|
|
54
|
+
}
|
|
55
|
+
}
|
package/ee/schemas.ts
CHANGED
|
@@ -241,6 +241,25 @@ export const jobResultsSchema: ExuluTableDefinition = {
|
|
|
241
241
|
name: "metadata",
|
|
242
242
|
type: "json",
|
|
243
243
|
},
|
|
244
|
+
// Knowledge V2 (KB-7): per-item pipeline tracking. Written at ENQUEUE
|
|
245
|
+
// time (state "waiting") by the queue decorator so the item page can
|
|
246
|
+
// detect waiting jobs — not only worker-started ones. `type` is the
|
|
247
|
+
// job kind (processor/embedder/...); item + context indexed for the
|
|
248
|
+
// item-page query.
|
|
249
|
+
{
|
|
250
|
+
name: "item",
|
|
251
|
+
type: "text",
|
|
252
|
+
index: true,
|
|
253
|
+
},
|
|
254
|
+
{
|
|
255
|
+
name: "context",
|
|
256
|
+
type: "text",
|
|
257
|
+
index: true,
|
|
258
|
+
},
|
|
259
|
+
{
|
|
260
|
+
name: "type",
|
|
261
|
+
type: "text",
|
|
262
|
+
},
|
|
244
263
|
],
|
|
245
264
|
};
|
|
246
265
|
|
package/ee/workers.ts
CHANGED
|
@@ -13,6 +13,7 @@ import type { ExuluTool } from "@SRC/exulu/tool.ts";
|
|
|
13
13
|
import { resolveModel } from "@SRC/exulu/resolve-model.ts";
|
|
14
14
|
import { postgresClient } from "@SRC/postgres/client";
|
|
15
15
|
import type { BullMqJobData } from "@EE/queues/decorator.ts";
|
|
16
|
+
import { maybePruneJobResults } from "@EE/queues/prune-job-results.ts";
|
|
16
17
|
import { type Tracer } from "@opentelemetry/api";
|
|
17
18
|
import { v4 as uuidv4 } from "uuid";
|
|
18
19
|
import { type UIMessage } from "ai";
|
|
@@ -275,13 +276,7 @@ export const createWorkers = async (
|
|
|
275
276
|
|
|
276
277
|
const label = `embedder-${bullmqJob.name}`;
|
|
277
278
|
|
|
278
|
-
await db
|
|
279
|
-
job_id: bullmqJob.id,
|
|
280
|
-
label: label,
|
|
281
|
-
state: await bullmqJob.getState(),
|
|
282
|
-
result: null,
|
|
283
|
-
metadata: {},
|
|
284
|
-
});
|
|
279
|
+
await upsertJobStart(db, bullmqJob, label, "embedder");
|
|
285
280
|
|
|
286
281
|
const context = contexts.find((context) => context.id === data.context);
|
|
287
282
|
|
|
@@ -331,13 +326,7 @@ export const createWorkers = async (
|
|
|
331
326
|
|
|
332
327
|
const label = `processor-${bullmqJob.name}`;
|
|
333
328
|
|
|
334
|
-
await db
|
|
335
|
-
job_id: bullmqJob.id,
|
|
336
|
-
label: label,
|
|
337
|
-
state: await bullmqJob.getState(),
|
|
338
|
-
result: null,
|
|
339
|
-
metadata: {},
|
|
340
|
-
});
|
|
329
|
+
await upsertJobStart(db, bullmqJob, label, "processor");
|
|
341
330
|
|
|
342
331
|
const context = contexts.find((context) => context.id === data.context);
|
|
343
332
|
|
|
@@ -502,6 +491,7 @@ export const createWorkers = async (
|
|
|
502
491
|
agent,
|
|
503
492
|
provider,
|
|
504
493
|
user,
|
|
494
|
+
workflow,
|
|
505
495
|
messages: inputMessages,
|
|
506
496
|
} = await validateWorkflowPayload(data, providers);
|
|
507
497
|
|
|
@@ -535,6 +525,8 @@ export const createWorkers = async (
|
|
|
535
525
|
tools,
|
|
536
526
|
config,
|
|
537
527
|
variables: data.inputs,
|
|
528
|
+
// Tag LLM spend to this routine (cron + ad-hoc share this path).
|
|
529
|
+
routine: { id: workflow.id, name: workflow.name },
|
|
538
530
|
});
|
|
539
531
|
resolve(messages);
|
|
540
532
|
break;
|
|
@@ -1021,6 +1013,9 @@ export const createWorkers = async (
|
|
|
1021
1013
|
result: returnvalue.result != null ? JSON.stringify(returnvalue.result) : null,
|
|
1022
1014
|
metadata: returnvalue.metadata != null ? JSON.stringify(returnvalue.metadata) : null,
|
|
1023
1015
|
});
|
|
1016
|
+
|
|
1017
|
+
// Cap the table as rows become terminal (every Nth, idempotent).
|
|
1018
|
+
void maybePruneJobResults(db);
|
|
1024
1019
|
},
|
|
1025
1020
|
);
|
|
1026
1021
|
|
|
@@ -1034,6 +1029,9 @@ export const createWorkers = async (
|
|
|
1034
1029
|
state: JOB_STATUS_ENUM.failed,
|
|
1035
1030
|
error,
|
|
1036
1031
|
});
|
|
1032
|
+
|
|
1033
|
+
// Cap the table as rows become terminal (every Nth, idempotent).
|
|
1034
|
+
void maybePruneJobResults(db);
|
|
1037
1035
|
return;
|
|
1038
1036
|
}
|
|
1039
1037
|
console.error(
|
|
@@ -1331,6 +1329,7 @@ export const processUiMessagesFlow = async ({
|
|
|
1331
1329
|
tools,
|
|
1332
1330
|
config,
|
|
1333
1331
|
variables,
|
|
1332
|
+
routine,
|
|
1334
1333
|
}: {
|
|
1335
1334
|
providers: ExuluProvider[];
|
|
1336
1335
|
agent: ExuluAgent;
|
|
@@ -1342,6 +1341,14 @@ export const processUiMessagesFlow = async ({
|
|
|
1342
1341
|
tools: ExuluTool[];
|
|
1343
1342
|
config: ExuluConfig;
|
|
1344
1343
|
variables?: Record<string, any>;
|
|
1344
|
+
/**
|
|
1345
|
+
* Set when this flow is invoked from a workflow_template run (one-shot via
|
|
1346
|
+
* runWorkflow or cron via upsertWorkflowSchedule). Forwarded to resolveModel
|
|
1347
|
+
* so buildTags() emits routine_id_/routine_name_ alongside user/agent tags
|
|
1348
|
+
* for /analytics + /admin/budgets attribution. /chat and /openai-gateway
|
|
1349
|
+
* callers leave this undefined — they have no routine context.
|
|
1350
|
+
*/
|
|
1351
|
+
routine?: { id: string; name: string };
|
|
1345
1352
|
}): Promise<{
|
|
1346
1353
|
messages: UIMessage[];
|
|
1347
1354
|
metadata: {
|
|
@@ -1390,7 +1397,8 @@ export const processUiMessagesFlow = async ({
|
|
|
1390
1397
|
modelId: agent.model,
|
|
1391
1398
|
user,
|
|
1392
1399
|
providers,
|
|
1393
|
-
agent: agent
|
|
1400
|
+
agent: agent,
|
|
1401
|
+
routine,
|
|
1394
1402
|
});
|
|
1395
1403
|
const providerapikey = resolved.apiKey;
|
|
1396
1404
|
const resolvedLanguageModel = resolved.languageModel;
|
|
@@ -1632,3 +1640,36 @@ function getAverage(arr: number[]): number {
|
|
|
1632
1640
|
if (arr.length === 0) return 0; // Handle empty array
|
|
1633
1641
|
return arr.reduce((a, b) => a + b, 0) / arr.length;
|
|
1634
1642
|
}
|
|
1643
|
+
|
|
1644
|
+
// KB-7: at worker pickup, advance the enqueue-time job_results row (state
|
|
1645
|
+
// "waiting", written by the queue decorator) to the live state instead of
|
|
1646
|
+
// inserting a duplicate. Falls back to an insert for jobs enqueued before
|
|
1647
|
+
// this change. Used by the processor + embedder handlers; the completed/
|
|
1648
|
+
// failed worker events then drive the same row to its terminal state.
|
|
1649
|
+
async function upsertJobStart(
|
|
1650
|
+
db: any,
|
|
1651
|
+
bullmqJob: { id?: string; data?: any; getState: () => Promise<string> },
|
|
1652
|
+
label: string,
|
|
1653
|
+
fallbackType: string,
|
|
1654
|
+
): Promise<void> {
|
|
1655
|
+
const state = await bullmqJob.getState();
|
|
1656
|
+
const rawItem = bullmqJob.data?.item;
|
|
1657
|
+
const itemId =
|
|
1658
|
+
rawItem == null ? null : typeof rawItem === "object" ? (rawItem.id ?? null) : rawItem;
|
|
1659
|
+
const updated = await db
|
|
1660
|
+
.from("job_results")
|
|
1661
|
+
.where({ job_id: bullmqJob.id })
|
|
1662
|
+
.update({ label, state });
|
|
1663
|
+
if (!updated) {
|
|
1664
|
+
await db.from("job_results").insert({
|
|
1665
|
+
job_id: bullmqJob.id,
|
|
1666
|
+
label,
|
|
1667
|
+
state,
|
|
1668
|
+
result: null,
|
|
1669
|
+
metadata: {},
|
|
1670
|
+
type: bullmqJob.data?.type ?? fallbackType,
|
|
1671
|
+
item: itemId == null ? null : String(itemId),
|
|
1672
|
+
context: bullmqJob.data?.context ? String(bullmqJob.data.context) : null,
|
|
1673
|
+
});
|
|
1674
|
+
}
|
|
1675
|
+
}
|