@exulu/backend 1.66.0 → 1.68.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-VPSLTGZF.js +10267 -0
- package/dist/{convert-exulu-tools-to-ai-sdk-tools-ZFIN7A5V.js → convert-exulu-tools-to-ai-sdk-tools-CHQF36XW.js} +1 -2
- package/dist/index.cjs +23930 -22308
- package/dist/index.d.cts +401 -100
- package/dist/index.d.ts +401 -100
- package/dist/index.js +15215 -4233
- package/ee/agentic-retrieval/v3/agent-loop.ts +4 -4
- package/ee/agentic-retrieval/v3/index.ts +20 -6
- package/ee/python/documents/processing/doc_processor.ts +79 -34
- package/ee/python/requirements.txt +8 -1
- package/ee/python/setup.sh +0 -49
- package/ee/queues/decorator.ts +36 -0
- package/ee/queues/prune-job-results.ts +55 -0
- package/ee/schemas.ts +19 -0
- package/ee/workers.ts +59 -32
- package/package.json +1 -1
- package/dist/chunk-KQDNL5WU.js +0 -19399
- package/ee/agentic-retrieval/v4/agent-loop.ts +0 -208
- package/ee/agentic-retrieval/v4/context-sampler.ts +0 -79
- package/ee/agentic-retrieval/v4/index.ts +0 -690
- package/ee/agentic-retrieval/v4/types.ts +0 -58
- package/ee/python/.hermes/.env.example +0 -8
- package/ee/python/.hermes/README.md +0 -44
- package/ee/python/.hermes/SOUL.md.example +0 -8
- package/ee/python/.hermes/config.yaml.example +0 -55
|
@@ -2,7 +2,7 @@ import { generateText, stepCountIs, tool } from "ai";
|
|
|
2
2
|
import type { LanguageModel, Tool as AITool, ModelMessage } from "ai";
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { withRetry } from "@SRC/utils/with-retry";
|
|
5
|
-
import type {
|
|
5
|
+
import type { ResolvedReranker } from "@SRC/exulu/resolve-reranker";
|
|
6
6
|
import type { AgenticRetrievalOutput, ChunkResult, ClassificationResult } from "./types";
|
|
7
7
|
import type { StrategyConfig } from "./strategies";
|
|
8
8
|
import { createDynamicTools } from "./dynamic-tools";
|
|
@@ -69,7 +69,7 @@ export async function* runAgentLoop(params: {
|
|
|
69
69
|
strategy: StrategyConfig;
|
|
70
70
|
tools: Record<string, AITool>;
|
|
71
71
|
model: LanguageModel;
|
|
72
|
-
reranker?:
|
|
72
|
+
reranker?: ResolvedReranker;
|
|
73
73
|
contextGuidance?: string;
|
|
74
74
|
customInstructions?: string;
|
|
75
75
|
classification: ClassificationResult;
|
|
@@ -171,8 +171,8 @@ export async function* runAgentLoop(params: {
|
|
|
171
171
|
|
|
172
172
|
// Rerank if reranker is available
|
|
173
173
|
if (reranker && stepChunks.length > 0) {
|
|
174
|
-
console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.
|
|
175
|
-
stepChunks = await reranker.
|
|
174
|
+
console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.model}`);
|
|
175
|
+
stepChunks = await reranker.rerank(query, stepChunks);
|
|
176
176
|
}
|
|
177
177
|
|
|
178
178
|
// Create dynamic tools (browse adjacent pages, load specific pages)
|
|
@@ -2,7 +2,8 @@ import { z } from "zod";
|
|
|
2
2
|
import { createBashTool } from "bash-tool";
|
|
3
3
|
import type { LanguageModel, Tool } from "ai";
|
|
4
4
|
import type { ExuluContext } from "@SRC/exulu/context";
|
|
5
|
-
import
|
|
5
|
+
import { resolveReranker } from "@SRC/exulu/resolve-reranker";
|
|
6
|
+
import type { ResolvedReranker } from "@SRC/exulu/resolve-reranker";
|
|
6
7
|
import { ExuluTool } from "@SRC/exulu/tool";
|
|
7
8
|
import type { User } from "@EXULU_TYPES/models/user";
|
|
8
9
|
import { checkLicense } from "@EE/entitlements";
|
|
@@ -34,7 +35,7 @@ async function* executeV3({
|
|
|
34
35
|
}: {
|
|
35
36
|
query: string;
|
|
36
37
|
contexts: ExuluContext[];
|
|
37
|
-
reranker?:
|
|
38
|
+
reranker?: ResolvedReranker;
|
|
38
39
|
toolVariablesConfig?: Record<string, any>;
|
|
39
40
|
model: LanguageModel;
|
|
40
41
|
user?: User;
|
|
@@ -189,7 +190,6 @@ async function* executeV3({
|
|
|
189
190
|
export function createAgenticRetrievalToolV3({
|
|
190
191
|
contexts,
|
|
191
192
|
instructions: adminInstructions,
|
|
192
|
-
rerankers,
|
|
193
193
|
user,
|
|
194
194
|
role,
|
|
195
195
|
model,
|
|
@@ -197,7 +197,6 @@ export function createAgenticRetrievalToolV3({
|
|
|
197
197
|
memoryItems
|
|
198
198
|
}: {
|
|
199
199
|
contexts: ExuluContext[];
|
|
200
|
-
rerankers: ExuluReranker[];
|
|
201
200
|
user?: User;
|
|
202
201
|
role?: string;
|
|
203
202
|
model?: LanguageModel;
|
|
@@ -355,7 +354,7 @@ export function createAgenticRetrievalToolV3({
|
|
|
355
354
|
}
|
|
356
355
|
|
|
357
356
|
let activeContexts = contexts;
|
|
358
|
-
let configuredReranker:
|
|
357
|
+
let configuredReranker: ResolvedReranker | undefined;
|
|
359
358
|
let configInstructions = "";
|
|
360
359
|
let logTrajectory = false;
|
|
361
360
|
let requiresPreselectedContexts = false;
|
|
@@ -382,7 +381,22 @@ export function createAgenticRetrievalToolV3({
|
|
|
382
381
|
const rerankerId = toolVariablesConfig["reranker"];
|
|
383
382
|
|
|
384
383
|
if (rerankerId && rerankerId !== "none") {
|
|
385
|
-
|
|
384
|
+
// rerankerId is a LiteLLM model_name from config.litellm.yaml
|
|
385
|
+
// (model_info.type: reranker). Resolution is best-effort: a
|
|
386
|
+
// misconfigured model or an unready proxy must not break retrieval —
|
|
387
|
+
// it just runs unreranked, matching the old find()→undefined path.
|
|
388
|
+
try {
|
|
389
|
+
configuredReranker = await resolveReranker({
|
|
390
|
+
model: rerankerId,
|
|
391
|
+
user,
|
|
392
|
+
roleId: role,
|
|
393
|
+
});
|
|
394
|
+
} catch (err) {
|
|
395
|
+
console.warn(
|
|
396
|
+
`[EXULU] v3 — could not resolve reranker "${rerankerId}", continuing without reranking:`,
|
|
397
|
+
err,
|
|
398
|
+
);
|
|
399
|
+
}
|
|
386
400
|
}
|
|
387
401
|
}
|
|
388
402
|
|
|
@@ -14,17 +14,41 @@ import { checkLicense } from '@EE/entitlements';
|
|
|
14
14
|
import { executePythonScript } from '@SRC/utils/python-executor';
|
|
15
15
|
import { setupPythonEnvironment, validatePythonEnvironment } from '@SRC/utils/python-setup';
|
|
16
16
|
import { LiteParse } from '@llamaindex/liteparse';
|
|
17
|
-
import {
|
|
18
|
-
import {
|
|
17
|
+
import { resolveOcr } from '@SRC/exulu/resolve-ocr';
|
|
18
|
+
import type { ResolveOcrInput } from '@SRC/exulu/resolve-ocr';
|
|
19
|
+
import { resolveModel } from '@SRC/exulu/resolve-model';
|
|
19
20
|
|
|
20
21
|
type DocumentProcessorConfig = {
|
|
21
22
|
vlm?: {
|
|
22
|
-
|
|
23
|
+
/**
|
|
24
|
+
* LiteLLM model_name for the VLM page-validation pass (declared in
|
|
25
|
+
* config.litellm.yaml, e.g. "vertex-gemini-2.5-flash"). Resolved via
|
|
26
|
+
* resolveModel() so the VLM pass shares the same tag-based cost controls
|
|
27
|
+
* and provider-switching as chat / embeddings / OCR, and the underlying
|
|
28
|
+
* provider can be swapped without code changes.
|
|
29
|
+
*/
|
|
30
|
+
model: string;
|
|
23
31
|
concurrency: number;
|
|
24
32
|
},
|
|
25
33
|
processor: {
|
|
26
34
|
name: "docling" | "liteparse" | "mistral" | "officeparser"
|
|
35
|
+
/**
|
|
36
|
+
* LiteLLM model_name for the "mistral" OCR processor (declared in
|
|
37
|
+
* config.litellm.yaml). Defaults to "mistral-ocr". OCR is routed through
|
|
38
|
+
* the LiteLLM proxy so it shares the same tag-based cost controls as chat
|
|
39
|
+
* and embeddings, and the underlying provider (mistral / azure_ai /
|
|
40
|
+
* vertex_ai) can be switched without code changes.
|
|
41
|
+
*/
|
|
42
|
+
model?: string
|
|
27
43
|
}
|
|
44
|
+
/**
|
|
45
|
+
* Optional cost-attribution context, forwarded to LiteLLM as spend tags
|
|
46
|
+
* (user / role / project / context) for both the OCR pass (resolveOcr) and
|
|
47
|
+
* the VLM page-validation pass (resolveModel). Not yet populated by callers;
|
|
48
|
+
* the wiring is in place so per-user/per-context budgets work the moment
|
|
49
|
+
* attribution is threaded through.
|
|
50
|
+
*/
|
|
51
|
+
attribution?: Omit<ResolveOcrInput, "model">
|
|
28
52
|
debugging?: {
|
|
29
53
|
deleteTempFiles?: boolean;
|
|
30
54
|
}
|
|
@@ -94,6 +118,38 @@ async function processWord(file: Buffer): Promise<ProcessorOutput> {
|
|
|
94
118
|
}
|
|
95
119
|
}
|
|
96
120
|
|
|
121
|
+
/**
|
|
122
|
+
* Resolve the dev-supplied VLM `model` string (a LiteLLM model_name from
|
|
123
|
+
* config.litellm.yaml, e.g. "vertex-gemini-2.5-flash") into an `ai` SDK
|
|
124
|
+
* LanguageModel via resolveModel. This routes the VLM page-validation pass
|
|
125
|
+
* through the LiteLLM proxy — same tag-based cost controls and provider
|
|
126
|
+
* switching as chat / embeddings / OCR — and keeps the internal VLM helpers
|
|
127
|
+
* (validateWithVLM / validatePageWithVLM) working with a LanguageModel.
|
|
128
|
+
*
|
|
129
|
+
* Returns undefined when no VLM model is configured. Attribution (user /
|
|
130
|
+
* project / agent / routine) is forwarded for spend tagging when callers
|
|
131
|
+
* populate config.attribution; rbacBypass is set because this is a background
|
|
132
|
+
* package call where model-level access control is delegated to LiteLLM.
|
|
133
|
+
*/
|
|
134
|
+
async function resolveVlmModel(
|
|
135
|
+
config?: DocumentProcessorConfig,
|
|
136
|
+
): Promise<LanguageModel | undefined> {
|
|
137
|
+
const modelId = config?.vlm?.model;
|
|
138
|
+
if (!modelId) return undefined;
|
|
139
|
+
|
|
140
|
+
const { languageModel } = await resolveModel({
|
|
141
|
+
modelId,
|
|
142
|
+
providers: [], // unused in LiteLLM mode; resolveModel ignores it there
|
|
143
|
+
user: config?.attribution?.user,
|
|
144
|
+
project: config?.attribution?.project,
|
|
145
|
+
agent: config?.attribution?.agent,
|
|
146
|
+
routine: config?.attribution?.routine,
|
|
147
|
+
rbacBypass: true,
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
return languageModel;
|
|
151
|
+
}
|
|
152
|
+
|
|
97
153
|
/**
|
|
98
154
|
* Processes a standalone image file by optionally extracting content using VLM
|
|
99
155
|
*/
|
|
@@ -122,14 +178,15 @@ async function processImage(
|
|
|
122
178
|
}];
|
|
123
179
|
|
|
124
180
|
// If VLM is enabled, use it to extract content from the image
|
|
125
|
-
|
|
181
|
+
const vlmModel = await resolveVlmModel(config);
|
|
182
|
+
if (vlmModel) {
|
|
126
183
|
console.log('[EXULU] Extracting content from image using VLM...');
|
|
127
184
|
|
|
128
185
|
json = await validateWithVLM(
|
|
129
186
|
json,
|
|
130
|
-
|
|
187
|
+
vlmModel,
|
|
131
188
|
verbose,
|
|
132
|
-
config
|
|
189
|
+
config!.vlm!.concurrency
|
|
133
190
|
);
|
|
134
191
|
|
|
135
192
|
// Save the processed result
|
|
@@ -679,15 +736,6 @@ async function processDocument(
|
|
|
679
736
|
};
|
|
680
737
|
}
|
|
681
738
|
|
|
682
|
-
const getMistralApiKey = async () => {
|
|
683
|
-
if (process.env.MISTRAL_API_KEY) {
|
|
684
|
-
return process.env.MISTRAL_API_KEY;
|
|
685
|
-
} else {
|
|
686
|
-
const variable = await ExuluVariables.get("MISTRAL_API_KEY");
|
|
687
|
-
return variable;
|
|
688
|
-
}
|
|
689
|
-
}
|
|
690
|
-
|
|
691
739
|
async function processPdf(
|
|
692
740
|
buffer: Buffer,
|
|
693
741
|
paths: ProcessingPaths,
|
|
@@ -759,28 +807,25 @@ async function processPdf(
|
|
|
759
807
|
|
|
760
808
|
} else if (config?.processor.name === "mistral") {
|
|
761
809
|
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
810
|
+
// OCR is routed through the LiteLLM proxy's Mistral-compatible /v1/ocr
|
|
811
|
+
// endpoint (see resolveOcr) rather than the Mistral SDK directly. This
|
|
812
|
+
// gives us tag-based cost control and lets us switch the OCR provider
|
|
813
|
+
// (mistral / azure_ai / vertex_ai) from config.litellm.yaml.
|
|
814
|
+
const resolved = await resolveOcr({
|
|
815
|
+
model: config.processor.model ?? "mistral-ocr",
|
|
816
|
+
...config.attribution,
|
|
817
|
+
});
|
|
766
818
|
|
|
767
819
|
// Wait a randomn time between 1 and 5 seconds to prevent rate limiting
|
|
768
820
|
await new Promise(resolve => setTimeout(resolve, Math.floor(Math.random() * 4000) + 1000));
|
|
769
821
|
|
|
770
822
|
const base64Pdf = buffer.toString('base64');
|
|
771
|
-
const client = new Mistral({ apiKey: MISTRAL_API_KEY });
|
|
772
823
|
|
|
773
824
|
const ocrResponse = await withRetry(async () => {
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
documentUrl: "data:application/pdf;base64," + base64Pdf
|
|
779
|
-
},
|
|
780
|
-
model: "mistral-ocr-latest",
|
|
781
|
-
includeImageBase64: false
|
|
782
|
-
});
|
|
783
|
-
return ocrResponse;
|
|
825
|
+
return await resolved.ocr({
|
|
826
|
+
type: "document_url",
|
|
827
|
+
document_url: "data:application/pdf;base64," + base64Pdf,
|
|
828
|
+
}, { includeImageBase64: false });
|
|
784
829
|
}, 10);
|
|
785
830
|
|
|
786
831
|
const parser = new LiteParse();
|
|
@@ -838,13 +883,14 @@ async function processPdf(
|
|
|
838
883
|
}
|
|
839
884
|
|
|
840
885
|
// Apply VLM validation if enabled
|
|
841
|
-
|
|
886
|
+
const vlmModel = config?.vlm?.model ? await resolveVlmModel(config) : undefined;
|
|
887
|
+
if (vlmModel && json.length > 0) {
|
|
842
888
|
|
|
843
889
|
json = await validateWithVLM(
|
|
844
890
|
json,
|
|
845
|
-
|
|
891
|
+
vlmModel,
|
|
846
892
|
verbose,
|
|
847
|
-
config
|
|
893
|
+
config!.vlm!.concurrency
|
|
848
894
|
);
|
|
849
895
|
|
|
850
896
|
console.log('[EXULU] \n📊 Processing Summary:');
|
|
@@ -1046,7 +1092,6 @@ export async function documentProcessor({
|
|
|
1046
1092
|
} catch (error) {
|
|
1047
1093
|
console.error('Error during chunking:', error);
|
|
1048
1094
|
throw error;
|
|
1049
|
-
|
|
1050
1095
|
} finally {
|
|
1051
1096
|
if (config?.debugging?.deleteTempFiles !== false) {
|
|
1052
1097
|
// Delete the temp directory using the local array to avoid race conditions
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
docling
|
|
2
|
-
transformers
|
|
2
|
+
# transformers <5: the 5.x line requires huggingface_hub>=1.0, which removed the
|
|
3
|
+
# `use_auth_token` kwarg that pyannote.audio 3.x still passes to hf_hub_download()
|
|
4
|
+
# (→ "unexpected keyword argument 'use_auth_token'", diarization silently
|
|
5
|
+
# disabled). whisperx only needs transformers>=4.48, so the 4.x line is fine.
|
|
6
|
+
transformers>=4.48,<5
|
|
3
7
|
pyinstaller
|
|
4
8
|
docling-hierarchical-pdf
|
|
5
9
|
defusedxml
|
|
@@ -17,6 +21,9 @@ torchaudio==2.5.1
|
|
|
17
21
|
torchvision==0.20.1
|
|
18
22
|
whisperx>=3.4.0
|
|
19
23
|
pyannote.audio>=3.3.0
|
|
24
|
+
# Belt-and-suspenders: keep huggingface_hub on the 0.x line so pyannote 3.x's
|
|
25
|
+
# `use_auth_token=` calls keep working (1.x removed that kwarg → diarization off).
|
|
26
|
+
huggingface_hub<1.0
|
|
20
27
|
fastapi
|
|
21
28
|
uvicorn
|
|
22
29
|
python-multipart
|
package/ee/python/setup.sh
CHANGED
|
@@ -253,46 +253,6 @@ if [ -n "$LITELLM_PROXY_DIR" ] && [ -f "$LITELLM_PROXY_DIR/schema.prisma" ]; the
|
|
|
253
253
|
|| print_warning "Prisma generate failed; LiteLLM database mode (database_url in config.litellm.yaml) may not work until you run 'cd $LITELLM_PROXY_DIR && PATH=$VENV_DIR/bin:\$PATH $VENV_DIR/bin/prisma generate'"
|
|
254
254
|
fi
|
|
255
255
|
|
|
256
|
-
# Step 6.6: Install the Hermes Agent harness (advanced agent mode).
|
|
257
|
-
# Opt-in via ENABLE_HERMES_AGENT=true. Hermes is NOT a pip package — it ships
|
|
258
|
-
# as a standalone binary via Nous Research's official installer (lands in
|
|
259
|
-
# ~/.local/bin/hermes). We only install if it's not already present so re-runs
|
|
260
|
-
# are fast, and we never fail the whole setup if the install fails (advanced
|
|
261
|
-
# mode is optional; the operator can install it manually and retry).
|
|
262
|
-
if [ "${ENABLE_HERMES_AGENT}" = "true" ]; then
|
|
263
|
-
echo ""
|
|
264
|
-
echo "Step 6.6: Installing Hermes Agent harness (ENABLE_HERMES_AGENT=true)..."
|
|
265
|
-
if command -v hermes &> /dev/null || [ -x "$HOME/.local/bin/hermes" ]; then
|
|
266
|
-
HERMES_VERSION=$( (command -v hermes &> /dev/null && hermes --version 2>/dev/null) || "$HOME/.local/bin/hermes" --version 2>/dev/null || echo "unknown")
|
|
267
|
-
print_success "Hermes already installed ($HERMES_VERSION) — skipping installer"
|
|
268
|
-
else
|
|
269
|
-
print_info "Running Hermes official installer..."
|
|
270
|
-
if curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash; then
|
|
271
|
-
print_success "Hermes Agent installed (binary at ~/.local/bin/hermes)"
|
|
272
|
-
else
|
|
273
|
-
print_warning "Hermes installer failed. Advanced agent mode will be unavailable until 'hermes' is on PATH. Install manually: https://hermes-agent.nousresearch.com/docs/getting-started/installation"
|
|
274
|
-
fi
|
|
275
|
-
fi
|
|
276
|
-
|
|
277
|
-
# Pre-pull the docker terminal-backend image so the first agent request
|
|
278
|
-
# isn't blocked on a cold image pull (~minute). Only when the backend is
|
|
279
|
-
# docker (the default) and docker is available; non-fatal otherwise.
|
|
280
|
-
HERMES_BACKEND="${HERMES_TERMINAL_BACKEND:-docker}"
|
|
281
|
-
if [ "${HERMES_BACKEND}" = "docker" ]; then
|
|
282
|
-
HERMES_IMG="${HERMES_DOCKER_IMAGE:-nikolaik/python-nodejs:python3.11-nodejs20}"
|
|
283
|
-
if command -v docker &> /dev/null; then
|
|
284
|
-
print_info "Pre-pulling Hermes docker backend image: ${HERMES_IMG}..."
|
|
285
|
-
if docker pull "${HERMES_IMG}" > /dev/null 2>&1; then
|
|
286
|
-
print_success "Docker backend image ready (${HERMES_IMG})"
|
|
287
|
-
else
|
|
288
|
-
print_warning "Could not pre-pull ${HERMES_IMG}; the first advanced-mode request will pull it (slower)."
|
|
289
|
-
fi
|
|
290
|
-
else
|
|
291
|
-
print_warning "Docker not found, but HERMES_TERMINAL_BACKEND=docker. Install Docker, or set HERMES_TERMINAL_BACKEND=local (unsandboxed)."
|
|
292
|
-
fi
|
|
293
|
-
fi
|
|
294
|
-
fi
|
|
295
|
-
|
|
296
256
|
# Step 7: Validate installation
|
|
297
257
|
echo ""
|
|
298
258
|
echo "Step 7: Validating installation..."
|
|
@@ -309,15 +269,6 @@ $PYTHON_CMD -c "import whisperx" 2>/dev/null && print_success "whisperx imported
|
|
|
309
269
|
$PYTHON_CMD -c "import pyannote.audio" 2>/dev/null && print_success "pyannote.audio imported successfully" || print_warning "pyannote.audio not importable (diarization will be disabled even with HF_AUTH_TOKEN)"
|
|
310
270
|
$PYTHON_CMD -c "import fastapi, uvicorn" 2>/dev/null && print_success "fastapi/uvicorn imported successfully" || print_warning "fastapi/uvicorn not importable (transcription server will not start)"
|
|
311
271
|
|
|
312
|
-
# Hermes Agent binary check (advanced agent mode) — only when opted in.
|
|
313
|
-
if [ "${ENABLE_HERMES_AGENT}" = "true" ]; then
|
|
314
|
-
if command -v hermes &> /dev/null || [ -x "$HOME/.local/bin/hermes" ]; then
|
|
315
|
-
print_success "hermes binary available (advanced agent mode ready)"
|
|
316
|
-
else
|
|
317
|
-
print_warning "hermes binary not found (advanced agent mode will be unavailable)"
|
|
318
|
-
fi
|
|
319
|
-
fi
|
|
320
|
-
|
|
321
272
|
# Step 8: Display summary
|
|
322
273
|
echo ""
|
|
323
274
|
echo -e "${GREEN}========================================${NC}"
|
package/ee/queues/decorator.ts
CHANGED
|
@@ -2,6 +2,8 @@ import { Queue } from "bullmq";
|
|
|
2
2
|
import { v4 as uuidv4 } from "uuid";
|
|
3
3
|
import type { UIMessage } from "ai";
|
|
4
4
|
import type { STATISTICS_LABELS } from "@EXULU_TYPES/statistics";
|
|
5
|
+
import { postgresClient } from "@SRC/postgres/client";
|
|
6
|
+
import { maybePruneJobResults } from "./prune-job-results";
|
|
5
7
|
|
|
6
8
|
type ExuluJobType = "embedder" | "workflow" | "eval" | "processor";
|
|
7
9
|
|
|
@@ -120,6 +122,40 @@ export const bullmqDecorator = async ({
|
|
|
120
122
|
};
|
|
121
123
|
|
|
122
124
|
const redisId = uuidv4();
|
|
125
|
+
|
|
126
|
+
// Knowledge V2 (KB-7): record the job in job_results at ENQUEUE time (state
|
|
127
|
+
// "waiting") for processor/embedder jobs, so the item detail page can detect
|
|
128
|
+
// jobs that are queued-but-not-yet-started (which it couldn't if the row was
|
|
129
|
+
// only written at worker pickup). Inserted BEFORE queue.add so the row is
|
|
130
|
+
// guaranteed present before any worker can grab the job (no insert/update
|
|
131
|
+
// race). The worker-start update + completed/failed handlers drive the row
|
|
132
|
+
// through active → completed/failed, all keyed by this job_id.
|
|
133
|
+
if ((type === "processor" || type === "embedder") && context) {
|
|
134
|
+
try {
|
|
135
|
+
const { db } = await postgresClient();
|
|
136
|
+
const itemId =
|
|
137
|
+
item == null
|
|
138
|
+
? null
|
|
139
|
+
: typeof item === "object"
|
|
140
|
+
? ((item as { id?: unknown }).id ?? null)
|
|
141
|
+
: item;
|
|
142
|
+
await db.from("job_results").insert({
|
|
143
|
+
job_id: redisId,
|
|
144
|
+
label,
|
|
145
|
+
state: "waiting",
|
|
146
|
+
type,
|
|
147
|
+
item: itemId == null ? null : String(itemId),
|
|
148
|
+
context: String(context),
|
|
149
|
+
result: null,
|
|
150
|
+
metadata: {},
|
|
151
|
+
});
|
|
152
|
+
// Bound the table: every Nth added row, prune the oldest terminal rows.
|
|
153
|
+
void maybePruneJobResults(db);
|
|
154
|
+
} catch (err) {
|
|
155
|
+
console.error("[EXULU] enqueue job_results insert failed", err);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
123
159
|
const job = await queue.add(`${embedder || workflow || processor || evaluation}`, jobData, {
|
|
124
160
|
jobId: redisId,
|
|
125
161
|
// Setting it to 3 as a sensible default, as
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Periodic job_results cap (knowledge V2 KB-7 follow-up).
|
|
3
|
+
*
|
|
4
|
+
* We now write a job_results row at enqueue time, so the table grows faster.
|
|
5
|
+
* To bound it, every PRUNE_EVERY-th call we delete the oldest terminal rows
|
|
6
|
+
* (state failed/completed) beyond the newest MAX_TERMINAL — keeping a rolling
|
|
7
|
+
* window of recent finished jobs. Waiting/active/delayed rows are never
|
|
8
|
+
* pruned (they're still live).
|
|
9
|
+
*
|
|
10
|
+
* The counter is per-process (the API process counts enqueues; the worker
|
|
11
|
+
* process counts completions) — that's fine: the prune is idempotent, so it
|
|
12
|
+
* doesn't matter which process triggers it. A `pruning` guard avoids
|
|
13
|
+
* overlapping runs.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
const MAX_TERMINAL = 10_000;
|
|
17
|
+
const PRUNE_EVERY = 100;
|
|
18
|
+
const TERMINAL_STATES = ["failed", "completed"];
|
|
19
|
+
|
|
20
|
+
let sinceLastPrune = 0;
|
|
21
|
+
let pruning = false;
|
|
22
|
+
|
|
23
|
+
export async function maybePruneJobResults(db: any): Promise<void> {
|
|
24
|
+
sinceLastPrune += 1;
|
|
25
|
+
if (sinceLastPrune < PRUNE_EVERY || pruning) return;
|
|
26
|
+
sinceLastPrune = 0;
|
|
27
|
+
pruning = true;
|
|
28
|
+
try {
|
|
29
|
+
// The (MAX_TERMINAL+1)-th newest terminal row marks the boundary; delete it
|
|
30
|
+
// and everything older. Dialect-agnostic (knex offset/limit) so it works on
|
|
31
|
+
// both Postgres and MySQL.
|
|
32
|
+
const boundary = await db("job_results")
|
|
33
|
+
.whereIn("state", TERMINAL_STATES)
|
|
34
|
+
.orderBy("createdAt", "desc")
|
|
35
|
+
.offset(MAX_TERMINAL)
|
|
36
|
+
.limit(1)
|
|
37
|
+
.first();
|
|
38
|
+
|
|
39
|
+
if (boundary?.createdAt) {
|
|
40
|
+
const deleted = await db("job_results")
|
|
41
|
+
.whereIn("state", TERMINAL_STATES)
|
|
42
|
+
.where("createdAt", "<=", boundary.createdAt)
|
|
43
|
+
.del();
|
|
44
|
+
if (deleted) {
|
|
45
|
+
console.log(
|
|
46
|
+
`[EXULU] pruned ${deleted} terminal job_results rows (cap ${MAX_TERMINAL}).`,
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
} catch (err) {
|
|
51
|
+
console.error("[EXULU] job_results prune failed", err);
|
|
52
|
+
} finally {
|
|
53
|
+
pruning = false;
|
|
54
|
+
}
|
|
55
|
+
}
|
package/ee/schemas.ts
CHANGED
|
@@ -241,6 +241,25 @@ export const jobResultsSchema: ExuluTableDefinition = {
|
|
|
241
241
|
name: "metadata",
|
|
242
242
|
type: "json",
|
|
243
243
|
},
|
|
244
|
+
// Knowledge V2 (KB-7): per-item pipeline tracking. Written at ENQUEUE
|
|
245
|
+
// time (state "waiting") by the queue decorator so the item page can
|
|
246
|
+
// detect waiting jobs — not only worker-started ones. `type` is the
|
|
247
|
+
// job kind (processor/embedder/...); item + context indexed for the
|
|
248
|
+
// item-page query.
|
|
249
|
+
{
|
|
250
|
+
name: "item",
|
|
251
|
+
type: "text",
|
|
252
|
+
index: true,
|
|
253
|
+
},
|
|
254
|
+
{
|
|
255
|
+
name: "context",
|
|
256
|
+
type: "text",
|
|
257
|
+
index: true,
|
|
258
|
+
},
|
|
259
|
+
{
|
|
260
|
+
name: "type",
|
|
261
|
+
type: "text",
|
|
262
|
+
},
|
|
244
263
|
],
|
|
245
264
|
};
|
|
246
265
|
|