@exulu/backend 1.53.1 → 1.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,309 @@
1
+ import * as fs from "fs/promises";
2
+ import * as path from "path";
3
+ import type { AgenticRetrievalOutput, ClassificationResult, ChunkResult } from "./types";
4
+
5
+ export const trajectoryRegistry = {
6
+ lastFile: undefined as string | undefined,
7
+ };
8
+
9
+ export interface TrajectoryStepData {
10
+ stepNumber: number;
11
+ systemPrompt: string;
12
+ text: string;
13
+ toolCalls: Array<{
14
+ name: string;
15
+ id: string;
16
+ input: any;
17
+ output?: any;
18
+ }>;
19
+ chunks: ChunkResult[];
20
+ dynamicToolsCreated: string[];
21
+ tokens: number;
22
+ }
23
+
24
+ interface TrajectoryData {
25
+ timestamp: string;
26
+ query: string;
27
+ classification: ClassificationResult;
28
+ preselectedItemIds?: string[];
29
+ steps: {
30
+ step_number: number;
31
+ text: string;
32
+ tool_calls: { name: string; id: string; input: any }[];
33
+ chunks_retrieved: number;
34
+ dynamic_tools_created: string[];
35
+ tokens: number;
36
+ }[];
37
+ final: {
38
+ total_chunks: number;
39
+ total_steps: number;
40
+ total_tokens: number;
41
+ duration_ms: number;
42
+ success: boolean;
43
+ error?: string;
44
+ };
45
+ }
46
+
47
+ export class TrajectoryLogger {
48
+ private data: TrajectoryData;
49
+ private richSteps: TrajectoryStepData[] = [];
50
+ private startTime = Date.now();
51
+ private logDir: string;
52
+
53
+ constructor(
54
+ query: string,
55
+ classification: ClassificationResult,
56
+ logDir = path.join(process.cwd(), "ee/agentic-retrieval/logs"),
57
+ preselectedItemIds?: string[],
58
+ ) {
59
+ this.logDir = logDir;
60
+ this.data = {
61
+ timestamp: new Date().toISOString(),
62
+ query,
63
+ classification,
64
+ preselectedItemIds: preselectedItemIds?.length ? preselectedItemIds : undefined,
65
+ steps: [],
66
+ final: {
67
+ total_chunks: 0,
68
+ total_steps: 0,
69
+ total_tokens: 0,
70
+ duration_ms: 0,
71
+ success: false,
72
+ },
73
+ };
74
+ }
75
+
76
+ recordStep(step: AgenticRetrievalOutput["steps"][0]): void {
77
+ this.data.steps.push({
78
+ step_number: step.stepNumber,
79
+ text: step.text,
80
+ tool_calls: step.toolCalls,
81
+ chunks_retrieved: step.chunks.length,
82
+ dynamic_tools_created: step.dynamicToolsCreated,
83
+ tokens: step.tokens,
84
+ });
85
+ }
86
+
87
+ recordRichStep(data: TrajectoryStepData): void {
88
+ this.richSteps.push(data);
89
+ }
90
+
91
+ private toMarkdown(durationMs: number, success: boolean, error?: Error): string {
92
+ const totalTokens = this.richSteps.reduce((sum, s) => sum + s.tokens, 0);
93
+ const totalChunks = this.richSteps.reduce((sum, s) => sum + s.chunks.length, 0);
94
+ const status = success ? "✓ Success" : `✗ Failed${error ? `: ${error.message}` : ""}`;
95
+ const lines: string[] = [];
96
+
97
+ // ── Header ──────────────────────────────────────────────────────────────
98
+ lines.push(`# Agentic Retrieval — ${this.data.timestamp}`);
99
+ lines.push("");
100
+ lines.push(`**Query:** ${this.data.query} `);
101
+ lines.push(
102
+ `**Duration:** ${(durationMs / 1000).toFixed(1)}s | **Tokens:** ${totalTokens} | **Status:** ${status}`,
103
+ );
104
+ lines.push("");
105
+
106
+ // ── Classification ───────────────────────────────────────────────────────
107
+ lines.push("## Classification");
108
+ lines.push("");
109
+ lines.push(`- **Type:** \`${this.data.classification.queryType}\``);
110
+ lines.push(`- **Language:** \`${this.data.classification.language}\``);
111
+ const suggested = this.data.classification.suggestedContextIds;
112
+ lines.push(
113
+ `- **Suggested contexts:** ${suggested.length > 0 ? suggested.map((id) => `\`${id}\``).join(", ") : "*(all)*"}`,
114
+ );
115
+ if (this.data.preselectedItemIds?.length) {
116
+ lines.push(
117
+ `- **Preselected item IDs:** ${this.data.preselectedItemIds.map((id) => `\`${id}\``).join(", ")}`,
118
+ );
119
+ }
120
+ lines.push("");
121
+ lines.push("---");
122
+ lines.push("");
123
+
124
+ // ── System prompt (from step 1, collapsed) ───────────────────────────────
125
+ const firstStep = this.richSteps[0];
126
+ if (firstStep) {
127
+ lines.push("## System Prompt");
128
+ lines.push("");
129
+ lines.push("<details>");
130
+ lines.push("<summary>View system prompt</summary>");
131
+ lines.push("");
132
+ lines.push("```");
133
+ lines.push(firstStep.systemPrompt);
134
+ lines.push("```");
135
+ lines.push("");
136
+ lines.push("</details>");
137
+ lines.push("");
138
+ lines.push("---");
139
+ lines.push("");
140
+ }
141
+
142
+ // ── Steps ────────────────────────────────────────────────────────────────
143
+ for (const step of this.richSteps) {
144
+ const toolLabel =
145
+ step.toolCalls.map((tc) => `\`${tc.name}\``).join(", ") || "*(no tool calls)*";
146
+ lines.push(`## Step ${step.stepNumber} — ${toolLabel}`);
147
+ lines.push("");
148
+ const dynLabel =
149
+ step.dynamicToolsCreated.length > 0
150
+ ? step.dynamicToolsCreated.map((t) => `\`${t}\``).join(", ")
151
+ : "none";
152
+ lines.push(
153
+ `**Tokens:** ${step.tokens} | **Chunks retrieved:** ${step.chunks.length} | **Dynamic tools created:** ${dynLabel}`,
154
+ );
155
+ lines.push("");
156
+
157
+ // Reasoning
158
+ if (step.text) {
159
+ lines.push("### Reasoning");
160
+ lines.push("");
161
+ lines.push(step.text);
162
+ lines.push("");
163
+ }
164
+
165
+ // Tool calls
166
+ if (step.toolCalls.length > 0) {
167
+ lines.push("### Tool Calls");
168
+ lines.push("");
169
+ for (const [i, tc] of step.toolCalls.entries()) {
170
+ lines.push(`#### ${i + 1}. \`${tc.name}\``);
171
+ lines.push("");
172
+ lines.push("**Input:**");
173
+ lines.push("```json");
174
+ lines.push(JSON.stringify(tc.input, null, 2));
175
+ lines.push("```");
176
+ lines.push("");
177
+
178
+ if (tc.output !== undefined) {
179
+ let parsedOutput: any;
180
+ try {
181
+ parsedOutput =
182
+ typeof tc.output === "string" ? JSON.parse(tc.output) : tc.output;
183
+ } catch {
184
+ parsedOutput = tc.output;
185
+ }
186
+ const outputStr = JSON.stringify(parsedOutput, null, 2);
187
+ const truncated = outputStr.length > 2000;
188
+ lines.push("**Output:**");
189
+ lines.push("```json");
190
+ lines.push(truncated ? `${outputStr.slice(0, 2000)}\n… (truncated)` : outputStr);
191
+ lines.push("```");
192
+ lines.push("");
193
+ }
194
+ }
195
+ }
196
+
197
+ // Chunks table
198
+ if (step.chunks.length > 0) {
199
+ lines.push("### Chunks Retrieved");
200
+ lines.push("");
201
+ lines.push("| # | Item | Context | Chunk | Score |");
202
+ lines.push("|---|------|---------|-------|-------|");
203
+ for (const [i, c] of step.chunks.entries()) {
204
+ const score =
205
+ c.metadata?.hybrid_score ??
206
+ c.metadata?.cosine_distance ??
207
+ c.metadata?.fts_rank ??
208
+ "—";
209
+ const scoreStr = typeof score === "number" ? score.toFixed(4) : String(score);
210
+ lines.push(
211
+ `| ${i + 1} | ${c.item_name ?? "—"} | \`${c.context}\` | ${c.chunk_index ?? "—"} | ${scoreStr} |`,
212
+ );
213
+ }
214
+ lines.push("");
215
+
216
+ const withContent = step.chunks.filter((c) => c.chunk_content);
217
+ if (withContent.length > 0) {
218
+ lines.push("<details>");
219
+ lines.push("<summary>View chunk content</summary>");
220
+ lines.push("");
221
+ for (const c of withContent) {
222
+ lines.push(`**${c.item_name} (chunk ${c.chunk_index}):**`);
223
+ lines.push("");
224
+ const content = (c.chunk_content ?? "").trim();
225
+ lines.push(`> ${content.split("\n").join("\n> ")}`);
226
+ lines.push("");
227
+ }
228
+ lines.push("</details>");
229
+ lines.push("");
230
+ }
231
+ }
232
+
233
+ // Per-step system prompt addendum (only when it differs from step 1)
234
+ if (firstStep && step.stepNumber > 1 && step.systemPrompt !== firstStep.systemPrompt) {
235
+ const addendum = step.systemPrompt.slice(firstStep.systemPrompt.length).trim();
236
+ if (addendum) {
237
+ lines.push("<details>");
238
+ lines.push("<summary>System prompt addendum (this step only)</summary>");
239
+ lines.push("");
240
+ lines.push("```");
241
+ lines.push(addendum);
242
+ lines.push("```");
243
+ lines.push("");
244
+ lines.push("</details>");
245
+ lines.push("");
246
+ }
247
+ }
248
+
249
+ lines.push("---");
250
+ lines.push("");
251
+ }
252
+
253
+ // ── Summary ──────────────────────────────────────────────────────────────
254
+ lines.push("## Summary");
255
+ lines.push("");
256
+ lines.push("| Metric | Value |");
257
+ lines.push("|--------|-------|");
258
+ lines.push(`| Steps | ${this.richSteps.length} |`);
259
+ lines.push(`| Total chunks | ${totalChunks} |`);
260
+ lines.push(`| Total tokens | ${totalTokens} |`);
261
+ lines.push(`| Duration | ${(durationMs / 1000).toFixed(1)}s |`);
262
+ lines.push(`| Status | ${status} |`);
263
+ if (error) {
264
+ lines.push(`| Error | ${error.message} |`);
265
+ }
266
+ lines.push("");
267
+
268
+ return lines.join("\n");
269
+ }
270
+
271
+ async finalize(
272
+ output: AgenticRetrievalOutput,
273
+ success: boolean,
274
+ error?: Error,
275
+ writeFiles = false,
276
+ ): Promise<string | undefined> {
277
+ const durationMs = Date.now() - this.startTime;
278
+
279
+ this.data.final = {
280
+ total_chunks: output.chunks.length,
281
+ total_steps: output.steps.length,
282
+ total_tokens: output.totalTokens,
283
+ duration_ms: durationMs,
284
+ success,
285
+ error: error?.message,
286
+ };
287
+
288
+ if (!writeFiles) return undefined;
289
+
290
+ try {
291
+ await fs.mkdir(this.logDir, { recursive: true });
292
+ const ts = Date.now();
293
+ const jsonPath = path.join(this.logDir, `trajectory_${ts}.json`);
294
+ const mdPath = path.join(this.logDir, `trajectory_${ts}.md`);
295
+
296
+ await Promise.all([
297
+ fs.writeFile(jsonPath, JSON.stringify(this.data, null, 2), "utf-8"),
298
+ fs.writeFile(mdPath, this.toMarkdown(durationMs, success, error), "utf-8"),
299
+ ]);
300
+
301
+ console.log(`[EXULU] v3 trajectory saved: trajectory_${ts}.json + trajectory_${ts}.md`);
302
+ trajectoryRegistry.lastFile = jsonPath;
303
+ return jsonPath;
304
+ } catch (e) {
305
+ console.error("[EXULU] v3 failed to write trajectory:", e);
306
+ return undefined;
307
+ }
308
+ }
309
+ }
@@ -0,0 +1,59 @@
1
+ export type QueryType = "aggregate" | "list" | "targeted" | "exploratory";
2
+
3
+ export interface ClassificationResult {
4
+ queryType: QueryType;
5
+ language: string;
6
+ /** IDs of contexts most likely relevant. Empty means search all. */
7
+ suggestedContextIds: string[];
8
+ }
9
+
10
+ export interface ContextSample {
11
+ contextId: string;
12
+ contextName: string;
13
+ /** All field names available on items (standard + custom) */
14
+ fields: string[];
15
+ /** Up to 2 example item records */
16
+ exampleItems: Array<Record<string, any>>;
17
+ sampledAt: number;
18
+ }
19
+
20
+ export interface ChunkResult {
21
+ item_name: string;
22
+ item_id: string;
23
+ context: string;
24
+ chunk_id?: string;
25
+ chunk_index?: number;
26
+ chunk_content?: string;
27
+ metadata?: Record<string, any>;
28
+ }
29
+
30
+ export interface RetrievalStep {
31
+ stepNumber: number;
32
+ /** Text the model output during this step (reasoning) */
33
+ text: string;
34
+ toolCalls: Array<{ name: string; id: string; input: any }>;
35
+ chunks: ChunkResult[];
36
+ dynamicToolsCreated: string[];
37
+ tokens: number;
38
+ }
39
+
40
+ interface Reasoning {
41
+ text: string;
42
+ tools: {
43
+ name: string;
44
+ id: string;
45
+ input: any;
46
+ output: any;
47
+ }[]
48
+ }
49
+
50
+ export interface AgenticRetrievalOutput {
51
+ steps: RetrievalStep[];
52
+ reasoning: Reasoning[];
53
+ /** All chunks collected across all steps */
54
+ chunks: ChunkResult[];
55
+ usage: any[];
56
+ totalTokens: number;
57
+ /** Path to the trajectory JSON file written to disk, if any */
58
+ trajectoryFile?: string;
59
+ }
@@ -516,7 +516,9 @@ export class MarkdownChunker {
516
516
  return newHeaders;
517
517
  }
518
518
 
519
- public async chunk(text: string, chunkSize: number, prefix?: string): Promise<{
519
+ public async chunk(text: string, chunkSize: number, prefix?: string, config?: {
520
+ pageBreakTags?: boolean;
521
+ }): Promise<{
520
522
  text: string;
521
523
  page: number;
522
524
  }[]> {
@@ -684,7 +686,7 @@ export class MarkdownChunker {
684
686
  finalText = headerPrefixText + '\n\n' + currentSlice;
685
687
  }
686
688
 
687
- if (currentPage) {
689
+ if (currentPage && config?.pageBreakTags) {
688
690
  finalText = `<!-- Current page: ${currentPage} -->\n\n` + finalText;
689
691
  }
690
692
 
@@ -0,0 +1,119 @@
1
+ import {
2
+ SandboxManager,
3
+ type SandboxRuntimeConfig,
4
+ } from '@anthropic-ai/sandbox-runtime'
5
+ import { mkdir, rm, writeFile } from 'node:fs/promises'
6
+ import { join, dirname } from 'node:path'
7
+ import { listS3ObjectsByPrefix, getS3ObjectContent } from '@SRC/uppy/index.ts'
8
+ import type { ExuluConfig } from '@SRC/exulu/app/index.ts'
9
+
10
+ import { tool } from 'ai'
11
+ import { z } from 'zod'
12
+
13
+ // This is called on every session where a skill is enabled
14
+ // each sandbox setup includes the skill files from the enabled
15
+ // skills, and uses the Anthropic Sandbox Runtime (SRT) to
16
+ // limit read and write scopes.
17
+
18
+ export interface SkillRef {
19
+ id: string
20
+ name: string
21
+ s3folder: string
22
+ current_version: number
23
+ }
24
+
25
+ export interface SkillSandboxHandle {
26
+ /** Absolute path to the session's temporary directory, containing all downloaded skill files. */
27
+ sessionDir: string
28
+ /** Wraps a shell command string so it runs inside the sandbox. */
29
+ wrapCommand: (command: string) => Promise<string>
30
+ /** Tears down the sandbox and deletes the session directory. */
31
+ cleanup: () => Promise<void>
32
+ }
33
+
34
+ /**
35
+ * Creates a sandboxed environment for a session:
36
+ * 1. Creates a temp directory at /tmp/exulu-sessions/<sessionId>
37
+ * 2. Downloads all files for each enabled skill into <sessionDir>/skills/<skillName>/
38
+ * 3. Initialises the SRT SandboxManager with filesystem access scoped to sessionDir only
39
+ * and no network access
40
+ */
41
+ export async function createSkillSandbox(
42
+ sessionId: string,
43
+ skills: SkillRef[],
44
+ config: ExuluConfig,
45
+ ): Promise<SkillSandboxHandle> {
46
+ const sessionDir = join('/tmp', 'exulu-sessions', sessionId)
47
+
48
+ await mkdir(sessionDir, { recursive: true })
49
+
50
+ const skillsDirectory = join(sessionDir, 'skills')
51
+
52
+ // Download each skill's files from S3 into the session directory
53
+ for (const skill of skills) {
54
+ const versionPrefix = `skills/${skill.id}/v${skill.current_version}/`
55
+ const files = await listS3ObjectsByPrefix(versionPrefix, config)
56
+
57
+ for (const file of files) {
58
+ // Extract the path relative to the version prefix, accounting for any S3 general prefix
59
+ const prefixIndex = file.key.indexOf(versionPrefix)
60
+ const relativePath = prefixIndex >= 0
61
+ ? file.key.slice(prefixIndex + versionPrefix.length)
62
+ : file.key
63
+
64
+ if (!relativePath) continue // skip directory markers
65
+
66
+ const localPath = join(skillsDirectory, skill.name, relativePath)
67
+ await mkdir(dirname(localPath), { recursive: true })
68
+
69
+ const content = await getS3ObjectContent(file.key, config)
70
+ await writeFile(localPath, content, 'utf-8')
71
+ }
72
+ }
73
+
74
+ const sandboxConfig: SandboxRuntimeConfig = {
75
+ network: {
76
+ allowedDomains: [], // todo
77
+ deniedDomains: [], // todo
78
+ },
79
+ filesystem: {
80
+ // Deny reads to home directory but re-allow only the session folder.
81
+ // System paths (/usr, /lib, etc.) remain readable for process execution.
82
+ denyRead: ['~'],
83
+ allowRead: [sessionDir],
84
+ // Write access is scoped exclusively to the session folder.
85
+ allowWrite: [sessionDir],
86
+ denyWrite: [],
87
+ },
88
+ }
89
+
90
+ await SandboxManager.initialize(sandboxConfig)
91
+
92
+ // Todo proper instructions to use skills
93
+
94
+ /* const bashTool = function createBashTool() {
95
+ return tool({
96
+ description: `
97
+ Execute bash commands inside the sandbox.
98
+ Examples (not exhaustive): ls, cat, less, head, tail, grep
99
+ `,
100
+ inputSchema: z.object({
101
+ command: z.string().describe('The bash command to execute'),
102
+ args: z.array(z.string()).describe('Arguments to pass to the command')
103
+ }),
104
+ execute: async ({ command, args }) => {
105
+ // code that executes when the tool is called
106
+ return await SandboxManager.wrapWithSandbox(command)
107
+ }
108
+ });
109
+ } */
110
+
111
+ return {
112
+ sessionDir,
113
+ wrapCommand: (command: string) => SandboxManager.wrapWithSandbox(command),
114
+ cleanup: async () => {
115
+ await SandboxManager.reset()
116
+ await rm(sessionDir, { recursive: true, force: true })
117
+ },
118
+ }
119
+ }
@@ -94,6 +94,84 @@ async function processWord(file: Buffer): Promise<ProcessorOutput> {
94
94
  }
95
95
  }
96
96
 
97
+ /**
98
+ * Processes a standalone image file by optionally extracting content using VLM
99
+ */
100
+ async function processImage(
101
+ buffer: Buffer,
102
+ paths: ProcessingPaths,
103
+ config?: DocumentProcessorConfig,
104
+ verbose: boolean = false,
105
+ ): Promise<ProcessorOutput> {
106
+ try {
107
+ // Create images directory
108
+ await fs.promises.mkdir(paths.images, { recursive: true });
109
+
110
+ // Save the image
111
+ const imagePath = path.join(paths.images, '1.png');
112
+ await fs.promises.writeFile(imagePath, buffer);
113
+
114
+ console.log(`[EXULU] Image saved to: ${imagePath}`);
115
+
116
+ // Create initial ProcessedDocument with minimal content
117
+ let json: ProcessedDocument = [{
118
+ page: 1,
119
+ content: '', // Empty initially, will be populated by VLM if enabled
120
+ image: imagePath,
121
+ headings: [],
122
+ }];
123
+
124
+ // If VLM is enabled, use it to extract content from the image
125
+ if (config?.vlm?.model) {
126
+ console.log('[EXULU] Extracting content from image using VLM...');
127
+
128
+ json = await validateWithVLM(
129
+ json,
130
+ config.vlm.model,
131
+ verbose,
132
+ config.vlm.concurrency
133
+ );
134
+
135
+ // Save the processed result
136
+ await fs.promises.writeFile(
137
+ paths.json,
138
+ JSON.stringify(json, null, 2),
139
+ 'utf-8'
140
+ );
141
+
142
+ console.log('[EXULU] VLM content extraction complete');
143
+
144
+ const correctedCount = json.filter(p => p.vlm_corrected_text).length;
145
+ console.log(`[EXULU] Content extracted: ${correctedCount > 0 ? 'Yes' : 'No'}`);
146
+ } else {
147
+ console.log('[EXULU] No VLM configured, image saved without content extraction');
148
+ console.log('[EXULU] Note: Enable VLM in config to extract text/content from images');
149
+
150
+ // Save empty result
151
+ await fs.promises.writeFile(
152
+ paths.json,
153
+ JSON.stringify(json, null, 2),
154
+ 'utf-8'
155
+ );
156
+ }
157
+
158
+ // Build markdown from content
159
+ const markdown = json.map(p => p.vlm_corrected_text ?? p.content).join('\n\n\n<!-- END_OF_PAGE -->\n\n\n');
160
+
161
+ // Save markdown
162
+ await fs.promises.writeFile(paths.markdown, markdown, 'utf-8');
163
+
164
+ return {
165
+ markdown: markdown,
166
+ json: json,
167
+ };
168
+
169
+ } catch (error) {
170
+ console.error('[EXULU] Error processing image:', error);
171
+ throw error;
172
+ }
173
+ }
174
+
97
175
  /**
98
176
  * Normalizes markdown content by removing excessive whitespace,
99
177
  * especially in table formatting.
@@ -307,6 +385,8 @@ If the page contains a flow-chart, schematic, technical drawing or control board
307
385
  reasoning: parsedOutput.reasoning,
308
386
  };
309
387
 
388
+ console.log(`[EXULU] VLM validation result: ${JSON.stringify(validation)}`);
389
+
310
390
  return validation;
311
391
  }
312
392
 
@@ -413,31 +493,36 @@ async function validateWithVLM(
413
493
 
414
494
  const imagePath = page.image;
415
495
 
416
- if (!page.content) {
417
- console.warn(`[EXULU] Page ${page.page}: No content found, skipping validation`);
418
- return;
419
- }
420
-
421
496
  if (!imagePath) {
422
497
  console.warn(`[EXULU] Page ${page.page}: No image found, skipping validation`);
423
498
  return;
424
499
  }
425
500
 
426
- // Check if page.content has a .jpeg, .jpg, .png, .gif, .webp image
427
- const hasImage = page.content.match(/\.(jpeg|jpg|png|gif|webp)/i);
428
- // Check if the content has multiple occurences of |
429
- const hasTable = (page.content.match(/\|/g)?.length || 0) > 1;
501
+ // For standalone images, page.content will be empty initially
502
+ // For PDFs/documents, we check if VLM validation is needed based on content
503
+ if (page.content) {
504
+ // Check if page.content has a .jpeg, .jpg, .png, .gif, .webp image
505
+ const hasImage = page.content.match(/\.(jpeg|jpg|png|gif|webp)/i);
506
+ // Check if the content has multiple occurences of |
507
+ const hasTable = (page.content.match(/\|/g)?.length || 0) > 1;
430
508
 
431
- if (!hasImage && !hasTable) {
509
+ if (!hasImage && !hasTable) {
510
+ if (verbose) {
511
+ console.log(`[EXULU] Page ${page.page}: No image or table found, SKIPPING VLM validation`);
512
+ }
513
+ return;
514
+ }
515
+ } else {
516
+ // Empty content means this is likely a standalone image that needs content extraction
432
517
  if (verbose) {
433
- console.log(`[EXULU] Page ${page.page}: No image or table found, SKIPPING VLM validation`);
518
+ console.log(`[EXULU] Page ${page.page}: Standalone image, proceeding with VLM content extraction`);
434
519
  }
435
- return;
436
520
  }
437
521
 
438
522
  // Validate the page
439
523
  let validation: VLMValidationResult;
440
524
  try {
525
+ console.log(`[EXULU] Validating page ${page.page} with VLM`);
441
526
  validation = await withRetry(async () => {
442
527
  return await validatePageWithVLM(page, imagePath, model);
443
528
  }, 3);
@@ -573,6 +658,13 @@ async function processDocument(
573
658
  case 'doc':
574
659
  result = await processWord(buffer);
575
660
  break;
661
+ case 'jpg':
662
+ case 'jpeg':
663
+ case 'png':
664
+ case 'gif':
665
+ case 'webp':
666
+ result = await processImage(buffer, paths, config, verbose);
667
+ break;
576
668
 
577
669
  // Todo other file types with docx and officeparser
578
670
  default:
@@ -922,7 +1014,7 @@ export async function documentProcessor({
922
1014
  let supportedTypes: string[] = [];
923
1015
  switch (config?.processor.name) {
924
1016
  case "docling":
925
- supportedTypes = ['pdf', 'docx', 'doc', 'txt', 'md'];
1017
+ supportedTypes = ['pdf', 'docx', 'doc', 'txt', 'md', 'jpg', 'jpeg', 'png', 'gif', 'webp'];
926
1018
  break;
927
1019
  case "officeparser":
928
1020
  supportedTypes = [];
@@ -931,7 +1023,7 @@ export async function documentProcessor({
931
1023
  supportedTypes = ['pdf', 'doc', 'docx', 'docm', 'odt', 'rtf', 'ppt', 'pptx', 'pptm', 'odp', 'xls', 'xlsx', 'xlsm', 'ods', 'csv', 'tsv'];
932
1024
  break;
933
1025
  case "mistral":
934
- supportedTypes = ['pdf', 'docx', 'doc', 'txt', 'md'];
1026
+ supportedTypes = ['pdf', 'docx', 'doc', 'txt', 'md', 'jpg', 'jpeg', 'png', 'gif', 'webp'];
935
1027
  break;
936
1028
  }
937
1029
 
package/ee/workers.ts CHANGED
@@ -1311,7 +1311,7 @@ const pollJobResult = async ({
1311
1311
  break;
1312
1312
  }
1313
1313
  // Wait for 2 seconds before polling again
1314
- await new Promise((resolve) => setTimeout((resolve) => resolve(true), 2000));
1314
+ await new Promise((resolve) => setTimeout(() => resolve(true), 2000));
1315
1315
  }
1316
1316
  return result;
1317
1317
  };