@roj-ai/sdk 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bootstrap.d.ts +13 -0
- package/dist/bootstrap.d.ts.map +1 -1
- package/dist/bootstrap.js +3 -1
- package/dist/bootstrap.js.map +1 -1
- package/dist/config.d.ts +2 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +3 -0
- package/dist/config.js.map +1 -1
- package/dist/core/agents/agent.d.ts +25 -1
- package/dist/core/agents/agent.d.ts.map +1 -1
- package/dist/core/agents/agent.js +117 -21
- package/dist/core/agents/agent.js.map +1 -1
- package/dist/core/agents/config.d.ts +7 -0
- package/dist/core/agents/config.d.ts.map +1 -1
- package/dist/core/agents/context.d.ts +10 -0
- package/dist/core/agents/context.d.ts.map +1 -1
- package/dist/core/agents/state.d.ts +11 -3
- package/dist/core/agents/state.d.ts.map +1 -1
- package/dist/core/agents/state.js.map +1 -1
- package/dist/core/file-store/file-store.d.ts +5 -1
- package/dist/core/file-store/file-store.d.ts.map +1 -1
- package/dist/core/file-store/file-store.js +31 -21
- package/dist/core/file-store/file-store.js.map +1 -1
- package/dist/core/image/vips-resizer.test.js +26 -14
- package/dist/core/image/vips-resizer.test.js.map +1 -1
- package/dist/core/llm/anthropic.d.ts.map +1 -1
- package/dist/core/llm/anthropic.js +11 -8
- package/dist/core/llm/anthropic.js.map +1 -1
- package/dist/core/llm/cache-breakpoints.d.ts +5 -1
- package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
- package/dist/core/llm/cache-breakpoints.js +10 -5
- package/dist/core/llm/cache-breakpoints.js.map +1 -1
- package/dist/core/sessions/session.d.ts.map +1 -1
- package/dist/core/sessions/session.js +10 -0
- package/dist/core/sessions/session.js.map +1 -1
- package/dist/core/sessions/session.test.js +5 -0
- package/dist/core/sessions/session.test.js.map +1 -1
- package/dist/core/sessions/state.d.ts.map +1 -1
- package/dist/core/sessions/state.js +5 -1
- package/dist/core/sessions/state.js.map +1 -1
- package/dist/core/tools/executor.test.js +1 -0
- package/dist/core/tools/executor.test.js.map +1 -1
- package/dist/lib/utils/concurrency.d.ts +25 -0
- package/dist/lib/utils/concurrency.d.ts.map +1 -0
- package/dist/lib/utils/concurrency.js +69 -0
- package/dist/lib/utils/concurrency.js.map +1 -0
- package/dist/lib/utils/concurrency.test.d.ts +2 -0
- package/dist/lib/utils/concurrency.test.d.ts.map +1 -0
- package/dist/lib/utils/concurrency.test.js +135 -0
- package/dist/lib/utils/concurrency.test.js.map +1 -0
- package/dist/plugins/agent-status/plugin.d.ts.map +1 -1
- package/dist/plugins/agent-status/plugin.js +18 -26
- package/dist/plugins/agent-status/plugin.js.map +1 -1
- package/dist/plugins/context-compact/compaction-live.test.d.ts +17 -0
- package/dist/plugins/context-compact/compaction-live.test.d.ts.map +1 -0
- package/dist/plugins/context-compact/compaction-live.test.js +177 -0
- package/dist/plugins/context-compact/compaction-live.test.js.map +1 -0
- package/dist/plugins/context-compact/context-compact.integration.test.js +123 -3
- package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
- package/dist/plugins/context-compact/context-compactor.d.ts +47 -17
- package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
- package/dist/plugins/context-compact/context-compactor.js +60 -36
- package/dist/plugins/context-compact/context-compactor.js.map +1 -1
- package/dist/plugins/context-compact/context-compactor.test.js +69 -103
- package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
- package/dist/plugins/context-compact/plugin.d.ts +9 -2
- package/dist/plugins/context-compact/plugin.d.ts.map +1 -1
- package/dist/plugins/context-compact/plugin.js +8 -4
- package/dist/plugins/context-compact/plugin.js.map +1 -1
- package/dist/plugins/filesystem/filesystem.integration.test.js +36 -0
- package/dist/plugins/filesystem/filesystem.integration.test.js.map +1 -1
- package/dist/plugins/filesystem/plugin.d.ts.map +1 -1
- package/dist/plugins/filesystem/plugin.js +8 -6
- package/dist/plugins/filesystem/plugin.js.map +1 -1
- package/dist/plugins/mailbox/mailbox.integration.test.js +9 -16
- package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
- package/dist/plugins/resources/plugin.d.ts.map +1 -1
- package/dist/plugins/resources/plugin.js +4 -1
- package/dist/plugins/resources/plugin.js.map +1 -1
- package/dist/plugins/uploads/plugin.d.ts +12 -0
- package/dist/plugins/uploads/plugin.d.ts.map +1 -1
- package/dist/plugins/uploads/plugin.js +188 -44
- package/dist/plugins/uploads/plugin.js.map +1 -1
- package/dist/plugins/uploads/preprocessors/image-classifier.d.ts +9 -0
- package/dist/plugins/uploads/preprocessors/image-classifier.d.ts.map +1 -1
- package/dist/plugins/uploads/preprocessors/image-classifier.js +4 -1
- package/dist/plugins/uploads/preprocessors/image-classifier.js.map +1 -1
- package/dist/plugins/uploads/preprocessors/image-classifier.test.d.ts +2 -0
- package/dist/plugins/uploads/preprocessors/image-classifier.test.d.ts.map +1 -0
- package/dist/plugins/uploads/preprocessors/image-classifier.test.js +113 -0
- package/dist/plugins/uploads/preprocessors/image-classifier.test.js.map +1 -0
- package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.d.ts.map +1 -1
- package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.js +8 -7
- package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.js.map +1 -1
- package/dist/plugins/uploads/preprocessors/zip-preprocessor.d.ts.map +1 -1
- package/dist/plugins/uploads/preprocessors/zip-preprocessor.js +35 -15
- package/dist/plugins/uploads/preprocessors/zip-preprocessor.js.map +1 -1
- package/dist/plugins/uploads/state.d.ts +1 -0
- package/dist/plugins/uploads/state.d.ts.map +1 -1
- package/dist/plugins/uploads/state.js +1 -1
- package/dist/plugins/uploads/state.js.map +1 -1
- package/dist/plugins/uploads/uploads.integration.test.js +97 -0
- package/dist/plugins/uploads/uploads.integration.test.js.map +1 -1
- package/dist/plugins/user-chat/plugin.d.ts +2 -0
- package/dist/plugins/user-chat/plugin.d.ts.map +1 -1
- package/dist/plugins/user-chat/plugin.js +47 -3
- package/dist/plugins/user-chat/plugin.js.map +1 -1
- package/dist/plugins/user-chat/schema.d.ts +10 -0
- package/dist/plugins/user-chat/schema.d.ts.map +1 -1
- package/dist/plugins/user-chat/schema.js +1 -0
- package/dist/plugins/user-chat/schema.js.map +1 -1
- package/dist/plugins/user-chat/user-chat.integration.test.js +86 -0
- package/dist/plugins/user-chat/user-chat.integration.test.js.map +1 -1
- package/dist/transport/http/routes/upload.d.ts.map +1 -1
- package/dist/transport/http/routes/upload.js +60 -0
- package/dist/transport/http/routes/upload.js.map +1 -1
- package/package.json +2 -2
- package/src/bootstrap.ts +3 -1
- package/src/config.ts +6 -0
- package/src/core/agents/agent.ts +134 -20
- package/src/core/agents/config.ts +7 -0
- package/src/core/agents/context.ts +11 -0
- package/src/core/agents/state.ts +11 -4
- package/src/core/file-store/file-store.ts +38 -18
- package/src/core/image/vips-resizer.test.ts +26 -15
- package/src/core/llm/anthropic.ts +19 -12
- package/src/core/llm/cache-breakpoints.ts +15 -6
- package/src/core/sessions/session.test.ts +6 -0
- package/src/core/sessions/session.ts +12 -0
- package/src/core/sessions/state.ts +5 -1
- package/src/core/tools/executor.test.ts +1 -0
- package/src/lib/utils/concurrency.test.ts +169 -0
- package/src/lib/utils/concurrency.ts +72 -0
- package/src/plugins/agent-status/plugin.ts +18 -25
- package/src/plugins/context-compact/compaction-live.test.ts +221 -0
- package/src/plugins/context-compact/context-compact.integration.test.ts +135 -3
- package/src/plugins/context-compact/context-compactor.test.ts +71 -110
- package/src/plugins/context-compact/context-compactor.ts +88 -43
- package/src/plugins/context-compact/plugin.ts +19 -10
- package/src/plugins/filesystem/filesystem.integration.test.ts +44 -0
- package/src/plugins/filesystem/plugin.ts +8 -6
- package/src/plugins/mailbox/mailbox.integration.test.ts +12 -18
- package/src/plugins/resources/plugin.ts +4 -1
- package/src/plugins/uploads/plugin.ts +212 -47
- package/src/plugins/uploads/preprocessors/image-classifier.test.ts +142 -0
- package/src/plugins/uploads/preprocessors/image-classifier.ts +13 -1
- package/src/plugins/uploads/preprocessors/markitdown-preprocessor.ts +8 -8
- package/src/plugins/uploads/preprocessors/zip-preprocessor.ts +37 -17
- package/src/plugins/uploads/state.ts +1 -1
- package/src/plugins/uploads/uploads.integration.test.ts +123 -0
- package/src/plugins/user-chat/plugin.ts +60 -3
- package/src/plugins/user-chat/schema.ts +10 -1
- package/src/plugins/user-chat/user-chat.integration.test.ts +99 -0
- package/src/transport/http/routes/upload.ts +87 -0
|
@@ -2,11 +2,24 @@ import z from 'zod/v4'
|
|
|
2
2
|
import { ValidationErrors } from '~/core/errors.js'
|
|
3
3
|
import type { FileStore } from '~/core/file-store/types.js'
|
|
4
4
|
import { definePlugin } from '~/core/plugins/plugin-builder.js'
|
|
5
|
+
import { SessionId } from '~/core/sessions/schema.js'
|
|
5
6
|
import { Err, Ok } from '~/lib/utils/result.js'
|
|
6
7
|
import type { PreprocessorRegistry } from './preprocessor.js'
|
|
7
8
|
import { generateUploadId, type MessageAttachment, UploadId, type UploadMetadata } from './schema.js'
|
|
8
9
|
import { type PendingUpload, uploadEvents, type UploadsState } from './state.js'
|
|
9
10
|
|
|
11
|
+
// ============================================================================
|
|
12
|
+
// Notification schemas
|
|
13
|
+
// ============================================================================
|
|
14
|
+
|
|
15
|
+
const statusChangedSchema = z.object({
|
|
16
|
+
sessionId: z.string(),
|
|
17
|
+
uploadId: z.string(),
|
|
18
|
+
status: z.enum(['processing', 'ready', 'failed']),
|
|
19
|
+
extractedContent: z.string().optional(),
|
|
20
|
+
error: z.string().optional(),
|
|
21
|
+
})
|
|
22
|
+
|
|
10
23
|
// ============================================================================
|
|
11
24
|
// Constants
|
|
12
25
|
// ============================================================================
|
|
@@ -66,6 +79,69 @@ function formatUploadsForLLM(uploads: PendingUpload[], sessionRoot: string): str
|
|
|
66
79
|
return blocks.join('\n')
|
|
67
80
|
}
|
|
68
81
|
|
|
82
|
+
/**
|
|
83
|
+
* Run preprocessor (with timeout) and persist final upload metadata to disk.
|
|
84
|
+
* Returns the resolved status + extracted/derived data for the caller to emit.
|
|
85
|
+
*/
|
|
86
|
+
async function runPreprocessAndPersist(args: {
|
|
87
|
+
uploadId: string
|
|
88
|
+
sessionId: SessionId
|
|
89
|
+
uploadStore: FileStore
|
|
90
|
+
filePath: string
|
|
91
|
+
filename: string
|
|
92
|
+
mimeType: string
|
|
93
|
+
size: number
|
|
94
|
+
createdAt: number
|
|
95
|
+
preprocessorRegistry?: PreprocessorRegistry
|
|
96
|
+
}): Promise<{
|
|
97
|
+
status: 'ready' | 'failed'
|
|
98
|
+
extractedContent?: string
|
|
99
|
+
derivedPaths?: string[]
|
|
100
|
+
error?: string
|
|
101
|
+
}> {
|
|
102
|
+
const preprocessor = args.preprocessorRegistry?.getForMimeType(args.mimeType)
|
|
103
|
+
|
|
104
|
+
let status: 'ready' | 'failed' = 'ready'
|
|
105
|
+
let extractedContent: string | undefined
|
|
106
|
+
let derivedPaths: string[] | undefined
|
|
107
|
+
let errorMessage: string | undefined
|
|
108
|
+
|
|
109
|
+
if (preprocessor) {
|
|
110
|
+
const processPromise = preprocessor.process(args.filePath, args.mimeType, {
|
|
111
|
+
files: args.uploadStore,
|
|
112
|
+
})
|
|
113
|
+
const timeoutPromise = sleep(PROCESSING_TIMEOUT_MS).then(() => ({
|
|
114
|
+
ok: false as const,
|
|
115
|
+
error: new Error('Processing timeout'),
|
|
116
|
+
}))
|
|
117
|
+
const result = await Promise.race([processPromise, timeoutPromise])
|
|
118
|
+
if (result.ok) {
|
|
119
|
+
extractedContent = result.value.extractedContent
|
|
120
|
+
derivedPaths = result.value.derivedPaths
|
|
121
|
+
} else {
|
|
122
|
+
status = 'failed'
|
|
123
|
+
errorMessage = result.error.message
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const metadata: UploadMetadata = {
|
|
128
|
+
uploadId: UploadId(args.uploadId),
|
|
129
|
+
sessionId: args.sessionId,
|
|
130
|
+
filename: args.filename,
|
|
131
|
+
mimeType: args.mimeType,
|
|
132
|
+
size: args.size,
|
|
133
|
+
path: args.filePath,
|
|
134
|
+
status,
|
|
135
|
+
extractedContent,
|
|
136
|
+
derivedPaths,
|
|
137
|
+
createdAt: args.createdAt,
|
|
138
|
+
completedAt: Date.now(),
|
|
139
|
+
}
|
|
140
|
+
await args.uploadStore.write('meta.json', JSON.stringify(metadata, null, 2))
|
|
141
|
+
|
|
142
|
+
return { status, extractedContent, derivedPaths, error: errorMessage }
|
|
143
|
+
}
|
|
144
|
+
|
|
69
145
|
// ============================================================================
|
|
70
146
|
// Plugin
|
|
71
147
|
// ============================================================================
|
|
@@ -73,6 +149,7 @@ function formatUploadsForLLM(uploads: PendingUpload[], sessionRoot: string): str
|
|
|
73
149
|
export const uploadsPlugin = definePlugin('uploads')
|
|
74
150
|
.pluginConfig<UploadsPluginConfig>()
|
|
75
151
|
.events([uploadEvents])
|
|
152
|
+
.notification('uploadStatusChanged', { schema: statusChangedSchema })
|
|
76
153
|
.state<UploadsState>({
|
|
77
154
|
key: 'uploads',
|
|
78
155
|
initial: (): UploadsState => ({ pending: [] }),
|
|
@@ -315,91 +392,179 @@ export const uploadsPlugin = definePlugin('uploads')
|
|
|
315
392
|
handler: async (ctx, input) => {
|
|
316
393
|
const { dataFileStore, preprocessorRegistry } = ctx.pluginConfig
|
|
317
394
|
|
|
318
|
-
// Validate
|
|
319
395
|
if (input.size > MAX_FILE_SIZE) {
|
|
320
396
|
return Err(ValidationErrors.invalid(`File too large: max ${MAX_FILE_SIZE / (1024 * 1024)}MB`))
|
|
321
397
|
}
|
|
322
|
-
|
|
323
398
|
if (!isAllowedMimeType(input.mimeType)) {
|
|
324
399
|
return Err(ValidationErrors.invalid(`Unsupported file type: ${input.mimeType}`))
|
|
325
400
|
}
|
|
326
401
|
|
|
327
|
-
// Generate upload ID and scoped store
|
|
328
402
|
const uploadId = generateUploadId()
|
|
329
403
|
const uploadStore = dataFileStore.scoped(`sessions/${input.sessionId}/uploads/${uploadId}`)
|
|
330
404
|
|
|
331
|
-
// Write file to disk
|
|
332
405
|
const writeResult = await uploadStore.write(input.filename, input.fileBuffer)
|
|
333
|
-
|
|
334
406
|
if (!writeResult.ok) {
|
|
335
407
|
return Err(ValidationErrors.invalid('Failed to write file'))
|
|
336
408
|
}
|
|
337
409
|
|
|
338
|
-
const
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
410
|
+
const result = await runPreprocessAndPersist({
|
|
411
|
+
uploadId: String(uploadId),
|
|
412
|
+
sessionId: ctx.sessionId,
|
|
413
|
+
uploadStore,
|
|
414
|
+
filePath: writeResult.value.path,
|
|
415
|
+
filename: input.filename,
|
|
416
|
+
mimeType: input.mimeType,
|
|
417
|
+
size: input.size,
|
|
418
|
+
createdAt: Date.now(),
|
|
419
|
+
preprocessorRegistry,
|
|
420
|
+
})
|
|
344
421
|
|
|
345
|
-
|
|
422
|
+
await ctx.emitEvent(uploadEvents.create('attachment_uploaded', {
|
|
423
|
+
uploadId,
|
|
424
|
+
filename: input.filename,
|
|
425
|
+
mimeType: input.mimeType,
|
|
426
|
+
size: input.size,
|
|
427
|
+
status: result.status,
|
|
428
|
+
extractedContent: result.extractedContent,
|
|
429
|
+
derivedPaths: result.derivedPaths,
|
|
430
|
+
error: result.error,
|
|
431
|
+
}))
|
|
346
432
|
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
433
|
+
return Ok({
|
|
434
|
+
uploadId: String(uploadId),
|
|
435
|
+
status: result.status,
|
|
436
|
+
extractedContent: result.extractedContent,
|
|
437
|
+
})
|
|
438
|
+
},
|
|
439
|
+
})
|
|
440
|
+
.method('uploadAsync', {
|
|
441
|
+
input: z.object({
|
|
442
|
+
sessionId: z.string(),
|
|
443
|
+
filename: z.string(),
|
|
444
|
+
mimeType: z.string(),
|
|
445
|
+
size: z.number(),
|
|
446
|
+
fileBuffer: z.custom<Buffer>(),
|
|
447
|
+
}),
|
|
448
|
+
output: z.object({
|
|
449
|
+
uploadId: z.string(),
|
|
450
|
+
status: z.enum(['processing']),
|
|
451
|
+
}),
|
|
452
|
+
handler: async (ctx, input) => {
|
|
453
|
+
const { dataFileStore, preprocessorRegistry } = ctx.pluginConfig
|
|
351
454
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
455
|
+
if (input.size > MAX_FILE_SIZE) {
|
|
456
|
+
return Err(ValidationErrors.invalid(`File too large: max ${MAX_FILE_SIZE / (1024 * 1024)}MB`))
|
|
457
|
+
}
|
|
458
|
+
if (!isAllowedMimeType(input.mimeType)) {
|
|
459
|
+
return Err(ValidationErrors.invalid(`Unsupported file type: ${input.mimeType}`))
|
|
460
|
+
}
|
|
356
461
|
|
|
357
|
-
|
|
462
|
+
const uploadId = generateUploadId()
|
|
463
|
+
const uploadIdStr = String(uploadId)
|
|
464
|
+
const uploadStore = dataFileStore.scoped(`sessions/${input.sessionId}/uploads/${uploadId}`)
|
|
358
465
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
derivedPaths = result.value.derivedPaths
|
|
363
|
-
} else {
|
|
364
|
-
processingResult = 'failed'
|
|
365
|
-
}
|
|
466
|
+
const writeResult = await uploadStore.write(input.filename, input.fileBuffer)
|
|
467
|
+
if (!writeResult.ok) {
|
|
468
|
+
return Err(ValidationErrors.invalid('Failed to write file'))
|
|
366
469
|
}
|
|
367
470
|
|
|
368
|
-
|
|
369
|
-
const
|
|
370
|
-
|
|
371
|
-
|
|
471
|
+
const filePath = writeResult.value.path
|
|
472
|
+
const createdAt = Date.now()
|
|
473
|
+
|
|
474
|
+
// Persist initial 'processing' metadata so listPending sees it before preprocessor finishes.
|
|
475
|
+
const processingMeta: UploadMetadata = {
|
|
372
476
|
uploadId,
|
|
373
477
|
sessionId: ctx.sessionId,
|
|
374
478
|
filename: input.filename,
|
|
375
479
|
mimeType: input.mimeType,
|
|
376
480
|
size: input.size,
|
|
377
481
|
path: filePath,
|
|
378
|
-
status:
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
createdAt: now,
|
|
382
|
-
completedAt: now,
|
|
482
|
+
status: 'processing',
|
|
483
|
+
createdAt,
|
|
484
|
+
completedAt: createdAt,
|
|
383
485
|
}
|
|
486
|
+
await uploadStore.write('meta.json', JSON.stringify(processingMeta, null, 2))
|
|
384
487
|
|
|
385
|
-
// Save metadata
|
|
386
|
-
await uploadStore.write('meta.json', JSON.stringify(metadata, null, 2))
|
|
387
|
-
|
|
388
|
-
// Emit event
|
|
389
488
|
await ctx.emitEvent(uploadEvents.create('attachment_uploaded', {
|
|
390
489
|
uploadId,
|
|
391
490
|
filename: input.filename,
|
|
392
491
|
mimeType: input.mimeType,
|
|
393
492
|
size: input.size,
|
|
394
|
-
status:
|
|
395
|
-
extractedContent,
|
|
396
|
-
derivedPaths,
|
|
493
|
+
status: 'processing',
|
|
397
494
|
}))
|
|
495
|
+
ctx.notify('uploadStatusChanged', {
|
|
496
|
+
sessionId: input.sessionId,
|
|
497
|
+
uploadId: uploadIdStr,
|
|
498
|
+
status: 'processing',
|
|
499
|
+
})
|
|
500
|
+
|
|
501
|
+
// Capture refs from ctx before the handler returns — `notify`/`emitEvent`
|
|
502
|
+
// closures stay valid for the lifetime of the session, which in roj
|
|
503
|
+
// outlives any single handler call.
|
|
504
|
+
const { emitEvent, notify, logger } = ctx
|
|
505
|
+
const sessionId = ctx.sessionId
|
|
506
|
+
|
|
507
|
+
void (async () => {
|
|
508
|
+
try {
|
|
509
|
+
const result = await runPreprocessAndPersist({
|
|
510
|
+
uploadId: uploadIdStr,
|
|
511
|
+
sessionId,
|
|
512
|
+
uploadStore,
|
|
513
|
+
filePath,
|
|
514
|
+
filename: input.filename,
|
|
515
|
+
mimeType: input.mimeType,
|
|
516
|
+
size: input.size,
|
|
517
|
+
createdAt,
|
|
518
|
+
preprocessorRegistry,
|
|
519
|
+
})
|
|
520
|
+
|
|
521
|
+
await emitEvent(uploadEvents.create('attachment_uploaded', {
|
|
522
|
+
uploadId,
|
|
523
|
+
filename: input.filename,
|
|
524
|
+
mimeType: input.mimeType,
|
|
525
|
+
size: input.size,
|
|
526
|
+
status: result.status,
|
|
527
|
+
extractedContent: result.extractedContent,
|
|
528
|
+
derivedPaths: result.derivedPaths,
|
|
529
|
+
error: result.error,
|
|
530
|
+
}))
|
|
531
|
+
notify('uploadStatusChanged', {
|
|
532
|
+
sessionId: input.sessionId,
|
|
533
|
+
uploadId: uploadIdStr,
|
|
534
|
+
status: result.status,
|
|
535
|
+
extractedContent: result.extractedContent,
|
|
536
|
+
error: result.error,
|
|
537
|
+
})
|
|
538
|
+
} catch (err) {
|
|
539
|
+
const message = err instanceof Error ? err.message : String(err)
|
|
540
|
+
logger.error('Async upload processing crashed', err instanceof Error ? err : undefined, {
|
|
541
|
+
uploadId: uploadIdStr,
|
|
542
|
+
filename: input.filename,
|
|
543
|
+
})
|
|
544
|
+
try {
|
|
545
|
+
await emitEvent(uploadEvents.create('attachment_uploaded', {
|
|
546
|
+
uploadId,
|
|
547
|
+
filename: input.filename,
|
|
548
|
+
mimeType: input.mimeType,
|
|
549
|
+
size: input.size,
|
|
550
|
+
status: 'failed',
|
|
551
|
+
error: message,
|
|
552
|
+
}))
|
|
553
|
+
} catch {
|
|
554
|
+
// Even event emission failed — best-effort; nothing useful left to do.
|
|
555
|
+
}
|
|
556
|
+
notify('uploadStatusChanged', {
|
|
557
|
+
sessionId: input.sessionId,
|
|
558
|
+
uploadId: uploadIdStr,
|
|
559
|
+
status: 'failed',
|
|
560
|
+
error: message,
|
|
561
|
+
})
|
|
562
|
+
}
|
|
563
|
+
})()
|
|
398
564
|
|
|
399
565
|
return Ok({
|
|
400
|
-
uploadId:
|
|
401
|
-
status:
|
|
402
|
-
extractedContent,
|
|
566
|
+
uploadId: uploadIdStr,
|
|
567
|
+
status: 'processing' as const,
|
|
403
568
|
})
|
|
404
569
|
},
|
|
405
570
|
})
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import { afterAll, beforeAll, describe, expect, it } from 'bun:test'
|
|
2
|
+
import { mkdtemp, rm, writeFile } from 'node:fs/promises'
|
|
3
|
+
import { tmpdir } from 'node:os'
|
|
4
|
+
import { join } from 'node:path'
|
|
5
|
+
import { SessionFileStore } from '~/core/file-store/file-store.js'
|
|
6
|
+
import { MockLLMProvider } from '~/core/llm/mock.js'
|
|
7
|
+
import { Semaphore } from '~/lib/utils/concurrency.js'
|
|
8
|
+
import { silentLogger } from '../../../lib/logger/logger.js'
|
|
9
|
+
import { createNodePlatform } from '../../../testing/node-platform.js'
|
|
10
|
+
import { ImageClassifierPreprocessor } from './image-classifier.js'
|
|
11
|
+
|
|
12
|
+
function defer<T = void>(): { promise: Promise<T>; resolve: (v: T) => void } {
|
|
13
|
+
let resolve!: (v: T) => void
|
|
14
|
+
const promise = new Promise<T>((res) => {
|
|
15
|
+
resolve = res
|
|
16
|
+
})
|
|
17
|
+
return { promise, resolve }
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
describe('ImageClassifierPreprocessor.gate', () => {
|
|
21
|
+
const platform = createNodePlatform()
|
|
22
|
+
let workDir: string
|
|
23
|
+
|
|
24
|
+
beforeAll(async () => {
|
|
25
|
+
workDir = await mkdtemp(join(tmpdir(), 'roj-classifier-gate-'))
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
afterAll(async () => {
|
|
29
|
+
await rm(workDir, { recursive: true, force: true }).catch(() => {})
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
it('caps concurrent vision LLM calls when a gate is provided', async () => {
|
|
33
|
+
const N = 8
|
|
34
|
+
const LIMIT = 3
|
|
35
|
+
|
|
36
|
+
// Track concurrency inside the mock LLM handler — each call increments
|
|
37
|
+
// `active` on entry, awaits a release barrier, then decrements on exit.
|
|
38
|
+
let active = 0
|
|
39
|
+
let peak = 0
|
|
40
|
+
const release = defer()
|
|
41
|
+
|
|
42
|
+
const llmProvider = new MockLLMProvider(async () => {
|
|
43
|
+
active++
|
|
44
|
+
peak = Math.max(peak, active)
|
|
45
|
+
await release.promise
|
|
46
|
+
active--
|
|
47
|
+
return {
|
|
48
|
+
content: 'desc',
|
|
49
|
+
toolCalls: [],
|
|
50
|
+
finishReason: 'stop',
|
|
51
|
+
metrics: MockLLMProvider.defaultMetrics(),
|
|
52
|
+
}
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
const gate = new Semaphore(LIMIT)
|
|
56
|
+
const classifier = new ImageClassifierPreprocessor({
|
|
57
|
+
llmProvider,
|
|
58
|
+
logger: silentLogger,
|
|
59
|
+
fs: platform.fs,
|
|
60
|
+
gate,
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
// Create N tiny dummy image files — content doesn't matter, classifier
|
|
64
|
+
// only stats them and hands the path to the (mocked) LLM.
|
|
65
|
+
const imagePaths = await Promise.all(
|
|
66
|
+
Array.from({ length: N }, async (_, i) => {
|
|
67
|
+
const p = join(workDir, `img-${i}.png`)
|
|
68
|
+
await writeFile(p, Buffer.from([0x89, 0x50, 0x4e, 0x47]))
|
|
69
|
+
return p
|
|
70
|
+
}),
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
const fileStore = new SessionFileStore(workDir, undefined, false, platform.fs, 'session')
|
|
74
|
+
|
|
75
|
+
const tasks = imagePaths.map((p, i) =>
|
|
76
|
+
classifier.process(p, 'image/png', { files: fileStore.scoped(`img-${i}-meta`) }),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
// Let the workers queue up; the first LIMIT should be in-flight.
|
|
80
|
+
await new Promise(r => setTimeout(r, 20))
|
|
81
|
+
expect(active).toBeLessThanOrEqual(LIMIT)
|
|
82
|
+
expect(active).toBe(LIMIT)
|
|
83
|
+
|
|
84
|
+
// Release everyone; wait for completion and check peak.
|
|
85
|
+
release.resolve()
|
|
86
|
+
const results = await Promise.all(tasks)
|
|
87
|
+
|
|
88
|
+
expect(peak).toBe(LIMIT)
|
|
89
|
+
expect(active).toBe(0)
|
|
90
|
+
expect(llmProvider.getCallCount()).toBe(N)
|
|
91
|
+
for (const r of results) {
|
|
92
|
+
expect(r.ok).toBe(true)
|
|
93
|
+
}
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
it('does not gate when no semaphore is provided (all run concurrently)', async () => {
|
|
97
|
+
const N = 6
|
|
98
|
+
let active = 0
|
|
99
|
+
let peak = 0
|
|
100
|
+
const release = defer()
|
|
101
|
+
|
|
102
|
+
const llmProvider = new MockLLMProvider(async () => {
|
|
103
|
+
active++
|
|
104
|
+
peak = Math.max(peak, active)
|
|
105
|
+
await release.promise
|
|
106
|
+
active--
|
|
107
|
+
return {
|
|
108
|
+
content: 'desc',
|
|
109
|
+
toolCalls: [],
|
|
110
|
+
finishReason: 'stop',
|
|
111
|
+
metrics: MockLLMProvider.defaultMetrics(),
|
|
112
|
+
}
|
|
113
|
+
})
|
|
114
|
+
|
|
115
|
+
const classifier = new ImageClassifierPreprocessor({
|
|
116
|
+
llmProvider,
|
|
117
|
+
logger: silentLogger,
|
|
118
|
+
fs: platform.fs,
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
const imagePaths = await Promise.all(
|
|
122
|
+
Array.from({ length: N }, async (_, i) => {
|
|
123
|
+
const p = join(workDir, `nogate-${i}.png`)
|
|
124
|
+
await writeFile(p, Buffer.from([0x89, 0x50, 0x4e, 0x47]))
|
|
125
|
+
return p
|
|
126
|
+
}),
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
const fileStore = new SessionFileStore(workDir, undefined, false, platform.fs, 'session')
|
|
130
|
+
|
|
131
|
+
const tasks = imagePaths.map((p, i) =>
|
|
132
|
+
classifier.process(p, 'image/png', { files: fileStore.scoped(`nogate-${i}-meta`) }),
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
await new Promise(r => setTimeout(r, 20))
|
|
136
|
+
expect(active).toBe(N)
|
|
137
|
+
|
|
138
|
+
release.resolve()
|
|
139
|
+
await Promise.all(tasks)
|
|
140
|
+
expect(peak).toBe(N)
|
|
141
|
+
})
|
|
142
|
+
})
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
import type { LLMProvider } from '~/core/llm/provider.js'
|
|
9
9
|
import { ModelId } from '~/core/llm/schema.js'
|
|
10
|
+
import type { Semaphore } from '~/lib/utils/concurrency.js'
|
|
10
11
|
import type { Result } from '~/lib/utils/result.js'
|
|
11
12
|
import { Err, Ok } from '~/lib/utils/result.js'
|
|
12
13
|
import type { FileSystem } from '~/platform/fs.js'
|
|
@@ -28,6 +29,13 @@ export interface ImageClassifierConfig {
|
|
|
28
29
|
fs: FileSystem
|
|
29
30
|
/** Whether to skip vision and just return metadata */
|
|
30
31
|
skipVision?: boolean
|
|
32
|
+
/**
|
|
33
|
+
* Optional semaphore to bound concurrent vision LLM calls. All callers
|
|
34
|
+
* sharing one instance compete for the same set of permits — useful when
|
|
35
|
+
* recursive preprocessors (ZIP → docs → images) would otherwise fan out
|
|
36
|
+
* into many simultaneous inferences.
|
|
37
|
+
*/
|
|
38
|
+
gate?: Semaphore
|
|
31
39
|
}
|
|
32
40
|
|
|
33
41
|
// ============================================================================
|
|
@@ -47,6 +55,7 @@ export class ImageClassifierPreprocessor implements Preprocessor {
|
|
|
47
55
|
private readonly logger: Logger
|
|
48
56
|
private readonly fs: FileSystem
|
|
49
57
|
private readonly skipVision: boolean
|
|
58
|
+
private readonly gate: Semaphore | undefined
|
|
50
59
|
|
|
51
60
|
constructor(config: ImageClassifierConfig) {
|
|
52
61
|
this.llmProvider = config.llmProvider
|
|
@@ -54,6 +63,7 @@ export class ImageClassifierPreprocessor implements Preprocessor {
|
|
|
54
63
|
this.logger = config.logger
|
|
55
64
|
this.fs = config.fs
|
|
56
65
|
this.skipVision = config.skipVision ?? false
|
|
66
|
+
this.gate = config.gate
|
|
57
67
|
}
|
|
58
68
|
|
|
59
69
|
async process(
|
|
@@ -124,7 +134,7 @@ export class ImageClassifierPreprocessor implements Preprocessor {
|
|
|
124
134
|
): Promise<string | null> {
|
|
125
135
|
try {
|
|
126
136
|
// Use file:// URL - resolved to base64 lazily in LLM provider
|
|
127
|
-
const
|
|
137
|
+
const inferenceCall = () => this.llmProvider.inference({
|
|
128
138
|
model: this.visionModel,
|
|
129
139
|
systemPrompt: 'You are an image description assistant. Describe images concisely in 1-2 sentences.',
|
|
130
140
|
messages: [
|
|
@@ -146,6 +156,8 @@ export class ImageClassifierPreprocessor implements Preprocessor {
|
|
|
146
156
|
temperature: 0.3,
|
|
147
157
|
})
|
|
148
158
|
|
|
159
|
+
const result = await (this.gate ? this.gate.run(inferenceCall) : inferenceCall())
|
|
160
|
+
|
|
149
161
|
if (result.ok && result.value.content) {
|
|
150
162
|
return result.value.content.trim()
|
|
151
163
|
}
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
15
|
import { dirname } from 'node:path'
|
|
16
|
+
import { mapWithConcurrency } from '~/lib/utils/concurrency.js'
|
|
16
17
|
import type { Result } from '~/lib/utils/result.js'
|
|
17
18
|
import { Err, Ok } from '~/lib/utils/result.js'
|
|
18
19
|
import type { FileSystem } from '~/platform/fs.js'
|
|
@@ -22,6 +23,7 @@ import type { Logger } from '../../../lib/logger/logger.js'
|
|
|
22
23
|
import type { Preprocessor, PreprocessorContext, PreprocessorRegistry, PreprocessorResult } from '../preprocessor.js'
|
|
23
24
|
|
|
24
25
|
const MAX_IMAGES = 50
|
|
26
|
+
const IMAGE_CLASSIFY_CONCURRENCY = 10
|
|
25
27
|
|
|
26
28
|
function makeExec(processRunner: ProcessRunner) {
|
|
27
29
|
return (cmd: string, args: string[]) => processRunner.execFile(cmd, args, { timeout: 60_000, maxBuffer: 50 * 1024 * 1024 })
|
|
@@ -228,19 +230,17 @@ export async function classifyExtractedImages(
|
|
|
228
230
|
registry: PreprocessorRegistry,
|
|
229
231
|
logger: Logger,
|
|
230
232
|
): Promise<Array<{ relativePath: string; description: string }>> {
|
|
231
|
-
const results: Array<{ relativePath: string; description: string }> = []
|
|
232
|
-
|
|
233
233
|
const listResult = await imageStore.list('', { maxDepth: 3 })
|
|
234
|
-
if (!listResult.ok) return
|
|
234
|
+
if (!listResult.ok) return []
|
|
235
235
|
|
|
236
236
|
const imageFiles = listResult.value
|
|
237
237
|
.filter(e => e.type === 'file' && IMAGE_EXT_RE.test(e.name))
|
|
238
238
|
.sort((a, b) => a.name.localeCompare(b.name))
|
|
239
239
|
.slice(0, MAX_IMAGES)
|
|
240
240
|
|
|
241
|
-
|
|
241
|
+
const settled = await mapWithConcurrency(imageFiles, IMAGE_CLASSIFY_CONCURRENCY, async (imgFile) => {
|
|
242
242
|
const imgPathResult = imageStore.realPath(imgFile.name)
|
|
243
|
-
if (!imgPathResult.ok)
|
|
243
|
+
if (!imgPathResult.ok) return null
|
|
244
244
|
|
|
245
245
|
const imgMime = guessImageMime(imgFile.name)
|
|
246
246
|
let description = imgMime
|
|
@@ -255,8 +255,8 @@ export async function classifyExtractedImages(
|
|
|
255
255
|
}
|
|
256
256
|
}
|
|
257
257
|
|
|
258
|
-
|
|
259
|
-
}
|
|
258
|
+
return { relativePath: `${relativePrefix}/${imgFile.name}`, description }
|
|
259
|
+
})
|
|
260
260
|
|
|
261
|
-
return
|
|
261
|
+
return settled.filter((r): r is { relativePath: string; description: string } => r !== null)
|
|
262
262
|
}
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
import { extname } from 'node:path'
|
|
11
|
+
import { mapWithConcurrency } from '~/lib/utils/concurrency.js'
|
|
11
12
|
import type { Result } from '~/lib/utils/result.js'
|
|
12
13
|
import { Err, Ok } from '~/lib/utils/result.js'
|
|
13
14
|
import type { ProcessRunner } from '~/platform/process.js'
|
|
@@ -17,6 +18,7 @@ import type { Preprocessor, PreprocessorContext, PreprocessorRegistry, Preproces
|
|
|
17
18
|
const MAX_DEPTH = 3
|
|
18
19
|
const MAX_FILES = 500
|
|
19
20
|
const MAX_TOTAL_SIZE = 100 * 1024 * 1024 // 100MB
|
|
21
|
+
const ZIP_FILE_CONCURRENCY = 10
|
|
20
22
|
|
|
21
23
|
const MIME_MAP: Record<string, string> = {
|
|
22
24
|
'.pdf': 'application/pdf',
|
|
@@ -116,38 +118,45 @@ export class ZipPreprocessor implements Preprocessor {
|
|
|
116
118
|
return Err(new Error('Failed to list extracted files'))
|
|
117
119
|
}
|
|
118
120
|
|
|
119
|
-
const derivedPaths: string[] = []
|
|
120
|
-
const manifest: string[] = []
|
|
121
|
-
let fileCount = 0
|
|
122
|
-
let totalSize = 0
|
|
123
|
-
|
|
124
121
|
const files = listResult.value
|
|
125
122
|
.filter(e => e.type === 'file')
|
|
126
123
|
.sort((a, b) => a.name.localeCompare(b.name))
|
|
127
124
|
|
|
125
|
+
// Pick eligible files first (limits depend on cumulative iteration order, so this stays sequential)
|
|
126
|
+
const eligible: typeof files = []
|
|
127
|
+
let totalSize = 0
|
|
128
|
+
let truncationNotice: string | null = null
|
|
129
|
+
|
|
128
130
|
for (const file of files) {
|
|
129
|
-
if (
|
|
130
|
-
|
|
131
|
+
if (eligible.length >= MAX_FILES) {
|
|
132
|
+
truncationNotice = `... (truncated, ${files.length - eligible.length} more files)`
|
|
131
133
|
break
|
|
132
134
|
}
|
|
133
|
-
|
|
134
135
|
const fileSize = file.size ?? 0
|
|
135
|
-
totalSize
|
|
136
|
-
|
|
137
|
-
manifest.push('... (total size limit reached)')
|
|
136
|
+
if (totalSize + fileSize > MAX_TOTAL_SIZE) {
|
|
137
|
+
truncationNotice = '... (total size limit reached)'
|
|
138
138
|
break
|
|
139
139
|
}
|
|
140
|
+
totalSize += fileSize
|
|
141
|
+
eligible.push(file)
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const fileCount = eligible.length
|
|
140
145
|
|
|
141
|
-
|
|
146
|
+
// Process eligible files in parallel with bounded concurrency
|
|
147
|
+
const processed = await mapWithConcurrency(eligible, ZIP_FILE_CONCURRENCY, async (file) => {
|
|
148
|
+
const collectedPaths: string[] = []
|
|
142
149
|
|
|
143
150
|
const fileRealPath = extractStore.realPath(file.name)
|
|
144
151
|
if (!fileRealPath.ok) {
|
|
145
|
-
|
|
146
|
-
|
|
152
|
+
return {
|
|
153
|
+
manifestEntry: `- ${file.name} (path resolution failed)`,
|
|
154
|
+
derivedPaths: collectedPaths,
|
|
155
|
+
}
|
|
147
156
|
}
|
|
148
157
|
|
|
149
158
|
const relativePath = `extracted/${file.name}`
|
|
150
|
-
|
|
159
|
+
collectedPaths.push(relativePath)
|
|
151
160
|
|
|
152
161
|
const mime = getMimeType(file.name)
|
|
153
162
|
let contentSummary = ''
|
|
@@ -171,7 +180,7 @@ export class ZipPreprocessor implements Preprocessor {
|
|
|
171
180
|
if (subResult.ok) {
|
|
172
181
|
if (subResult.value.derivedPaths) {
|
|
173
182
|
for (const dp of subResult.value.derivedPaths) {
|
|
174
|
-
|
|
183
|
+
collectedPaths.push(`extracted/${file.name}-content/${dp}`)
|
|
175
184
|
}
|
|
176
185
|
}
|
|
177
186
|
if (subResult.value.extractedContent) {
|
|
@@ -186,8 +195,19 @@ export class ZipPreprocessor implements Preprocessor {
|
|
|
186
195
|
}
|
|
187
196
|
}
|
|
188
197
|
|
|
189
|
-
|
|
198
|
+
return {
|
|
199
|
+
manifestEntry: `- ${file.name} (${formatSize(file.size ?? 0)})${contentSummary}`,
|
|
200
|
+
derivedPaths: collectedPaths,
|
|
201
|
+
}
|
|
202
|
+
})
|
|
203
|
+
|
|
204
|
+
const derivedPaths: string[] = []
|
|
205
|
+
const manifest: string[] = []
|
|
206
|
+
for (const item of processed) {
|
|
207
|
+
derivedPaths.push(...item.derivedPaths)
|
|
208
|
+
manifest.push(item.manifestEntry)
|
|
190
209
|
}
|
|
210
|
+
if (truncationNotice) manifest.push(truncationNotice)
|
|
191
211
|
|
|
192
212
|
const fullManifest = `## ZIP Contents (${fileCount} files)\n\n${manifest.join('\n')}`
|
|
193
213
|
|
|
@@ -9,7 +9,7 @@ export const uploadEvents = createEventsFactory({
|
|
|
9
9
|
filename: z4.string(),
|
|
10
10
|
mimeType: z4.string(),
|
|
11
11
|
size: z4.number(),
|
|
12
|
-
status: z4.enum(['ready', 'failed']),
|
|
12
|
+
status: z4.enum(['processing', 'ready', 'failed']),
|
|
13
13
|
extractedContent: z4.string().optional(),
|
|
14
14
|
derivedPaths: z4.array(z4.string()).optional(),
|
|
15
15
|
error: z4.string().optional(),
|