@roj-ai/sdk 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/dist/bootstrap.d.ts +13 -0
  2. package/dist/bootstrap.d.ts.map +1 -1
  3. package/dist/bootstrap.js +3 -1
  4. package/dist/bootstrap.js.map +1 -1
  5. package/dist/config.d.ts +2 -0
  6. package/dist/config.d.ts.map +1 -1
  7. package/dist/config.js +3 -0
  8. package/dist/config.js.map +1 -1
  9. package/dist/core/agents/agent.d.ts +25 -1
  10. package/dist/core/agents/agent.d.ts.map +1 -1
  11. package/dist/core/agents/agent.js +117 -21
  12. package/dist/core/agents/agent.js.map +1 -1
  13. package/dist/core/agents/config.d.ts +7 -0
  14. package/dist/core/agents/config.d.ts.map +1 -1
  15. package/dist/core/agents/context.d.ts +10 -0
  16. package/dist/core/agents/context.d.ts.map +1 -1
  17. package/dist/core/agents/state.d.ts +11 -3
  18. package/dist/core/agents/state.d.ts.map +1 -1
  19. package/dist/core/agents/state.js.map +1 -1
  20. package/dist/core/file-store/file-store.d.ts +5 -1
  21. package/dist/core/file-store/file-store.d.ts.map +1 -1
  22. package/dist/core/file-store/file-store.js +31 -21
  23. package/dist/core/file-store/file-store.js.map +1 -1
  24. package/dist/core/image/vips-resizer.test.js +26 -14
  25. package/dist/core/image/vips-resizer.test.js.map +1 -1
  26. package/dist/core/llm/anthropic.d.ts.map +1 -1
  27. package/dist/core/llm/anthropic.js +11 -8
  28. package/dist/core/llm/anthropic.js.map +1 -1
  29. package/dist/core/llm/cache-breakpoints.d.ts +5 -1
  30. package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
  31. package/dist/core/llm/cache-breakpoints.js +10 -5
  32. package/dist/core/llm/cache-breakpoints.js.map +1 -1
  33. package/dist/core/sessions/session.d.ts.map +1 -1
  34. package/dist/core/sessions/session.js +10 -0
  35. package/dist/core/sessions/session.js.map +1 -1
  36. package/dist/core/sessions/session.test.js +5 -0
  37. package/dist/core/sessions/session.test.js.map +1 -1
  38. package/dist/core/sessions/state.d.ts.map +1 -1
  39. package/dist/core/sessions/state.js +5 -1
  40. package/dist/core/sessions/state.js.map +1 -1
  41. package/dist/core/tools/executor.test.js +1 -0
  42. package/dist/core/tools/executor.test.js.map +1 -1
  43. package/dist/lib/utils/concurrency.d.ts +25 -0
  44. package/dist/lib/utils/concurrency.d.ts.map +1 -0
  45. package/dist/lib/utils/concurrency.js +69 -0
  46. package/dist/lib/utils/concurrency.js.map +1 -0
  47. package/dist/lib/utils/concurrency.test.d.ts +2 -0
  48. package/dist/lib/utils/concurrency.test.d.ts.map +1 -0
  49. package/dist/lib/utils/concurrency.test.js +135 -0
  50. package/dist/lib/utils/concurrency.test.js.map +1 -0
  51. package/dist/plugins/agent-status/plugin.d.ts.map +1 -1
  52. package/dist/plugins/agent-status/plugin.js +18 -26
  53. package/dist/plugins/agent-status/plugin.js.map +1 -1
  54. package/dist/plugins/context-compact/compaction-live.test.d.ts +17 -0
  55. package/dist/plugins/context-compact/compaction-live.test.d.ts.map +1 -0
  56. package/dist/plugins/context-compact/compaction-live.test.js +177 -0
  57. package/dist/plugins/context-compact/compaction-live.test.js.map +1 -0
  58. package/dist/plugins/context-compact/context-compact.integration.test.js +123 -3
  59. package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
  60. package/dist/plugins/context-compact/context-compactor.d.ts +47 -17
  61. package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
  62. package/dist/plugins/context-compact/context-compactor.js +60 -36
  63. package/dist/plugins/context-compact/context-compactor.js.map +1 -1
  64. package/dist/plugins/context-compact/context-compactor.test.js +69 -103
  65. package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
  66. package/dist/plugins/context-compact/plugin.d.ts +9 -2
  67. package/dist/plugins/context-compact/plugin.d.ts.map +1 -1
  68. package/dist/plugins/context-compact/plugin.js +8 -4
  69. package/dist/plugins/context-compact/plugin.js.map +1 -1
  70. package/dist/plugins/filesystem/filesystem.integration.test.js +36 -0
  71. package/dist/plugins/filesystem/filesystem.integration.test.js.map +1 -1
  72. package/dist/plugins/filesystem/plugin.d.ts.map +1 -1
  73. package/dist/plugins/filesystem/plugin.js +8 -6
  74. package/dist/plugins/filesystem/plugin.js.map +1 -1
  75. package/dist/plugins/mailbox/mailbox.integration.test.js +9 -16
  76. package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
  77. package/dist/plugins/resources/plugin.d.ts.map +1 -1
  78. package/dist/plugins/resources/plugin.js +4 -1
  79. package/dist/plugins/resources/plugin.js.map +1 -1
  80. package/dist/plugins/uploads/plugin.d.ts +12 -0
  81. package/dist/plugins/uploads/plugin.d.ts.map +1 -1
  82. package/dist/plugins/uploads/plugin.js +188 -44
  83. package/dist/plugins/uploads/plugin.js.map +1 -1
  84. package/dist/plugins/uploads/preprocessors/image-classifier.d.ts +9 -0
  85. package/dist/plugins/uploads/preprocessors/image-classifier.d.ts.map +1 -1
  86. package/dist/plugins/uploads/preprocessors/image-classifier.js +4 -1
  87. package/dist/plugins/uploads/preprocessors/image-classifier.js.map +1 -1
  88. package/dist/plugins/uploads/preprocessors/image-classifier.test.d.ts +2 -0
  89. package/dist/plugins/uploads/preprocessors/image-classifier.test.d.ts.map +1 -0
  90. package/dist/plugins/uploads/preprocessors/image-classifier.test.js +113 -0
  91. package/dist/plugins/uploads/preprocessors/image-classifier.test.js.map +1 -0
  92. package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.d.ts.map +1 -1
  93. package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.js +8 -7
  94. package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.js.map +1 -1
  95. package/dist/plugins/uploads/preprocessors/zip-preprocessor.d.ts.map +1 -1
  96. package/dist/plugins/uploads/preprocessors/zip-preprocessor.js +35 -15
  97. package/dist/plugins/uploads/preprocessors/zip-preprocessor.js.map +1 -1
  98. package/dist/plugins/uploads/state.d.ts +1 -0
  99. package/dist/plugins/uploads/state.d.ts.map +1 -1
  100. package/dist/plugins/uploads/state.js +1 -1
  101. package/dist/plugins/uploads/state.js.map +1 -1
  102. package/dist/plugins/uploads/uploads.integration.test.js +97 -0
  103. package/dist/plugins/uploads/uploads.integration.test.js.map +1 -1
  104. package/dist/plugins/user-chat/plugin.d.ts +2 -0
  105. package/dist/plugins/user-chat/plugin.d.ts.map +1 -1
  106. package/dist/plugins/user-chat/plugin.js +47 -3
  107. package/dist/plugins/user-chat/plugin.js.map +1 -1
  108. package/dist/plugins/user-chat/schema.d.ts +10 -0
  109. package/dist/plugins/user-chat/schema.d.ts.map +1 -1
  110. package/dist/plugins/user-chat/schema.js +1 -0
  111. package/dist/plugins/user-chat/schema.js.map +1 -1
  112. package/dist/plugins/user-chat/user-chat.integration.test.js +86 -0
  113. package/dist/plugins/user-chat/user-chat.integration.test.js.map +1 -1
  114. package/dist/transport/http/routes/upload.d.ts.map +1 -1
  115. package/dist/transport/http/routes/upload.js +60 -0
  116. package/dist/transport/http/routes/upload.js.map +1 -1
  117. package/package.json +2 -2
  118. package/src/bootstrap.ts +3 -1
  119. package/src/config.ts +6 -0
  120. package/src/core/agents/agent.ts +134 -20
  121. package/src/core/agents/config.ts +7 -0
  122. package/src/core/agents/context.ts +11 -0
  123. package/src/core/agents/state.ts +11 -4
  124. package/src/core/file-store/file-store.ts +38 -18
  125. package/src/core/image/vips-resizer.test.ts +26 -15
  126. package/src/core/llm/anthropic.ts +19 -12
  127. package/src/core/llm/cache-breakpoints.ts +15 -6
  128. package/src/core/sessions/session.test.ts +6 -0
  129. package/src/core/sessions/session.ts +12 -0
  130. package/src/core/sessions/state.ts +5 -1
  131. package/src/core/tools/executor.test.ts +1 -0
  132. package/src/lib/utils/concurrency.test.ts +169 -0
  133. package/src/lib/utils/concurrency.ts +72 -0
  134. package/src/plugins/agent-status/plugin.ts +18 -25
  135. package/src/plugins/context-compact/compaction-live.test.ts +221 -0
  136. package/src/plugins/context-compact/context-compact.integration.test.ts +135 -3
  137. package/src/plugins/context-compact/context-compactor.test.ts +71 -110
  138. package/src/plugins/context-compact/context-compactor.ts +88 -43
  139. package/src/plugins/context-compact/plugin.ts +19 -10
  140. package/src/plugins/filesystem/filesystem.integration.test.ts +44 -0
  141. package/src/plugins/filesystem/plugin.ts +8 -6
  142. package/src/plugins/mailbox/mailbox.integration.test.ts +12 -18
  143. package/src/plugins/resources/plugin.ts +4 -1
  144. package/src/plugins/uploads/plugin.ts +212 -47
  145. package/src/plugins/uploads/preprocessors/image-classifier.test.ts +142 -0
  146. package/src/plugins/uploads/preprocessors/image-classifier.ts +13 -1
  147. package/src/plugins/uploads/preprocessors/markitdown-preprocessor.ts +8 -8
  148. package/src/plugins/uploads/preprocessors/zip-preprocessor.ts +37 -17
  149. package/src/plugins/uploads/state.ts +1 -1
  150. package/src/plugins/uploads/uploads.integration.test.ts +123 -0
  151. package/src/plugins/user-chat/plugin.ts +60 -3
  152. package/src/plugins/user-chat/schema.ts +10 -1
  153. package/src/plugins/user-chat/user-chat.integration.test.ts +99 -0
  154. package/src/transport/http/routes/upload.ts +87 -0
@@ -2,11 +2,24 @@ import z from 'zod/v4'
2
2
  import { ValidationErrors } from '~/core/errors.js'
3
3
  import type { FileStore } from '~/core/file-store/types.js'
4
4
  import { definePlugin } from '~/core/plugins/plugin-builder.js'
5
+ import { SessionId } from '~/core/sessions/schema.js'
5
6
  import { Err, Ok } from '~/lib/utils/result.js'
6
7
  import type { PreprocessorRegistry } from './preprocessor.js'
7
8
  import { generateUploadId, type MessageAttachment, UploadId, type UploadMetadata } from './schema.js'
8
9
  import { type PendingUpload, uploadEvents, type UploadsState } from './state.js'
9
10
 
11
+ // ============================================================================
12
+ // Notification schemas
13
+ // ============================================================================
14
+
15
+ const statusChangedSchema = z.object({
16
+ sessionId: z.string(),
17
+ uploadId: z.string(),
18
+ status: z.enum(['processing', 'ready', 'failed']),
19
+ extractedContent: z.string().optional(),
20
+ error: z.string().optional(),
21
+ })
22
+
10
23
  // ============================================================================
11
24
  // Constants
12
25
  // ============================================================================
@@ -66,6 +79,69 @@ function formatUploadsForLLM(uploads: PendingUpload[], sessionRoot: string): str
66
79
  return blocks.join('\n')
67
80
  }
68
81
 
82
+ /**
83
+ * Run preprocessor (with timeout) and persist final upload metadata to disk.
84
+ * Returns the resolved status + extracted/derived data for the caller to emit.
85
+ */
86
+ async function runPreprocessAndPersist(args: {
87
+ uploadId: string
88
+ sessionId: SessionId
89
+ uploadStore: FileStore
90
+ filePath: string
91
+ filename: string
92
+ mimeType: string
93
+ size: number
94
+ createdAt: number
95
+ preprocessorRegistry?: PreprocessorRegistry
96
+ }): Promise<{
97
+ status: 'ready' | 'failed'
98
+ extractedContent?: string
99
+ derivedPaths?: string[]
100
+ error?: string
101
+ }> {
102
+ const preprocessor = args.preprocessorRegistry?.getForMimeType(args.mimeType)
103
+
104
+ let status: 'ready' | 'failed' = 'ready'
105
+ let extractedContent: string | undefined
106
+ let derivedPaths: string[] | undefined
107
+ let errorMessage: string | undefined
108
+
109
+ if (preprocessor) {
110
+ const processPromise = preprocessor.process(args.filePath, args.mimeType, {
111
+ files: args.uploadStore,
112
+ })
113
+ const timeoutPromise = sleep(PROCESSING_TIMEOUT_MS).then(() => ({
114
+ ok: false as const,
115
+ error: new Error('Processing timeout'),
116
+ }))
117
+ const result = await Promise.race([processPromise, timeoutPromise])
118
+ if (result.ok) {
119
+ extractedContent = result.value.extractedContent
120
+ derivedPaths = result.value.derivedPaths
121
+ } else {
122
+ status = 'failed'
123
+ errorMessage = result.error.message
124
+ }
125
+ }
126
+
127
+ const metadata: UploadMetadata = {
128
+ uploadId: UploadId(args.uploadId),
129
+ sessionId: args.sessionId,
130
+ filename: args.filename,
131
+ mimeType: args.mimeType,
132
+ size: args.size,
133
+ path: args.filePath,
134
+ status,
135
+ extractedContent,
136
+ derivedPaths,
137
+ createdAt: args.createdAt,
138
+ completedAt: Date.now(),
139
+ }
140
+ await args.uploadStore.write('meta.json', JSON.stringify(metadata, null, 2))
141
+
142
+ return { status, extractedContent, derivedPaths, error: errorMessage }
143
+ }
144
+
69
145
  // ============================================================================
70
146
  // Plugin
71
147
  // ============================================================================
@@ -73,6 +149,7 @@ function formatUploadsForLLM(uploads: PendingUpload[], sessionRoot: string): str
73
149
  export const uploadsPlugin = definePlugin('uploads')
74
150
  .pluginConfig<UploadsPluginConfig>()
75
151
  .events([uploadEvents])
152
+ .notification('uploadStatusChanged', { schema: statusChangedSchema })
76
153
  .state<UploadsState>({
77
154
  key: 'uploads',
78
155
  initial: (): UploadsState => ({ pending: [] }),
@@ -315,91 +392,179 @@ export const uploadsPlugin = definePlugin('uploads')
315
392
  handler: async (ctx, input) => {
316
393
  const { dataFileStore, preprocessorRegistry } = ctx.pluginConfig
317
394
 
318
- // Validate
319
395
  if (input.size > MAX_FILE_SIZE) {
320
396
  return Err(ValidationErrors.invalid(`File too large: max ${MAX_FILE_SIZE / (1024 * 1024)}MB`))
321
397
  }
322
-
323
398
  if (!isAllowedMimeType(input.mimeType)) {
324
399
  return Err(ValidationErrors.invalid(`Unsupported file type: ${input.mimeType}`))
325
400
  }
326
401
 
327
- // Generate upload ID and scoped store
328
402
  const uploadId = generateUploadId()
329
403
  const uploadStore = dataFileStore.scoped(`sessions/${input.sessionId}/uploads/${uploadId}`)
330
404
 
331
- // Write file to disk
332
405
  const writeResult = await uploadStore.write(input.filename, input.fileBuffer)
333
-
334
406
  if (!writeResult.ok) {
335
407
  return Err(ValidationErrors.invalid('Failed to write file'))
336
408
  }
337
409
 
338
- const filePath = writeResult.value.path
339
-
340
- // Run preprocessor (with timeout)
341
- let processingResult: 'success' | 'failed' | 'skipped' = 'skipped'
342
- let extractedContent: string | undefined
343
- let derivedPaths: string[] | undefined
410
+ const result = await runPreprocessAndPersist({
411
+ uploadId: String(uploadId),
412
+ sessionId: ctx.sessionId,
413
+ uploadStore,
414
+ filePath: writeResult.value.path,
415
+ filename: input.filename,
416
+ mimeType: input.mimeType,
417
+ size: input.size,
418
+ createdAt: Date.now(),
419
+ preprocessorRegistry,
420
+ })
344
421
 
345
- const preprocessor = preprocessorRegistry?.getForMimeType(input.mimeType)
422
+ await ctx.emitEvent(uploadEvents.create('attachment_uploaded', {
423
+ uploadId,
424
+ filename: input.filename,
425
+ mimeType: input.mimeType,
426
+ size: input.size,
427
+ status: result.status,
428
+ extractedContent: result.extractedContent,
429
+ derivedPaths: result.derivedPaths,
430
+ error: result.error,
431
+ }))
346
432
 
347
- if (preprocessor) {
348
- const processPromise = preprocessor.process(filePath, input.mimeType, {
349
- files: uploadStore,
350
- })
433
+ return Ok({
434
+ uploadId: String(uploadId),
435
+ status: result.status,
436
+ extractedContent: result.extractedContent,
437
+ })
438
+ },
439
+ })
440
+ .method('uploadAsync', {
441
+ input: z.object({
442
+ sessionId: z.string(),
443
+ filename: z.string(),
444
+ mimeType: z.string(),
445
+ size: z.number(),
446
+ fileBuffer: z.custom<Buffer>(),
447
+ }),
448
+ output: z.object({
449
+ uploadId: z.string(),
450
+ status: z.enum(['processing']),
451
+ }),
452
+ handler: async (ctx, input) => {
453
+ const { dataFileStore, preprocessorRegistry } = ctx.pluginConfig
351
454
 
352
- const timeoutPromise = sleep(PROCESSING_TIMEOUT_MS).then(() => ({
353
- ok: false as const,
354
- error: new Error('Processing timeout'),
355
- }))
455
+ if (input.size > MAX_FILE_SIZE) {
456
+ return Err(ValidationErrors.invalid(`File too large: max ${MAX_FILE_SIZE / (1024 * 1024)}MB`))
457
+ }
458
+ if (!isAllowedMimeType(input.mimeType)) {
459
+ return Err(ValidationErrors.invalid(`Unsupported file type: ${input.mimeType}`))
460
+ }
356
461
 
357
- const result = await Promise.race([processPromise, timeoutPromise])
462
+ const uploadId = generateUploadId()
463
+ const uploadIdStr = String(uploadId)
464
+ const uploadStore = dataFileStore.scoped(`sessions/${input.sessionId}/uploads/${uploadId}`)
358
465
 
359
- if (result.ok) {
360
- processingResult = 'success'
361
- extractedContent = result.value.extractedContent
362
- derivedPaths = result.value.derivedPaths
363
- } else {
364
- processingResult = 'failed'
365
- }
466
+ const writeResult = await uploadStore.write(input.filename, input.fileBuffer)
467
+ if (!writeResult.ok) {
468
+ return Err(ValidationErrors.invalid('Failed to write file'))
366
469
  }
367
470
 
368
- // Create upload metadata
369
- const now = Date.now()
370
- const uploadStatus = processingResult === 'failed' ? 'failed' as const : 'ready' as const
371
- const metadata: UploadMetadata = {
471
+ const filePath = writeResult.value.path
472
+ const createdAt = Date.now()
473
+
474
+ // Persist initial 'processing' metadata so listPending sees it before preprocessor finishes.
475
+ const processingMeta: UploadMetadata = {
372
476
  uploadId,
373
477
  sessionId: ctx.sessionId,
374
478
  filename: input.filename,
375
479
  mimeType: input.mimeType,
376
480
  size: input.size,
377
481
  path: filePath,
378
- status: uploadStatus,
379
- extractedContent,
380
- derivedPaths,
381
- createdAt: now,
382
- completedAt: now,
482
+ status: 'processing',
483
+ createdAt,
484
+ completedAt: createdAt,
383
485
  }
486
+ await uploadStore.write('meta.json', JSON.stringify(processingMeta, null, 2))
384
487
 
385
- // Save metadata
386
- await uploadStore.write('meta.json', JSON.stringify(metadata, null, 2))
387
-
388
- // Emit event
389
488
  await ctx.emitEvent(uploadEvents.create('attachment_uploaded', {
390
489
  uploadId,
391
490
  filename: input.filename,
392
491
  mimeType: input.mimeType,
393
492
  size: input.size,
394
- status: uploadStatus,
395
- extractedContent,
396
- derivedPaths,
493
+ status: 'processing',
397
494
  }))
495
+ ctx.notify('uploadStatusChanged', {
496
+ sessionId: input.sessionId,
497
+ uploadId: uploadIdStr,
498
+ status: 'processing',
499
+ })
500
+
501
+ // Capture refs from ctx before the handler returns — `notify`/`emitEvent`
502
+ // closures stay valid for the lifetime of the session, which in roj
503
+ // outlives any single handler call.
504
+ const { emitEvent, notify, logger } = ctx
505
+ const sessionId = ctx.sessionId
506
+
507
+ void (async () => {
508
+ try {
509
+ const result = await runPreprocessAndPersist({
510
+ uploadId: uploadIdStr,
511
+ sessionId,
512
+ uploadStore,
513
+ filePath,
514
+ filename: input.filename,
515
+ mimeType: input.mimeType,
516
+ size: input.size,
517
+ createdAt,
518
+ preprocessorRegistry,
519
+ })
520
+
521
+ await emitEvent(uploadEvents.create('attachment_uploaded', {
522
+ uploadId,
523
+ filename: input.filename,
524
+ mimeType: input.mimeType,
525
+ size: input.size,
526
+ status: result.status,
527
+ extractedContent: result.extractedContent,
528
+ derivedPaths: result.derivedPaths,
529
+ error: result.error,
530
+ }))
531
+ notify('uploadStatusChanged', {
532
+ sessionId: input.sessionId,
533
+ uploadId: uploadIdStr,
534
+ status: result.status,
535
+ extractedContent: result.extractedContent,
536
+ error: result.error,
537
+ })
538
+ } catch (err) {
539
+ const message = err instanceof Error ? err.message : String(err)
540
+ logger.error('Async upload processing crashed', err instanceof Error ? err : undefined, {
541
+ uploadId: uploadIdStr,
542
+ filename: input.filename,
543
+ })
544
+ try {
545
+ await emitEvent(uploadEvents.create('attachment_uploaded', {
546
+ uploadId,
547
+ filename: input.filename,
548
+ mimeType: input.mimeType,
549
+ size: input.size,
550
+ status: 'failed',
551
+ error: message,
552
+ }))
553
+ } catch {
554
+ // Even event emission failed — best-effort; nothing useful left to do.
555
+ }
556
+ notify('uploadStatusChanged', {
557
+ sessionId: input.sessionId,
558
+ uploadId: uploadIdStr,
559
+ status: 'failed',
560
+ error: message,
561
+ })
562
+ }
563
+ })()
398
564
 
399
565
  return Ok({
400
- uploadId: String(uploadId),
401
- status: uploadStatus,
402
- extractedContent,
566
+ uploadId: uploadIdStr,
567
+ status: 'processing' as const,
403
568
  })
404
569
  },
405
570
  })
@@ -0,0 +1,142 @@
1
+ import { afterAll, beforeAll, describe, expect, it } from 'bun:test'
2
+ import { mkdtemp, rm, writeFile } from 'node:fs/promises'
3
+ import { tmpdir } from 'node:os'
4
+ import { join } from 'node:path'
5
+ import { SessionFileStore } from '~/core/file-store/file-store.js'
6
+ import { MockLLMProvider } from '~/core/llm/mock.js'
7
+ import { Semaphore } from '~/lib/utils/concurrency.js'
8
+ import { silentLogger } from '../../../lib/logger/logger.js'
9
+ import { createNodePlatform } from '../../../testing/node-platform.js'
10
+ import { ImageClassifierPreprocessor } from './image-classifier.js'
11
+
12
+ function defer<T = void>(): { promise: Promise<T>; resolve: (v: T) => void } {
13
+ let resolve!: (v: T) => void
14
+ const promise = new Promise<T>((res) => {
15
+ resolve = res
16
+ })
17
+ return { promise, resolve }
18
+ }
19
+
20
+ describe('ImageClassifierPreprocessor.gate', () => {
21
+ const platform = createNodePlatform()
22
+ let workDir: string
23
+
24
+ beforeAll(async () => {
25
+ workDir = await mkdtemp(join(tmpdir(), 'roj-classifier-gate-'))
26
+ })
27
+
28
+ afterAll(async () => {
29
+ await rm(workDir, { recursive: true, force: true }).catch(() => {})
30
+ })
31
+
32
+ it('caps concurrent vision LLM calls when a gate is provided', async () => {
33
+ const N = 8
34
+ const LIMIT = 3
35
+
36
+ // Track concurrency inside the mock LLM handler — each call increments
37
+ // `active` on entry, awaits a release barrier, then decrements on exit.
38
+ let active = 0
39
+ let peak = 0
40
+ const release = defer()
41
+
42
+ const llmProvider = new MockLLMProvider(async () => {
43
+ active++
44
+ peak = Math.max(peak, active)
45
+ await release.promise
46
+ active--
47
+ return {
48
+ content: 'desc',
49
+ toolCalls: [],
50
+ finishReason: 'stop',
51
+ metrics: MockLLMProvider.defaultMetrics(),
52
+ }
53
+ })
54
+
55
+ const gate = new Semaphore(LIMIT)
56
+ const classifier = new ImageClassifierPreprocessor({
57
+ llmProvider,
58
+ logger: silentLogger,
59
+ fs: platform.fs,
60
+ gate,
61
+ })
62
+
63
+ // Create N tiny dummy image files — content doesn't matter, classifier
64
+ // only stats them and hands the path to the (mocked) LLM.
65
+ const imagePaths = await Promise.all(
66
+ Array.from({ length: N }, async (_, i) => {
67
+ const p = join(workDir, `img-${i}.png`)
68
+ await writeFile(p, Buffer.from([0x89, 0x50, 0x4e, 0x47]))
69
+ return p
70
+ }),
71
+ )
72
+
73
+ const fileStore = new SessionFileStore(workDir, undefined, false, platform.fs, 'session')
74
+
75
+ const tasks = imagePaths.map((p, i) =>
76
+ classifier.process(p, 'image/png', { files: fileStore.scoped(`img-${i}-meta`) }),
77
+ )
78
+
79
+ // Let the workers queue up; the first LIMIT should be in-flight.
80
+ await new Promise(r => setTimeout(r, 20))
81
+ expect(active).toBeLessThanOrEqual(LIMIT)
82
+ expect(active).toBe(LIMIT)
83
+
84
+ // Release everyone; wait for completion and check peak.
85
+ release.resolve()
86
+ const results = await Promise.all(tasks)
87
+
88
+ expect(peak).toBe(LIMIT)
89
+ expect(active).toBe(0)
90
+ expect(llmProvider.getCallCount()).toBe(N)
91
+ for (const r of results) {
92
+ expect(r.ok).toBe(true)
93
+ }
94
+ })
95
+
96
+ it('does not gate when no semaphore is provided (all run concurrently)', async () => {
97
+ const N = 6
98
+ let active = 0
99
+ let peak = 0
100
+ const release = defer()
101
+
102
+ const llmProvider = new MockLLMProvider(async () => {
103
+ active++
104
+ peak = Math.max(peak, active)
105
+ await release.promise
106
+ active--
107
+ return {
108
+ content: 'desc',
109
+ toolCalls: [],
110
+ finishReason: 'stop',
111
+ metrics: MockLLMProvider.defaultMetrics(),
112
+ }
113
+ })
114
+
115
+ const classifier = new ImageClassifierPreprocessor({
116
+ llmProvider,
117
+ logger: silentLogger,
118
+ fs: platform.fs,
119
+ })
120
+
121
+ const imagePaths = await Promise.all(
122
+ Array.from({ length: N }, async (_, i) => {
123
+ const p = join(workDir, `nogate-${i}.png`)
124
+ await writeFile(p, Buffer.from([0x89, 0x50, 0x4e, 0x47]))
125
+ return p
126
+ }),
127
+ )
128
+
129
+ const fileStore = new SessionFileStore(workDir, undefined, false, platform.fs, 'session')
130
+
131
+ const tasks = imagePaths.map((p, i) =>
132
+ classifier.process(p, 'image/png', { files: fileStore.scoped(`nogate-${i}-meta`) }),
133
+ )
134
+
135
+ await new Promise(r => setTimeout(r, 20))
136
+ expect(active).toBe(N)
137
+
138
+ release.resolve()
139
+ await Promise.all(tasks)
140
+ expect(peak).toBe(N)
141
+ })
142
+ })
@@ -7,6 +7,7 @@
7
7
 
8
8
  import type { LLMProvider } from '~/core/llm/provider.js'
9
9
  import { ModelId } from '~/core/llm/schema.js'
10
+ import type { Semaphore } from '~/lib/utils/concurrency.js'
10
11
  import type { Result } from '~/lib/utils/result.js'
11
12
  import { Err, Ok } from '~/lib/utils/result.js'
12
13
  import type { FileSystem } from '~/platform/fs.js'
@@ -28,6 +29,13 @@ export interface ImageClassifierConfig {
28
29
  fs: FileSystem
29
30
  /** Whether to skip vision and just return metadata */
30
31
  skipVision?: boolean
32
+ /**
33
+ * Optional semaphore to bound concurrent vision LLM calls. All callers
34
+ * sharing one instance compete for the same set of permits — useful when
35
+ * recursive preprocessors (ZIP → docs → images) would otherwise fan out
36
+ * into many simultaneous inferences.
37
+ */
38
+ gate?: Semaphore
31
39
  }
32
40
 
33
41
  // ============================================================================
@@ -47,6 +55,7 @@ export class ImageClassifierPreprocessor implements Preprocessor {
47
55
  private readonly logger: Logger
48
56
  private readonly fs: FileSystem
49
57
  private readonly skipVision: boolean
58
+ private readonly gate: Semaphore | undefined
50
59
 
51
60
  constructor(config: ImageClassifierConfig) {
52
61
  this.llmProvider = config.llmProvider
@@ -54,6 +63,7 @@ export class ImageClassifierPreprocessor implements Preprocessor {
54
63
  this.logger = config.logger
55
64
  this.fs = config.fs
56
65
  this.skipVision = config.skipVision ?? false
66
+ this.gate = config.gate
57
67
  }
58
68
 
59
69
  async process(
@@ -124,7 +134,7 @@ export class ImageClassifierPreprocessor implements Preprocessor {
124
134
  ): Promise<string | null> {
125
135
  try {
126
136
  // Use file:// URL - resolved to base64 lazily in LLM provider
127
- const result = await this.llmProvider.inference({
137
+ const inferenceCall = () => this.llmProvider.inference({
128
138
  model: this.visionModel,
129
139
  systemPrompt: 'You are an image description assistant. Describe images concisely in 1-2 sentences.',
130
140
  messages: [
@@ -146,6 +156,8 @@ export class ImageClassifierPreprocessor implements Preprocessor {
146
156
  temperature: 0.3,
147
157
  })
148
158
 
159
+ const result = await (this.gate ? this.gate.run(inferenceCall) : inferenceCall())
160
+
149
161
  if (result.ok && result.value.content) {
150
162
  return result.value.content.trim()
151
163
  }
@@ -13,6 +13,7 @@
13
13
  */
14
14
 
15
15
  import { dirname } from 'node:path'
16
+ import { mapWithConcurrency } from '~/lib/utils/concurrency.js'
16
17
  import type { Result } from '~/lib/utils/result.js'
17
18
  import { Err, Ok } from '~/lib/utils/result.js'
18
19
  import type { FileSystem } from '~/platform/fs.js'
@@ -22,6 +23,7 @@ import type { Logger } from '../../../lib/logger/logger.js'
22
23
  import type { Preprocessor, PreprocessorContext, PreprocessorRegistry, PreprocessorResult } from '../preprocessor.js'
23
24
 
24
25
  const MAX_IMAGES = 50
26
+ const IMAGE_CLASSIFY_CONCURRENCY = 10
25
27
 
26
28
  function makeExec(processRunner: ProcessRunner) {
27
29
  return (cmd: string, args: string[]) => processRunner.execFile(cmd, args, { timeout: 60_000, maxBuffer: 50 * 1024 * 1024 })
@@ -228,19 +230,17 @@ export async function classifyExtractedImages(
228
230
  registry: PreprocessorRegistry,
229
231
  logger: Logger,
230
232
  ): Promise<Array<{ relativePath: string; description: string }>> {
231
- const results: Array<{ relativePath: string; description: string }> = []
232
-
233
233
  const listResult = await imageStore.list('', { maxDepth: 3 })
234
- if (!listResult.ok) return results
234
+ if (!listResult.ok) return []
235
235
 
236
236
  const imageFiles = listResult.value
237
237
  .filter(e => e.type === 'file' && IMAGE_EXT_RE.test(e.name))
238
238
  .sort((a, b) => a.name.localeCompare(b.name))
239
239
  .slice(0, MAX_IMAGES)
240
240
 
241
- for (const imgFile of imageFiles) {
241
+ const settled = await mapWithConcurrency(imageFiles, IMAGE_CLASSIFY_CONCURRENCY, async (imgFile) => {
242
242
  const imgPathResult = imageStore.realPath(imgFile.name)
243
- if (!imgPathResult.ok) continue
243
+ if (!imgPathResult.ok) return null
244
244
 
245
245
  const imgMime = guessImageMime(imgFile.name)
246
246
  let description = imgMime
@@ -255,8 +255,8 @@ export async function classifyExtractedImages(
255
255
  }
256
256
  }
257
257
 
258
- results.push({ relativePath: `${relativePrefix}/${imgFile.name}`, description })
259
- }
258
+ return { relativePath: `${relativePrefix}/${imgFile.name}`, description }
259
+ })
260
260
 
261
- return results
261
+ return settled.filter((r): r is { relativePath: string; description: string } => r !== null)
262
262
  }
@@ -8,6 +8,7 @@
8
8
  */
9
9
 
10
10
  import { extname } from 'node:path'
11
+ import { mapWithConcurrency } from '~/lib/utils/concurrency.js'
11
12
  import type { Result } from '~/lib/utils/result.js'
12
13
  import { Err, Ok } from '~/lib/utils/result.js'
13
14
  import type { ProcessRunner } from '~/platform/process.js'
@@ -17,6 +18,7 @@ import type { Preprocessor, PreprocessorContext, PreprocessorRegistry, Preproces
17
18
  const MAX_DEPTH = 3
18
19
  const MAX_FILES = 500
19
20
  const MAX_TOTAL_SIZE = 100 * 1024 * 1024 // 100MB
21
+ const ZIP_FILE_CONCURRENCY = 10
20
22
 
21
23
  const MIME_MAP: Record<string, string> = {
22
24
  '.pdf': 'application/pdf',
@@ -116,38 +118,45 @@ export class ZipPreprocessor implements Preprocessor {
116
118
  return Err(new Error('Failed to list extracted files'))
117
119
  }
118
120
 
119
- const derivedPaths: string[] = []
120
- const manifest: string[] = []
121
- let fileCount = 0
122
- let totalSize = 0
123
-
124
121
  const files = listResult.value
125
122
  .filter(e => e.type === 'file')
126
123
  .sort((a, b) => a.name.localeCompare(b.name))
127
124
 
125
+ // Pick eligible files first (limits depend on cumulative iteration order, so this stays sequential)
126
+ const eligible: typeof files = []
127
+ let totalSize = 0
128
+ let truncationNotice: string | null = null
129
+
128
130
  for (const file of files) {
129
- if (fileCount >= MAX_FILES) {
130
- manifest.push(`... (truncated, ${files.length - fileCount} more files)`)
131
+ if (eligible.length >= MAX_FILES) {
132
+ truncationNotice = `... (truncated, ${files.length - eligible.length} more files)`
131
133
  break
132
134
  }
133
-
134
135
  const fileSize = file.size ?? 0
135
- totalSize += fileSize
136
- if (totalSize > MAX_TOTAL_SIZE) {
137
- manifest.push('... (total size limit reached)')
136
+ if (totalSize + fileSize > MAX_TOTAL_SIZE) {
137
+ truncationNotice = '... (total size limit reached)'
138
138
  break
139
139
  }
140
+ totalSize += fileSize
141
+ eligible.push(file)
142
+ }
143
+
144
+ const fileCount = eligible.length
140
145
 
141
- fileCount++
146
+ // Process eligible files in parallel with bounded concurrency
147
+ const processed = await mapWithConcurrency(eligible, ZIP_FILE_CONCURRENCY, async (file) => {
148
+ const collectedPaths: string[] = []
142
149
 
143
150
  const fileRealPath = extractStore.realPath(file.name)
144
151
  if (!fileRealPath.ok) {
145
- manifest.push(`- ${file.name} (path resolution failed)`)
146
- continue
152
+ return {
153
+ manifestEntry: `- ${file.name} (path resolution failed)`,
154
+ derivedPaths: collectedPaths,
155
+ }
147
156
  }
148
157
 
149
158
  const relativePath = `extracted/${file.name}`
150
- derivedPaths.push(relativePath)
159
+ collectedPaths.push(relativePath)
151
160
 
152
161
  const mime = getMimeType(file.name)
153
162
  let contentSummary = ''
@@ -171,7 +180,7 @@ export class ZipPreprocessor implements Preprocessor {
171
180
  if (subResult.ok) {
172
181
  if (subResult.value.derivedPaths) {
173
182
  for (const dp of subResult.value.derivedPaths) {
174
- derivedPaths.push(`extracted/${file.name}-content/${dp}`)
183
+ collectedPaths.push(`extracted/${file.name}-content/${dp}`)
175
184
  }
176
185
  }
177
186
  if (subResult.value.extractedContent) {
@@ -186,8 +195,19 @@ export class ZipPreprocessor implements Preprocessor {
186
195
  }
187
196
  }
188
197
 
189
- manifest.push(`- ${file.name} (${formatSize(fileSize)})${contentSummary}`)
198
+ return {
199
+ manifestEntry: `- ${file.name} (${formatSize(file.size ?? 0)})${contentSummary}`,
200
+ derivedPaths: collectedPaths,
201
+ }
202
+ })
203
+
204
+ const derivedPaths: string[] = []
205
+ const manifest: string[] = []
206
+ for (const item of processed) {
207
+ derivedPaths.push(...item.derivedPaths)
208
+ manifest.push(item.manifestEntry)
190
209
  }
210
+ if (truncationNotice) manifest.push(truncationNotice)
191
211
 
192
212
  const fullManifest = `## ZIP Contents (${fileCount} files)\n\n${manifest.join('\n')}`
193
213
 
@@ -9,7 +9,7 @@ export const uploadEvents = createEventsFactory({
9
9
  filename: z4.string(),
10
10
  mimeType: z4.string(),
11
11
  size: z4.number(),
12
- status: z4.enum(['ready', 'failed']),
12
+ status: z4.enum(['processing', 'ready', 'failed']),
13
13
  extractedContent: z4.string().optional(),
14
14
  derivedPaths: z4.array(z4.string()).optional(),
15
15
  error: z4.string().optional(),