@juspay/neurolink 9.56.0 → 9.56.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -936,12 +936,16 @@ export class RedisConversationMemoryManager {
936
936
  const titleGenerator = new NeuroLink({
937
937
  conversationMemory: { enabled: false },
938
938
  });
939
- const titlePrompt = `Generate a clear, concise, and descriptive title (5–8 words maximum) for a conversation based on the following user message.
939
+ const defaultTitlePrompt = `Generate a clear, concise, and descriptive title (20-25 letters maximum) for a conversation based on the following user message.
940
940
  The title must meaningfully reflect the topic or intent of the message.
941
941
  Do not output anything unrelated, vague, or generic.
942
942
  Do not say you cannot create a title. Always return a valid title.
943
943
 
944
944
  User message: "${userMessage}"`;
945
+ const customPrompt = process.env.NEUROLINK_TITLE_PROMPT;
946
+ const titlePrompt = customPrompt
947
+ ? customPrompt.replace(/\$\{userMessage\}/g, userMessage)
948
+ : defaultTitlePrompt;
945
949
  const result = await titleGenerator.generate({
946
950
  input: { text: titlePrompt },
947
951
  provider: this.config.summarizationProvider || "vertex",
@@ -17,6 +17,7 @@ import { tmpdir } from "node:os";
17
17
  import { basename, extname, join } from "node:path";
18
18
  import { estimatePostProcessingTokens } from "../context/fileTokenBudget.js";
19
19
  import { logger } from "../utils/logger.js";
20
+ import { mimeHintToExtension, mimeHintToFileType, normalizeMimeHint, } from "../utils/mimeTypeHints.js";
20
21
  import { StreamingReader } from "./streamingReader.js";
21
22
  import { SIZE_TIER_THRESHOLDS } from "../types/index.js";
22
23
  /** Default maximum files in registry before LRU eviction */
@@ -89,19 +90,33 @@ export class FileReferenceRegistry {
89
90
  const sizeMB = (sizeBytes / (1024 * 1024)).toFixed(1);
90
91
  throw new Error(`File too large (${sizeMB} MB). Maximum accepted size is 2 GB.`);
91
92
  }
93
+ // Normalize the caller-provided mimetype hint — shared helper drops
94
+ // `application/octet-stream` because that opaque sentinel would
95
+ // otherwise be trusted verbatim for the output mimeType and mask a
96
+ // better magic-byte-derived classification (e.g. PNG bytes hinted as
97
+ // octet-stream would record mimeType=octet-stream, not image/png).
98
+ const hintMime = normalizeMimeHint(options.mimetype);
99
+ const hintExt = hintMime ? mimeHintToExtension(hintMime) : "";
92
100
  // Detect file type from magic bytes and extension.
93
- // If the provided filename has no extension, append one guessed from magic bytes
94
- // so downstream processors (e.g., VideoProcessor) can validate by extension.
95
- let filename = options.filename || `file-${Date.now()}${this.guessExtension(buffer)}`;
96
- if (!extname(filename)) {
97
- const guessedExt = this.guessExtension(buffer);
98
- if (guessedExt) {
99
- filename = `${filename}${guessedExt}`;
100
- }
101
+ // If the provided filename has no extension, append one guessed from the
102
+ // mimetype hint first (more reliable for text formats than magic bytes),
103
+ // then fall back to magic bytes — so downstream processors (e.g.,
104
+ // VideoProcessor) can validate by extension. Compute once, reuse.
105
+ const synthDefaultExt = hintExt
106
+ ? `.${hintExt}`
107
+ : this.guessExtension(buffer);
108
+ let filename = options.filename || `file-${Date.now()}${synthDefaultExt}`;
109
+ if (!extname(filename) && synthDefaultExt) {
110
+ filename = `${filename}${synthDefaultExt}`;
101
111
  }
102
112
  const ext = extname(filename).toLowerCase().replace(".", "");
103
- const detectedType = options.fileType || this.detectType(buffer, ext);
104
- const mimeType = this.guessMimeType(detectedType, ext);
113
+ const detectedType = options.fileType ||
114
+ (hintMime && mimeHintToFileType(hintMime)) ||
115
+ this.detectType(buffer, ext);
116
+ // Prefer the caller's hint verbatim for the output mimeType, but only
117
+ // when normalizeMimeHint accepted it (i.e. it is not the opaque
118
+ // octet-stream sentinel). Otherwise derive from the detected type.
119
+ const mimeType = hintMime || this.guessMimeType(detectedType, ext);
105
120
  const sizeTier = FileReferenceRegistry.classifySizeTier(sizeBytes);
106
121
  // Generate preview (fast — only reads first N chars)
107
122
  const preview = this.extractPreview(buffer, detectedType, options.maxPreviewChars ?? this.defaultPreviewChars);
@@ -936,12 +936,16 @@ export class RedisConversationMemoryManager {
936
936
  const titleGenerator = new NeuroLink({
937
937
  conversationMemory: { enabled: false },
938
938
  });
939
- const titlePrompt = `Generate a clear, concise, and descriptive title (5–8 words maximum) for a conversation based on the following user message.
939
+ const defaultTitlePrompt = `Generate a clear, concise, and descriptive title (20-25 letters maximum) for a conversation based on the following user message.
940
940
  The title must meaningfully reflect the topic or intent of the message.
941
941
  Do not output anything unrelated, vague, or generic.
942
942
  Do not say you cannot create a title. Always return a valid title.
943
943
 
944
944
  User message: "${userMessage}"`;
945
+ const customPrompt = process.env.NEUROLINK_TITLE_PROMPT;
946
+ const titlePrompt = customPrompt
947
+ ? customPrompt.replace(/\$\{userMessage\}/g, userMessage)
948
+ : defaultTitlePrompt;
945
949
  const result = await titleGenerator.generate({
946
950
  input: { text: titlePrompt },
947
951
  provider: this.config.summarizationProvider || "vertex",
@@ -17,6 +17,7 @@ import { tmpdir } from "node:os";
17
17
  import { basename, extname, join } from "node:path";
18
18
  import { estimatePostProcessingTokens } from "../context/fileTokenBudget.js";
19
19
  import { logger } from "../utils/logger.js";
20
+ import { mimeHintToExtension, mimeHintToFileType, normalizeMimeHint, } from "../utils/mimeTypeHints.js";
20
21
  import { StreamingReader } from "./streamingReader.js";
21
22
  import { SIZE_TIER_THRESHOLDS } from "../types/index.js";
22
23
  /** Default maximum files in registry before LRU eviction */
@@ -89,19 +90,33 @@ export class FileReferenceRegistry {
89
90
  const sizeMB = (sizeBytes / (1024 * 1024)).toFixed(1);
90
91
  throw new Error(`File too large (${sizeMB} MB). Maximum accepted size is 2 GB.`);
91
92
  }
93
+ // Normalize the caller-provided mimetype hint — shared helper drops
94
+ // `application/octet-stream` because that opaque sentinel would
95
+ // otherwise be trusted verbatim for the output mimeType and mask a
96
+ // better magic-byte-derived classification (e.g. PNG bytes hinted as
97
+ // octet-stream would record mimeType=octet-stream, not image/png).
98
+ const hintMime = normalizeMimeHint(options.mimetype);
99
+ const hintExt = hintMime ? mimeHintToExtension(hintMime) : "";
92
100
  // Detect file type from magic bytes and extension.
93
- // If the provided filename has no extension, append one guessed from magic bytes
94
- // so downstream processors (e.g., VideoProcessor) can validate by extension.
95
- let filename = options.filename || `file-${Date.now()}${this.guessExtension(buffer)}`;
96
- if (!extname(filename)) {
97
- const guessedExt = this.guessExtension(buffer);
98
- if (guessedExt) {
99
- filename = `${filename}${guessedExt}`;
100
- }
101
+ // If the provided filename has no extension, append one guessed from the
102
+ // mimetype hint first (more reliable for text formats than magic bytes),
103
+ // then fall back to magic bytes — so downstream processors (e.g.,
104
+ // VideoProcessor) can validate by extension. Compute once, reuse.
105
+ const synthDefaultExt = hintExt
106
+ ? `.${hintExt}`
107
+ : this.guessExtension(buffer);
108
+ let filename = options.filename || `file-${Date.now()}${synthDefaultExt}`;
109
+ if (!extname(filename) && synthDefaultExt) {
110
+ filename = `${filename}${synthDefaultExt}`;
101
111
  }
102
112
  const ext = extname(filename).toLowerCase().replace(".", "");
103
- const detectedType = options.fileType || this.detectType(buffer, ext);
104
- const mimeType = this.guessMimeType(detectedType, ext);
113
+ const detectedType = options.fileType ||
114
+ (hintMime && mimeHintToFileType(hintMime)) ||
115
+ this.detectType(buffer, ext);
116
+ // Prefer the caller's hint verbatim for the output mimeType, but only
117
+ // when normalizeMimeHint accepted it (i.e. it is not the opaque
118
+ // octet-stream sentinel). Otherwise derive from the detected type.
119
+ const mimeType = hintMime || this.guessMimeType(detectedType, ext);
105
120
  const sizeTier = FileReferenceRegistry.classifySizeTier(sizeBytes);
106
121
  // Generate preview (fast — only reads first N chars)
107
122
  const preview = this.extractPreview(buffer, detectedType, options.maxPreviewChars ?? this.defaultPreviewChars);
@@ -307,6 +307,16 @@ export type FileDetectorOptions = {
307
307
  maxRetries?: number;
308
308
  /** Initial retry delay in milliseconds with exponential backoff (default: 1000) */
309
309
  retryDelay?: number;
310
+ /**
311
+ * Caller-provided MIME type hint (e.g. "text/plain", "application/json").
312
+ * Used when the filename has no extension and magic-byte detection cannot
313
+ * identify the content — the common Slack/Curator extension-less-buffer
314
+ * case. When set to a trustworthy mimetype (not "application/octet-stream"),
315
+ * it short-circuits the detection strategy loop with a high-confidence
316
+ * result so small files on the eager file-processing path still honor the
317
+ * hint (the lazy FileReferenceRegistry path has its own hint-handling).
318
+ */
319
+ mimetypeHint?: string;
310
320
  };
311
321
  /**
312
322
  * Google AI Studio Files API types
@@ -96,6 +96,15 @@ export type FileRegistrationOptions = {
96
96
  filename?: string;
97
97
  /** Override file type detection */
98
98
  fileType?: FileType;
99
+ /**
100
+ * Caller-provided MIME type hint (e.g. "text/plain", "application/json").
101
+ * Used when the filename has no extension and magic-byte detection cannot
102
+ * identify the content (common for Slack/Curator-style buffers where the
103
+ * original extension was stripped). Honored during type detection, mimeType
104
+ * assignment, and filename-extension synthesis. An explicit `fileType`
105
+ * override still wins over this hint.
106
+ */
107
+ mimetype?: string;
99
108
  /** Maximum preview length in characters */
100
109
  maxPreviewChars?: number;
101
110
  /** Skip persisting buffer to temp directory */
@@ -43,6 +43,13 @@ export declare class FileDetector {
43
43
  * Derive byte size from FileInput for tracing.
44
44
  */
45
45
  private static deriveInputSize;
46
+ /**
47
+ * Classify a FileInput into the FileSource enum used by downstream
48
+ * loaders. Keeps the mimetype-hint short-circuit in detect() able to
49
+ * produce a valid FileDetectionResult without re-implementing the
50
+ * source-inference rules scattered across loadContent().
51
+ */
52
+ private static deriveInputSource;
46
53
  /**
47
54
  * Try fallback parsing for a specific file type
48
55
  * Used when file detection returns "unknown" but we want to try parsing anyway
@@ -23,6 +23,7 @@ import { tracers, ATTR, withSpan } from "../telemetry/index.js";
23
23
  import { CSVProcessor } from "./csvProcessor.js";
24
24
  import { ImageProcessor } from "./imageProcessor.js";
25
25
  import { logger } from "./logger.js";
26
+ import { mimeHintToExtension, mimeHintToFileType, normalizeMimeHint, } from "./mimeTypeHints.js";
26
27
  import { PDFProcessor } from "./pdfProcessor.js";
27
28
  /**
28
29
  * Default retry configuration constants
@@ -320,6 +321,27 @@ export class FileDetector {
320
321
  }
321
322
  return 0;
322
323
  }
324
+ /**
325
+ * Classify a FileInput into the FileSource enum used by downstream
326
+ * loaders. Keeps the mimetype-hint short-circuit in detect() able to
327
+ * produce a valid FileDetectionResult without re-implementing the
328
+ * source-inference rules scattered across loadContent().
329
+ */
330
+ static deriveInputSource(input) {
331
+ if (Buffer.isBuffer(input)) {
332
+ return "buffer";
333
+ }
334
+ if (typeof input === "string") {
335
+ if (input.startsWith("data:")) {
336
+ return "datauri";
337
+ }
338
+ if (input.startsWith("http://") || input.startsWith("https://")) {
339
+ return "url";
340
+ }
341
+ return "path";
342
+ }
343
+ return "buffer";
344
+ }
323
345
  /**
324
346
  * Try fallback parsing for a specific file type
325
347
  * Used when file detection returns "unknown" but we want to try parsing anyway
@@ -520,6 +542,31 @@ export class FileDetector {
520
542
  * Stops at first strategy with confidence >= threshold (default: 80%)
521
543
  */
522
544
  static async detect(input, options) {
545
+ // Short-circuit on a trustworthy caller-provided mimetype hint. This is
546
+ // the eager-path counterpart to FileReferenceRegistry.register()'s hint
547
+ // handling — necessary for tiny files (<= TINY_MAX) that skip the lazy
548
+ // registry path. normalizeMimeHint drops "application/octet-stream" so a
549
+ // caller cannot hide real content behind the opaque sentinel.
550
+ const hintMime = normalizeMimeHint(options?.mimetypeHint);
551
+ if (hintMime) {
552
+ const type = mimeHintToFileType(hintMime);
553
+ if (type) {
554
+ const ext = mimeHintToExtension(hintMime);
555
+ const result = {
556
+ type,
557
+ mimeType: hintMime,
558
+ extension: ext || null,
559
+ source: FileDetector.deriveInputSource(input),
560
+ metadata: {
561
+ confidence: 95,
562
+ filename: FileDetector.deriveInputFilename(input),
563
+ size: FileDetector.deriveInputSize(input),
564
+ },
565
+ };
566
+ logger.info(`[FileDetector] Type: ${type} (95%, from mimetype hint: ${hintMime})`);
567
+ return result;
568
+ }
569
+ }
523
570
  const confidenceThreshold = options?.confidenceThreshold ?? 80;
524
571
  const strategies = [
525
572
  new MagicBytesStrategy(),
@@ -397,6 +397,9 @@ function toModelMessage(message) {
397
397
  if (message.role === "user" ||
398
398
  message.role === "assistant" ||
399
399
  message.role === "system") {
400
+ if (message.content.trim() === "") {
401
+ return null;
402
+ }
400
403
  return {
401
404
  role: message.role,
402
405
  content: message.content,
@@ -551,6 +554,7 @@ export async function buildMessagesArray(options) {
551
554
  maxSize: 50 * 1024 * 1024,
552
555
  allowedTypes: ["csv"],
553
556
  csvOptions: csvOptions,
557
+ mimetypeHint: isFileWithMetadata(file) ? file.mimetype : undefined,
554
558
  });
555
559
  if (result.type === "csv") {
556
560
  let csvSection = `\n\n## CSV Data from "${filename}":\n`;
@@ -803,6 +807,12 @@ async function processUnifiedFilesArray(options, maxSize, provider) {
803
807
  // ─── Full processing path (current behavior) ──────────────────
804
808
  const genericFileMaxSize = Math.max(maxSize, 100 * 1024 * 1024);
805
809
  const rawFileInput = isFileWithMetadata(file) ? file.buffer : file;
810
+ // Forward the caller's mimetype hint (Slack/Curator-style
811
+ // extension-less buffers) so the eager path classifies correctly
812
+ // for tiny files — the lazy registry path has its own hint wiring.
813
+ const fileMimetypeHint = isFileWithMetadata(file)
814
+ ? file.mimetype
815
+ : undefined;
806
816
  const result = await FileDetector.detectAndProcess(rawFileInput, {
807
817
  maxSize: genericFileMaxSize,
808
818
  allowedTypes: [
@@ -821,6 +831,7 @@ async function processUnifiedFilesArray(options, maxSize, provider) {
821
831
  ],
822
832
  csvOptions: options.csvOptions,
823
833
  provider: provider,
834
+ mimetypeHint: fileMimetypeHint,
824
835
  });
825
836
  appendDetectedFileResult(result, file, options);
826
837
  includedCount++;
@@ -1655,7 +1666,13 @@ async function tryRegisterFileReference(file, fileSize, registry, index = 0) {
1655
1666
  return false;
1656
1667
  }
1657
1668
  const filename = extractFilename(file, index);
1658
- await registry.register(buffer, getFileSource(file), { filename });
1669
+ const mimetype = typeof file === "object" && !Buffer.isBuffer(file)
1670
+ ? file.mimetype
1671
+ : undefined;
1672
+ await registry.register(buffer, getFileSource(file), {
1673
+ filename,
1674
+ mimetype,
1675
+ });
1659
1676
  logger.info(`[FileDetector] Registered "${filename}" (${(fileSize / 1024).toFixed(0)} KB) ` +
1660
1677
  `as lazy reference — skipping upfront processing`);
1661
1678
  return true;
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Shared helpers for caller-provided MIME type hints.
3
+ *
4
+ * A "MIME hint" is a mimetype string the SDK receives alongside a raw Buffer
5
+ * whose original filename is missing (e.g. Slack/Curator file-uploads that
6
+ * arrive as { buffer, filename: "Untitled", mimetype: "text/plain" }). When
7
+ * the filename has no extension and magic-byte detection cannot identify the
8
+ * content, the hint is the only signal we have.
9
+ *
10
+ * Both FileReferenceRegistry.register() and FileDetector.detect() consume
11
+ * these helpers so the trust/normalization rules stay in one place:
12
+ *
13
+ * - `application/octet-stream` is never trusted — it is the opaque
14
+ * "I don't know" sentinel and would let a caller hide real content
15
+ * behind a generic label (a PNG hinted as octet-stream would otherwise
16
+ * record mimeType="application/octet-stream" instead of "image/png").
17
+ * - Empty/undefined hints pass through as `undefined`.
18
+ * - A hint that cannot be classified maps to `null` so the caller falls
19
+ * back to magic-byte / extension detection instead of synthesising a
20
+ * wrong type.
21
+ */
22
+ import type { FileType } from "../types/index.js";
23
+ /**
24
+ * Normalize a caller-provided mimetype hint: strip any `;charset=...`
25
+ * parameter, lowercase, trim. Returns undefined for empty strings or for
26
+ * the opaque `application/octet-stream` sentinel so downstream code can
27
+ * treat the hint as absent instead of trusting it verbatim.
28
+ */
29
+ export declare function normalizeMimeHint(raw?: string): string | undefined;
30
+ /**
31
+ * Map a normalized mimetype hint to a NeuroLink FileType. Returns null when
32
+ * the mimetype is unknown or too generic to classify confidently.
33
+ */
34
+ export declare function mimeHintToFileType(mimetype: string): FileType | null;
35
+ /**
36
+ * Map a normalized mimetype hint to the canonical file extension (without
37
+ * leading dot). Returns "" when the mimetype is unknown — caller should
38
+ * then fall back to magic-byte detection.
39
+ */
40
+ export declare function mimeHintToExtension(mimetype: string): string;
@@ -0,0 +1,122 @@
1
+ const OPAQUE_MIMETYPE = "application/octet-stream";
2
+ /**
3
+ * Normalize a caller-provided mimetype hint: strip any `;charset=...`
4
+ * parameter, lowercase, trim. Returns undefined for empty strings or for
5
+ * the opaque `application/octet-stream` sentinel so downstream code can
6
+ * treat the hint as absent instead of trusting it verbatim.
7
+ */
8
+ export function normalizeMimeHint(raw) {
9
+ if (!raw) {
10
+ return undefined;
11
+ }
12
+ const cleaned = raw.split(";")[0].trim().toLowerCase();
13
+ if (!cleaned || cleaned === OPAQUE_MIMETYPE) {
14
+ return undefined;
15
+ }
16
+ return cleaned;
17
+ }
18
+ /**
19
+ * Map a normalized mimetype hint to a NeuroLink FileType. Returns null when
20
+ * the mimetype is unknown or too generic to classify confidently.
21
+ */
22
+ export function mimeHintToFileType(mimetype) {
23
+ const exact = {
24
+ "text/csv": "csv",
25
+ "application/csv": "csv",
26
+ "image/svg+xml": "svg",
27
+ "application/pdf": "pdf",
28
+ "application/json": "text",
29
+ "application/xml": "text",
30
+ "text/xml": "text",
31
+ "application/yaml": "text",
32
+ "application/x-yaml": "text",
33
+ "text/yaml": "text",
34
+ "application/javascript": "text",
35
+ "application/typescript": "text",
36
+ "application/zip": "archive",
37
+ "application/x-tar": "archive",
38
+ "application/gzip": "archive",
39
+ "application/x-gzip": "archive",
40
+ "application/x-7z-compressed": "archive",
41
+ "application/vnd.rar": "archive",
42
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
43
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
44
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
45
+ };
46
+ if (exact[mimetype]) {
47
+ return exact[mimetype];
48
+ }
49
+ if (mimetype.startsWith("text/")) {
50
+ return "text";
51
+ }
52
+ if (mimetype.startsWith("image/")) {
53
+ return "image";
54
+ }
55
+ if (mimetype.startsWith("audio/")) {
56
+ return "audio";
57
+ }
58
+ if (mimetype.startsWith("video/")) {
59
+ return "video";
60
+ }
61
+ return null;
62
+ }
63
+ /**
64
+ * Map a normalized mimetype hint to the canonical file extension (without
65
+ * leading dot). Returns "" when the mimetype is unknown — caller should
66
+ * then fall back to magic-byte detection.
67
+ */
68
+ export function mimeHintToExtension(mimetype) {
69
+ const table = {
70
+ // Text
71
+ "text/plain": "txt",
72
+ "text/html": "html",
73
+ "text/css": "css",
74
+ "text/javascript": "js",
75
+ "application/javascript": "js",
76
+ "application/typescript": "ts",
77
+ "text/markdown": "md",
78
+ "text/csv": "csv",
79
+ "application/csv": "csv",
80
+ "application/json": "json",
81
+ "application/xml": "xml",
82
+ "text/xml": "xml",
83
+ "application/yaml": "yaml",
84
+ "application/x-yaml": "yaml",
85
+ "text/yaml": "yaml",
86
+ // Documents
87
+ "application/pdf": "pdf",
88
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
89
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
90
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
91
+ // Images
92
+ "image/png": "png",
93
+ "image/jpeg": "jpg",
94
+ "image/gif": "gif",
95
+ "image/webp": "webp",
96
+ "image/bmp": "bmp",
97
+ "image/tiff": "tiff",
98
+ "image/svg+xml": "svg",
99
+ // Video
100
+ "video/mp4": "mp4",
101
+ "video/webm": "webm",
102
+ "video/quicktime": "mov",
103
+ "video/x-matroska": "mkv",
104
+ "video/x-msvideo": "avi",
105
+ // Audio
106
+ "audio/mpeg": "mp3",
107
+ "audio/wav": "wav",
108
+ "audio/ogg": "ogg",
109
+ "audio/flac": "flac",
110
+ "audio/mp4": "m4a",
111
+ "audio/aac": "aac",
112
+ // Archives
113
+ "application/zip": "zip",
114
+ "application/x-tar": "tar",
115
+ "application/gzip": "gz",
116
+ "application/x-gzip": "gz",
117
+ "application/x-7z-compressed": "7z",
118
+ "application/vnd.rar": "rar",
119
+ };
120
+ return table[mimetype] || "";
121
+ }
122
+ //# sourceMappingURL=mimeTypeHints.js.map
@@ -307,6 +307,16 @@ export type FileDetectorOptions = {
307
307
  maxRetries?: number;
308
308
  /** Initial retry delay in milliseconds with exponential backoff (default: 1000) */
309
309
  retryDelay?: number;
310
+ /**
311
+ * Caller-provided MIME type hint (e.g. "text/plain", "application/json").
312
+ * Used when the filename has no extension and magic-byte detection cannot
313
+ * identify the content — the common Slack/Curator extension-less-buffer
314
+ * case. When set to a trustworthy mimetype (not "application/octet-stream"),
315
+ * it short-circuits the detection strategy loop with a high-confidence
316
+ * result so small files on the eager file-processing path still honor the
317
+ * hint (the lazy FileReferenceRegistry path has its own hint-handling).
318
+ */
319
+ mimetypeHint?: string;
310
320
  };
311
321
  /**
312
322
  * Google AI Studio Files API types
@@ -96,6 +96,15 @@ export type FileRegistrationOptions = {
96
96
  filename?: string;
97
97
  /** Override file type detection */
98
98
  fileType?: FileType;
99
+ /**
100
+ * Caller-provided MIME type hint (e.g. "text/plain", "application/json").
101
+ * Used when the filename has no extension and magic-byte detection cannot
102
+ * identify the content (common for Slack/Curator-style buffers where the
103
+ * original extension was stripped). Honored during type detection, mimeType
104
+ * assignment, and filename-extension synthesis. An explicit `fileType`
105
+ * override still wins over this hint.
106
+ */
107
+ mimetype?: string;
99
108
  /** Maximum preview length in characters */
100
109
  maxPreviewChars?: number;
101
110
  /** Skip persisting buffer to temp directory */
@@ -43,6 +43,13 @@ export declare class FileDetector {
43
43
  * Derive byte size from FileInput for tracing.
44
44
  */
45
45
  private static deriveInputSize;
46
+ /**
47
+ * Classify a FileInput into the FileSource enum used by downstream
48
+ * loaders. Keeps the mimetype-hint short-circuit in detect() able to
49
+ * produce a valid FileDetectionResult without re-implementing the
50
+ * source-inference rules scattered across loadContent().
51
+ */
52
+ private static deriveInputSource;
46
53
  /**
47
54
  * Try fallback parsing for a specific file type
48
55
  * Used when file detection returns "unknown" but we want to try parsing anyway
@@ -23,6 +23,7 @@ import { tracers, ATTR, withSpan } from "../telemetry/index.js";
23
23
  import { CSVProcessor } from "./csvProcessor.js";
24
24
  import { ImageProcessor } from "./imageProcessor.js";
25
25
  import { logger } from "./logger.js";
26
+ import { mimeHintToExtension, mimeHintToFileType, normalizeMimeHint, } from "./mimeTypeHints.js";
26
27
  import { PDFProcessor } from "./pdfProcessor.js";
27
28
  /**
28
29
  * Default retry configuration constants
@@ -320,6 +321,27 @@ export class FileDetector {
320
321
  }
321
322
  return 0;
322
323
  }
324
+ /**
325
+ * Classify a FileInput into the FileSource enum used by downstream
326
+ * loaders. Keeps the mimetype-hint short-circuit in detect() able to
327
+ * produce a valid FileDetectionResult without re-implementing the
328
+ * source-inference rules scattered across loadContent().
329
+ */
330
+ static deriveInputSource(input) {
331
+ if (Buffer.isBuffer(input)) {
332
+ return "buffer";
333
+ }
334
+ if (typeof input === "string") {
335
+ if (input.startsWith("data:")) {
336
+ return "datauri";
337
+ }
338
+ if (input.startsWith("http://") || input.startsWith("https://")) {
339
+ return "url";
340
+ }
341
+ return "path";
342
+ }
343
+ return "buffer";
344
+ }
323
345
  /**
324
346
  * Try fallback parsing for a specific file type
325
347
  * Used when file detection returns "unknown" but we want to try parsing anyway
@@ -520,6 +542,31 @@ export class FileDetector {
520
542
  * Stops at first strategy with confidence >= threshold (default: 80%)
521
543
  */
522
544
  static async detect(input, options) {
545
+ // Short-circuit on a trustworthy caller-provided mimetype hint. This is
546
+ // the eager-path counterpart to FileReferenceRegistry.register()'s hint
547
+ // handling — necessary for tiny files (<= TINY_MAX) that skip the lazy
548
+ // registry path. normalizeMimeHint drops "application/octet-stream" so a
549
+ // caller cannot hide real content behind the opaque sentinel.
550
+ const hintMime = normalizeMimeHint(options?.mimetypeHint);
551
+ if (hintMime) {
552
+ const type = mimeHintToFileType(hintMime);
553
+ if (type) {
554
+ const ext = mimeHintToExtension(hintMime);
555
+ const result = {
556
+ type,
557
+ mimeType: hintMime,
558
+ extension: ext || null,
559
+ source: FileDetector.deriveInputSource(input),
560
+ metadata: {
561
+ confidence: 95,
562
+ filename: FileDetector.deriveInputFilename(input),
563
+ size: FileDetector.deriveInputSize(input),
564
+ },
565
+ };
566
+ logger.info(`[FileDetector] Type: ${type} (95%, from mimetype hint: ${hintMime})`);
567
+ return result;
568
+ }
569
+ }
523
570
  const confidenceThreshold = options?.confidenceThreshold ?? 80;
524
571
  const strategies = [
525
572
  new MagicBytesStrategy(),
@@ -397,6 +397,9 @@ function toModelMessage(message) {
397
397
  if (message.role === "user" ||
398
398
  message.role === "assistant" ||
399
399
  message.role === "system") {
400
+ if (message.content.trim() === "") {
401
+ return null;
402
+ }
400
403
  return {
401
404
  role: message.role,
402
405
  content: message.content,
@@ -551,6 +554,7 @@ export async function buildMessagesArray(options) {
551
554
  maxSize: 50 * 1024 * 1024,
552
555
  allowedTypes: ["csv"],
553
556
  csvOptions: csvOptions,
557
+ mimetypeHint: isFileWithMetadata(file) ? file.mimetype : undefined,
554
558
  });
555
559
  if (result.type === "csv") {
556
560
  let csvSection = `\n\n## CSV Data from "${filename}":\n`;
@@ -803,6 +807,12 @@ async function processUnifiedFilesArray(options, maxSize, provider) {
803
807
  // ─── Full processing path (current behavior) ──────────────────
804
808
  const genericFileMaxSize = Math.max(maxSize, 100 * 1024 * 1024);
805
809
  const rawFileInput = isFileWithMetadata(file) ? file.buffer : file;
810
+ // Forward the caller's mimetype hint (Slack/Curator-style
811
+ // extension-less buffers) so the eager path classifies correctly
812
+ // for tiny files — the lazy registry path has its own hint wiring.
813
+ const fileMimetypeHint = isFileWithMetadata(file)
814
+ ? file.mimetype
815
+ : undefined;
806
816
  const result = await FileDetector.detectAndProcess(rawFileInput, {
807
817
  maxSize: genericFileMaxSize,
808
818
  allowedTypes: [
@@ -821,6 +831,7 @@ async function processUnifiedFilesArray(options, maxSize, provider) {
821
831
  ],
822
832
  csvOptions: options.csvOptions,
823
833
  provider: provider,
834
+ mimetypeHint: fileMimetypeHint,
824
835
  });
825
836
  appendDetectedFileResult(result, file, options);
826
837
  includedCount++;
@@ -1655,7 +1666,13 @@ async function tryRegisterFileReference(file, fileSize, registry, index = 0) {
1655
1666
  return false;
1656
1667
  }
1657
1668
  const filename = extractFilename(file, index);
1658
- await registry.register(buffer, getFileSource(file), { filename });
1669
+ const mimetype = typeof file === "object" && !Buffer.isBuffer(file)
1670
+ ? file.mimetype
1671
+ : undefined;
1672
+ await registry.register(buffer, getFileSource(file), {
1673
+ filename,
1674
+ mimetype,
1675
+ });
1659
1676
  logger.info(`[FileDetector] Registered "${filename}" (${(fileSize / 1024).toFixed(0)} KB) ` +
1660
1677
  `as lazy reference — skipping upfront processing`);
1661
1678
  return true;