npm - @juspay/neurolink - Versions diffs - 9.56.0 → 9.56.2 - Mend

@juspay/neurolink 9.56.0 → 9.56.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/CHANGELOG.md +12 -0
package/dist/browser/neurolink.min.js +307 -307
package/dist/core/redisConversationMemoryManager.js +5 -1
package/dist/files/fileReferenceRegistry.js +25 -10
package/dist/lib/core/redisConversationMemoryManager.js +5 -1
package/dist/lib/files/fileReferenceRegistry.js +25 -10
package/dist/lib/types/file.d.ts +10 -0
package/dist/lib/types/fileReference.d.ts +9 -0
package/dist/lib/utils/fileDetector.d.ts +7 -0
package/dist/lib/utils/fileDetector.js +47 -0
package/dist/lib/utils/messageBuilder.js +18 -1
package/dist/lib/utils/mimeTypeHints.d.ts +40 -0
package/dist/lib/utils/mimeTypeHints.js +122 -0
package/dist/types/file.d.ts +10 -0
package/dist/types/fileReference.d.ts +9 -0
package/dist/utils/fileDetector.d.ts +7 -0
package/dist/utils/fileDetector.js +47 -0
package/dist/utils/messageBuilder.js +18 -1
package/dist/utils/mimeTypeHints.d.ts +40 -0
package/dist/utils/mimeTypeHints.js +121 -0
package/package.json +1 -1

package/dist/core/redisConversationMemoryManager.js CHANGED Viewed

@@ -936,12 +936,16 @@ export class RedisConversationMemoryManager {
             const titleGenerator = new NeuroLink({
                 conversationMemory: { enabled: false },
             });
-            const titlePrompt = `Generate a clear, concise, and descriptive title (5–8 words maximum) for a conversation based on the following user message.
+            const defaultTitlePrompt = `Generate a clear, concise, and descriptive title (20-25 letters maximum) for a conversation based on the following user message.
 The title must meaningfully reflect the topic or intent of the message.
 Do not output anything unrelated, vague, or generic.
 Do not say you cannot create a title. Always return a valid title.
 User message: "${userMessage}"`;
+            const customPrompt = process.env.NEUROLINK_TITLE_PROMPT;
+            const titlePrompt = customPrompt
+                ? customPrompt.replace(/\$\{userMessage\}/g, userMessage)
+                : defaultTitlePrompt;
             const result = await titleGenerator.generate({
                 input: { text: titlePrompt },
                 provider: this.config.summarizationProvider || "vertex",

package/dist/files/fileReferenceRegistry.js CHANGED Viewed

@@ -17,6 +17,7 @@ import { tmpdir } from "node:os";
 import { basename, extname, join } from "node:path";
 import { estimatePostProcessingTokens } from "../context/fileTokenBudget.js";
 import { logger } from "../utils/logger.js";
+import { mimeHintToExtension, mimeHintToFileType, normalizeMimeHint, } from "../utils/mimeTypeHints.js";
 import { StreamingReader } from "./streamingReader.js";
 import { SIZE_TIER_THRESHOLDS } from "../types/index.js";
 /** Default maximum files in registry before LRU eviction */
@@ -89,19 +90,33 @@ export class FileReferenceRegistry {
             const sizeMB = (sizeBytes / (1024 * 1024)).toFixed(1);
             throw new Error(`File too large (${sizeMB} MB). Maximum accepted size is 2 GB.`);
         }
+        // Normalize the caller-provided mimetype hint — shared helper drops
+        // `application/octet-stream` because that opaque sentinel would
+        // otherwise be trusted verbatim for the output mimeType and mask a
+        // better magic-byte-derived classification (e.g. PNG bytes hinted as
+        // octet-stream would record mimeType=octet-stream, not image/png).
+        const hintMime = normalizeMimeHint(options.mimetype);
+        const hintExt = hintMime ? mimeHintToExtension(hintMime) : "";
         // Detect file type from magic bytes and extension.
-        // If the provided filename has no extension, append one guessed from magic bytes
-        // so downstream processors (e.g., VideoProcessor) can validate by extension.
-        let filename = options.filename || `file-${Date.now()}${this.guessExtension(buffer)}`;
-        if (!extname(filename)) {
-            const guessedExt = this.guessExtension(buffer);
-            if (guessedExt) {
-                filename = `${filename}${guessedExt}`;
-            }
+        // If the provided filename has no extension, append one guessed from the
+        // mimetype hint first (more reliable for text formats than magic bytes),
+        // then fall back to magic bytes — so downstream processors (e.g.,
+        // VideoProcessor) can validate by extension. Compute once, reuse.
+        const synthDefaultExt = hintExt
+            ? `.${hintExt}`
+            : this.guessExtension(buffer);
+        let filename = options.filename || `file-${Date.now()}${synthDefaultExt}`;
+        if (!extname(filename) && synthDefaultExt) {
+            filename = `${filename}${synthDefaultExt}`;
         }
         const ext = extname(filename).toLowerCase().replace(".", "");
-        const detectedType = options.fileType || this.detectType(buffer, ext);
-        const mimeType = this.guessMimeType(detectedType, ext);
+        const detectedType = options.fileType ||
+            (hintMime && mimeHintToFileType(hintMime)) ||
+            this.detectType(buffer, ext);
+        // Prefer the caller's hint verbatim for the output mimeType, but only
+        // when normalizeMimeHint accepted it (i.e. it is not the opaque
+        // octet-stream sentinel). Otherwise derive from the detected type.
+        const mimeType = hintMime || this.guessMimeType(detectedType, ext);
         const sizeTier = FileReferenceRegistry.classifySizeTier(sizeBytes);
         // Generate preview (fast — only reads first N chars)
         const preview = this.extractPreview(buffer, detectedType, options.maxPreviewChars ?? this.defaultPreviewChars);

package/dist/lib/core/redisConversationMemoryManager.js CHANGED Viewed

@@ -936,12 +936,16 @@ export class RedisConversationMemoryManager {
             const titleGenerator = new NeuroLink({
                 conversationMemory: { enabled: false },
             });
-            const titlePrompt = `Generate a clear, concise, and descriptive title (5–8 words maximum) for a conversation based on the following user message.
+            const defaultTitlePrompt = `Generate a clear, concise, and descriptive title (20-25 letters maximum) for a conversation based on the following user message.
 The title must meaningfully reflect the topic or intent of the message.
 Do not output anything unrelated, vague, or generic.
 Do not say you cannot create a title. Always return a valid title.
 User message: "${userMessage}"`;
+            const customPrompt = process.env.NEUROLINK_TITLE_PROMPT;
+            const titlePrompt = customPrompt
+                ? customPrompt.replace(/\$\{userMessage\}/g, userMessage)
+                : defaultTitlePrompt;
             const result = await titleGenerator.generate({
                 input: { text: titlePrompt },
                 provider: this.config.summarizationProvider || "vertex",

package/dist/lib/files/fileReferenceRegistry.js CHANGED Viewed

@@ -17,6 +17,7 @@ import { tmpdir } from "node:os";
 import { basename, extname, join } from "node:path";
 import { estimatePostProcessingTokens } from "../context/fileTokenBudget.js";
 import { logger } from "../utils/logger.js";
+import { mimeHintToExtension, mimeHintToFileType, normalizeMimeHint, } from "../utils/mimeTypeHints.js";
 import { StreamingReader } from "./streamingReader.js";
 import { SIZE_TIER_THRESHOLDS } from "../types/index.js";
 /** Default maximum files in registry before LRU eviction */
@@ -89,19 +90,33 @@ export class FileReferenceRegistry {
             const sizeMB = (sizeBytes / (1024 * 1024)).toFixed(1);
             throw new Error(`File too large (${sizeMB} MB). Maximum accepted size is 2 GB.`);
         }
+        // Normalize the caller-provided mimetype hint — shared helper drops
+        // `application/octet-stream` because that opaque sentinel would
+        // otherwise be trusted verbatim for the output mimeType and mask a
+        // better magic-byte-derived classification (e.g. PNG bytes hinted as
+        // octet-stream would record mimeType=octet-stream, not image/png).
+        const hintMime = normalizeMimeHint(options.mimetype);
+        const hintExt = hintMime ? mimeHintToExtension(hintMime) : "";
         // Detect file type from magic bytes and extension.
-        // If the provided filename has no extension, append one guessed from magic bytes
-        // so downstream processors (e.g., VideoProcessor) can validate by extension.
-        let filename = options.filename || `file-${Date.now()}${this.guessExtension(buffer)}`;
-        if (!extname(filename)) {
-            const guessedExt = this.guessExtension(buffer);
-            if (guessedExt) {
-                filename = `${filename}${guessedExt}`;
-            }
+        // If the provided filename has no extension, append one guessed from the
+        // mimetype hint first (more reliable for text formats than magic bytes),
+        // then fall back to magic bytes — so downstream processors (e.g.,
+        // VideoProcessor) can validate by extension. Compute once, reuse.
+        const synthDefaultExt = hintExt
+            ? `.${hintExt}`
+            : this.guessExtension(buffer);
+        let filename = options.filename || `file-${Date.now()}${synthDefaultExt}`;
+        if (!extname(filename) && synthDefaultExt) {
+            filename = `${filename}${synthDefaultExt}`;
         }
         const ext = extname(filename).toLowerCase().replace(".", "");
-        const detectedType = options.fileType || this.detectType(buffer, ext);
-        const mimeType = this.guessMimeType(detectedType, ext);
+        const detectedType = options.fileType ||
+            (hintMime && mimeHintToFileType(hintMime)) ||
+            this.detectType(buffer, ext);
+        // Prefer the caller's hint verbatim for the output mimeType, but only
+        // when normalizeMimeHint accepted it (i.e. it is not the opaque
+        // octet-stream sentinel). Otherwise derive from the detected type.
+        const mimeType = hintMime || this.guessMimeType(detectedType, ext);
         const sizeTier = FileReferenceRegistry.classifySizeTier(sizeBytes);
         // Generate preview (fast — only reads first N chars)
         const preview = this.extractPreview(buffer, detectedType, options.maxPreviewChars ?? this.defaultPreviewChars);

package/dist/lib/types/file.d.ts CHANGED Viewed

@@ -307,6 +307,16 @@ export type FileDetectorOptions = {
     maxRetries?: number;
     /** Initial retry delay in milliseconds with exponential backoff (default: 1000) */
     retryDelay?: number;
+    /**
+     * Caller-provided MIME type hint (e.g. "text/plain", "application/json").
+     * Used when the filename has no extension and magic-byte detection cannot
+     * identify the content — the common Slack/Curator extension-less-buffer
+     * case. When set to a trustworthy mimetype (not "application/octet-stream"),
+     * it short-circuits the detection strategy loop with a high-confidence
+     * result so small files on the eager file-processing path still honor the
+     * hint (the lazy FileReferenceRegistry path has its own hint-handling).
+     */
+    mimetypeHint?: string;
 };
 /**
  * Google AI Studio Files API types

package/dist/lib/types/fileReference.d.ts CHANGED Viewed

@@ -96,6 +96,15 @@ export type FileRegistrationOptions = {
     filename?: string;
     /** Override file type detection */
     fileType?: FileType;
+    /**
+     * Caller-provided MIME type hint (e.g. "text/plain", "application/json").
+     * Used when the filename has no extension and magic-byte detection cannot
+     * identify the content (common for Slack/Curator-style buffers where the
+     * original extension was stripped). Honored during type detection, mimeType
+     * assignment, and filename-extension synthesis. An explicit `fileType`
+     * override still wins over this hint.
+     */
+    mimetype?: string;
     /** Maximum preview length in characters */
     maxPreviewChars?: number;
     /** Skip persisting buffer to temp directory */

package/dist/lib/utils/fileDetector.d.ts CHANGED Viewed

@@ -43,6 +43,13 @@ export declare class FileDetector {
      * Derive byte size from FileInput for tracing.
      */
     private static deriveInputSize;
+    /**
+     * Classify a FileInput into the FileSource enum used by downstream
+     * loaders. Keeps the mimetype-hint short-circuit in detect() able to
+     * produce a valid FileDetectionResult without re-implementing the
+     * source-inference rules scattered across loadContent().
+     */
+    private static deriveInputSource;
     /**
      * Try fallback parsing for a specific file type
      * Used when file detection returns "unknown" but we want to try parsing anyway

package/dist/lib/utils/fileDetector.js CHANGED Viewed

@@ -23,6 +23,7 @@ import { tracers, ATTR, withSpan } from "../telemetry/index.js";
 import { CSVProcessor } from "./csvProcessor.js";
 import { ImageProcessor } from "./imageProcessor.js";
 import { logger } from "./logger.js";
+import { mimeHintToExtension, mimeHintToFileType, normalizeMimeHint, } from "./mimeTypeHints.js";
 import { PDFProcessor } from "./pdfProcessor.js";
 /**
  * Default retry configuration constants
@@ -320,6 +321,27 @@ export class FileDetector {
         }
         return 0;
     }
+    /**
+     * Classify a FileInput into the FileSource enum used by downstream
+     * loaders. Keeps the mimetype-hint short-circuit in detect() able to
+     * produce a valid FileDetectionResult without re-implementing the
+     * source-inference rules scattered across loadContent().
+     */
+    static deriveInputSource(input) {
+        if (Buffer.isBuffer(input)) {
+            return "buffer";
+        }
+        if (typeof input === "string") {
+            if (input.startsWith("data:")) {
+                return "datauri";
+            }
+            if (input.startsWith("http://") || input.startsWith("https://")) {
+                return "url";
+            }
+            return "path";
+        }
+        return "buffer";
+    }
     /**
      * Try fallback parsing for a specific file type
      * Used when file detection returns "unknown" but we want to try parsing anyway
@@ -520,6 +542,31 @@ export class FileDetector {
      * Stops at first strategy with confidence >= threshold (default: 80%)
      */
     static async detect(input, options) {
+        // Short-circuit on a trustworthy caller-provided mimetype hint. This is
+        // the eager-path counterpart to FileReferenceRegistry.register()'s hint
+        // handling — necessary for tiny files (<= TINY_MAX) that skip the lazy
+        // registry path. normalizeMimeHint drops "application/octet-stream" so a
+        // caller cannot hide real content behind the opaque sentinel.
+        const hintMime = normalizeMimeHint(options?.mimetypeHint);
+        if (hintMime) {
+            const type = mimeHintToFileType(hintMime);
+            if (type) {
+                const ext = mimeHintToExtension(hintMime);
+                const result = {
+                    type,
+                    mimeType: hintMime,
+                    extension: ext || null,
+                    source: FileDetector.deriveInputSource(input),
+                    metadata: {
+                        confidence: 95,
+                        filename: FileDetector.deriveInputFilename(input),
+                        size: FileDetector.deriveInputSize(input),
+                    },
+                };
+                logger.info(`[FileDetector] Type: ${type} (95%, from mimetype hint: ${hintMime})`);
+                return result;
+            }
+        }
         const confidenceThreshold = options?.confidenceThreshold ?? 80;
         const strategies = [
             new MagicBytesStrategy(),

package/dist/lib/utils/messageBuilder.js CHANGED Viewed

@@ -397,6 +397,9 @@ function toModelMessage(message) {
     if (message.role === "user" ||
         message.role === "assistant" ||
         message.role === "system") {
+        if (message.content.trim() === "") {
+            return null;
+        }
         return {
             role: message.role,
             content: message.content,
@@ -551,6 +554,7 @@ export async function buildMessagesArray(options) {
                         maxSize: 50 * 1024 * 1024,
                         allowedTypes: ["csv"],
                         csvOptions: csvOptions,
+                        mimetypeHint: isFileWithMetadata(file) ? file.mimetype : undefined,
                     });
                     if (result.type === "csv") {
                         let csvSection = `\n\n## CSV Data from "${filename}":\n`;
@@ -803,6 +807,12 @@ async function processUnifiedFilesArray(options, maxSize, provider) {
                 // ─── Full processing path (current behavior) ──────────────────
                 const genericFileMaxSize = Math.max(maxSize, 100 * 1024 * 1024);
                 const rawFileInput = isFileWithMetadata(file) ? file.buffer : file;
+                // Forward the caller's mimetype hint (Slack/Curator-style
+                // extension-less buffers) so the eager path classifies correctly
+                // for tiny files — the lazy registry path has its own hint wiring.
+                const fileMimetypeHint = isFileWithMetadata(file)
+                    ? file.mimetype
+                    : undefined;
                 const result = await FileDetector.detectAndProcess(rawFileInput, {
                     maxSize: genericFileMaxSize,
                     allowedTypes: [
@@ -821,6 +831,7 @@ async function processUnifiedFilesArray(options, maxSize, provider) {
                     ],
                     csvOptions: options.csvOptions,
                     provider: provider,
+                    mimetypeHint: fileMimetypeHint,
                 });
                 appendDetectedFileResult(result, file, options);
                 includedCount++;
@@ -1655,7 +1666,13 @@ async function tryRegisterFileReference(file, fileSize, registry, index = 0) {
             return false;
         }
         const filename = extractFilename(file, index);
-        await registry.register(buffer, getFileSource(file), { filename });
+        const mimetype = typeof file === "object" && !Buffer.isBuffer(file)
+            ? file.mimetype
+            : undefined;
+        await registry.register(buffer, getFileSource(file), {
+            filename,
+            mimetype,
+        });
         logger.info(`[FileDetector] Registered "${filename}" (${(fileSize / 1024).toFixed(0)} KB) ` +
             `as lazy reference — skipping upfront processing`);
         return true;

package/dist/lib/utils/mimeTypeHints.d.ts ADDED Viewed

@@ -0,0 +1,40 @@
+/**
+ * Shared helpers for caller-provided MIME type hints.
+ *
+ * A "MIME hint" is a mimetype string the SDK receives alongside a raw Buffer
+ * whose original filename is missing (e.g. Slack/Curator file-uploads that
+ * arrive as { buffer, filename: "Untitled", mimetype: "text/plain" }). When
+ * the filename has no extension and magic-byte detection cannot identify the
+ * content, the hint is the only signal we have.
+ *
+ * Both FileReferenceRegistry.register() and FileDetector.detect() consume
+ * these helpers so the trust/normalization rules stay in one place:
+ *
+ *   - `application/octet-stream` is never trusted — it is the opaque
+ *     "I don't know" sentinel and would let a caller hide real content
+ *     behind a generic label (a PNG hinted as octet-stream would otherwise
+ *     record mimeType="application/octet-stream" instead of "image/png").
+ *   - Empty/undefined hints pass through as `undefined`.
+ *   - A hint that cannot be classified maps to `null` so the caller falls
+ *     back to magic-byte / extension detection instead of synthesising a
+ *     wrong type.
+ */
+import type { FileType } from "../types/index.js";
+/**
+ * Normalize a caller-provided mimetype hint: strip any `;charset=...`
+ * parameter, lowercase, trim. Returns undefined for empty strings or for
+ * the opaque `application/octet-stream` sentinel so downstream code can
+ * treat the hint as absent instead of trusting it verbatim.
+ */
+export declare function normalizeMimeHint(raw?: string): string | undefined;
+/**
+ * Map a normalized mimetype hint to a NeuroLink FileType. Returns null when
+ * the mimetype is unknown or too generic to classify confidently.
+ */
+export declare function mimeHintToFileType(mimetype: string): FileType | null;
+/**
+ * Map a normalized mimetype hint to the canonical file extension (without
+ * leading dot). Returns "" when the mimetype is unknown — caller should
+ * then fall back to magic-byte detection.
+ */
+export declare function mimeHintToExtension(mimetype: string): string;

package/dist/lib/utils/mimeTypeHints.js ADDED Viewed

@@ -0,0 +1,122 @@
+const OPAQUE_MIMETYPE = "application/octet-stream";
+/**
+ * Normalize a caller-provided mimetype hint: strip any `;charset=...`
+ * parameter, lowercase, trim. Returns undefined for empty strings or for
+ * the opaque `application/octet-stream` sentinel so downstream code can
+ * treat the hint as absent instead of trusting it verbatim.
+ */
+export function normalizeMimeHint(raw) {
+    if (!raw) {
+        return undefined;
+    }
+    const cleaned = raw.split(";")[0].trim().toLowerCase();
+    if (!cleaned || cleaned === OPAQUE_MIMETYPE) {
+        return undefined;
+    }
+    return cleaned;
+}
+/**
+ * Map a normalized mimetype hint to a NeuroLink FileType. Returns null when
+ * the mimetype is unknown or too generic to classify confidently.
+ */
+export function mimeHintToFileType(mimetype) {
+    const exact = {
+        "text/csv": "csv",
+        "application/csv": "csv",
+        "image/svg+xml": "svg",
+        "application/pdf": "pdf",
+        "application/json": "text",
+        "application/xml": "text",
+        "text/xml": "text",
+        "application/yaml": "text",
+        "application/x-yaml": "text",
+        "text/yaml": "text",
+        "application/javascript": "text",
+        "application/typescript": "text",
+        "application/zip": "archive",
+        "application/x-tar": "archive",
+        "application/gzip": "archive",
+        "application/x-gzip": "archive",
+        "application/x-7z-compressed": "archive",
+        "application/vnd.rar": "archive",
+        "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
+        "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
+        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
+    };
+    if (exact[mimetype]) {
+        return exact[mimetype];
+    }
+    if (mimetype.startsWith("text/")) {
+        return "text";
+    }
+    if (mimetype.startsWith("image/")) {
+        return "image";
+    }
+    if (mimetype.startsWith("audio/")) {
+        return "audio";
+    }
+    if (mimetype.startsWith("video/")) {
+        return "video";
+    }
+    return null;
+}
+/**
+ * Map a normalized mimetype hint to the canonical file extension (without
+ * leading dot). Returns "" when the mimetype is unknown — caller should
+ * then fall back to magic-byte detection.
+ */
+export function mimeHintToExtension(mimetype) {
+    const table = {
+        // Text
+        "text/plain": "txt",
+        "text/html": "html",
+        "text/css": "css",
+        "text/javascript": "js",
+        "application/javascript": "js",
+        "application/typescript": "ts",
+        "text/markdown": "md",
+        "text/csv": "csv",
+        "application/csv": "csv",
+        "application/json": "json",
+        "application/xml": "xml",
+        "text/xml": "xml",
+        "application/yaml": "yaml",
+        "application/x-yaml": "yaml",
+        "text/yaml": "yaml",
+        // Documents
+        "application/pdf": "pdf",
+        "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
+        "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
+        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
+        // Images
+        "image/png": "png",
+        "image/jpeg": "jpg",
+        "image/gif": "gif",
+        "image/webp": "webp",
+        "image/bmp": "bmp",
+        "image/tiff": "tiff",
+        "image/svg+xml": "svg",
+        // Video
+        "video/mp4": "mp4",
+        "video/webm": "webm",
+        "video/quicktime": "mov",
+        "video/x-matroska": "mkv",
+        "video/x-msvideo": "avi",
+        // Audio
+        "audio/mpeg": "mp3",
+        "audio/wav": "wav",
+        "audio/ogg": "ogg",
+        "audio/flac": "flac",
+        "audio/mp4": "m4a",
+        "audio/aac": "aac",
+        // Archives
+        "application/zip": "zip",
+        "application/x-tar": "tar",
+        "application/gzip": "gz",
+        "application/x-gzip": "gz",
+        "application/x-7z-compressed": "7z",
+        "application/vnd.rar": "rar",
+    };
+    return table[mimetype] || "";
+}
+//# sourceMappingURL=mimeTypeHints.js.map

package/dist/types/file.d.ts CHANGED Viewed

@@ -307,6 +307,16 @@ export type FileDetectorOptions = {
     maxRetries?: number;
     /** Initial retry delay in milliseconds with exponential backoff (default: 1000) */
     retryDelay?: number;
+    /**
+     * Caller-provided MIME type hint (e.g. "text/plain", "application/json").
+     * Used when the filename has no extension and magic-byte detection cannot
+     * identify the content — the common Slack/Curator extension-less-buffer
+     * case. When set to a trustworthy mimetype (not "application/octet-stream"),
+     * it short-circuits the detection strategy loop with a high-confidence
+     * result so small files on the eager file-processing path still honor the
+     * hint (the lazy FileReferenceRegistry path has its own hint-handling).
+     */
+    mimetypeHint?: string;
 };
 /**
  * Google AI Studio Files API types

package/dist/types/fileReference.d.ts CHANGED Viewed

@@ -96,6 +96,15 @@ export type FileRegistrationOptions = {
     filename?: string;
     /** Override file type detection */
     fileType?: FileType;
+    /**
+     * Caller-provided MIME type hint (e.g. "text/plain", "application/json").
+     * Used when the filename has no extension and magic-byte detection cannot
+     * identify the content (common for Slack/Curator-style buffers where the
+     * original extension was stripped). Honored during type detection, mimeType
+     * assignment, and filename-extension synthesis. An explicit `fileType`
+     * override still wins over this hint.
+     */
+    mimetype?: string;
     /** Maximum preview length in characters */
     maxPreviewChars?: number;
     /** Skip persisting buffer to temp directory */

package/dist/utils/fileDetector.d.ts CHANGED Viewed

@@ -43,6 +43,13 @@ export declare class FileDetector {
      * Derive byte size from FileInput for tracing.
      */
     private static deriveInputSize;
+    /**
+     * Classify a FileInput into the FileSource enum used by downstream
+     * loaders. Keeps the mimetype-hint short-circuit in detect() able to
+     * produce a valid FileDetectionResult without re-implementing the
+     * source-inference rules scattered across loadContent().
+     */
+    private static deriveInputSource;
     /**
      * Try fallback parsing for a specific file type
      * Used when file detection returns "unknown" but we want to try parsing anyway

package/dist/utils/fileDetector.js CHANGED Viewed

@@ -23,6 +23,7 @@ import { tracers, ATTR, withSpan } from "../telemetry/index.js";
 import { CSVProcessor } from "./csvProcessor.js";
 import { ImageProcessor } from "./imageProcessor.js";
 import { logger } from "./logger.js";
+import { mimeHintToExtension, mimeHintToFileType, normalizeMimeHint, } from "./mimeTypeHints.js";
 import { PDFProcessor } from "./pdfProcessor.js";
 /**
  * Default retry configuration constants
@@ -320,6 +321,27 @@ export class FileDetector {
         }
         return 0;
     }
+    /**
+     * Classify a FileInput into the FileSource enum used by downstream
+     * loaders. Keeps the mimetype-hint short-circuit in detect() able to
+     * produce a valid FileDetectionResult without re-implementing the
+     * source-inference rules scattered across loadContent().
+     */
+    static deriveInputSource(input) {
+        if (Buffer.isBuffer(input)) {
+            return "buffer";
+        }
+        if (typeof input === "string") {
+            if (input.startsWith("data:")) {
+                return "datauri";
+            }
+            if (input.startsWith("http://") || input.startsWith("https://")) {
+                return "url";
+            }
+            return "path";
+        }
+        return "buffer";
+    }
     /**
      * Try fallback parsing for a specific file type
      * Used when file detection returns "unknown" but we want to try parsing anyway
@@ -520,6 +542,31 @@ export class FileDetector {
      * Stops at first strategy with confidence >= threshold (default: 80%)
      */
     static async detect(input, options) {
+        // Short-circuit on a trustworthy caller-provided mimetype hint. This is
+        // the eager-path counterpart to FileReferenceRegistry.register()'s hint
+        // handling — necessary for tiny files (<= TINY_MAX) that skip the lazy
+        // registry path. normalizeMimeHint drops "application/octet-stream" so a
+        // caller cannot hide real content behind the opaque sentinel.
+        const hintMime = normalizeMimeHint(options?.mimetypeHint);
+        if (hintMime) {
+            const type = mimeHintToFileType(hintMime);
+            if (type) {
+                const ext = mimeHintToExtension(hintMime);
+                const result = {
+                    type,
+                    mimeType: hintMime,
+                    extension: ext || null,
+                    source: FileDetector.deriveInputSource(input),
+                    metadata: {
+                        confidence: 95,
+                        filename: FileDetector.deriveInputFilename(input),
+                        size: FileDetector.deriveInputSize(input),
+                    },
+                };
+                logger.info(`[FileDetector] Type: ${type} (95%, from mimetype hint: ${hintMime})`);
+                return result;
+            }
+        }
         const confidenceThreshold = options?.confidenceThreshold ?? 80;
         const strategies = [
             new MagicBytesStrategy(),

package/dist/utils/messageBuilder.js CHANGED Viewed

@@ -397,6 +397,9 @@ function toModelMessage(message) {
     if (message.role === "user" ||
         message.role === "assistant" ||
         message.role === "system") {
+        if (message.content.trim() === "") {
+            return null;
+        }
         return {
             role: message.role,
             content: message.content,
@@ -551,6 +554,7 @@ export async function buildMessagesArray(options) {
                         maxSize: 50 * 1024 * 1024,
                         allowedTypes: ["csv"],
                         csvOptions: csvOptions,
+                        mimetypeHint: isFileWithMetadata(file) ? file.mimetype : undefined,
                     });
                     if (result.type === "csv") {
                         let csvSection = `\n\n## CSV Data from "${filename}":\n`;
@@ -803,6 +807,12 @@ async function processUnifiedFilesArray(options, maxSize, provider) {
                 // ─── Full processing path (current behavior) ──────────────────
                 const genericFileMaxSize = Math.max(maxSize, 100 * 1024 * 1024);
                 const rawFileInput = isFileWithMetadata(file) ? file.buffer : file;
+                // Forward the caller's mimetype hint (Slack/Curator-style
+                // extension-less buffers) so the eager path classifies correctly
+                // for tiny files — the lazy registry path has its own hint wiring.
+                const fileMimetypeHint = isFileWithMetadata(file)
+                    ? file.mimetype
+                    : undefined;
                 const result = await FileDetector.detectAndProcess(rawFileInput, {
                     maxSize: genericFileMaxSize,
                     allowedTypes: [
@@ -821,6 +831,7 @@ async function processUnifiedFilesArray(options, maxSize, provider) {
                     ],
                     csvOptions: options.csvOptions,
                     provider: provider,
+                    mimetypeHint: fileMimetypeHint,
                 });
                 appendDetectedFileResult(result, file, options);
                 includedCount++;
@@ -1655,7 +1666,13 @@ async function tryRegisterFileReference(file, fileSize, registry, index = 0) {
             return false;
         }
         const filename = extractFilename(file, index);
-        await registry.register(buffer, getFileSource(file), { filename });
+        const mimetype = typeof file === "object" && !Buffer.isBuffer(file)
+            ? file.mimetype
+            : undefined;
+        await registry.register(buffer, getFileSource(file), {
+            filename,
+            mimetype,
+        });
         logger.info(`[FileDetector] Registered "${filename}" (${(fileSize / 1024).toFixed(0)} KB) ` +
             `as lazy reference — skipping upfront processing`);
         return true;