@juspay/neurolink 9.56.0 → 9.56.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +307 -307
- package/dist/core/redisConversationMemoryManager.js +5 -1
- package/dist/files/fileReferenceRegistry.js +25 -10
- package/dist/lib/core/redisConversationMemoryManager.js +5 -1
- package/dist/lib/files/fileReferenceRegistry.js +25 -10
- package/dist/lib/types/file.d.ts +10 -0
- package/dist/lib/types/fileReference.d.ts +9 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -0
- package/dist/lib/utils/fileDetector.js +47 -0
- package/dist/lib/utils/messageBuilder.js +18 -1
- package/dist/lib/utils/mimeTypeHints.d.ts +40 -0
- package/dist/lib/utils/mimeTypeHints.js +122 -0
- package/dist/types/file.d.ts +10 -0
- package/dist/types/fileReference.d.ts +9 -0
- package/dist/utils/fileDetector.d.ts +7 -0
- package/dist/utils/fileDetector.js +47 -0
- package/dist/utils/messageBuilder.js +18 -1
- package/dist/utils/mimeTypeHints.d.ts +40 -0
- package/dist/utils/mimeTypeHints.js +121 -0
- package/package.json +1 -1
|
@@ -936,12 +936,16 @@ export class RedisConversationMemoryManager {
|
|
|
936
936
|
const titleGenerator = new NeuroLink({
|
|
937
937
|
conversationMemory: { enabled: false },
|
|
938
938
|
});
|
|
939
|
-
const
|
|
939
|
+
const defaultTitlePrompt = `Generate a clear, concise, and descriptive title (20-25 letters maximum) for a conversation based on the following user message.
|
|
940
940
|
The title must meaningfully reflect the topic or intent of the message.
|
|
941
941
|
Do not output anything unrelated, vague, or generic.
|
|
942
942
|
Do not say you cannot create a title. Always return a valid title.
|
|
943
943
|
|
|
944
944
|
User message: "${userMessage}"`;
|
|
945
|
+
const customPrompt = process.env.NEUROLINK_TITLE_PROMPT;
|
|
946
|
+
const titlePrompt = customPrompt
|
|
947
|
+
? customPrompt.replace(/\$\{userMessage\}/g, userMessage)
|
|
948
|
+
: defaultTitlePrompt;
|
|
945
949
|
const result = await titleGenerator.generate({
|
|
946
950
|
input: { text: titlePrompt },
|
|
947
951
|
provider: this.config.summarizationProvider || "vertex",
|
|
@@ -17,6 +17,7 @@ import { tmpdir } from "node:os";
|
|
|
17
17
|
import { basename, extname, join } from "node:path";
|
|
18
18
|
import { estimatePostProcessingTokens } from "../context/fileTokenBudget.js";
|
|
19
19
|
import { logger } from "../utils/logger.js";
|
|
20
|
+
import { mimeHintToExtension, mimeHintToFileType, normalizeMimeHint, } from "../utils/mimeTypeHints.js";
|
|
20
21
|
import { StreamingReader } from "./streamingReader.js";
|
|
21
22
|
import { SIZE_TIER_THRESHOLDS } from "../types/index.js";
|
|
22
23
|
/** Default maximum files in registry before LRU eviction */
|
|
@@ -89,19 +90,33 @@ export class FileReferenceRegistry {
|
|
|
89
90
|
const sizeMB = (sizeBytes / (1024 * 1024)).toFixed(1);
|
|
90
91
|
throw new Error(`File too large (${sizeMB} MB). Maximum accepted size is 2 GB.`);
|
|
91
92
|
}
|
|
93
|
+
// Normalize the caller-provided mimetype hint — shared helper drops
|
|
94
|
+
// `application/octet-stream` because that opaque sentinel would
|
|
95
|
+
// otherwise be trusted verbatim for the output mimeType and mask a
|
|
96
|
+
// better magic-byte-derived classification (e.g. PNG bytes hinted as
|
|
97
|
+
// octet-stream would record mimeType=octet-stream, not image/png).
|
|
98
|
+
const hintMime = normalizeMimeHint(options.mimetype);
|
|
99
|
+
const hintExt = hintMime ? mimeHintToExtension(hintMime) : "";
|
|
92
100
|
// Detect file type from magic bytes and extension.
|
|
93
|
-
// If the provided filename has no extension, append one guessed from
|
|
94
|
-
//
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
+
// If the provided filename has no extension, append one guessed from the
|
|
102
|
+
// mimetype hint first (more reliable for text formats than magic bytes),
|
|
103
|
+
// then fall back to magic bytes — so downstream processors (e.g.,
|
|
104
|
+
// VideoProcessor) can validate by extension. Compute once, reuse.
|
|
105
|
+
const synthDefaultExt = hintExt
|
|
106
|
+
? `.${hintExt}`
|
|
107
|
+
: this.guessExtension(buffer);
|
|
108
|
+
let filename = options.filename || `file-${Date.now()}${synthDefaultExt}`;
|
|
109
|
+
if (!extname(filename) && synthDefaultExt) {
|
|
110
|
+
filename = `${filename}${synthDefaultExt}`;
|
|
101
111
|
}
|
|
102
112
|
const ext = extname(filename).toLowerCase().replace(".", "");
|
|
103
|
-
const detectedType = options.fileType ||
|
|
104
|
-
|
|
113
|
+
const detectedType = options.fileType ||
|
|
114
|
+
(hintMime && mimeHintToFileType(hintMime)) ||
|
|
115
|
+
this.detectType(buffer, ext);
|
|
116
|
+
// Prefer the caller's hint verbatim for the output mimeType, but only
|
|
117
|
+
// when normalizeMimeHint accepted it (i.e. it is not the opaque
|
|
118
|
+
// octet-stream sentinel). Otherwise derive from the detected type.
|
|
119
|
+
const mimeType = hintMime || this.guessMimeType(detectedType, ext);
|
|
105
120
|
const sizeTier = FileReferenceRegistry.classifySizeTier(sizeBytes);
|
|
106
121
|
// Generate preview (fast — only reads first N chars)
|
|
107
122
|
const preview = this.extractPreview(buffer, detectedType, options.maxPreviewChars ?? this.defaultPreviewChars);
|
|
@@ -936,12 +936,16 @@ export class RedisConversationMemoryManager {
|
|
|
936
936
|
const titleGenerator = new NeuroLink({
|
|
937
937
|
conversationMemory: { enabled: false },
|
|
938
938
|
});
|
|
939
|
-
const
|
|
939
|
+
const defaultTitlePrompt = `Generate a clear, concise, and descriptive title (20-25 letters maximum) for a conversation based on the following user message.
|
|
940
940
|
The title must meaningfully reflect the topic or intent of the message.
|
|
941
941
|
Do not output anything unrelated, vague, or generic.
|
|
942
942
|
Do not say you cannot create a title. Always return a valid title.
|
|
943
943
|
|
|
944
944
|
User message: "${userMessage}"`;
|
|
945
|
+
const customPrompt = process.env.NEUROLINK_TITLE_PROMPT;
|
|
946
|
+
const titlePrompt = customPrompt
|
|
947
|
+
? customPrompt.replace(/\$\{userMessage\}/g, userMessage)
|
|
948
|
+
: defaultTitlePrompt;
|
|
945
949
|
const result = await titleGenerator.generate({
|
|
946
950
|
input: { text: titlePrompt },
|
|
947
951
|
provider: this.config.summarizationProvider || "vertex",
|
|
@@ -17,6 +17,7 @@ import { tmpdir } from "node:os";
|
|
|
17
17
|
import { basename, extname, join } from "node:path";
|
|
18
18
|
import { estimatePostProcessingTokens } from "../context/fileTokenBudget.js";
|
|
19
19
|
import { logger } from "../utils/logger.js";
|
|
20
|
+
import { mimeHintToExtension, mimeHintToFileType, normalizeMimeHint, } from "../utils/mimeTypeHints.js";
|
|
20
21
|
import { StreamingReader } from "./streamingReader.js";
|
|
21
22
|
import { SIZE_TIER_THRESHOLDS } from "../types/index.js";
|
|
22
23
|
/** Default maximum files in registry before LRU eviction */
|
|
@@ -89,19 +90,33 @@ export class FileReferenceRegistry {
|
|
|
89
90
|
const sizeMB = (sizeBytes / (1024 * 1024)).toFixed(1);
|
|
90
91
|
throw new Error(`File too large (${sizeMB} MB). Maximum accepted size is 2 GB.`);
|
|
91
92
|
}
|
|
93
|
+
// Normalize the caller-provided mimetype hint — shared helper drops
|
|
94
|
+
// `application/octet-stream` because that opaque sentinel would
|
|
95
|
+
// otherwise be trusted verbatim for the output mimeType and mask a
|
|
96
|
+
// better magic-byte-derived classification (e.g. PNG bytes hinted as
|
|
97
|
+
// octet-stream would record mimeType=octet-stream, not image/png).
|
|
98
|
+
const hintMime = normalizeMimeHint(options.mimetype);
|
|
99
|
+
const hintExt = hintMime ? mimeHintToExtension(hintMime) : "";
|
|
92
100
|
// Detect file type from magic bytes and extension.
|
|
93
|
-
// If the provided filename has no extension, append one guessed from
|
|
94
|
-
//
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
+
// If the provided filename has no extension, append one guessed from the
|
|
102
|
+
// mimetype hint first (more reliable for text formats than magic bytes),
|
|
103
|
+
// then fall back to magic bytes — so downstream processors (e.g.,
|
|
104
|
+
// VideoProcessor) can validate by extension. Compute once, reuse.
|
|
105
|
+
const synthDefaultExt = hintExt
|
|
106
|
+
? `.${hintExt}`
|
|
107
|
+
: this.guessExtension(buffer);
|
|
108
|
+
let filename = options.filename || `file-${Date.now()}${synthDefaultExt}`;
|
|
109
|
+
if (!extname(filename) && synthDefaultExt) {
|
|
110
|
+
filename = `${filename}${synthDefaultExt}`;
|
|
101
111
|
}
|
|
102
112
|
const ext = extname(filename).toLowerCase().replace(".", "");
|
|
103
|
-
const detectedType = options.fileType ||
|
|
104
|
-
|
|
113
|
+
const detectedType = options.fileType ||
|
|
114
|
+
(hintMime && mimeHintToFileType(hintMime)) ||
|
|
115
|
+
this.detectType(buffer, ext);
|
|
116
|
+
// Prefer the caller's hint verbatim for the output mimeType, but only
|
|
117
|
+
// when normalizeMimeHint accepted it (i.e. it is not the opaque
|
|
118
|
+
// octet-stream sentinel). Otherwise derive from the detected type.
|
|
119
|
+
const mimeType = hintMime || this.guessMimeType(detectedType, ext);
|
|
105
120
|
const sizeTier = FileReferenceRegistry.classifySizeTier(sizeBytes);
|
|
106
121
|
// Generate preview (fast — only reads first N chars)
|
|
107
122
|
const preview = this.extractPreview(buffer, detectedType, options.maxPreviewChars ?? this.defaultPreviewChars);
|
package/dist/lib/types/file.d.ts
CHANGED
|
@@ -307,6 +307,16 @@ export type FileDetectorOptions = {
|
|
|
307
307
|
maxRetries?: number;
|
|
308
308
|
/** Initial retry delay in milliseconds with exponential backoff (default: 1000) */
|
|
309
309
|
retryDelay?: number;
|
|
310
|
+
/**
|
|
311
|
+
* Caller-provided MIME type hint (e.g. "text/plain", "application/json").
|
|
312
|
+
* Used when the filename has no extension and magic-byte detection cannot
|
|
313
|
+
* identify the content — the common Slack/Curator extension-less-buffer
|
|
314
|
+
* case. When set to a trustworthy mimetype (not "application/octet-stream"),
|
|
315
|
+
* it short-circuits the detection strategy loop with a high-confidence
|
|
316
|
+
* result so small files on the eager file-processing path still honor the
|
|
317
|
+
* hint (the lazy FileReferenceRegistry path has its own hint-handling).
|
|
318
|
+
*/
|
|
319
|
+
mimetypeHint?: string;
|
|
310
320
|
};
|
|
311
321
|
/**
|
|
312
322
|
* Google AI Studio Files API types
|
|
@@ -96,6 +96,15 @@ export type FileRegistrationOptions = {
|
|
|
96
96
|
filename?: string;
|
|
97
97
|
/** Override file type detection */
|
|
98
98
|
fileType?: FileType;
|
|
99
|
+
/**
|
|
100
|
+
* Caller-provided MIME type hint (e.g. "text/plain", "application/json").
|
|
101
|
+
* Used when the filename has no extension and magic-byte detection cannot
|
|
102
|
+
* identify the content (common for Slack/Curator-style buffers where the
|
|
103
|
+
* original extension was stripped). Honored during type detection, mimeType
|
|
104
|
+
* assignment, and filename-extension synthesis. An explicit `fileType`
|
|
105
|
+
* override still wins over this hint.
|
|
106
|
+
*/
|
|
107
|
+
mimetype?: string;
|
|
99
108
|
/** Maximum preview length in characters */
|
|
100
109
|
maxPreviewChars?: number;
|
|
101
110
|
/** Skip persisting buffer to temp directory */
|
|
@@ -43,6 +43,13 @@ export declare class FileDetector {
|
|
|
43
43
|
* Derive byte size from FileInput for tracing.
|
|
44
44
|
*/
|
|
45
45
|
private static deriveInputSize;
|
|
46
|
+
/**
|
|
47
|
+
* Classify a FileInput into the FileSource enum used by downstream
|
|
48
|
+
* loaders. Keeps the mimetype-hint short-circuit in detect() able to
|
|
49
|
+
* produce a valid FileDetectionResult without re-implementing the
|
|
50
|
+
* source-inference rules scattered across loadContent().
|
|
51
|
+
*/
|
|
52
|
+
private static deriveInputSource;
|
|
46
53
|
/**
|
|
47
54
|
* Try fallback parsing for a specific file type
|
|
48
55
|
* Used when file detection returns "unknown" but we want to try parsing anyway
|
|
@@ -23,6 +23,7 @@ import { tracers, ATTR, withSpan } from "../telemetry/index.js";
|
|
|
23
23
|
import { CSVProcessor } from "./csvProcessor.js";
|
|
24
24
|
import { ImageProcessor } from "./imageProcessor.js";
|
|
25
25
|
import { logger } from "./logger.js";
|
|
26
|
+
import { mimeHintToExtension, mimeHintToFileType, normalizeMimeHint, } from "./mimeTypeHints.js";
|
|
26
27
|
import { PDFProcessor } from "./pdfProcessor.js";
|
|
27
28
|
/**
|
|
28
29
|
* Default retry configuration constants
|
|
@@ -320,6 +321,27 @@ export class FileDetector {
|
|
|
320
321
|
}
|
|
321
322
|
return 0;
|
|
322
323
|
}
|
|
324
|
+
/**
|
|
325
|
+
* Classify a FileInput into the FileSource enum used by downstream
|
|
326
|
+
* loaders. Keeps the mimetype-hint short-circuit in detect() able to
|
|
327
|
+
* produce a valid FileDetectionResult without re-implementing the
|
|
328
|
+
* source-inference rules scattered across loadContent().
|
|
329
|
+
*/
|
|
330
|
+
static deriveInputSource(input) {
|
|
331
|
+
if (Buffer.isBuffer(input)) {
|
|
332
|
+
return "buffer";
|
|
333
|
+
}
|
|
334
|
+
if (typeof input === "string") {
|
|
335
|
+
if (input.startsWith("data:")) {
|
|
336
|
+
return "datauri";
|
|
337
|
+
}
|
|
338
|
+
if (input.startsWith("http://") || input.startsWith("https://")) {
|
|
339
|
+
return "url";
|
|
340
|
+
}
|
|
341
|
+
return "path";
|
|
342
|
+
}
|
|
343
|
+
return "buffer";
|
|
344
|
+
}
|
|
323
345
|
/**
|
|
324
346
|
* Try fallback parsing for a specific file type
|
|
325
347
|
* Used when file detection returns "unknown" but we want to try parsing anyway
|
|
@@ -520,6 +542,31 @@ export class FileDetector {
|
|
|
520
542
|
* Stops at first strategy with confidence >= threshold (default: 80%)
|
|
521
543
|
*/
|
|
522
544
|
static async detect(input, options) {
|
|
545
|
+
// Short-circuit on a trustworthy caller-provided mimetype hint. This is
|
|
546
|
+
// the eager-path counterpart to FileReferenceRegistry.register()'s hint
|
|
547
|
+
// handling — necessary for tiny files (<= TINY_MAX) that skip the lazy
|
|
548
|
+
// registry path. normalizeMimeHint drops "application/octet-stream" so a
|
|
549
|
+
// caller cannot hide real content behind the opaque sentinel.
|
|
550
|
+
const hintMime = normalizeMimeHint(options?.mimetypeHint);
|
|
551
|
+
if (hintMime) {
|
|
552
|
+
const type = mimeHintToFileType(hintMime);
|
|
553
|
+
if (type) {
|
|
554
|
+
const ext = mimeHintToExtension(hintMime);
|
|
555
|
+
const result = {
|
|
556
|
+
type,
|
|
557
|
+
mimeType: hintMime,
|
|
558
|
+
extension: ext || null,
|
|
559
|
+
source: FileDetector.deriveInputSource(input),
|
|
560
|
+
metadata: {
|
|
561
|
+
confidence: 95,
|
|
562
|
+
filename: FileDetector.deriveInputFilename(input),
|
|
563
|
+
size: FileDetector.deriveInputSize(input),
|
|
564
|
+
},
|
|
565
|
+
};
|
|
566
|
+
logger.info(`[FileDetector] Type: ${type} (95%, from mimetype hint: ${hintMime})`);
|
|
567
|
+
return result;
|
|
568
|
+
}
|
|
569
|
+
}
|
|
523
570
|
const confidenceThreshold = options?.confidenceThreshold ?? 80;
|
|
524
571
|
const strategies = [
|
|
525
572
|
new MagicBytesStrategy(),
|
|
@@ -397,6 +397,9 @@ function toModelMessage(message) {
|
|
|
397
397
|
if (message.role === "user" ||
|
|
398
398
|
message.role === "assistant" ||
|
|
399
399
|
message.role === "system") {
|
|
400
|
+
if (message.content.trim() === "") {
|
|
401
|
+
return null;
|
|
402
|
+
}
|
|
400
403
|
return {
|
|
401
404
|
role: message.role,
|
|
402
405
|
content: message.content,
|
|
@@ -551,6 +554,7 @@ export async function buildMessagesArray(options) {
|
|
|
551
554
|
maxSize: 50 * 1024 * 1024,
|
|
552
555
|
allowedTypes: ["csv"],
|
|
553
556
|
csvOptions: csvOptions,
|
|
557
|
+
mimetypeHint: isFileWithMetadata(file) ? file.mimetype : undefined,
|
|
554
558
|
});
|
|
555
559
|
if (result.type === "csv") {
|
|
556
560
|
let csvSection = `\n\n## CSV Data from "${filename}":\n`;
|
|
@@ -803,6 +807,12 @@ async function processUnifiedFilesArray(options, maxSize, provider) {
|
|
|
803
807
|
// ─── Full processing path (current behavior) ──────────────────
|
|
804
808
|
const genericFileMaxSize = Math.max(maxSize, 100 * 1024 * 1024);
|
|
805
809
|
const rawFileInput = isFileWithMetadata(file) ? file.buffer : file;
|
|
810
|
+
// Forward the caller's mimetype hint (Slack/Curator-style
|
|
811
|
+
// extension-less buffers) so the eager path classifies correctly
|
|
812
|
+
// for tiny files — the lazy registry path has its own hint wiring.
|
|
813
|
+
const fileMimetypeHint = isFileWithMetadata(file)
|
|
814
|
+
? file.mimetype
|
|
815
|
+
: undefined;
|
|
806
816
|
const result = await FileDetector.detectAndProcess(rawFileInput, {
|
|
807
817
|
maxSize: genericFileMaxSize,
|
|
808
818
|
allowedTypes: [
|
|
@@ -821,6 +831,7 @@ async function processUnifiedFilesArray(options, maxSize, provider) {
|
|
|
821
831
|
],
|
|
822
832
|
csvOptions: options.csvOptions,
|
|
823
833
|
provider: provider,
|
|
834
|
+
mimetypeHint: fileMimetypeHint,
|
|
824
835
|
});
|
|
825
836
|
appendDetectedFileResult(result, file, options);
|
|
826
837
|
includedCount++;
|
|
@@ -1655,7 +1666,13 @@ async function tryRegisterFileReference(file, fileSize, registry, index = 0) {
|
|
|
1655
1666
|
return false;
|
|
1656
1667
|
}
|
|
1657
1668
|
const filename = extractFilename(file, index);
|
|
1658
|
-
|
|
1669
|
+
const mimetype = typeof file === "object" && !Buffer.isBuffer(file)
|
|
1670
|
+
? file.mimetype
|
|
1671
|
+
: undefined;
|
|
1672
|
+
await registry.register(buffer, getFileSource(file), {
|
|
1673
|
+
filename,
|
|
1674
|
+
mimetype,
|
|
1675
|
+
});
|
|
1659
1676
|
logger.info(`[FileDetector] Registered "${filename}" (${(fileSize / 1024).toFixed(0)} KB) ` +
|
|
1660
1677
|
`as lazy reference — skipping upfront processing`);
|
|
1661
1678
|
return true;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared helpers for caller-provided MIME type hints.
|
|
3
|
+
*
|
|
4
|
+
* A "MIME hint" is a mimetype string the SDK receives alongside a raw Buffer
|
|
5
|
+
* whose original filename is missing (e.g. Slack/Curator file-uploads that
|
|
6
|
+
* arrive as { buffer, filename: "Untitled", mimetype: "text/plain" }). When
|
|
7
|
+
* the filename has no extension and magic-byte detection cannot identify the
|
|
8
|
+
* content, the hint is the only signal we have.
|
|
9
|
+
*
|
|
10
|
+
* Both FileReferenceRegistry.register() and FileDetector.detect() consume
|
|
11
|
+
* these helpers so the trust/normalization rules stay in one place:
|
|
12
|
+
*
|
|
13
|
+
* - `application/octet-stream` is never trusted — it is the opaque
|
|
14
|
+
* "I don't know" sentinel and would let a caller hide real content
|
|
15
|
+
* behind a generic label (a PNG hinted as octet-stream would otherwise
|
|
16
|
+
* record mimeType="application/octet-stream" instead of "image/png").
|
|
17
|
+
* - Empty/undefined hints pass through as `undefined`.
|
|
18
|
+
* - A hint that cannot be classified maps to `null` so the caller falls
|
|
19
|
+
* back to magic-byte / extension detection instead of synthesising a
|
|
20
|
+
* wrong type.
|
|
21
|
+
*/
|
|
22
|
+
import type { FileType } from "../types/index.js";
|
|
23
|
+
/**
|
|
24
|
+
* Normalize a caller-provided mimetype hint: strip any `;charset=...`
|
|
25
|
+
* parameter, lowercase, trim. Returns undefined for empty strings or for
|
|
26
|
+
* the opaque `application/octet-stream` sentinel so downstream code can
|
|
27
|
+
* treat the hint as absent instead of trusting it verbatim.
|
|
28
|
+
*/
|
|
29
|
+
export declare function normalizeMimeHint(raw?: string): string | undefined;
|
|
30
|
+
/**
|
|
31
|
+
* Map a normalized mimetype hint to a NeuroLink FileType. Returns null when
|
|
32
|
+
* the mimetype is unknown or too generic to classify confidently.
|
|
33
|
+
*/
|
|
34
|
+
export declare function mimeHintToFileType(mimetype: string): FileType | null;
|
|
35
|
+
/**
|
|
36
|
+
* Map a normalized mimetype hint to the canonical file extension (without
|
|
37
|
+
* leading dot). Returns "" when the mimetype is unknown — caller should
|
|
38
|
+
* then fall back to magic-byte detection.
|
|
39
|
+
*/
|
|
40
|
+
export declare function mimeHintToExtension(mimetype: string): string;
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
const OPAQUE_MIMETYPE = "application/octet-stream";
|
|
2
|
+
/**
|
|
3
|
+
* Normalize a caller-provided mimetype hint: strip any `;charset=...`
|
|
4
|
+
* parameter, lowercase, trim. Returns undefined for empty strings or for
|
|
5
|
+
* the opaque `application/octet-stream` sentinel so downstream code can
|
|
6
|
+
* treat the hint as absent instead of trusting it verbatim.
|
|
7
|
+
*/
|
|
8
|
+
export function normalizeMimeHint(raw) {
|
|
9
|
+
if (!raw) {
|
|
10
|
+
return undefined;
|
|
11
|
+
}
|
|
12
|
+
const cleaned = raw.split(";")[0].trim().toLowerCase();
|
|
13
|
+
if (!cleaned || cleaned === OPAQUE_MIMETYPE) {
|
|
14
|
+
return undefined;
|
|
15
|
+
}
|
|
16
|
+
return cleaned;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Map a normalized mimetype hint to a NeuroLink FileType. Returns null when
|
|
20
|
+
* the mimetype is unknown or too generic to classify confidently.
|
|
21
|
+
*/
|
|
22
|
+
export function mimeHintToFileType(mimetype) {
|
|
23
|
+
const exact = {
|
|
24
|
+
"text/csv": "csv",
|
|
25
|
+
"application/csv": "csv",
|
|
26
|
+
"image/svg+xml": "svg",
|
|
27
|
+
"application/pdf": "pdf",
|
|
28
|
+
"application/json": "text",
|
|
29
|
+
"application/xml": "text",
|
|
30
|
+
"text/xml": "text",
|
|
31
|
+
"application/yaml": "text",
|
|
32
|
+
"application/x-yaml": "text",
|
|
33
|
+
"text/yaml": "text",
|
|
34
|
+
"application/javascript": "text",
|
|
35
|
+
"application/typescript": "text",
|
|
36
|
+
"application/zip": "archive",
|
|
37
|
+
"application/x-tar": "archive",
|
|
38
|
+
"application/gzip": "archive",
|
|
39
|
+
"application/x-gzip": "archive",
|
|
40
|
+
"application/x-7z-compressed": "archive",
|
|
41
|
+
"application/vnd.rar": "archive",
|
|
42
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
|
43
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
|
|
44
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
|
45
|
+
};
|
|
46
|
+
if (exact[mimetype]) {
|
|
47
|
+
return exact[mimetype];
|
|
48
|
+
}
|
|
49
|
+
if (mimetype.startsWith("text/")) {
|
|
50
|
+
return "text";
|
|
51
|
+
}
|
|
52
|
+
if (mimetype.startsWith("image/")) {
|
|
53
|
+
return "image";
|
|
54
|
+
}
|
|
55
|
+
if (mimetype.startsWith("audio/")) {
|
|
56
|
+
return "audio";
|
|
57
|
+
}
|
|
58
|
+
if (mimetype.startsWith("video/")) {
|
|
59
|
+
return "video";
|
|
60
|
+
}
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Map a normalized mimetype hint to the canonical file extension (without
|
|
65
|
+
* leading dot). Returns "" when the mimetype is unknown — caller should
|
|
66
|
+
* then fall back to magic-byte detection.
|
|
67
|
+
*/
|
|
68
|
+
export function mimeHintToExtension(mimetype) {
|
|
69
|
+
const table = {
|
|
70
|
+
// Text
|
|
71
|
+
"text/plain": "txt",
|
|
72
|
+
"text/html": "html",
|
|
73
|
+
"text/css": "css",
|
|
74
|
+
"text/javascript": "js",
|
|
75
|
+
"application/javascript": "js",
|
|
76
|
+
"application/typescript": "ts",
|
|
77
|
+
"text/markdown": "md",
|
|
78
|
+
"text/csv": "csv",
|
|
79
|
+
"application/csv": "csv",
|
|
80
|
+
"application/json": "json",
|
|
81
|
+
"application/xml": "xml",
|
|
82
|
+
"text/xml": "xml",
|
|
83
|
+
"application/yaml": "yaml",
|
|
84
|
+
"application/x-yaml": "yaml",
|
|
85
|
+
"text/yaml": "yaml",
|
|
86
|
+
// Documents
|
|
87
|
+
"application/pdf": "pdf",
|
|
88
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
|
89
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
|
|
90
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
|
91
|
+
// Images
|
|
92
|
+
"image/png": "png",
|
|
93
|
+
"image/jpeg": "jpg",
|
|
94
|
+
"image/gif": "gif",
|
|
95
|
+
"image/webp": "webp",
|
|
96
|
+
"image/bmp": "bmp",
|
|
97
|
+
"image/tiff": "tiff",
|
|
98
|
+
"image/svg+xml": "svg",
|
|
99
|
+
// Video
|
|
100
|
+
"video/mp4": "mp4",
|
|
101
|
+
"video/webm": "webm",
|
|
102
|
+
"video/quicktime": "mov",
|
|
103
|
+
"video/x-matroska": "mkv",
|
|
104
|
+
"video/x-msvideo": "avi",
|
|
105
|
+
// Audio
|
|
106
|
+
"audio/mpeg": "mp3",
|
|
107
|
+
"audio/wav": "wav",
|
|
108
|
+
"audio/ogg": "ogg",
|
|
109
|
+
"audio/flac": "flac",
|
|
110
|
+
"audio/mp4": "m4a",
|
|
111
|
+
"audio/aac": "aac",
|
|
112
|
+
// Archives
|
|
113
|
+
"application/zip": "zip",
|
|
114
|
+
"application/x-tar": "tar",
|
|
115
|
+
"application/gzip": "gz",
|
|
116
|
+
"application/x-gzip": "gz",
|
|
117
|
+
"application/x-7z-compressed": "7z",
|
|
118
|
+
"application/vnd.rar": "rar",
|
|
119
|
+
};
|
|
120
|
+
return table[mimetype] || "";
|
|
121
|
+
}
|
|
122
|
+
//# sourceMappingURL=mimeTypeHints.js.map
|
package/dist/types/file.d.ts
CHANGED
|
@@ -307,6 +307,16 @@ export type FileDetectorOptions = {
|
|
|
307
307
|
maxRetries?: number;
|
|
308
308
|
/** Initial retry delay in milliseconds with exponential backoff (default: 1000) */
|
|
309
309
|
retryDelay?: number;
|
|
310
|
+
/**
|
|
311
|
+
* Caller-provided MIME type hint (e.g. "text/plain", "application/json").
|
|
312
|
+
* Used when the filename has no extension and magic-byte detection cannot
|
|
313
|
+
* identify the content — the common Slack/Curator extension-less-buffer
|
|
314
|
+
* case. When set to a trustworthy mimetype (not "application/octet-stream"),
|
|
315
|
+
* it short-circuits the detection strategy loop with a high-confidence
|
|
316
|
+
* result so small files on the eager file-processing path still honor the
|
|
317
|
+
* hint (the lazy FileReferenceRegistry path has its own hint-handling).
|
|
318
|
+
*/
|
|
319
|
+
mimetypeHint?: string;
|
|
310
320
|
};
|
|
311
321
|
/**
|
|
312
322
|
* Google AI Studio Files API types
|
|
@@ -96,6 +96,15 @@ export type FileRegistrationOptions = {
|
|
|
96
96
|
filename?: string;
|
|
97
97
|
/** Override file type detection */
|
|
98
98
|
fileType?: FileType;
|
|
99
|
+
/**
|
|
100
|
+
* Caller-provided MIME type hint (e.g. "text/plain", "application/json").
|
|
101
|
+
* Used when the filename has no extension and magic-byte detection cannot
|
|
102
|
+
* identify the content (common for Slack/Curator-style buffers where the
|
|
103
|
+
* original extension was stripped). Honored during type detection, mimeType
|
|
104
|
+
* assignment, and filename-extension synthesis. An explicit `fileType`
|
|
105
|
+
* override still wins over this hint.
|
|
106
|
+
*/
|
|
107
|
+
mimetype?: string;
|
|
99
108
|
/** Maximum preview length in characters */
|
|
100
109
|
maxPreviewChars?: number;
|
|
101
110
|
/** Skip persisting buffer to temp directory */
|
|
@@ -43,6 +43,13 @@ export declare class FileDetector {
|
|
|
43
43
|
* Derive byte size from FileInput for tracing.
|
|
44
44
|
*/
|
|
45
45
|
private static deriveInputSize;
|
|
46
|
+
/**
|
|
47
|
+
* Classify a FileInput into the FileSource enum used by downstream
|
|
48
|
+
* loaders. Keeps the mimetype-hint short-circuit in detect() able to
|
|
49
|
+
* produce a valid FileDetectionResult without re-implementing the
|
|
50
|
+
* source-inference rules scattered across loadContent().
|
|
51
|
+
*/
|
|
52
|
+
private static deriveInputSource;
|
|
46
53
|
/**
|
|
47
54
|
* Try fallback parsing for a specific file type
|
|
48
55
|
* Used when file detection returns "unknown" but we want to try parsing anyway
|
|
@@ -23,6 +23,7 @@ import { tracers, ATTR, withSpan } from "../telemetry/index.js";
|
|
|
23
23
|
import { CSVProcessor } from "./csvProcessor.js";
|
|
24
24
|
import { ImageProcessor } from "./imageProcessor.js";
|
|
25
25
|
import { logger } from "./logger.js";
|
|
26
|
+
import { mimeHintToExtension, mimeHintToFileType, normalizeMimeHint, } from "./mimeTypeHints.js";
|
|
26
27
|
import { PDFProcessor } from "./pdfProcessor.js";
|
|
27
28
|
/**
|
|
28
29
|
* Default retry configuration constants
|
|
@@ -320,6 +321,27 @@ export class FileDetector {
|
|
|
320
321
|
}
|
|
321
322
|
return 0;
|
|
322
323
|
}
|
|
324
|
+
/**
|
|
325
|
+
* Classify a FileInput into the FileSource enum used by downstream
|
|
326
|
+
* loaders. Keeps the mimetype-hint short-circuit in detect() able to
|
|
327
|
+
* produce a valid FileDetectionResult without re-implementing the
|
|
328
|
+
* source-inference rules scattered across loadContent().
|
|
329
|
+
*/
|
|
330
|
+
static deriveInputSource(input) {
|
|
331
|
+
if (Buffer.isBuffer(input)) {
|
|
332
|
+
return "buffer";
|
|
333
|
+
}
|
|
334
|
+
if (typeof input === "string") {
|
|
335
|
+
if (input.startsWith("data:")) {
|
|
336
|
+
return "datauri";
|
|
337
|
+
}
|
|
338
|
+
if (input.startsWith("http://") || input.startsWith("https://")) {
|
|
339
|
+
return "url";
|
|
340
|
+
}
|
|
341
|
+
return "path";
|
|
342
|
+
}
|
|
343
|
+
return "buffer";
|
|
344
|
+
}
|
|
323
345
|
/**
|
|
324
346
|
* Try fallback parsing for a specific file type
|
|
325
347
|
* Used when file detection returns "unknown" but we want to try parsing anyway
|
|
@@ -520,6 +542,31 @@ export class FileDetector {
|
|
|
520
542
|
* Stops at first strategy with confidence >= threshold (default: 80%)
|
|
521
543
|
*/
|
|
522
544
|
static async detect(input, options) {
|
|
545
|
+
// Short-circuit on a trustworthy caller-provided mimetype hint. This is
|
|
546
|
+
// the eager-path counterpart to FileReferenceRegistry.register()'s hint
|
|
547
|
+
// handling — necessary for tiny files (<= TINY_MAX) that skip the lazy
|
|
548
|
+
// registry path. normalizeMimeHint drops "application/octet-stream" so a
|
|
549
|
+
// caller cannot hide real content behind the opaque sentinel.
|
|
550
|
+
const hintMime = normalizeMimeHint(options?.mimetypeHint);
|
|
551
|
+
if (hintMime) {
|
|
552
|
+
const type = mimeHintToFileType(hintMime);
|
|
553
|
+
if (type) {
|
|
554
|
+
const ext = mimeHintToExtension(hintMime);
|
|
555
|
+
const result = {
|
|
556
|
+
type,
|
|
557
|
+
mimeType: hintMime,
|
|
558
|
+
extension: ext || null,
|
|
559
|
+
source: FileDetector.deriveInputSource(input),
|
|
560
|
+
metadata: {
|
|
561
|
+
confidence: 95,
|
|
562
|
+
filename: FileDetector.deriveInputFilename(input),
|
|
563
|
+
size: FileDetector.deriveInputSize(input),
|
|
564
|
+
},
|
|
565
|
+
};
|
|
566
|
+
logger.info(`[FileDetector] Type: ${type} (95%, from mimetype hint: ${hintMime})`);
|
|
567
|
+
return result;
|
|
568
|
+
}
|
|
569
|
+
}
|
|
523
570
|
const confidenceThreshold = options?.confidenceThreshold ?? 80;
|
|
524
571
|
const strategies = [
|
|
525
572
|
new MagicBytesStrategy(),
|
|
@@ -397,6 +397,9 @@ function toModelMessage(message) {
|
|
|
397
397
|
if (message.role === "user" ||
|
|
398
398
|
message.role === "assistant" ||
|
|
399
399
|
message.role === "system") {
|
|
400
|
+
if (message.content.trim() === "") {
|
|
401
|
+
return null;
|
|
402
|
+
}
|
|
400
403
|
return {
|
|
401
404
|
role: message.role,
|
|
402
405
|
content: message.content,
|
|
@@ -551,6 +554,7 @@ export async function buildMessagesArray(options) {
|
|
|
551
554
|
maxSize: 50 * 1024 * 1024,
|
|
552
555
|
allowedTypes: ["csv"],
|
|
553
556
|
csvOptions: csvOptions,
|
|
557
|
+
mimetypeHint: isFileWithMetadata(file) ? file.mimetype : undefined,
|
|
554
558
|
});
|
|
555
559
|
if (result.type === "csv") {
|
|
556
560
|
let csvSection = `\n\n## CSV Data from "${filename}":\n`;
|
|
@@ -803,6 +807,12 @@ async function processUnifiedFilesArray(options, maxSize, provider) {
|
|
|
803
807
|
// ─── Full processing path (current behavior) ──────────────────
|
|
804
808
|
const genericFileMaxSize = Math.max(maxSize, 100 * 1024 * 1024);
|
|
805
809
|
const rawFileInput = isFileWithMetadata(file) ? file.buffer : file;
|
|
810
|
+
// Forward the caller's mimetype hint (Slack/Curator-style
|
|
811
|
+
// extension-less buffers) so the eager path classifies correctly
|
|
812
|
+
// for tiny files — the lazy registry path has its own hint wiring.
|
|
813
|
+
const fileMimetypeHint = isFileWithMetadata(file)
|
|
814
|
+
? file.mimetype
|
|
815
|
+
: undefined;
|
|
806
816
|
const result = await FileDetector.detectAndProcess(rawFileInput, {
|
|
807
817
|
maxSize: genericFileMaxSize,
|
|
808
818
|
allowedTypes: [
|
|
@@ -821,6 +831,7 @@ async function processUnifiedFilesArray(options, maxSize, provider) {
|
|
|
821
831
|
],
|
|
822
832
|
csvOptions: options.csvOptions,
|
|
823
833
|
provider: provider,
|
|
834
|
+
mimetypeHint: fileMimetypeHint,
|
|
824
835
|
});
|
|
825
836
|
appendDetectedFileResult(result, file, options);
|
|
826
837
|
includedCount++;
|
|
@@ -1655,7 +1666,13 @@ async function tryRegisterFileReference(file, fileSize, registry, index = 0) {
|
|
|
1655
1666
|
return false;
|
|
1656
1667
|
}
|
|
1657
1668
|
const filename = extractFilename(file, index);
|
|
1658
|
-
|
|
1669
|
+
const mimetype = typeof file === "object" && !Buffer.isBuffer(file)
|
|
1670
|
+
? file.mimetype
|
|
1671
|
+
: undefined;
|
|
1672
|
+
await registry.register(buffer, getFileSource(file), {
|
|
1673
|
+
filename,
|
|
1674
|
+
mimetype,
|
|
1675
|
+
});
|
|
1659
1676
|
logger.info(`[FileDetector] Registered "${filename}" (${(fileSize / 1024).toFixed(0)} KB) ` +
|
|
1660
1677
|
`as lazy reference — skipping upfront processing`);
|
|
1661
1678
|
return true;
|