@ljoukov/llm 6.0.0 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -11
- package/dist/index.cjs +509 -232
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +512 -235
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -3240,19 +3240,16 @@ function getCurrentAgentLoggingSession() {
|
|
|
3240
3240
|
|
|
3241
3241
|
// src/files.ts
|
|
3242
3242
|
import { AsyncLocalStorage as AsyncLocalStorage2 } from "async_hooks";
|
|
3243
|
-
import { Buffer as Buffer4
|
|
3243
|
+
import { Buffer as Buffer4 } from "buffer";
|
|
3244
3244
|
import { createHash } from "crypto";
|
|
3245
|
-
import { createReadStream
|
|
3246
|
-
import { copyFile, mkdir as mkdir2,
|
|
3245
|
+
import { createReadStream } from "fs";
|
|
3246
|
+
import { copyFile, mkdir as mkdir2, readFile, stat, unlink, writeFile as writeFile2 } from "fs/promises";
|
|
3247
3247
|
import os3 from "os";
|
|
3248
3248
|
import path4 from "path";
|
|
3249
|
-
import { Readable } from "stream";
|
|
3250
3249
|
import { pipeline } from "stream/promises";
|
|
3251
3250
|
import { Storage } from "@google-cloud/storage";
|
|
3252
3251
|
import mime from "mime";
|
|
3253
3252
|
var DEFAULT_FILE_TTL_SECONDS = 48 * 60 * 60;
|
|
3254
|
-
var OPENAI_FILE_CREATE_MAX_BYTES = 512 * 1024 * 1024;
|
|
3255
|
-
var OPENAI_UPLOAD_PART_MAX_BYTES = 64 * 1024 * 1024;
|
|
3256
3253
|
var GEMINI_FILE_POLL_INTERVAL_MS = 1e3;
|
|
3257
3254
|
var GEMINI_FILE_POLL_TIMEOUT_MS = 6e4;
|
|
3258
3255
|
var FILES_TEMP_ROOT = path4.join(os3.tmpdir(), "ljoukov-llm-files");
|
|
@@ -3261,7 +3258,7 @@ var FILES_CACHE_CONTENT_ROOT = path4.join(FILES_CACHE_ROOT, "content");
|
|
|
3261
3258
|
var FILES_CACHE_METADATA_ROOT = path4.join(FILES_CACHE_ROOT, "metadata");
|
|
3262
3259
|
var filesState = getRuntimeSingleton(/* @__PURE__ */ Symbol.for("@ljoukov/llm.filesState"), () => ({
|
|
3263
3260
|
metadataById: /* @__PURE__ */ new Map(),
|
|
3264
|
-
|
|
3261
|
+
canonicalUploadCacheByKey: /* @__PURE__ */ new Map(),
|
|
3265
3262
|
materializedById: /* @__PURE__ */ new Map(),
|
|
3266
3263
|
geminiMirrorById: /* @__PURE__ */ new Map(),
|
|
3267
3264
|
vertexMirrorById: /* @__PURE__ */ new Map(),
|
|
@@ -3342,7 +3339,7 @@ function formatUploadLogLine(event) {
|
|
|
3342
3339
|
}
|
|
3343
3340
|
function recordUploadEvent(event) {
|
|
3344
3341
|
const scope = fileUploadScopeStorage.getStore();
|
|
3345
|
-
const resolvedSource = event.source ?? scope?.source ?? (event.backend === "
|
|
3342
|
+
const resolvedSource = event.source ?? scope?.source ?? (event.backend === "gcs" ? "files_api" : "provider_mirror");
|
|
3346
3343
|
const timestampedEvent = {
|
|
3347
3344
|
...event,
|
|
3348
3345
|
source: resolvedSource,
|
|
@@ -3389,16 +3386,117 @@ async function computeFileSha256Hex(filePath) {
|
|
|
3389
3386
|
}
|
|
3390
3387
|
return hash.digest("hex");
|
|
3391
3388
|
}
|
|
3392
|
-
function
|
|
3389
|
+
function buildCanonicalFileId(filename, mimeType, sha256Hex) {
|
|
3390
|
+
return `file_${createHash("sha256").update(filename).update("\0").update(mimeType).update("\0").update(sha256Hex).digest("hex")}`;
|
|
3391
|
+
}
|
|
3392
|
+
function resolveCanonicalFilesBucket() {
|
|
3393
|
+
const raw = process.env.LLM_FILES_GCS_BUCKET ?? process.env.VERTEX_GCS_BUCKET ?? process.env.LLM_VERTEX_GCS_BUCKET;
|
|
3394
|
+
const trimmed = raw?.trim();
|
|
3395
|
+
if (!trimmed) {
|
|
3396
|
+
throw new Error(
|
|
3397
|
+
"LLM_FILES_GCS_BUCKET (or VERTEX_GCS_BUCKET) must be set to use the canonical files API."
|
|
3398
|
+
);
|
|
3399
|
+
}
|
|
3400
|
+
return trimmed.replace(/^gs:\/\//u, "").replace(/\/+$/u, "");
|
|
3401
|
+
}
|
|
3402
|
+
function resolveCanonicalFilesPrefix() {
|
|
3403
|
+
const raw = process.env.LLM_FILES_GCS_PREFIX;
|
|
3404
|
+
const trimmed = raw?.trim().replace(/^\/+/u, "").replace(/\/+$/u, "");
|
|
3405
|
+
return trimmed ? `${trimmed}/` : "canonical-files/";
|
|
3406
|
+
}
|
|
3407
|
+
function isLatexLikeFile(filename, mimeType) {
|
|
3408
|
+
const extension = path4.extname(filename).trim().toLowerCase();
|
|
3409
|
+
const normalisedMimeType = mimeType.trim().toLowerCase();
|
|
3410
|
+
return extension === ".tex" || extension === ".ltx" || extension === ".latex" || normalisedMimeType === "application/x-tex" || normalisedMimeType === "text/x-tex";
|
|
3411
|
+
}
|
|
3412
|
+
function resolveCanonicalStorageContentType(filename, mimeType) {
|
|
3413
|
+
if (isLatexLikeFile(filename, mimeType)) {
|
|
3414
|
+
return "text/plain";
|
|
3415
|
+
}
|
|
3416
|
+
return mimeType;
|
|
3417
|
+
}
|
|
3418
|
+
function resolveCanonicalObjectExtension(filename, mimeType) {
|
|
3419
|
+
if (isLatexLikeFile(filename, mimeType)) {
|
|
3420
|
+
return "txt";
|
|
3421
|
+
}
|
|
3422
|
+
const fromFilename = path4.extname(filename).replace(/^\./u, "").trim().toLowerCase();
|
|
3423
|
+
if (fromFilename) {
|
|
3424
|
+
return fromFilename;
|
|
3425
|
+
}
|
|
3426
|
+
const fromMimeType = mime.getExtension(mimeType)?.trim().toLowerCase();
|
|
3427
|
+
if (fromMimeType) {
|
|
3428
|
+
return fromMimeType;
|
|
3429
|
+
}
|
|
3430
|
+
return "bin";
|
|
3431
|
+
}
|
|
3432
|
+
function buildCanonicalObjectName(fileId, filename, mimeType) {
|
|
3433
|
+
const extension = resolveCanonicalObjectExtension(filename, mimeType);
|
|
3434
|
+
return `${resolveCanonicalFilesPrefix()}${fileId}.${extension}`;
|
|
3435
|
+
}
|
|
3436
|
+
function toSafeStorageFilename(filename) {
|
|
3437
|
+
const normalized = normaliseFilename(filename).replace(/[^\w.-]+/gu, "-");
|
|
3438
|
+
return normalized.length > 0 ? normalized : "attachment.bin";
|
|
3439
|
+
}
|
|
3440
|
+
function parseUnixSeconds(value, fallback) {
|
|
3441
|
+
if (value) {
|
|
3442
|
+
const numeric = Number.parseInt(value, 10);
|
|
3443
|
+
if (Number.isFinite(numeric) && numeric > 0) {
|
|
3444
|
+
return numeric;
|
|
3445
|
+
}
|
|
3446
|
+
}
|
|
3447
|
+
if (fallback) {
|
|
3448
|
+
const millis = Date.parse(fallback);
|
|
3449
|
+
if (Number.isFinite(millis)) {
|
|
3450
|
+
return Math.floor(millis / 1e3);
|
|
3451
|
+
}
|
|
3452
|
+
}
|
|
3453
|
+
return Math.floor(Date.now() / 1e3);
|
|
3454
|
+
}
|
|
3455
|
+
function parseOptionalUnixSeconds(value) {
|
|
3456
|
+
if (!value) {
|
|
3457
|
+
return void 0;
|
|
3458
|
+
}
|
|
3459
|
+
const millis = Date.parse(value);
|
|
3460
|
+
if (Number.isFinite(millis)) {
|
|
3461
|
+
return Math.floor(millis / 1e3);
|
|
3462
|
+
}
|
|
3463
|
+
const numeric = Number.parseInt(value, 10);
|
|
3464
|
+
return Number.isFinite(numeric) && numeric > 0 ? numeric : void 0;
|
|
3465
|
+
}
|
|
3466
|
+
function toStoredFileFromCanonicalMetadata(options) {
|
|
3467
|
+
const metadata = options.objectMetadata.metadata;
|
|
3468
|
+
const filenameRaw = typeof metadata?.filename === "string" && metadata.filename.trim().length > 0 ? metadata.filename.trim() : path4.basename(options.objectName);
|
|
3469
|
+
const filename = normaliseFilename(filenameRaw);
|
|
3470
|
+
const bytesRaw = options.objectMetadata.size;
|
|
3471
|
+
const bytes = typeof bytesRaw === "string" ? Number.parseInt(bytesRaw, 10) : typeof bytesRaw === "number" ? bytesRaw : 0;
|
|
3472
|
+
const purpose = metadata?.purpose === "user_data" ? "user_data" : "user_data";
|
|
3473
|
+
const createdAt = parseUnixSeconds(
|
|
3474
|
+
typeof metadata?.createdAtUnix === "string" ? metadata.createdAtUnix : void 0,
|
|
3475
|
+
typeof options.objectMetadata.timeCreated === "string" ? options.objectMetadata.timeCreated : void 0
|
|
3476
|
+
);
|
|
3477
|
+
const expiresAt = parseOptionalUnixSeconds(
|
|
3478
|
+
typeof metadata?.expiresAt === "string" ? metadata.expiresAt : void 0
|
|
3479
|
+
);
|
|
3480
|
+
const mimeType = typeof metadata?.mimeType === "string" && metadata.mimeType.trim().length > 0 ? metadata.mimeType.trim() : typeof options.objectMetadata.contentType === "string" && options.objectMetadata.contentType.trim().length > 0 ? options.objectMetadata.contentType.trim() : resolveMimeType(filename, void 0);
|
|
3481
|
+
const sha256Hex = typeof metadata?.sha256 === "string" && metadata.sha256.trim().length > 0 ? metadata.sha256.trim() : void 0;
|
|
3393
3482
|
return {
|
|
3394
|
-
|
|
3395
|
-
|
|
3396
|
-
|
|
3397
|
-
|
|
3398
|
-
|
|
3399
|
-
|
|
3400
|
-
|
|
3401
|
-
|
|
3483
|
+
file: {
|
|
3484
|
+
id: options.fileId,
|
|
3485
|
+
bytes: Number.isFinite(bytes) ? bytes : 0,
|
|
3486
|
+
created_at: createdAt,
|
|
3487
|
+
filename,
|
|
3488
|
+
object: "file",
|
|
3489
|
+
purpose,
|
|
3490
|
+
status: "processed",
|
|
3491
|
+
...expiresAt ? { expires_at: expiresAt } : {}
|
|
3492
|
+
},
|
|
3493
|
+
filename,
|
|
3494
|
+
bytes: Number.isFinite(bytes) ? bytes : 0,
|
|
3495
|
+
mimeType,
|
|
3496
|
+
sha256Hex,
|
|
3497
|
+
localPath: options.localPath,
|
|
3498
|
+
bucketName: options.bucketName,
|
|
3499
|
+
objectName: options.objectName
|
|
3402
3500
|
};
|
|
3403
3501
|
}
|
|
3404
3502
|
function buildCacheKey(filename, mimeType, sha256Hex) {
|
|
@@ -3419,7 +3517,7 @@ function isFresh(file) {
|
|
|
3419
3517
|
function recordMetadata(metadata) {
|
|
3420
3518
|
filesState.metadataById.set(metadata.file.id, metadata);
|
|
3421
3519
|
if (metadata.sha256Hex) {
|
|
3422
|
-
filesState.
|
|
3520
|
+
filesState.canonicalUploadCacheByKey.set(
|
|
3423
3521
|
buildCacheKey(
|
|
3424
3522
|
metadata.filename,
|
|
3425
3523
|
metadata.mimeType ?? "application/octet-stream",
|
|
@@ -3468,7 +3566,9 @@ async function persistMetadataToDisk(metadata) {
|
|
|
3468
3566
|
bytes: metadata.bytes,
|
|
3469
3567
|
mimeType: metadata.mimeType,
|
|
3470
3568
|
sha256Hex: metadata.sha256Hex,
|
|
3471
|
-
localPath: metadata.localPath
|
|
3569
|
+
localPath: metadata.localPath,
|
|
3570
|
+
bucketName: metadata.bucketName,
|
|
3571
|
+
objectName: metadata.objectName
|
|
3472
3572
|
};
|
|
3473
3573
|
await writeFile2(
|
|
3474
3574
|
buildCachedMetadataPath(metadata.file.id),
|
|
@@ -3500,175 +3600,271 @@ async function loadPersistedMetadata(fileId) {
|
|
|
3500
3600
|
bytes: payload.bytes,
|
|
3501
3601
|
mimeType: payload.mimeType,
|
|
3502
3602
|
sha256Hex: payload.sha256Hex,
|
|
3503
|
-
localPath: payload.localPath
|
|
3603
|
+
localPath: payload.localPath,
|
|
3604
|
+
bucketName: payload.bucketName,
|
|
3605
|
+
objectName: payload.objectName
|
|
3504
3606
|
});
|
|
3505
3607
|
} catch {
|
|
3506
3608
|
return void 0;
|
|
3507
3609
|
}
|
|
3508
3610
|
}
|
|
3509
|
-
async function
|
|
3510
|
-
const
|
|
3511
|
-
const
|
|
3512
|
-
|
|
3513
|
-
|
|
3611
|
+
async function writeCanonicalFileFromPath(options) {
|
|
3612
|
+
const file = getStorageClient().bucket(options.bucketName).file(options.objectName);
|
|
3613
|
+
const storageContentType = resolveCanonicalStorageContentType(
|
|
3614
|
+
options.metadata.filename ?? "attachment.bin",
|
|
3615
|
+
options.mimeType
|
|
3616
|
+
);
|
|
3617
|
+
try {
|
|
3618
|
+
await pipeline(
|
|
3619
|
+
createReadStream(options.filePath),
|
|
3620
|
+
file.createWriteStream({
|
|
3621
|
+
resumable: options.bytes >= 10 * 1024 * 1024,
|
|
3622
|
+
preconditionOpts: { ifGenerationMatch: 0 },
|
|
3623
|
+
metadata: {
|
|
3624
|
+
contentType: storageContentType,
|
|
3625
|
+
contentDisposition: `inline; filename="${toSafeStorageFilename(options.metadata.filename ?? "attachment.bin")}"`,
|
|
3626
|
+
metadata: options.metadata
|
|
3627
|
+
}
|
|
3628
|
+
})
|
|
3629
|
+
);
|
|
3630
|
+
return true;
|
|
3631
|
+
} catch (error) {
|
|
3632
|
+
const code = error.code;
|
|
3633
|
+
if (code === 412 || code === "412") {
|
|
3634
|
+
return false;
|
|
3635
|
+
}
|
|
3636
|
+
throw error;
|
|
3514
3637
|
}
|
|
3515
|
-
|
|
3516
|
-
|
|
3517
|
-
|
|
3518
|
-
|
|
3519
|
-
|
|
3520
|
-
|
|
3521
|
-
|
|
3522
|
-
|
|
3523
|
-
|
|
3524
|
-
|
|
3525
|
-
|
|
3526
|
-
|
|
3527
|
-
|
|
3528
|
-
|
|
3638
|
+
}
|
|
3639
|
+
async function writeCanonicalFileFromBytes(options) {
|
|
3640
|
+
const file = getStorageClient().bucket(options.bucketName).file(options.objectName);
|
|
3641
|
+
const storageContentType = resolveCanonicalStorageContentType(
|
|
3642
|
+
options.metadata.filename ?? "attachment.bin",
|
|
3643
|
+
options.mimeType
|
|
3644
|
+
);
|
|
3645
|
+
try {
|
|
3646
|
+
await file.save(options.bytes, {
|
|
3647
|
+
resumable: options.bytes.byteLength >= 10 * 1024 * 1024,
|
|
3648
|
+
preconditionOpts: { ifGenerationMatch: 0 },
|
|
3649
|
+
metadata: {
|
|
3650
|
+
contentType: storageContentType,
|
|
3651
|
+
contentDisposition: `inline; filename="${toSafeStorageFilename(options.metadata.filename ?? "attachment.bin")}"`,
|
|
3652
|
+
metadata: options.metadata
|
|
3529
3653
|
}
|
|
3530
3654
|
});
|
|
3531
|
-
|
|
3532
|
-
|
|
3533
|
-
const
|
|
3534
|
-
|
|
3535
|
-
|
|
3536
|
-
mime_type: params.mimeType,
|
|
3537
|
-
purpose: params.purpose
|
|
3538
|
-
});
|
|
3539
|
-
const partIds = [];
|
|
3540
|
-
for (let offset = 0; offset < params.bytes.byteLength; offset += OPENAI_UPLOAD_PART_MAX_BYTES) {
|
|
3541
|
-
const chunk = params.bytes.subarray(
|
|
3542
|
-
offset,
|
|
3543
|
-
Math.min(offset + OPENAI_UPLOAD_PART_MAX_BYTES, params.bytes.byteLength)
|
|
3544
|
-
);
|
|
3545
|
-
const uploadPart = await client.uploads.parts.create(upload.id, {
|
|
3546
|
-
data: new NodeFile([new Uint8Array(chunk)], `${params.sha256Hex}.part`, {
|
|
3547
|
-
type: params.mimeType
|
|
3548
|
-
})
|
|
3549
|
-
});
|
|
3550
|
-
partIds.push(uploadPart.id);
|
|
3551
|
-
}
|
|
3552
|
-
const completed = await client.uploads.complete(upload.id, { part_ids: partIds });
|
|
3553
|
-
const fileId = completed.file?.id;
|
|
3554
|
-
if (!fileId) {
|
|
3555
|
-
throw new Error("OpenAI upload completed without a file id.");
|
|
3655
|
+
return true;
|
|
3656
|
+
} catch (error) {
|
|
3657
|
+
const code = error.code;
|
|
3658
|
+
if (code === 412 || code === "412") {
|
|
3659
|
+
return false;
|
|
3556
3660
|
}
|
|
3557
|
-
|
|
3661
|
+
throw error;
|
|
3558
3662
|
}
|
|
3559
|
-
|
|
3560
|
-
|
|
3561
|
-
|
|
3562
|
-
filename
|
|
3563
|
-
|
|
3564
|
-
|
|
3565
|
-
|
|
3663
|
+
}
|
|
3664
|
+
async function refreshCanonicalObjectMetadata(options) {
|
|
3665
|
+
const storageContentType = resolveCanonicalStorageContentType(
|
|
3666
|
+
options.metadata.filename ?? "attachment.bin",
|
|
3667
|
+
options.mimeType
|
|
3668
|
+
);
|
|
3669
|
+
await getStorageClient().bucket(options.bucketName).file(options.objectName).setMetadata({
|
|
3670
|
+
contentType: storageContentType,
|
|
3671
|
+
contentDisposition: `inline; filename="${toSafeStorageFilename(options.metadata.filename ?? "attachment.bin")}"`,
|
|
3672
|
+
metadata: options.metadata
|
|
3566
3673
|
});
|
|
3567
|
-
|
|
3568
|
-
|
|
3569
|
-
|
|
3570
|
-
|
|
3571
|
-
|
|
3572
|
-
|
|
3573
|
-
|
|
3574
|
-
|
|
3674
|
+
}
|
|
3675
|
+
async function createCanonicalMetadata(options) {
|
|
3676
|
+
const createdAt = Math.floor(Date.now() / 1e3);
|
|
3677
|
+
const expiresAt = createdAt + options.expiresAfterSeconds;
|
|
3678
|
+
const storedFile = {
|
|
3679
|
+
id: options.fileId,
|
|
3680
|
+
bytes: options.bytes,
|
|
3681
|
+
created_at: createdAt,
|
|
3682
|
+
filename: options.filename,
|
|
3683
|
+
object: "file",
|
|
3684
|
+
purpose: options.purpose,
|
|
3685
|
+
status: "processed",
|
|
3686
|
+
expires_at: expiresAt
|
|
3687
|
+
};
|
|
3688
|
+
const metadata = recordMetadata({
|
|
3689
|
+
file: storedFile,
|
|
3690
|
+
filename: options.filename,
|
|
3691
|
+
bytes: options.bytes,
|
|
3692
|
+
mimeType: options.mimeType,
|
|
3693
|
+
sha256Hex: options.sha256Hex,
|
|
3694
|
+
localPath: options.localPath,
|
|
3695
|
+
bucketName: options.bucketName,
|
|
3696
|
+
objectName: options.objectName
|
|
3575
3697
|
});
|
|
3698
|
+
await persistMetadataToDisk(metadata);
|
|
3576
3699
|
return metadata;
|
|
3577
3700
|
}
|
|
3578
|
-
async function
|
|
3701
|
+
async function uploadCanonicalFileFromBytes(params) {
|
|
3579
3702
|
const cacheKey = buildCacheKey(params.filename, params.mimeType, params.sha256Hex);
|
|
3580
|
-
const cached = filesState.
|
|
3703
|
+
const cached = filesState.canonicalUploadCacheByKey.get(cacheKey);
|
|
3581
3704
|
if (cached && isFresh(cached.file)) {
|
|
3582
3705
|
return cached;
|
|
3583
3706
|
}
|
|
3584
|
-
const
|
|
3707
|
+
const fileId = buildCanonicalFileId(params.filename, params.mimeType, params.sha256Hex);
|
|
3708
|
+
const bucketName = resolveCanonicalFilesBucket();
|
|
3709
|
+
const objectName = buildCanonicalObjectName(fileId, params.filename, params.mimeType);
|
|
3710
|
+
const metadataFields = {
|
|
3711
|
+
fileId,
|
|
3712
|
+
filename: params.filename,
|
|
3713
|
+
mimeType: params.mimeType,
|
|
3714
|
+
purpose: params.purpose,
|
|
3715
|
+
sha256: params.sha256Hex,
|
|
3716
|
+
createdAtUnix: Math.floor(Date.now() / 1e3).toString(),
|
|
3717
|
+
expiresAt: new Date(Date.now() + params.expiresAfterSeconds * 1e3).toISOString()
|
|
3718
|
+
};
|
|
3585
3719
|
const startedAtMs = Date.now();
|
|
3586
|
-
|
|
3587
|
-
|
|
3588
|
-
|
|
3589
|
-
|
|
3590
|
-
|
|
3591
|
-
|
|
3592
|
-
|
|
3593
|
-
|
|
3594
|
-
|
|
3595
|
-
|
|
3596
|
-
|
|
3597
|
-
|
|
3720
|
+
const uploaded = await writeCanonicalFileFromBytes({
|
|
3721
|
+
bytes: params.bytes,
|
|
3722
|
+
bucketName,
|
|
3723
|
+
objectName,
|
|
3724
|
+
mimeType: params.mimeType,
|
|
3725
|
+
metadata: metadataFields
|
|
3726
|
+
});
|
|
3727
|
+
if (!uploaded) {
|
|
3728
|
+
await refreshCanonicalObjectMetadata({
|
|
3729
|
+
bucketName,
|
|
3730
|
+
objectName,
|
|
3731
|
+
mimeType: params.mimeType,
|
|
3732
|
+
metadata: metadataFields
|
|
3598
3733
|
});
|
|
3599
|
-
}
|
|
3600
|
-
|
|
3601
|
-
|
|
3602
|
-
|
|
3734
|
+
}
|
|
3735
|
+
const localPath = await cacheBufferLocally(params.bytes, params.sha256Hex);
|
|
3736
|
+
const canonical = await createCanonicalMetadata({
|
|
3737
|
+
fileId,
|
|
3738
|
+
filename: params.filename,
|
|
3739
|
+
mimeType: params.mimeType,
|
|
3740
|
+
purpose: params.purpose,
|
|
3741
|
+
expiresAfterSeconds: params.expiresAfterSeconds,
|
|
3742
|
+
sha256Hex: params.sha256Hex,
|
|
3743
|
+
bytes: params.bytes.byteLength,
|
|
3744
|
+
bucketName,
|
|
3745
|
+
objectName,
|
|
3746
|
+
localPath
|
|
3747
|
+
});
|
|
3748
|
+
if (uploaded) {
|
|
3749
|
+
recordUploadEvent({
|
|
3750
|
+
backend: "gcs",
|
|
3751
|
+
mode: "gcs",
|
|
3603
3752
|
filename: params.filename,
|
|
3604
|
-
|
|
3605
|
-
|
|
3606
|
-
|
|
3607
|
-
|
|
3608
|
-
|
|
3609
|
-
highWaterMark: OPENAI_UPLOAD_PART_MAX_BYTES
|
|
3753
|
+
bytes: params.bytes.byteLength,
|
|
3754
|
+
durationMs: Math.max(0, Date.now() - startedAtMs),
|
|
3755
|
+
mimeType: params.mimeType,
|
|
3756
|
+
fileId,
|
|
3757
|
+
fileUri: `gs://${bucketName}/${objectName}`
|
|
3610
3758
|
});
|
|
3611
|
-
let partIndex = 0;
|
|
3612
|
-
for await (const chunk of stream) {
|
|
3613
|
-
const buffer = Buffer4.isBuffer(chunk) ? chunk : Buffer4.from(chunk);
|
|
3614
|
-
const uploadPart = await client.uploads.parts.create(upload.id, {
|
|
3615
|
-
data: new NodeFile(
|
|
3616
|
-
[new Uint8Array(buffer)],
|
|
3617
|
-
`${params.sha256Hex}.${partIndex.toString()}.part`,
|
|
3618
|
-
{
|
|
3619
|
-
type: params.mimeType
|
|
3620
|
-
}
|
|
3621
|
-
)
|
|
3622
|
-
});
|
|
3623
|
-
partIds.push(uploadPart.id);
|
|
3624
|
-
partIndex += 1;
|
|
3625
|
-
}
|
|
3626
|
-
const completed = await client.uploads.complete(upload.id, { part_ids: partIds });
|
|
3627
|
-
const fileId = completed.file?.id;
|
|
3628
|
-
if (!fileId) {
|
|
3629
|
-
throw new Error("OpenAI upload completed without a file id.");
|
|
3630
|
-
}
|
|
3631
|
-
uploaded = await client.files.retrieve(fileId);
|
|
3632
3759
|
}
|
|
3633
|
-
|
|
3634
|
-
|
|
3635
|
-
|
|
3636
|
-
|
|
3637
|
-
|
|
3760
|
+
return canonical;
|
|
3761
|
+
}
|
|
3762
|
+
async function uploadCanonicalFileFromPath(params) {
|
|
3763
|
+
const cacheKey = buildCacheKey(params.filename, params.mimeType, params.sha256Hex);
|
|
3764
|
+
const cached = filesState.canonicalUploadCacheByKey.get(cacheKey);
|
|
3765
|
+
if (cached && isFresh(cached.file)) {
|
|
3766
|
+
return cached;
|
|
3767
|
+
}
|
|
3768
|
+
const fileId = buildCanonicalFileId(params.filename, params.mimeType, params.sha256Hex);
|
|
3769
|
+
const bucketName = resolveCanonicalFilesBucket();
|
|
3770
|
+
const objectName = buildCanonicalObjectName(fileId, params.filename, params.mimeType);
|
|
3771
|
+
const metadataFields = {
|
|
3772
|
+
fileId,
|
|
3773
|
+
filename: params.filename,
|
|
3774
|
+
mimeType: params.mimeType,
|
|
3775
|
+
purpose: params.purpose,
|
|
3776
|
+
sha256: params.sha256Hex,
|
|
3777
|
+
createdAtUnix: Math.floor(Date.now() / 1e3).toString(),
|
|
3778
|
+
expiresAt: new Date(Date.now() + params.expiresAfterSeconds * 1e3).toISOString()
|
|
3779
|
+
};
|
|
3780
|
+
const startedAtMs = Date.now();
|
|
3781
|
+
const uploaded = await writeCanonicalFileFromPath({
|
|
3782
|
+
filePath: params.filePath,
|
|
3783
|
+
bucketName,
|
|
3784
|
+
objectName,
|
|
3785
|
+
bytes: params.bytes,
|
|
3638
3786
|
mimeType: params.mimeType,
|
|
3639
|
-
|
|
3787
|
+
metadata: metadataFields
|
|
3640
3788
|
});
|
|
3641
|
-
|
|
3642
|
-
|
|
3643
|
-
|
|
3644
|
-
|
|
3645
|
-
|
|
3646
|
-
|
|
3789
|
+
if (!uploaded) {
|
|
3790
|
+
await refreshCanonicalObjectMetadata({
|
|
3791
|
+
bucketName,
|
|
3792
|
+
objectName,
|
|
3793
|
+
mimeType: params.mimeType,
|
|
3794
|
+
metadata: metadataFields
|
|
3795
|
+
});
|
|
3796
|
+
}
|
|
3797
|
+
const localPath = await cacheFileLocally(params.filePath, params.sha256Hex);
|
|
3798
|
+
const canonical = await createCanonicalMetadata({
|
|
3799
|
+
fileId,
|
|
3800
|
+
filename: params.filename,
|
|
3647
3801
|
mimeType: params.mimeType,
|
|
3648
|
-
|
|
3802
|
+
purpose: params.purpose,
|
|
3803
|
+
expiresAfterSeconds: params.expiresAfterSeconds,
|
|
3804
|
+
sha256Hex: params.sha256Hex,
|
|
3805
|
+
bytes: params.bytes,
|
|
3806
|
+
bucketName,
|
|
3807
|
+
objectName,
|
|
3808
|
+
localPath
|
|
3649
3809
|
});
|
|
3650
|
-
|
|
3810
|
+
if (uploaded) {
|
|
3811
|
+
recordUploadEvent({
|
|
3812
|
+
backend: "gcs",
|
|
3813
|
+
mode: "gcs",
|
|
3814
|
+
filename: params.filename,
|
|
3815
|
+
bytes: params.bytes,
|
|
3816
|
+
durationMs: Math.max(0, Date.now() - startedAtMs),
|
|
3817
|
+
mimeType: params.mimeType,
|
|
3818
|
+
fileId,
|
|
3819
|
+
fileUri: `gs://${bucketName}/${objectName}`
|
|
3820
|
+
});
|
|
3821
|
+
}
|
|
3822
|
+
return canonical;
|
|
3651
3823
|
}
|
|
3652
|
-
async function
|
|
3824
|
+
async function resolveCanonicalStorageLocation(fileId) {
|
|
3825
|
+
const cached = filesState.metadataById.get(fileId) ?? await loadPersistedMetadata(fileId);
|
|
3826
|
+
if (cached?.bucketName && cached.objectName) {
|
|
3827
|
+
return {
|
|
3828
|
+
bucketName: cached.bucketName,
|
|
3829
|
+
objectName: cached.objectName
|
|
3830
|
+
};
|
|
3831
|
+
}
|
|
3832
|
+
const bucketName = resolveCanonicalFilesBucket();
|
|
3833
|
+
const [files2] = await getStorageClient().bucket(bucketName).getFiles({
|
|
3834
|
+
prefix: `${resolveCanonicalFilesPrefix()}${fileId}.`,
|
|
3835
|
+
maxResults: 1,
|
|
3836
|
+
autoPaginate: false
|
|
3837
|
+
});
|
|
3838
|
+
const file = files2[0];
|
|
3839
|
+
if (!file) {
|
|
3840
|
+
throw new Error(`Canonical file ${fileId} was not found in GCS.`);
|
|
3841
|
+
}
|
|
3842
|
+
return {
|
|
3843
|
+
bucketName,
|
|
3844
|
+
objectName: file.name
|
|
3845
|
+
};
|
|
3846
|
+
}
|
|
3847
|
+
async function retrieveCanonicalFile(fileId) {
|
|
3653
3848
|
const cached = filesState.metadataById.get(fileId);
|
|
3654
|
-
if (cached && isFresh(cached.file)) {
|
|
3849
|
+
if (cached && isFresh(cached.file) && cached.bucketName && cached.objectName) {
|
|
3655
3850
|
return cached;
|
|
3656
3851
|
}
|
|
3657
3852
|
const persisted = await loadPersistedMetadata(fileId);
|
|
3658
|
-
if (persisted && isFresh(persisted.file)) {
|
|
3853
|
+
if (persisted && isFresh(persisted.file) && persisted.bucketName && persisted.objectName) {
|
|
3659
3854
|
return persisted;
|
|
3660
3855
|
}
|
|
3661
|
-
const
|
|
3662
|
-
const
|
|
3663
|
-
const
|
|
3664
|
-
const metadata = recordMetadata(
|
|
3665
|
-
|
|
3666
|
-
|
|
3667
|
-
|
|
3668
|
-
|
|
3669
|
-
|
|
3670
|
-
|
|
3671
|
-
|
|
3856
|
+
const existingLocalPath = cached?.localPath ?? persisted?.localPath;
|
|
3857
|
+
const { bucketName, objectName } = await resolveCanonicalStorageLocation(fileId);
|
|
3858
|
+
const [objectMetadata] = await getStorageClient().bucket(bucketName).file(objectName).getMetadata();
|
|
3859
|
+
const metadata = recordMetadata(
|
|
3860
|
+
toStoredFileFromCanonicalMetadata({
|
|
3861
|
+
fileId,
|
|
3862
|
+
bucketName,
|
|
3863
|
+
objectName,
|
|
3864
|
+
objectMetadata,
|
|
3865
|
+
localPath: existingLocalPath
|
|
3866
|
+
})
|
|
3867
|
+
);
|
|
3672
3868
|
await persistMetadataToDisk(metadata);
|
|
3673
3869
|
return metadata;
|
|
3674
3870
|
}
|
|
@@ -3696,7 +3892,7 @@ function resolveVertexMirrorBucket() {
|
|
|
3696
3892
|
const trimmed = raw?.trim();
|
|
3697
3893
|
if (!trimmed) {
|
|
3698
3894
|
throw new Error(
|
|
3699
|
-
"VERTEX_GCS_BUCKET must be set to use
|
|
3895
|
+
"VERTEX_GCS_BUCKET must be set to use canonical file ids with Vertex Gemini models."
|
|
3700
3896
|
);
|
|
3701
3897
|
}
|
|
3702
3898
|
return trimmed.replace(/^gs:\/\//u, "").replace(/\/+$/u, "");
|
|
@@ -3726,61 +3922,41 @@ function getGeminiMirrorClient() {
|
|
|
3726
3922
|
}
|
|
3727
3923
|
return filesState.geminiClientPromise;
|
|
3728
3924
|
}
|
|
3729
|
-
async function
|
|
3925
|
+
async function materializeCanonicalFile(fileId) {
|
|
3730
3926
|
const cachedPromise = filesState.materializedById.get(fileId);
|
|
3731
3927
|
if (cachedPromise) {
|
|
3732
3928
|
return await cachedPromise;
|
|
3733
3929
|
}
|
|
3734
3930
|
const promise = (async () => {
|
|
3735
|
-
const metadata = await
|
|
3736
|
-
if (metadata.localPath && metadata.sha256Hex && metadata.mimeType) {
|
|
3931
|
+
const metadata = await retrieveCanonicalFile(fileId);
|
|
3932
|
+
if (metadata.localPath && metadata.sha256Hex && metadata.mimeType && metadata.bucketName && metadata.objectName) {
|
|
3737
3933
|
return {
|
|
3738
3934
|
file: metadata.file,
|
|
3739
3935
|
filename: metadata.filename,
|
|
3740
3936
|
bytes: metadata.bytes,
|
|
3741
3937
|
mimeType: metadata.mimeType,
|
|
3742
3938
|
sha256Hex: metadata.sha256Hex,
|
|
3743
|
-
localPath: metadata.localPath
|
|
3939
|
+
localPath: metadata.localPath,
|
|
3940
|
+
bucketName: metadata.bucketName,
|
|
3941
|
+
objectName: metadata.objectName
|
|
3744
3942
|
};
|
|
3745
3943
|
}
|
|
3746
|
-
|
|
3747
|
-
|
|
3748
|
-
path4.join(FILES_TEMP_ROOT, `${fileId.replace(/[^a-z0-9_-]/giu, "")}-`)
|
|
3749
|
-
);
|
|
3750
|
-
const localPath = path4.join(tempDir, normaliseFilename(metadata.filename, `${fileId}.bin`));
|
|
3751
|
-
const response = await getOpenAiClient().files.content(fileId);
|
|
3752
|
-
if (!response.ok) {
|
|
3753
|
-
throw new Error(
|
|
3754
|
-
`Failed to download OpenAI file ${fileId}: ${response.status} ${response.statusText}`
|
|
3755
|
-
);
|
|
3944
|
+
if (!metadata.bucketName || !metadata.objectName) {
|
|
3945
|
+
throw new Error(`Canonical file ${fileId} is missing GCS location metadata.`);
|
|
3756
3946
|
}
|
|
3757
|
-
const
|
|
3758
|
-
const mimeType = resolveMimeType(metadata.filename,
|
|
3759
|
-
const
|
|
3760
|
-
|
|
3761
|
-
if (response.body) {
|
|
3762
|
-
const source = Readable.fromWeb(response.body);
|
|
3763
|
-
const writable = createWriteStream(localPath, { flags: "wx" });
|
|
3764
|
-
source.on("data", (chunk) => {
|
|
3765
|
-
const buffer = Buffer4.isBuffer(chunk) ? chunk : Buffer4.from(chunk);
|
|
3766
|
-
hash.update(buffer);
|
|
3767
|
-
bytes += buffer.byteLength;
|
|
3768
|
-
});
|
|
3769
|
-
await pipeline(source, writable);
|
|
3770
|
-
} else {
|
|
3771
|
-
const buffer = Buffer4.from(await response.arrayBuffer());
|
|
3772
|
-
hash.update(buffer);
|
|
3773
|
-
bytes = buffer.byteLength;
|
|
3774
|
-
await writeFile2(localPath, buffer);
|
|
3775
|
-
}
|
|
3776
|
-
const sha256Hex = hash.digest("hex");
|
|
3947
|
+
const [downloadedBytes] = await getStorageClient().bucket(metadata.bucketName).file(metadata.objectName).download();
|
|
3948
|
+
const mimeType = metadata.mimeType ?? resolveMimeType(metadata.filename, void 0);
|
|
3949
|
+
const sha256Hex = metadata.sha256Hex ?? computeSha256Hex(downloadedBytes);
|
|
3950
|
+
const localPath = await cacheBufferLocally(downloadedBytes, sha256Hex);
|
|
3777
3951
|
const updated = recordMetadata({
|
|
3778
3952
|
file: metadata.file,
|
|
3779
3953
|
filename: metadata.filename,
|
|
3780
|
-
bytes:
|
|
3954
|
+
bytes: downloadedBytes.byteLength || metadata.bytes,
|
|
3781
3955
|
mimeType,
|
|
3782
3956
|
sha256Hex,
|
|
3783
|
-
localPath
|
|
3957
|
+
localPath,
|
|
3958
|
+
bucketName: metadata.bucketName,
|
|
3959
|
+
objectName: metadata.objectName
|
|
3784
3960
|
});
|
|
3785
3961
|
await persistMetadataToDisk(updated);
|
|
3786
3962
|
return {
|
|
@@ -3789,7 +3965,9 @@ async function materializeOpenAiFile(fileId) {
|
|
|
3789
3965
|
bytes: updated.bytes,
|
|
3790
3966
|
mimeType: updated.mimeType ?? mimeType,
|
|
3791
3967
|
sha256Hex,
|
|
3792
|
-
localPath
|
|
3968
|
+
localPath,
|
|
3969
|
+
bucketName: metadata.bucketName,
|
|
3970
|
+
objectName: metadata.objectName
|
|
3793
3971
|
};
|
|
3794
3972
|
})();
|
|
3795
3973
|
filesState.materializedById.set(fileId, promise);
|
|
@@ -3805,14 +3983,14 @@ async function ensureGeminiFileMirror(fileId) {
|
|
|
3805
3983
|
if (cached) {
|
|
3806
3984
|
return cached;
|
|
3807
3985
|
}
|
|
3808
|
-
const materialized = await
|
|
3986
|
+
const materialized = await materializeCanonicalFile(fileId);
|
|
3809
3987
|
const client = await getGeminiMirrorClient();
|
|
3810
3988
|
const name = buildGeminiMirrorName(materialized.sha256Hex);
|
|
3811
3989
|
try {
|
|
3812
3990
|
const existing = await client.files.get({ name });
|
|
3813
3991
|
if (existing.name && existing.uri && existing.mimeType) {
|
|
3814
3992
|
const mirror2 = {
|
|
3815
|
-
|
|
3993
|
+
canonicalFileId: fileId,
|
|
3816
3994
|
name: existing.name,
|
|
3817
3995
|
uri: existing.uri,
|
|
3818
3996
|
mimeType: existing.mimeType,
|
|
@@ -3840,7 +4018,7 @@ async function ensureGeminiFileMirror(fileId) {
|
|
|
3840
4018
|
throw new Error("Gemini file upload completed without a usable URI.");
|
|
3841
4019
|
}
|
|
3842
4020
|
const mirror = {
|
|
3843
|
-
|
|
4021
|
+
canonicalFileId: fileId,
|
|
3844
4022
|
name: resolved.name,
|
|
3845
4023
|
uri: resolved.uri,
|
|
3846
4024
|
mimeType: resolved.mimeType,
|
|
@@ -3865,7 +4043,7 @@ async function ensureVertexFileMirror(fileId) {
|
|
|
3865
4043
|
if (cached) {
|
|
3866
4044
|
return cached;
|
|
3867
4045
|
}
|
|
3868
|
-
const materialized = await
|
|
4046
|
+
const materialized = await materializeCanonicalFile(fileId);
|
|
3869
4047
|
const bucketName = resolveVertexMirrorBucket();
|
|
3870
4048
|
const prefix = resolveVertexMirrorPrefix();
|
|
3871
4049
|
const extension = mime.getExtension(materialized.mimeType) ?? path4.extname(materialized.filename).replace(/^\./u, "") ?? "bin";
|
|
@@ -3906,7 +4084,7 @@ async function ensureVertexFileMirror(fileId) {
|
|
|
3906
4084
|
}
|
|
3907
4085
|
}
|
|
3908
4086
|
const mirror = {
|
|
3909
|
-
|
|
4087
|
+
canonicalFileId: fileId,
|
|
3910
4088
|
bucket: bucketName,
|
|
3911
4089
|
objectName,
|
|
3912
4090
|
fileUri: `gs://${bucketName}/${objectName}`,
|
|
@@ -3937,7 +4115,7 @@ async function filesCreate(params) {
|
|
|
3937
4115
|
const filename2 = normaliseFilename(params.filename, path4.basename(filePath));
|
|
3938
4116
|
const mimeType2 = resolveMimeType(filename2, params.mimeType);
|
|
3939
4117
|
const sha256Hex2 = await computeFileSha256Hex(filePath);
|
|
3940
|
-
const uploaded2 = await
|
|
4118
|
+
const uploaded2 = await uploadCanonicalFileFromPath({
|
|
3941
4119
|
filePath,
|
|
3942
4120
|
filename: filename2,
|
|
3943
4121
|
mimeType: mimeType2,
|
|
@@ -3946,19 +4124,13 @@ async function filesCreate(params) {
|
|
|
3946
4124
|
sha256Hex: sha256Hex2,
|
|
3947
4125
|
bytes: info.size
|
|
3948
4126
|
});
|
|
3949
|
-
|
|
3950
|
-
const cached2 = recordMetadata({
|
|
3951
|
-
...uploaded2,
|
|
3952
|
-
localPath: localPath2
|
|
3953
|
-
});
|
|
3954
|
-
await persistMetadataToDisk(cached2);
|
|
3955
|
-
return cached2.file;
|
|
4127
|
+
return uploaded2.file;
|
|
3956
4128
|
}
|
|
3957
4129
|
const filename = normaliseFilename(params.filename);
|
|
3958
4130
|
const bytes = toBuffer(params.data);
|
|
3959
4131
|
const mimeType = resolveMimeType(filename, params.mimeType, "text/plain");
|
|
3960
4132
|
const sha256Hex = computeSha256Hex(bytes);
|
|
3961
|
-
const uploaded = await
|
|
4133
|
+
const uploaded = await uploadCanonicalFileFromBytes({
|
|
3962
4134
|
bytes,
|
|
3963
4135
|
filename,
|
|
3964
4136
|
mimeType,
|
|
@@ -3966,16 +4138,10 @@ async function filesCreate(params) {
|
|
|
3966
4138
|
expiresAfterSeconds,
|
|
3967
4139
|
sha256Hex
|
|
3968
4140
|
});
|
|
3969
|
-
|
|
3970
|
-
const cached = recordMetadata({
|
|
3971
|
-
...uploaded,
|
|
3972
|
-
localPath
|
|
3973
|
-
});
|
|
3974
|
-
await persistMetadataToDisk(cached);
|
|
3975
|
-
return cached.file;
|
|
4141
|
+
return uploaded.file;
|
|
3976
4142
|
}
|
|
3977
4143
|
async function filesRetrieve(fileId) {
|
|
3978
|
-
return (await
|
|
4144
|
+
return (await retrieveCanonicalFile(fileId)).file;
|
|
3979
4145
|
}
|
|
3980
4146
|
async function filesDelete(fileId) {
|
|
3981
4147
|
const cachedGemini = filesState.geminiMirrorById.get(fileId);
|
|
@@ -4002,34 +4168,73 @@ async function filesDelete(fileId) {
|
|
|
4002
4168
|
} catch {
|
|
4003
4169
|
}
|
|
4004
4170
|
}
|
|
4005
|
-
|
|
4171
|
+
try {
|
|
4172
|
+
const { bucketName, objectName } = await resolveCanonicalStorageLocation(fileId);
|
|
4173
|
+
await getStorageClient().bucket(bucketName).file(objectName).delete({ ignoreNotFound: true });
|
|
4174
|
+
} catch {
|
|
4175
|
+
}
|
|
4006
4176
|
filesState.metadataById.delete(fileId);
|
|
4177
|
+
filesState.canonicalUploadCacheByKey.forEach((value, key) => {
|
|
4178
|
+
if (value.file.id === fileId) {
|
|
4179
|
+
filesState.canonicalUploadCacheByKey.delete(key);
|
|
4180
|
+
}
|
|
4181
|
+
});
|
|
4007
4182
|
filesState.materializedById.delete(fileId);
|
|
4008
4183
|
try {
|
|
4009
4184
|
await unlink(buildCachedMetadataPath(fileId));
|
|
4010
4185
|
} catch {
|
|
4011
4186
|
}
|
|
4012
4187
|
return {
|
|
4013
|
-
id:
|
|
4014
|
-
deleted:
|
|
4188
|
+
id: fileId,
|
|
4189
|
+
deleted: true,
|
|
4015
4190
|
object: "file"
|
|
4016
4191
|
};
|
|
4017
4192
|
}
|
|
4018
4193
|
async function filesContent(fileId) {
|
|
4019
|
-
|
|
4194
|
+
const metadata = await retrieveCanonicalFile(fileId);
|
|
4195
|
+
if (!metadata.bucketName || !metadata.objectName) {
|
|
4196
|
+
throw new Error(`Canonical file ${fileId} is missing GCS location metadata.`);
|
|
4197
|
+
}
|
|
4198
|
+
const [bytes] = await getStorageClient().bucket(metadata.bucketName).file(metadata.objectName).download();
|
|
4199
|
+
const headers = new Headers();
|
|
4200
|
+
headers.set("content-type", metadata.mimeType ?? resolveMimeType(metadata.filename, void 0));
|
|
4201
|
+
headers.set("content-length", bytes.byteLength.toString());
|
|
4202
|
+
headers.set(
|
|
4203
|
+
"content-disposition",
|
|
4204
|
+
`inline; filename="${toSafeStorageFilename(metadata.filename)}"`
|
|
4205
|
+
);
|
|
4206
|
+
return new Response(bytes, {
|
|
4207
|
+
status: 200,
|
|
4208
|
+
headers
|
|
4209
|
+
});
|
|
4020
4210
|
}
|
|
4021
4211
|
async function getCanonicalFileMetadata(fileId) {
|
|
4022
|
-
const metadata = await
|
|
4212
|
+
const metadata = await retrieveCanonicalFile(fileId);
|
|
4023
4213
|
const mimeType = metadata.mimeType ?? resolveMimeType(metadata.filename, void 0);
|
|
4024
4214
|
const updated = metadata.mimeType === mimeType ? metadata : recordMetadata({
|
|
4025
4215
|
...metadata,
|
|
4026
4216
|
mimeType
|
|
4027
4217
|
});
|
|
4218
|
+
if (!updated.bucketName || !updated.objectName) {
|
|
4219
|
+
throw new Error(`Canonical file ${fileId} is missing GCS location metadata.`);
|
|
4220
|
+
}
|
|
4028
4221
|
return {
|
|
4029
4222
|
...updated,
|
|
4030
|
-
mimeType
|
|
4223
|
+
mimeType,
|
|
4224
|
+
bucketName: updated.bucketName,
|
|
4225
|
+
objectName: updated.objectName
|
|
4031
4226
|
};
|
|
4032
4227
|
}
|
|
4228
|
+
async function getCanonicalFileSignedUrl(options) {
|
|
4229
|
+
const metadata = await getCanonicalFileMetadata(options.fileId);
|
|
4230
|
+
const [signedUrl] = await getStorageClient().bucket(metadata.bucketName).file(metadata.objectName).getSignedUrl({
|
|
4231
|
+
version: "v4",
|
|
4232
|
+
action: "read",
|
|
4233
|
+
expires: Date.now() + (options.expiresAfterSeconds ?? 15 * 60) * 1e3,
|
|
4234
|
+
responseType: resolveCanonicalStorageContentType(metadata.filename, metadata.mimeType)
|
|
4235
|
+
});
|
|
4236
|
+
return signedUrl;
|
|
4237
|
+
}
|
|
4033
4238
|
var files = {
|
|
4034
4239
|
create: filesCreate,
|
|
4035
4240
|
retrieve: filesRetrieve,
|
|
@@ -4391,6 +4596,7 @@ function isJsonSchemaObject(schema) {
|
|
|
4391
4596
|
return false;
|
|
4392
4597
|
}
|
|
4393
4598
|
var CANONICAL_GEMINI_FILE_URI_PREFIX = "openai://file/";
|
|
4599
|
+
var CANONICAL_LLM_FILE_ID_PATTERN = /^file_[a-f0-9]{64}$/u;
|
|
4394
4600
|
function buildCanonicalGeminiFileUri(fileId) {
|
|
4395
4601
|
return `${CANONICAL_GEMINI_FILE_URI_PREFIX}${fileId}`;
|
|
4396
4602
|
}
|
|
@@ -4401,6 +4607,9 @@ function parseCanonicalGeminiFileId(fileUri) {
|
|
|
4401
4607
|
const fileId = fileUri.slice(CANONICAL_GEMINI_FILE_URI_PREFIX.length).trim();
|
|
4402
4608
|
return fileId.length > 0 ? fileId : void 0;
|
|
4403
4609
|
}
|
|
4610
|
+
function isCanonicalLlmFileId(fileId) {
|
|
4611
|
+
return typeof fileId === "string" && CANONICAL_LLM_FILE_ID_PATTERN.test(fileId.trim());
|
|
4612
|
+
}
|
|
4404
4613
|
function isLlmMediaResolution(value) {
|
|
4405
4614
|
return value === "auto" || value === "low" || value === "medium" || value === "high" || value === "original";
|
|
4406
4615
|
}
|
|
@@ -4888,7 +5097,21 @@ async function prepareOpenAiPromptContentItem(item, options) {
|
|
|
4888
5097
|
if (!isOpenAiNativeContentItem(item)) {
|
|
4889
5098
|
return item;
|
|
4890
5099
|
}
|
|
4891
|
-
if (item.type === "input_image"
|
|
5100
|
+
if (item.type === "input_image") {
|
|
5101
|
+
if (isCanonicalLlmFileId(item.file_id)) {
|
|
5102
|
+
const signedUrl2 = await getCanonicalFileSignedUrl({ fileId: item.file_id });
|
|
5103
|
+
return {
|
|
5104
|
+
type: "input_image",
|
|
5105
|
+
image_url: signedUrl2,
|
|
5106
|
+
detail: toOpenAiImageDetail(
|
|
5107
|
+
isLlmMediaResolution(item.detail) ? item.detail : void 0,
|
|
5108
|
+
options?.model
|
|
5109
|
+
)
|
|
5110
|
+
};
|
|
5111
|
+
}
|
|
5112
|
+
if (options?.offloadInlineData !== true || typeof item.image_url !== "string" || !item.image_url.trim().toLowerCase().startsWith("data:")) {
|
|
5113
|
+
return item;
|
|
5114
|
+
}
|
|
4892
5115
|
const parsed = parseDataUrlPayload(item.image_url);
|
|
4893
5116
|
if (!parsed) {
|
|
4894
5117
|
return item;
|
|
@@ -4901,16 +5124,27 @@ async function prepareOpenAiPromptContentItem(item, options) {
|
|
|
4901
5124
|
guessInlineDataFilename(parsed.mimeType)
|
|
4902
5125
|
)
|
|
4903
5126
|
});
|
|
5127
|
+
const signedUrl = await getCanonicalFileSignedUrl({ fileId: uploaded.fileId });
|
|
4904
5128
|
return {
|
|
4905
5129
|
type: "input_image",
|
|
5130
|
+
image_url: signedUrl,
|
|
4906
5131
|
detail: toOpenAiImageDetail(
|
|
4907
5132
|
isLlmMediaResolution(item.detail) ? item.detail : void 0,
|
|
4908
5133
|
options?.model
|
|
4909
|
-
)
|
|
4910
|
-
|
|
5134
|
+
)
|
|
5135
|
+
};
|
|
5136
|
+
}
|
|
5137
|
+
if (item.type !== "input_file") {
|
|
5138
|
+
return item;
|
|
5139
|
+
}
|
|
5140
|
+
if (isCanonicalLlmFileId(item.file_id)) {
|
|
5141
|
+
const signedUrl = await getCanonicalFileSignedUrl({ fileId: item.file_id });
|
|
5142
|
+
return {
|
|
5143
|
+
type: "input_file",
|
|
5144
|
+
file_url: signedUrl
|
|
4911
5145
|
};
|
|
4912
5146
|
}
|
|
4913
|
-
if (
|
|
5147
|
+
if (options?.offloadInlineData !== true) {
|
|
4914
5148
|
return item;
|
|
4915
5149
|
}
|
|
4916
5150
|
if (typeof item.file_data === "string" && item.file_data.trim().length > 0) {
|
|
@@ -4924,7 +5158,11 @@ async function prepareOpenAiPromptContentItem(item, options) {
|
|
|
4924
5158
|
mimeType,
|
|
4925
5159
|
filename
|
|
4926
5160
|
});
|
|
4927
|
-
|
|
5161
|
+
const signedUrl = await getCanonicalFileSignedUrl({ fileId: uploaded.fileId });
|
|
5162
|
+
return {
|
|
5163
|
+
type: "input_file",
|
|
5164
|
+
file_url: signedUrl
|
|
5165
|
+
};
|
|
4928
5166
|
}
|
|
4929
5167
|
if (typeof item.file_url === "string" && item.file_url.trim().toLowerCase().startsWith("data:")) {
|
|
4930
5168
|
const parsed = parseDataUrlPayload(item.file_url);
|
|
@@ -4939,7 +5177,11 @@ async function prepareOpenAiPromptContentItem(item, options) {
|
|
|
4939
5177
|
guessInlineDataFilename(parsed.mimeType)
|
|
4940
5178
|
)
|
|
4941
5179
|
});
|
|
4942
|
-
|
|
5180
|
+
const signedUrl = await getCanonicalFileSignedUrl({ fileId: uploaded.fileId });
|
|
5181
|
+
return {
|
|
5182
|
+
type: "input_file",
|
|
5183
|
+
file_url: signedUrl
|
|
5184
|
+
};
|
|
4943
5185
|
}
|
|
4944
5186
|
return item;
|
|
4945
5187
|
}
|
|
@@ -4969,11 +5211,40 @@ async function prepareOpenAiPromptInput(input, options) {
|
|
|
4969
5211
|
};
|
|
4970
5212
|
return await Promise.all(input.map((item) => prepareItem(item)));
|
|
4971
5213
|
}
|
|
5214
|
+
function hasCanonicalOpenAiFileReferences(input) {
|
|
5215
|
+
let found = false;
|
|
5216
|
+
const visitItems = (items) => {
|
|
5217
|
+
for (const item of items) {
|
|
5218
|
+
if (found || !item || typeof item !== "object") {
|
|
5219
|
+
continue;
|
|
5220
|
+
}
|
|
5221
|
+
if (Array.isArray(item.content)) {
|
|
5222
|
+
visitItems(item.content);
|
|
5223
|
+
}
|
|
5224
|
+
if (Array.isArray(item.output)) {
|
|
5225
|
+
visitItems(item.output);
|
|
5226
|
+
}
|
|
5227
|
+
if (!isOpenAiNativeContentItem(item)) {
|
|
5228
|
+
continue;
|
|
5229
|
+
}
|
|
5230
|
+
if ((item.type === "input_image" || item.type === "input_file") && isCanonicalLlmFileId(item.file_id)) {
|
|
5231
|
+
found = true;
|
|
5232
|
+
return;
|
|
5233
|
+
}
|
|
5234
|
+
}
|
|
5235
|
+
};
|
|
5236
|
+
visitItems(input);
|
|
5237
|
+
return found;
|
|
5238
|
+
}
|
|
4972
5239
|
async function maybePrepareOpenAiPromptInput(input, options) {
|
|
4973
|
-
|
|
5240
|
+
const offloadInlineData = estimateOpenAiInlinePromptBytes(input) > INLINE_ATTACHMENT_PROMPT_THRESHOLD_BYTES;
|
|
5241
|
+
if (!offloadInlineData && !hasCanonicalOpenAiFileReferences(input)) {
|
|
4974
5242
|
return Array.from(input);
|
|
4975
5243
|
}
|
|
4976
|
-
return await prepareOpenAiPromptInput(input,
|
|
5244
|
+
return await prepareOpenAiPromptInput(input, {
|
|
5245
|
+
...options,
|
|
5246
|
+
offloadInlineData
|
|
5247
|
+
});
|
|
4977
5248
|
}
|
|
4978
5249
|
function estimateGeminiInlinePromptBytes(contents) {
|
|
4979
5250
|
let total = 0;
|
|
@@ -6275,9 +6546,6 @@ async function maybeSpillToolOutputItem(item, toolName, options) {
|
|
|
6275
6546
|
return item;
|
|
6276
6547
|
}
|
|
6277
6548
|
async function maybeSpillToolOutput(value, toolName, options) {
|
|
6278
|
-
if (options?.provider === "chatgpt") {
|
|
6279
|
-
return value;
|
|
6280
|
-
}
|
|
6281
6549
|
if (typeof value === "string") {
|
|
6282
6550
|
if (options?.force !== true && Buffer5.byteLength(value, "utf8") <= TOOL_OUTPUT_SPILL_THRESHOLD_BYTES) {
|
|
6283
6551
|
return value;
|
|
@@ -7360,7 +7628,7 @@ async function runTextCall(params) {
|
|
|
7360
7628
|
defaultMediaResolution: request.mediaResolution,
|
|
7361
7629
|
model: request.model
|
|
7362
7630
|
}),
|
|
7363
|
-
{ model: request.model }
|
|
7631
|
+
{ model: request.model, provider: "openai" }
|
|
7364
7632
|
);
|
|
7365
7633
|
const openAiTools = toOpenAiTools(request.tools);
|
|
7366
7634
|
const reasoningEffort = resolveOpenAiReasoningEffort(
|
|
@@ -7439,6 +7707,10 @@ async function runTextCall(params) {
|
|
|
7439
7707
|
defaultMediaResolution: request.mediaResolution,
|
|
7440
7708
|
model: request.model
|
|
7441
7709
|
});
|
|
7710
|
+
const preparedChatGptInput = await maybePrepareOpenAiPromptInput(chatGptInput.input, {
|
|
7711
|
+
model: request.model,
|
|
7712
|
+
provider: "chatgpt"
|
|
7713
|
+
});
|
|
7442
7714
|
const reasoningEffort = resolveOpenAiReasoningEffort(request.model, request.thinkingLevel);
|
|
7443
7715
|
const openAiTools = toOpenAiTools(request.tools);
|
|
7444
7716
|
const requestPayload = {
|
|
@@ -7447,7 +7719,7 @@ async function runTextCall(params) {
|
|
|
7447
7719
|
stream: true,
|
|
7448
7720
|
...providerInfo.serviceTier ? { service_tier: providerInfo.serviceTier } : {},
|
|
7449
7721
|
instructions: chatGptInput.instructions ?? "You are a helpful assistant.",
|
|
7450
|
-
input:
|
|
7722
|
+
input: preparedChatGptInput,
|
|
7451
7723
|
include: ["reasoning.encrypted_content"],
|
|
7452
7724
|
reasoning: {
|
|
7453
7725
|
effort: toOpenAiReasoningEffort(reasoningEffort),
|
|
@@ -8255,7 +8527,8 @@ async function runToolLoop(request) {
|
|
|
8255
8527
|
let stepToolCallText;
|
|
8256
8528
|
let stepToolCallPayload;
|
|
8257
8529
|
const preparedInput = await maybePrepareOpenAiPromptInput(input, {
|
|
8258
|
-
model: request.model
|
|
8530
|
+
model: request.model,
|
|
8531
|
+
provider: "openai"
|
|
8259
8532
|
});
|
|
8260
8533
|
const stepRequestPayload = {
|
|
8261
8534
|
model: providerInfo.model,
|
|
@@ -8625,6 +8898,10 @@ async function runToolLoop(request) {
|
|
|
8625
8898
|
let reasoningSummaryText = "";
|
|
8626
8899
|
let stepToolCallText;
|
|
8627
8900
|
let stepToolCallPayload;
|
|
8901
|
+
const preparedInput = await maybePrepareOpenAiPromptInput(input, {
|
|
8902
|
+
model: request.model,
|
|
8903
|
+
provider: "chatgpt"
|
|
8904
|
+
});
|
|
8628
8905
|
const markFirstModelEvent = () => {
|
|
8629
8906
|
if (firstModelEventAtMs === void 0) {
|
|
8630
8907
|
firstModelEventAtMs = Date.now();
|
|
@@ -8636,7 +8913,7 @@ async function runToolLoop(request) {
|
|
|
8636
8913
|
stream: true,
|
|
8637
8914
|
...providerInfo.serviceTier ? { service_tier: providerInfo.serviceTier } : {},
|
|
8638
8915
|
instructions: toolLoopInput.instructions ?? "You are a helpful assistant.",
|
|
8639
|
-
input,
|
|
8916
|
+
input: preparedInput,
|
|
8640
8917
|
prompt_cache_key: promptCacheKey,
|
|
8641
8918
|
include: ["reasoning.encrypted_content"],
|
|
8642
8919
|
tools: openAiTools,
|