@ljoukov/llm 6.0.0 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -11
- package/dist/index.cjs +509 -232
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +512 -235
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -3358,13 +3358,10 @@ var import_node_fs3 = require("fs");
|
|
|
3358
3358
|
var import_promises2 = require("fs/promises");
|
|
3359
3359
|
var import_node_os3 = __toESM(require("os"), 1);
|
|
3360
3360
|
var import_node_path4 = __toESM(require("path"), 1);
|
|
3361
|
-
var import_node_stream = require("stream");
|
|
3362
3361
|
var import_promises3 = require("stream/promises");
|
|
3363
3362
|
var import_storage = require("@google-cloud/storage");
|
|
3364
3363
|
var import_mime = __toESM(require("mime"), 1);
|
|
3365
3364
|
var DEFAULT_FILE_TTL_SECONDS = 48 * 60 * 60;
|
|
3366
|
-
var OPENAI_FILE_CREATE_MAX_BYTES = 512 * 1024 * 1024;
|
|
3367
|
-
var OPENAI_UPLOAD_PART_MAX_BYTES = 64 * 1024 * 1024;
|
|
3368
3365
|
var GEMINI_FILE_POLL_INTERVAL_MS = 1e3;
|
|
3369
3366
|
var GEMINI_FILE_POLL_TIMEOUT_MS = 6e4;
|
|
3370
3367
|
var FILES_TEMP_ROOT = import_node_path4.default.join(import_node_os3.default.tmpdir(), "ljoukov-llm-files");
|
|
@@ -3373,7 +3370,7 @@ var FILES_CACHE_CONTENT_ROOT = import_node_path4.default.join(FILES_CACHE_ROOT,
|
|
|
3373
3370
|
var FILES_CACHE_METADATA_ROOT = import_node_path4.default.join(FILES_CACHE_ROOT, "metadata");
|
|
3374
3371
|
var filesState = getRuntimeSingleton(/* @__PURE__ */ Symbol.for("@ljoukov/llm.filesState"), () => ({
|
|
3375
3372
|
metadataById: /* @__PURE__ */ new Map(),
|
|
3376
|
-
|
|
3373
|
+
canonicalUploadCacheByKey: /* @__PURE__ */ new Map(),
|
|
3377
3374
|
materializedById: /* @__PURE__ */ new Map(),
|
|
3378
3375
|
geminiMirrorById: /* @__PURE__ */ new Map(),
|
|
3379
3376
|
vertexMirrorById: /* @__PURE__ */ new Map(),
|
|
@@ -3454,7 +3451,7 @@ function formatUploadLogLine(event) {
|
|
|
3454
3451
|
}
|
|
3455
3452
|
function recordUploadEvent(event) {
|
|
3456
3453
|
const scope = fileUploadScopeStorage.getStore();
|
|
3457
|
-
const resolvedSource = event.source ?? scope?.source ?? (event.backend === "
|
|
3454
|
+
const resolvedSource = event.source ?? scope?.source ?? (event.backend === "gcs" ? "files_api" : "provider_mirror");
|
|
3458
3455
|
const timestampedEvent = {
|
|
3459
3456
|
...event,
|
|
3460
3457
|
source: resolvedSource,
|
|
@@ -3501,16 +3498,117 @@ async function computeFileSha256Hex(filePath) {
|
|
|
3501
3498
|
}
|
|
3502
3499
|
return hash.digest("hex");
|
|
3503
3500
|
}
|
|
3504
|
-
function
|
|
3501
|
+
function buildCanonicalFileId(filename, mimeType, sha256Hex) {
|
|
3502
|
+
return `file_${(0, import_node_crypto.createHash)("sha256").update(filename).update("\0").update(mimeType).update("\0").update(sha256Hex).digest("hex")}`;
|
|
3503
|
+
}
|
|
3504
|
+
function resolveCanonicalFilesBucket() {
|
|
3505
|
+
const raw = process.env.LLM_FILES_GCS_BUCKET ?? process.env.VERTEX_GCS_BUCKET ?? process.env.LLM_VERTEX_GCS_BUCKET;
|
|
3506
|
+
const trimmed = raw?.trim();
|
|
3507
|
+
if (!trimmed) {
|
|
3508
|
+
throw new Error(
|
|
3509
|
+
"LLM_FILES_GCS_BUCKET (or VERTEX_GCS_BUCKET) must be set to use the canonical files API."
|
|
3510
|
+
);
|
|
3511
|
+
}
|
|
3512
|
+
return trimmed.replace(/^gs:\/\//u, "").replace(/\/+$/u, "");
|
|
3513
|
+
}
|
|
3514
|
+
function resolveCanonicalFilesPrefix() {
|
|
3515
|
+
const raw = process.env.LLM_FILES_GCS_PREFIX;
|
|
3516
|
+
const trimmed = raw?.trim().replace(/^\/+/u, "").replace(/\/+$/u, "");
|
|
3517
|
+
return trimmed ? `${trimmed}/` : "canonical-files/";
|
|
3518
|
+
}
|
|
3519
|
+
function isLatexLikeFile(filename, mimeType) {
|
|
3520
|
+
const extension = import_node_path4.default.extname(filename).trim().toLowerCase();
|
|
3521
|
+
const normalisedMimeType = mimeType.trim().toLowerCase();
|
|
3522
|
+
return extension === ".tex" || extension === ".ltx" || extension === ".latex" || normalisedMimeType === "application/x-tex" || normalisedMimeType === "text/x-tex";
|
|
3523
|
+
}
|
|
3524
|
+
function resolveCanonicalStorageContentType(filename, mimeType) {
|
|
3525
|
+
if (isLatexLikeFile(filename, mimeType)) {
|
|
3526
|
+
return "text/plain";
|
|
3527
|
+
}
|
|
3528
|
+
return mimeType;
|
|
3529
|
+
}
|
|
3530
|
+
function resolveCanonicalObjectExtension(filename, mimeType) {
|
|
3531
|
+
if (isLatexLikeFile(filename, mimeType)) {
|
|
3532
|
+
return "txt";
|
|
3533
|
+
}
|
|
3534
|
+
const fromFilename = import_node_path4.default.extname(filename).replace(/^\./u, "").trim().toLowerCase();
|
|
3535
|
+
if (fromFilename) {
|
|
3536
|
+
return fromFilename;
|
|
3537
|
+
}
|
|
3538
|
+
const fromMimeType = import_mime.default.getExtension(mimeType)?.trim().toLowerCase();
|
|
3539
|
+
if (fromMimeType) {
|
|
3540
|
+
return fromMimeType;
|
|
3541
|
+
}
|
|
3542
|
+
return "bin";
|
|
3543
|
+
}
|
|
3544
|
+
function buildCanonicalObjectName(fileId, filename, mimeType) {
|
|
3545
|
+
const extension = resolveCanonicalObjectExtension(filename, mimeType);
|
|
3546
|
+
return `${resolveCanonicalFilesPrefix()}${fileId}.${extension}`;
|
|
3547
|
+
}
|
|
3548
|
+
function toSafeStorageFilename(filename) {
|
|
3549
|
+
const normalized = normaliseFilename(filename).replace(/[^\w.-]+/gu, "-");
|
|
3550
|
+
return normalized.length > 0 ? normalized : "attachment.bin";
|
|
3551
|
+
}
|
|
3552
|
+
function parseUnixSeconds(value, fallback) {
|
|
3553
|
+
if (value) {
|
|
3554
|
+
const numeric = Number.parseInt(value, 10);
|
|
3555
|
+
if (Number.isFinite(numeric) && numeric > 0) {
|
|
3556
|
+
return numeric;
|
|
3557
|
+
}
|
|
3558
|
+
}
|
|
3559
|
+
if (fallback) {
|
|
3560
|
+
const millis = Date.parse(fallback);
|
|
3561
|
+
if (Number.isFinite(millis)) {
|
|
3562
|
+
return Math.floor(millis / 1e3);
|
|
3563
|
+
}
|
|
3564
|
+
}
|
|
3565
|
+
return Math.floor(Date.now() / 1e3);
|
|
3566
|
+
}
|
|
3567
|
+
function parseOptionalUnixSeconds(value) {
|
|
3568
|
+
if (!value) {
|
|
3569
|
+
return void 0;
|
|
3570
|
+
}
|
|
3571
|
+
const millis = Date.parse(value);
|
|
3572
|
+
if (Number.isFinite(millis)) {
|
|
3573
|
+
return Math.floor(millis / 1e3);
|
|
3574
|
+
}
|
|
3575
|
+
const numeric = Number.parseInt(value, 10);
|
|
3576
|
+
return Number.isFinite(numeric) && numeric > 0 ? numeric : void 0;
|
|
3577
|
+
}
|
|
3578
|
+
function toStoredFileFromCanonicalMetadata(options) {
|
|
3579
|
+
const metadata = options.objectMetadata.metadata;
|
|
3580
|
+
const filenameRaw = typeof metadata?.filename === "string" && metadata.filename.trim().length > 0 ? metadata.filename.trim() : import_node_path4.default.basename(options.objectName);
|
|
3581
|
+
const filename = normaliseFilename(filenameRaw);
|
|
3582
|
+
const bytesRaw = options.objectMetadata.size;
|
|
3583
|
+
const bytes = typeof bytesRaw === "string" ? Number.parseInt(bytesRaw, 10) : typeof bytesRaw === "number" ? bytesRaw : 0;
|
|
3584
|
+
const purpose = metadata?.purpose === "user_data" ? "user_data" : "user_data";
|
|
3585
|
+
const createdAt = parseUnixSeconds(
|
|
3586
|
+
typeof metadata?.createdAtUnix === "string" ? metadata.createdAtUnix : void 0,
|
|
3587
|
+
typeof options.objectMetadata.timeCreated === "string" ? options.objectMetadata.timeCreated : void 0
|
|
3588
|
+
);
|
|
3589
|
+
const expiresAt = parseOptionalUnixSeconds(
|
|
3590
|
+
typeof metadata?.expiresAt === "string" ? metadata.expiresAt : void 0
|
|
3591
|
+
);
|
|
3592
|
+
const mimeType = typeof metadata?.mimeType === "string" && metadata.mimeType.trim().length > 0 ? metadata.mimeType.trim() : typeof options.objectMetadata.contentType === "string" && options.objectMetadata.contentType.trim().length > 0 ? options.objectMetadata.contentType.trim() : resolveMimeType(filename, void 0);
|
|
3593
|
+
const sha256Hex = typeof metadata?.sha256 === "string" && metadata.sha256.trim().length > 0 ? metadata.sha256.trim() : void 0;
|
|
3505
3594
|
return {
|
|
3506
|
-
|
|
3507
|
-
|
|
3508
|
-
|
|
3509
|
-
|
|
3510
|
-
|
|
3511
|
-
|
|
3512
|
-
|
|
3513
|
-
|
|
3595
|
+
file: {
|
|
3596
|
+
id: options.fileId,
|
|
3597
|
+
bytes: Number.isFinite(bytes) ? bytes : 0,
|
|
3598
|
+
created_at: createdAt,
|
|
3599
|
+
filename,
|
|
3600
|
+
object: "file",
|
|
3601
|
+
purpose,
|
|
3602
|
+
status: "processed",
|
|
3603
|
+
...expiresAt ? { expires_at: expiresAt } : {}
|
|
3604
|
+
},
|
|
3605
|
+
filename,
|
|
3606
|
+
bytes: Number.isFinite(bytes) ? bytes : 0,
|
|
3607
|
+
mimeType,
|
|
3608
|
+
sha256Hex,
|
|
3609
|
+
localPath: options.localPath,
|
|
3610
|
+
bucketName: options.bucketName,
|
|
3611
|
+
objectName: options.objectName
|
|
3514
3612
|
};
|
|
3515
3613
|
}
|
|
3516
3614
|
function buildCacheKey(filename, mimeType, sha256Hex) {
|
|
@@ -3531,7 +3629,7 @@ function isFresh(file) {
|
|
|
3531
3629
|
function recordMetadata(metadata) {
|
|
3532
3630
|
filesState.metadataById.set(metadata.file.id, metadata);
|
|
3533
3631
|
if (metadata.sha256Hex) {
|
|
3534
|
-
filesState.
|
|
3632
|
+
filesState.canonicalUploadCacheByKey.set(
|
|
3535
3633
|
buildCacheKey(
|
|
3536
3634
|
metadata.filename,
|
|
3537
3635
|
metadata.mimeType ?? "application/octet-stream",
|
|
@@ -3580,7 +3678,9 @@ async function persistMetadataToDisk(metadata) {
|
|
|
3580
3678
|
bytes: metadata.bytes,
|
|
3581
3679
|
mimeType: metadata.mimeType,
|
|
3582
3680
|
sha256Hex: metadata.sha256Hex,
|
|
3583
|
-
localPath: metadata.localPath
|
|
3681
|
+
localPath: metadata.localPath,
|
|
3682
|
+
bucketName: metadata.bucketName,
|
|
3683
|
+
objectName: metadata.objectName
|
|
3584
3684
|
};
|
|
3585
3685
|
await (0, import_promises2.writeFile)(
|
|
3586
3686
|
buildCachedMetadataPath(metadata.file.id),
|
|
@@ -3612,175 +3712,271 @@ async function loadPersistedMetadata(fileId) {
|
|
|
3612
3712
|
bytes: payload.bytes,
|
|
3613
3713
|
mimeType: payload.mimeType,
|
|
3614
3714
|
sha256Hex: payload.sha256Hex,
|
|
3615
|
-
localPath: payload.localPath
|
|
3715
|
+
localPath: payload.localPath,
|
|
3716
|
+
bucketName: payload.bucketName,
|
|
3717
|
+
objectName: payload.objectName
|
|
3616
3718
|
});
|
|
3617
3719
|
} catch {
|
|
3618
3720
|
return void 0;
|
|
3619
3721
|
}
|
|
3620
3722
|
}
|
|
3621
|
-
async function
|
|
3622
|
-
const
|
|
3623
|
-
const
|
|
3624
|
-
|
|
3625
|
-
|
|
3723
|
+
async function writeCanonicalFileFromPath(options) {
|
|
3724
|
+
const file = getStorageClient().bucket(options.bucketName).file(options.objectName);
|
|
3725
|
+
const storageContentType = resolveCanonicalStorageContentType(
|
|
3726
|
+
options.metadata.filename ?? "attachment.bin",
|
|
3727
|
+
options.mimeType
|
|
3728
|
+
);
|
|
3729
|
+
try {
|
|
3730
|
+
await (0, import_promises3.pipeline)(
|
|
3731
|
+
(0, import_node_fs3.createReadStream)(options.filePath),
|
|
3732
|
+
file.createWriteStream({
|
|
3733
|
+
resumable: options.bytes >= 10 * 1024 * 1024,
|
|
3734
|
+
preconditionOpts: { ifGenerationMatch: 0 },
|
|
3735
|
+
metadata: {
|
|
3736
|
+
contentType: storageContentType,
|
|
3737
|
+
contentDisposition: `inline; filename="${toSafeStorageFilename(options.metadata.filename ?? "attachment.bin")}"`,
|
|
3738
|
+
metadata: options.metadata
|
|
3739
|
+
}
|
|
3740
|
+
})
|
|
3741
|
+
);
|
|
3742
|
+
return true;
|
|
3743
|
+
} catch (error) {
|
|
3744
|
+
const code = error.code;
|
|
3745
|
+
if (code === 412 || code === "412") {
|
|
3746
|
+
return false;
|
|
3747
|
+
}
|
|
3748
|
+
throw error;
|
|
3626
3749
|
}
|
|
3627
|
-
|
|
3628
|
-
|
|
3629
|
-
|
|
3630
|
-
|
|
3631
|
-
|
|
3632
|
-
|
|
3633
|
-
|
|
3634
|
-
|
|
3635
|
-
|
|
3636
|
-
|
|
3637
|
-
|
|
3638
|
-
|
|
3639
|
-
|
|
3640
|
-
|
|
3750
|
+
}
|
|
3751
|
+
async function writeCanonicalFileFromBytes(options) {
|
|
3752
|
+
const file = getStorageClient().bucket(options.bucketName).file(options.objectName);
|
|
3753
|
+
const storageContentType = resolveCanonicalStorageContentType(
|
|
3754
|
+
options.metadata.filename ?? "attachment.bin",
|
|
3755
|
+
options.mimeType
|
|
3756
|
+
);
|
|
3757
|
+
try {
|
|
3758
|
+
await file.save(options.bytes, {
|
|
3759
|
+
resumable: options.bytes.byteLength >= 10 * 1024 * 1024,
|
|
3760
|
+
preconditionOpts: { ifGenerationMatch: 0 },
|
|
3761
|
+
metadata: {
|
|
3762
|
+
contentType: storageContentType,
|
|
3763
|
+
contentDisposition: `inline; filename="${toSafeStorageFilename(options.metadata.filename ?? "attachment.bin")}"`,
|
|
3764
|
+
metadata: options.metadata
|
|
3641
3765
|
}
|
|
3642
3766
|
});
|
|
3643
|
-
|
|
3644
|
-
|
|
3645
|
-
const
|
|
3646
|
-
|
|
3647
|
-
|
|
3648
|
-
mime_type: params.mimeType,
|
|
3649
|
-
purpose: params.purpose
|
|
3650
|
-
});
|
|
3651
|
-
const partIds = [];
|
|
3652
|
-
for (let offset = 0; offset < params.bytes.byteLength; offset += OPENAI_UPLOAD_PART_MAX_BYTES) {
|
|
3653
|
-
const chunk = params.bytes.subarray(
|
|
3654
|
-
offset,
|
|
3655
|
-
Math.min(offset + OPENAI_UPLOAD_PART_MAX_BYTES, params.bytes.byteLength)
|
|
3656
|
-
);
|
|
3657
|
-
const uploadPart = await client.uploads.parts.create(upload.id, {
|
|
3658
|
-
data: new import_node_buffer3.File([new Uint8Array(chunk)], `${params.sha256Hex}.part`, {
|
|
3659
|
-
type: params.mimeType
|
|
3660
|
-
})
|
|
3661
|
-
});
|
|
3662
|
-
partIds.push(uploadPart.id);
|
|
3663
|
-
}
|
|
3664
|
-
const completed = await client.uploads.complete(upload.id, { part_ids: partIds });
|
|
3665
|
-
const fileId = completed.file?.id;
|
|
3666
|
-
if (!fileId) {
|
|
3667
|
-
throw new Error("OpenAI upload completed without a file id.");
|
|
3767
|
+
return true;
|
|
3768
|
+
} catch (error) {
|
|
3769
|
+
const code = error.code;
|
|
3770
|
+
if (code === 412 || code === "412") {
|
|
3771
|
+
return false;
|
|
3668
3772
|
}
|
|
3669
|
-
|
|
3773
|
+
throw error;
|
|
3670
3774
|
}
|
|
3671
|
-
|
|
3672
|
-
|
|
3673
|
-
|
|
3674
|
-
filename
|
|
3675
|
-
|
|
3676
|
-
|
|
3677
|
-
|
|
3775
|
+
}
|
|
3776
|
+
async function refreshCanonicalObjectMetadata(options) {
|
|
3777
|
+
const storageContentType = resolveCanonicalStorageContentType(
|
|
3778
|
+
options.metadata.filename ?? "attachment.bin",
|
|
3779
|
+
options.mimeType
|
|
3780
|
+
);
|
|
3781
|
+
await getStorageClient().bucket(options.bucketName).file(options.objectName).setMetadata({
|
|
3782
|
+
contentType: storageContentType,
|
|
3783
|
+
contentDisposition: `inline; filename="${toSafeStorageFilename(options.metadata.filename ?? "attachment.bin")}"`,
|
|
3784
|
+
metadata: options.metadata
|
|
3678
3785
|
});
|
|
3679
|
-
|
|
3680
|
-
|
|
3681
|
-
|
|
3682
|
-
|
|
3683
|
-
|
|
3684
|
-
|
|
3685
|
-
|
|
3686
|
-
|
|
3786
|
+
}
|
|
3787
|
+
async function createCanonicalMetadata(options) {
|
|
3788
|
+
const createdAt = Math.floor(Date.now() / 1e3);
|
|
3789
|
+
const expiresAt = createdAt + options.expiresAfterSeconds;
|
|
3790
|
+
const storedFile = {
|
|
3791
|
+
id: options.fileId,
|
|
3792
|
+
bytes: options.bytes,
|
|
3793
|
+
created_at: createdAt,
|
|
3794
|
+
filename: options.filename,
|
|
3795
|
+
object: "file",
|
|
3796
|
+
purpose: options.purpose,
|
|
3797
|
+
status: "processed",
|
|
3798
|
+
expires_at: expiresAt
|
|
3799
|
+
};
|
|
3800
|
+
const metadata = recordMetadata({
|
|
3801
|
+
file: storedFile,
|
|
3802
|
+
filename: options.filename,
|
|
3803
|
+
bytes: options.bytes,
|
|
3804
|
+
mimeType: options.mimeType,
|
|
3805
|
+
sha256Hex: options.sha256Hex,
|
|
3806
|
+
localPath: options.localPath,
|
|
3807
|
+
bucketName: options.bucketName,
|
|
3808
|
+
objectName: options.objectName
|
|
3687
3809
|
});
|
|
3810
|
+
await persistMetadataToDisk(metadata);
|
|
3688
3811
|
return metadata;
|
|
3689
3812
|
}
|
|
3690
|
-
async function
|
|
3813
|
+
async function uploadCanonicalFileFromBytes(params) {
|
|
3691
3814
|
const cacheKey = buildCacheKey(params.filename, params.mimeType, params.sha256Hex);
|
|
3692
|
-
const cached = filesState.
|
|
3815
|
+
const cached = filesState.canonicalUploadCacheByKey.get(cacheKey);
|
|
3693
3816
|
if (cached && isFresh(cached.file)) {
|
|
3694
3817
|
return cached;
|
|
3695
3818
|
}
|
|
3696
|
-
const
|
|
3819
|
+
const fileId = buildCanonicalFileId(params.filename, params.mimeType, params.sha256Hex);
|
|
3820
|
+
const bucketName = resolveCanonicalFilesBucket();
|
|
3821
|
+
const objectName = buildCanonicalObjectName(fileId, params.filename, params.mimeType);
|
|
3822
|
+
const metadataFields = {
|
|
3823
|
+
fileId,
|
|
3824
|
+
filename: params.filename,
|
|
3825
|
+
mimeType: params.mimeType,
|
|
3826
|
+
purpose: params.purpose,
|
|
3827
|
+
sha256: params.sha256Hex,
|
|
3828
|
+
createdAtUnix: Math.floor(Date.now() / 1e3).toString(),
|
|
3829
|
+
expiresAt: new Date(Date.now() + params.expiresAfterSeconds * 1e3).toISOString()
|
|
3830
|
+
};
|
|
3697
3831
|
const startedAtMs = Date.now();
|
|
3698
|
-
|
|
3699
|
-
|
|
3700
|
-
|
|
3701
|
-
|
|
3702
|
-
|
|
3703
|
-
|
|
3704
|
-
|
|
3705
|
-
|
|
3706
|
-
|
|
3707
|
-
|
|
3708
|
-
|
|
3709
|
-
|
|
3832
|
+
const uploaded = await writeCanonicalFileFromBytes({
|
|
3833
|
+
bytes: params.bytes,
|
|
3834
|
+
bucketName,
|
|
3835
|
+
objectName,
|
|
3836
|
+
mimeType: params.mimeType,
|
|
3837
|
+
metadata: metadataFields
|
|
3838
|
+
});
|
|
3839
|
+
if (!uploaded) {
|
|
3840
|
+
await refreshCanonicalObjectMetadata({
|
|
3841
|
+
bucketName,
|
|
3842
|
+
objectName,
|
|
3843
|
+
mimeType: params.mimeType,
|
|
3844
|
+
metadata: metadataFields
|
|
3710
3845
|
});
|
|
3711
|
-
}
|
|
3712
|
-
|
|
3713
|
-
|
|
3714
|
-
|
|
3846
|
+
}
|
|
3847
|
+
const localPath = await cacheBufferLocally(params.bytes, params.sha256Hex);
|
|
3848
|
+
const canonical = await createCanonicalMetadata({
|
|
3849
|
+
fileId,
|
|
3850
|
+
filename: params.filename,
|
|
3851
|
+
mimeType: params.mimeType,
|
|
3852
|
+
purpose: params.purpose,
|
|
3853
|
+
expiresAfterSeconds: params.expiresAfterSeconds,
|
|
3854
|
+
sha256Hex: params.sha256Hex,
|
|
3855
|
+
bytes: params.bytes.byteLength,
|
|
3856
|
+
bucketName,
|
|
3857
|
+
objectName,
|
|
3858
|
+
localPath
|
|
3859
|
+
});
|
|
3860
|
+
if (uploaded) {
|
|
3861
|
+
recordUploadEvent({
|
|
3862
|
+
backend: "gcs",
|
|
3863
|
+
mode: "gcs",
|
|
3715
3864
|
filename: params.filename,
|
|
3716
|
-
|
|
3717
|
-
|
|
3718
|
-
|
|
3719
|
-
|
|
3720
|
-
|
|
3721
|
-
highWaterMark: OPENAI_UPLOAD_PART_MAX_BYTES
|
|
3865
|
+
bytes: params.bytes.byteLength,
|
|
3866
|
+
durationMs: Math.max(0, Date.now() - startedAtMs),
|
|
3867
|
+
mimeType: params.mimeType,
|
|
3868
|
+
fileId,
|
|
3869
|
+
fileUri: `gs://${bucketName}/${objectName}`
|
|
3722
3870
|
});
|
|
3723
|
-
let partIndex = 0;
|
|
3724
|
-
for await (const chunk of stream) {
|
|
3725
|
-
const buffer = import_node_buffer3.Buffer.isBuffer(chunk) ? chunk : import_node_buffer3.Buffer.from(chunk);
|
|
3726
|
-
const uploadPart = await client.uploads.parts.create(upload.id, {
|
|
3727
|
-
data: new import_node_buffer3.File(
|
|
3728
|
-
[new Uint8Array(buffer)],
|
|
3729
|
-
`${params.sha256Hex}.${partIndex.toString()}.part`,
|
|
3730
|
-
{
|
|
3731
|
-
type: params.mimeType
|
|
3732
|
-
}
|
|
3733
|
-
)
|
|
3734
|
-
});
|
|
3735
|
-
partIds.push(uploadPart.id);
|
|
3736
|
-
partIndex += 1;
|
|
3737
|
-
}
|
|
3738
|
-
const completed = await client.uploads.complete(upload.id, { part_ids: partIds });
|
|
3739
|
-
const fileId = completed.file?.id;
|
|
3740
|
-
if (!fileId) {
|
|
3741
|
-
throw new Error("OpenAI upload completed without a file id.");
|
|
3742
|
-
}
|
|
3743
|
-
uploaded = await client.files.retrieve(fileId);
|
|
3744
3871
|
}
|
|
3745
|
-
|
|
3746
|
-
|
|
3747
|
-
|
|
3748
|
-
|
|
3749
|
-
|
|
3872
|
+
return canonical;
|
|
3873
|
+
}
|
|
3874
|
+
async function uploadCanonicalFileFromPath(params) {
|
|
3875
|
+
const cacheKey = buildCacheKey(params.filename, params.mimeType, params.sha256Hex);
|
|
3876
|
+
const cached = filesState.canonicalUploadCacheByKey.get(cacheKey);
|
|
3877
|
+
if (cached && isFresh(cached.file)) {
|
|
3878
|
+
return cached;
|
|
3879
|
+
}
|
|
3880
|
+
const fileId = buildCanonicalFileId(params.filename, params.mimeType, params.sha256Hex);
|
|
3881
|
+
const bucketName = resolveCanonicalFilesBucket();
|
|
3882
|
+
const objectName = buildCanonicalObjectName(fileId, params.filename, params.mimeType);
|
|
3883
|
+
const metadataFields = {
|
|
3884
|
+
fileId,
|
|
3885
|
+
filename: params.filename,
|
|
3886
|
+
mimeType: params.mimeType,
|
|
3887
|
+
purpose: params.purpose,
|
|
3888
|
+
sha256: params.sha256Hex,
|
|
3889
|
+
createdAtUnix: Math.floor(Date.now() / 1e3).toString(),
|
|
3890
|
+
expiresAt: new Date(Date.now() + params.expiresAfterSeconds * 1e3).toISOString()
|
|
3891
|
+
};
|
|
3892
|
+
const startedAtMs = Date.now();
|
|
3893
|
+
const uploaded = await writeCanonicalFileFromPath({
|
|
3894
|
+
filePath: params.filePath,
|
|
3895
|
+
bucketName,
|
|
3896
|
+
objectName,
|
|
3897
|
+
bytes: params.bytes,
|
|
3750
3898
|
mimeType: params.mimeType,
|
|
3751
|
-
|
|
3899
|
+
metadata: metadataFields
|
|
3752
3900
|
});
|
|
3753
|
-
|
|
3754
|
-
|
|
3755
|
-
|
|
3756
|
-
|
|
3757
|
-
|
|
3758
|
-
|
|
3901
|
+
if (!uploaded) {
|
|
3902
|
+
await refreshCanonicalObjectMetadata({
|
|
3903
|
+
bucketName,
|
|
3904
|
+
objectName,
|
|
3905
|
+
mimeType: params.mimeType,
|
|
3906
|
+
metadata: metadataFields
|
|
3907
|
+
});
|
|
3908
|
+
}
|
|
3909
|
+
const localPath = await cacheFileLocally(params.filePath, params.sha256Hex);
|
|
3910
|
+
const canonical = await createCanonicalMetadata({
|
|
3911
|
+
fileId,
|
|
3912
|
+
filename: params.filename,
|
|
3759
3913
|
mimeType: params.mimeType,
|
|
3760
|
-
|
|
3914
|
+
purpose: params.purpose,
|
|
3915
|
+
expiresAfterSeconds: params.expiresAfterSeconds,
|
|
3916
|
+
sha256Hex: params.sha256Hex,
|
|
3917
|
+
bytes: params.bytes,
|
|
3918
|
+
bucketName,
|
|
3919
|
+
objectName,
|
|
3920
|
+
localPath
|
|
3761
3921
|
});
|
|
3762
|
-
|
|
3922
|
+
if (uploaded) {
|
|
3923
|
+
recordUploadEvent({
|
|
3924
|
+
backend: "gcs",
|
|
3925
|
+
mode: "gcs",
|
|
3926
|
+
filename: params.filename,
|
|
3927
|
+
bytes: params.bytes,
|
|
3928
|
+
durationMs: Math.max(0, Date.now() - startedAtMs),
|
|
3929
|
+
mimeType: params.mimeType,
|
|
3930
|
+
fileId,
|
|
3931
|
+
fileUri: `gs://${bucketName}/${objectName}`
|
|
3932
|
+
});
|
|
3933
|
+
}
|
|
3934
|
+
return canonical;
|
|
3763
3935
|
}
|
|
3764
|
-
async function
|
|
3936
|
+
async function resolveCanonicalStorageLocation(fileId) {
|
|
3937
|
+
const cached = filesState.metadataById.get(fileId) ?? await loadPersistedMetadata(fileId);
|
|
3938
|
+
if (cached?.bucketName && cached.objectName) {
|
|
3939
|
+
return {
|
|
3940
|
+
bucketName: cached.bucketName,
|
|
3941
|
+
objectName: cached.objectName
|
|
3942
|
+
};
|
|
3943
|
+
}
|
|
3944
|
+
const bucketName = resolveCanonicalFilesBucket();
|
|
3945
|
+
const [files2] = await getStorageClient().bucket(bucketName).getFiles({
|
|
3946
|
+
prefix: `${resolveCanonicalFilesPrefix()}${fileId}.`,
|
|
3947
|
+
maxResults: 1,
|
|
3948
|
+
autoPaginate: false
|
|
3949
|
+
});
|
|
3950
|
+
const file = files2[0];
|
|
3951
|
+
if (!file) {
|
|
3952
|
+
throw new Error(`Canonical file ${fileId} was not found in GCS.`);
|
|
3953
|
+
}
|
|
3954
|
+
return {
|
|
3955
|
+
bucketName,
|
|
3956
|
+
objectName: file.name
|
|
3957
|
+
};
|
|
3958
|
+
}
|
|
3959
|
+
async function retrieveCanonicalFile(fileId) {
|
|
3765
3960
|
const cached = filesState.metadataById.get(fileId);
|
|
3766
|
-
if (cached && isFresh(cached.file)) {
|
|
3961
|
+
if (cached && isFresh(cached.file) && cached.bucketName && cached.objectName) {
|
|
3767
3962
|
return cached;
|
|
3768
3963
|
}
|
|
3769
3964
|
const persisted = await loadPersistedMetadata(fileId);
|
|
3770
|
-
if (persisted && isFresh(persisted.file)) {
|
|
3965
|
+
if (persisted && isFresh(persisted.file) && persisted.bucketName && persisted.objectName) {
|
|
3771
3966
|
return persisted;
|
|
3772
3967
|
}
|
|
3773
|
-
const
|
|
3774
|
-
const
|
|
3775
|
-
const
|
|
3776
|
-
const metadata = recordMetadata(
|
|
3777
|
-
|
|
3778
|
-
|
|
3779
|
-
|
|
3780
|
-
|
|
3781
|
-
|
|
3782
|
-
|
|
3783
|
-
|
|
3968
|
+
const existingLocalPath = cached?.localPath ?? persisted?.localPath;
|
|
3969
|
+
const { bucketName, objectName } = await resolveCanonicalStorageLocation(fileId);
|
|
3970
|
+
const [objectMetadata] = await getStorageClient().bucket(bucketName).file(objectName).getMetadata();
|
|
3971
|
+
const metadata = recordMetadata(
|
|
3972
|
+
toStoredFileFromCanonicalMetadata({
|
|
3973
|
+
fileId,
|
|
3974
|
+
bucketName,
|
|
3975
|
+
objectName,
|
|
3976
|
+
objectMetadata,
|
|
3977
|
+
localPath: existingLocalPath
|
|
3978
|
+
})
|
|
3979
|
+
);
|
|
3784
3980
|
await persistMetadataToDisk(metadata);
|
|
3785
3981
|
return metadata;
|
|
3786
3982
|
}
|
|
@@ -3808,7 +4004,7 @@ function resolveVertexMirrorBucket() {
|
|
|
3808
4004
|
const trimmed = raw?.trim();
|
|
3809
4005
|
if (!trimmed) {
|
|
3810
4006
|
throw new Error(
|
|
3811
|
-
"VERTEX_GCS_BUCKET must be set to use
|
|
4007
|
+
"VERTEX_GCS_BUCKET must be set to use canonical file ids with Vertex Gemini models."
|
|
3812
4008
|
);
|
|
3813
4009
|
}
|
|
3814
4010
|
return trimmed.replace(/^gs:\/\//u, "").replace(/\/+$/u, "");
|
|
@@ -3838,61 +4034,41 @@ function getGeminiMirrorClient() {
|
|
|
3838
4034
|
}
|
|
3839
4035
|
return filesState.geminiClientPromise;
|
|
3840
4036
|
}
|
|
3841
|
-
async function
|
|
4037
|
+
async function materializeCanonicalFile(fileId) {
|
|
3842
4038
|
const cachedPromise = filesState.materializedById.get(fileId);
|
|
3843
4039
|
if (cachedPromise) {
|
|
3844
4040
|
return await cachedPromise;
|
|
3845
4041
|
}
|
|
3846
4042
|
const promise = (async () => {
|
|
3847
|
-
const metadata = await
|
|
3848
|
-
if (metadata.localPath && metadata.sha256Hex && metadata.mimeType) {
|
|
4043
|
+
const metadata = await retrieveCanonicalFile(fileId);
|
|
4044
|
+
if (metadata.localPath && metadata.sha256Hex && metadata.mimeType && metadata.bucketName && metadata.objectName) {
|
|
3849
4045
|
return {
|
|
3850
4046
|
file: metadata.file,
|
|
3851
4047
|
filename: metadata.filename,
|
|
3852
4048
|
bytes: metadata.bytes,
|
|
3853
4049
|
mimeType: metadata.mimeType,
|
|
3854
4050
|
sha256Hex: metadata.sha256Hex,
|
|
3855
|
-
localPath: metadata.localPath
|
|
4051
|
+
localPath: metadata.localPath,
|
|
4052
|
+
bucketName: metadata.bucketName,
|
|
4053
|
+
objectName: metadata.objectName
|
|
3856
4054
|
};
|
|
3857
4055
|
}
|
|
3858
|
-
|
|
3859
|
-
|
|
3860
|
-
import_node_path4.default.join(FILES_TEMP_ROOT, `${fileId.replace(/[^a-z0-9_-]/giu, "")}-`)
|
|
3861
|
-
);
|
|
3862
|
-
const localPath = import_node_path4.default.join(tempDir, normaliseFilename(metadata.filename, `${fileId}.bin`));
|
|
3863
|
-
const response = await getOpenAiClient().files.content(fileId);
|
|
3864
|
-
if (!response.ok) {
|
|
3865
|
-
throw new Error(
|
|
3866
|
-
`Failed to download OpenAI file ${fileId}: ${response.status} ${response.statusText}`
|
|
3867
|
-
);
|
|
4056
|
+
if (!metadata.bucketName || !metadata.objectName) {
|
|
4057
|
+
throw new Error(`Canonical file ${fileId} is missing GCS location metadata.`);
|
|
3868
4058
|
}
|
|
3869
|
-
const
|
|
3870
|
-
const mimeType = resolveMimeType(metadata.filename,
|
|
3871
|
-
const
|
|
3872
|
-
|
|
3873
|
-
if (response.body) {
|
|
3874
|
-
const source = import_node_stream.Readable.fromWeb(response.body);
|
|
3875
|
-
const writable = (0, import_node_fs3.createWriteStream)(localPath, { flags: "wx" });
|
|
3876
|
-
source.on("data", (chunk) => {
|
|
3877
|
-
const buffer = import_node_buffer3.Buffer.isBuffer(chunk) ? chunk : import_node_buffer3.Buffer.from(chunk);
|
|
3878
|
-
hash.update(buffer);
|
|
3879
|
-
bytes += buffer.byteLength;
|
|
3880
|
-
});
|
|
3881
|
-
await (0, import_promises3.pipeline)(source, writable);
|
|
3882
|
-
} else {
|
|
3883
|
-
const buffer = import_node_buffer3.Buffer.from(await response.arrayBuffer());
|
|
3884
|
-
hash.update(buffer);
|
|
3885
|
-
bytes = buffer.byteLength;
|
|
3886
|
-
await (0, import_promises2.writeFile)(localPath, buffer);
|
|
3887
|
-
}
|
|
3888
|
-
const sha256Hex = hash.digest("hex");
|
|
4059
|
+
const [downloadedBytes] = await getStorageClient().bucket(metadata.bucketName).file(metadata.objectName).download();
|
|
4060
|
+
const mimeType = metadata.mimeType ?? resolveMimeType(metadata.filename, void 0);
|
|
4061
|
+
const sha256Hex = metadata.sha256Hex ?? computeSha256Hex(downloadedBytes);
|
|
4062
|
+
const localPath = await cacheBufferLocally(downloadedBytes, sha256Hex);
|
|
3889
4063
|
const updated = recordMetadata({
|
|
3890
4064
|
file: metadata.file,
|
|
3891
4065
|
filename: metadata.filename,
|
|
3892
|
-
bytes:
|
|
4066
|
+
bytes: downloadedBytes.byteLength || metadata.bytes,
|
|
3893
4067
|
mimeType,
|
|
3894
4068
|
sha256Hex,
|
|
3895
|
-
localPath
|
|
4069
|
+
localPath,
|
|
4070
|
+
bucketName: metadata.bucketName,
|
|
4071
|
+
objectName: metadata.objectName
|
|
3896
4072
|
});
|
|
3897
4073
|
await persistMetadataToDisk(updated);
|
|
3898
4074
|
return {
|
|
@@ -3901,7 +4077,9 @@ async function materializeOpenAiFile(fileId) {
|
|
|
3901
4077
|
bytes: updated.bytes,
|
|
3902
4078
|
mimeType: updated.mimeType ?? mimeType,
|
|
3903
4079
|
sha256Hex,
|
|
3904
|
-
localPath
|
|
4080
|
+
localPath,
|
|
4081
|
+
bucketName: metadata.bucketName,
|
|
4082
|
+
objectName: metadata.objectName
|
|
3905
4083
|
};
|
|
3906
4084
|
})();
|
|
3907
4085
|
filesState.materializedById.set(fileId, promise);
|
|
@@ -3917,14 +4095,14 @@ async function ensureGeminiFileMirror(fileId) {
|
|
|
3917
4095
|
if (cached) {
|
|
3918
4096
|
return cached;
|
|
3919
4097
|
}
|
|
3920
|
-
const materialized = await
|
|
4098
|
+
const materialized = await materializeCanonicalFile(fileId);
|
|
3921
4099
|
const client = await getGeminiMirrorClient();
|
|
3922
4100
|
const name = buildGeminiMirrorName(materialized.sha256Hex);
|
|
3923
4101
|
try {
|
|
3924
4102
|
const existing = await client.files.get({ name });
|
|
3925
4103
|
if (existing.name && existing.uri && existing.mimeType) {
|
|
3926
4104
|
const mirror2 = {
|
|
3927
|
-
|
|
4105
|
+
canonicalFileId: fileId,
|
|
3928
4106
|
name: existing.name,
|
|
3929
4107
|
uri: existing.uri,
|
|
3930
4108
|
mimeType: existing.mimeType,
|
|
@@ -3952,7 +4130,7 @@ async function ensureGeminiFileMirror(fileId) {
|
|
|
3952
4130
|
throw new Error("Gemini file upload completed without a usable URI.");
|
|
3953
4131
|
}
|
|
3954
4132
|
const mirror = {
|
|
3955
|
-
|
|
4133
|
+
canonicalFileId: fileId,
|
|
3956
4134
|
name: resolved.name,
|
|
3957
4135
|
uri: resolved.uri,
|
|
3958
4136
|
mimeType: resolved.mimeType,
|
|
@@ -3977,7 +4155,7 @@ async function ensureVertexFileMirror(fileId) {
|
|
|
3977
4155
|
if (cached) {
|
|
3978
4156
|
return cached;
|
|
3979
4157
|
}
|
|
3980
|
-
const materialized = await
|
|
4158
|
+
const materialized = await materializeCanonicalFile(fileId);
|
|
3981
4159
|
const bucketName = resolveVertexMirrorBucket();
|
|
3982
4160
|
const prefix = resolveVertexMirrorPrefix();
|
|
3983
4161
|
const extension = import_mime.default.getExtension(materialized.mimeType) ?? import_node_path4.default.extname(materialized.filename).replace(/^\./u, "") ?? "bin";
|
|
@@ -4018,7 +4196,7 @@ async function ensureVertexFileMirror(fileId) {
|
|
|
4018
4196
|
}
|
|
4019
4197
|
}
|
|
4020
4198
|
const mirror = {
|
|
4021
|
-
|
|
4199
|
+
canonicalFileId: fileId,
|
|
4022
4200
|
bucket: bucketName,
|
|
4023
4201
|
objectName,
|
|
4024
4202
|
fileUri: `gs://${bucketName}/${objectName}`,
|
|
@@ -4049,7 +4227,7 @@ async function filesCreate(params) {
|
|
|
4049
4227
|
const filename2 = normaliseFilename(params.filename, import_node_path4.default.basename(filePath));
|
|
4050
4228
|
const mimeType2 = resolveMimeType(filename2, params.mimeType);
|
|
4051
4229
|
const sha256Hex2 = await computeFileSha256Hex(filePath);
|
|
4052
|
-
const uploaded2 = await
|
|
4230
|
+
const uploaded2 = await uploadCanonicalFileFromPath({
|
|
4053
4231
|
filePath,
|
|
4054
4232
|
filename: filename2,
|
|
4055
4233
|
mimeType: mimeType2,
|
|
@@ -4058,19 +4236,13 @@ async function filesCreate(params) {
|
|
|
4058
4236
|
sha256Hex: sha256Hex2,
|
|
4059
4237
|
bytes: info.size
|
|
4060
4238
|
});
|
|
4061
|
-
|
|
4062
|
-
const cached2 = recordMetadata({
|
|
4063
|
-
...uploaded2,
|
|
4064
|
-
localPath: localPath2
|
|
4065
|
-
});
|
|
4066
|
-
await persistMetadataToDisk(cached2);
|
|
4067
|
-
return cached2.file;
|
|
4239
|
+
return uploaded2.file;
|
|
4068
4240
|
}
|
|
4069
4241
|
const filename = normaliseFilename(params.filename);
|
|
4070
4242
|
const bytes = toBuffer(params.data);
|
|
4071
4243
|
const mimeType = resolveMimeType(filename, params.mimeType, "text/plain");
|
|
4072
4244
|
const sha256Hex = computeSha256Hex(bytes);
|
|
4073
|
-
const uploaded = await
|
|
4245
|
+
const uploaded = await uploadCanonicalFileFromBytes({
|
|
4074
4246
|
bytes,
|
|
4075
4247
|
filename,
|
|
4076
4248
|
mimeType,
|
|
@@ -4078,16 +4250,10 @@ async function filesCreate(params) {
|
|
|
4078
4250
|
expiresAfterSeconds,
|
|
4079
4251
|
sha256Hex
|
|
4080
4252
|
});
|
|
4081
|
-
|
|
4082
|
-
const cached = recordMetadata({
|
|
4083
|
-
...uploaded,
|
|
4084
|
-
localPath
|
|
4085
|
-
});
|
|
4086
|
-
await persistMetadataToDisk(cached);
|
|
4087
|
-
return cached.file;
|
|
4253
|
+
return uploaded.file;
|
|
4088
4254
|
}
|
|
4089
4255
|
async function filesRetrieve(fileId) {
|
|
4090
|
-
return (await
|
|
4256
|
+
return (await retrieveCanonicalFile(fileId)).file;
|
|
4091
4257
|
}
|
|
4092
4258
|
async function filesDelete(fileId) {
|
|
4093
4259
|
const cachedGemini = filesState.geminiMirrorById.get(fileId);
|
|
@@ -4114,34 +4280,73 @@ async function filesDelete(fileId) {
|
|
|
4114
4280
|
} catch {
|
|
4115
4281
|
}
|
|
4116
4282
|
}
|
|
4117
|
-
|
|
4283
|
+
try {
|
|
4284
|
+
const { bucketName, objectName } = await resolveCanonicalStorageLocation(fileId);
|
|
4285
|
+
await getStorageClient().bucket(bucketName).file(objectName).delete({ ignoreNotFound: true });
|
|
4286
|
+
} catch {
|
|
4287
|
+
}
|
|
4118
4288
|
filesState.metadataById.delete(fileId);
|
|
4289
|
+
filesState.canonicalUploadCacheByKey.forEach((value, key) => {
|
|
4290
|
+
if (value.file.id === fileId) {
|
|
4291
|
+
filesState.canonicalUploadCacheByKey.delete(key);
|
|
4292
|
+
}
|
|
4293
|
+
});
|
|
4119
4294
|
filesState.materializedById.delete(fileId);
|
|
4120
4295
|
try {
|
|
4121
4296
|
await (0, import_promises2.unlink)(buildCachedMetadataPath(fileId));
|
|
4122
4297
|
} catch {
|
|
4123
4298
|
}
|
|
4124
4299
|
return {
|
|
4125
|
-
id:
|
|
4126
|
-
deleted:
|
|
4300
|
+
id: fileId,
|
|
4301
|
+
deleted: true,
|
|
4127
4302
|
object: "file"
|
|
4128
4303
|
};
|
|
4129
4304
|
}
|
|
4130
4305
|
async function filesContent(fileId) {
|
|
4131
|
-
|
|
4306
|
+
const metadata = await retrieveCanonicalFile(fileId);
|
|
4307
|
+
if (!metadata.bucketName || !metadata.objectName) {
|
|
4308
|
+
throw new Error(`Canonical file ${fileId} is missing GCS location metadata.`);
|
|
4309
|
+
}
|
|
4310
|
+
const [bytes] = await getStorageClient().bucket(metadata.bucketName).file(metadata.objectName).download();
|
|
4311
|
+
const headers = new Headers();
|
|
4312
|
+
headers.set("content-type", metadata.mimeType ?? resolveMimeType(metadata.filename, void 0));
|
|
4313
|
+
headers.set("content-length", bytes.byteLength.toString());
|
|
4314
|
+
headers.set(
|
|
4315
|
+
"content-disposition",
|
|
4316
|
+
`inline; filename="${toSafeStorageFilename(metadata.filename)}"`
|
|
4317
|
+
);
|
|
4318
|
+
return new Response(bytes, {
|
|
4319
|
+
status: 200,
|
|
4320
|
+
headers
|
|
4321
|
+
});
|
|
4132
4322
|
}
|
|
4133
4323
|
async function getCanonicalFileMetadata(fileId) {
|
|
4134
|
-
const metadata = await
|
|
4324
|
+
const metadata = await retrieveCanonicalFile(fileId);
|
|
4135
4325
|
const mimeType = metadata.mimeType ?? resolveMimeType(metadata.filename, void 0);
|
|
4136
4326
|
const updated = metadata.mimeType === mimeType ? metadata : recordMetadata({
|
|
4137
4327
|
...metadata,
|
|
4138
4328
|
mimeType
|
|
4139
4329
|
});
|
|
4330
|
+
if (!updated.bucketName || !updated.objectName) {
|
|
4331
|
+
throw new Error(`Canonical file ${fileId} is missing GCS location metadata.`);
|
|
4332
|
+
}
|
|
4140
4333
|
return {
|
|
4141
4334
|
...updated,
|
|
4142
|
-
mimeType
|
|
4335
|
+
mimeType,
|
|
4336
|
+
bucketName: updated.bucketName,
|
|
4337
|
+
objectName: updated.objectName
|
|
4143
4338
|
};
|
|
4144
4339
|
}
|
|
4340
|
+
async function getCanonicalFileSignedUrl(options) {
|
|
4341
|
+
const metadata = await getCanonicalFileMetadata(options.fileId);
|
|
4342
|
+
const [signedUrl] = await getStorageClient().bucket(metadata.bucketName).file(metadata.objectName).getSignedUrl({
|
|
4343
|
+
version: "v4",
|
|
4344
|
+
action: "read",
|
|
4345
|
+
expires: Date.now() + (options.expiresAfterSeconds ?? 15 * 60) * 1e3,
|
|
4346
|
+
responseType: resolveCanonicalStorageContentType(metadata.filename, metadata.mimeType)
|
|
4347
|
+
});
|
|
4348
|
+
return signedUrl;
|
|
4349
|
+
}
|
|
4145
4350
|
var files = {
|
|
4146
4351
|
create: filesCreate,
|
|
4147
4352
|
retrieve: filesRetrieve,
|
|
@@ -4503,6 +4708,7 @@ function isJsonSchemaObject(schema) {
|
|
|
4503
4708
|
return false;
|
|
4504
4709
|
}
|
|
4505
4710
|
var CANONICAL_GEMINI_FILE_URI_PREFIX = "openai://file/";
|
|
4711
|
+
var CANONICAL_LLM_FILE_ID_PATTERN = /^file_[a-f0-9]{64}$/u;
|
|
4506
4712
|
function buildCanonicalGeminiFileUri(fileId) {
|
|
4507
4713
|
return `${CANONICAL_GEMINI_FILE_URI_PREFIX}${fileId}`;
|
|
4508
4714
|
}
|
|
@@ -4513,6 +4719,9 @@ function parseCanonicalGeminiFileId(fileUri) {
|
|
|
4513
4719
|
const fileId = fileUri.slice(CANONICAL_GEMINI_FILE_URI_PREFIX.length).trim();
|
|
4514
4720
|
return fileId.length > 0 ? fileId : void 0;
|
|
4515
4721
|
}
|
|
4722
|
+
function isCanonicalLlmFileId(fileId) {
|
|
4723
|
+
return typeof fileId === "string" && CANONICAL_LLM_FILE_ID_PATTERN.test(fileId.trim());
|
|
4724
|
+
}
|
|
4516
4725
|
function isLlmMediaResolution(value) {
|
|
4517
4726
|
return value === "auto" || value === "low" || value === "medium" || value === "high" || value === "original";
|
|
4518
4727
|
}
|
|
@@ -5000,7 +5209,21 @@ async function prepareOpenAiPromptContentItem(item, options) {
|
|
|
5000
5209
|
if (!isOpenAiNativeContentItem(item)) {
|
|
5001
5210
|
return item;
|
|
5002
5211
|
}
|
|
5003
|
-
if (item.type === "input_image"
|
|
5212
|
+
if (item.type === "input_image") {
|
|
5213
|
+
if (isCanonicalLlmFileId(item.file_id)) {
|
|
5214
|
+
const signedUrl2 = await getCanonicalFileSignedUrl({ fileId: item.file_id });
|
|
5215
|
+
return {
|
|
5216
|
+
type: "input_image",
|
|
5217
|
+
image_url: signedUrl2,
|
|
5218
|
+
detail: toOpenAiImageDetail(
|
|
5219
|
+
isLlmMediaResolution(item.detail) ? item.detail : void 0,
|
|
5220
|
+
options?.model
|
|
5221
|
+
)
|
|
5222
|
+
};
|
|
5223
|
+
}
|
|
5224
|
+
if (options?.offloadInlineData !== true || typeof item.image_url !== "string" || !item.image_url.trim().toLowerCase().startsWith("data:")) {
|
|
5225
|
+
return item;
|
|
5226
|
+
}
|
|
5004
5227
|
const parsed = parseDataUrlPayload(item.image_url);
|
|
5005
5228
|
if (!parsed) {
|
|
5006
5229
|
return item;
|
|
@@ -5013,16 +5236,27 @@ async function prepareOpenAiPromptContentItem(item, options) {
|
|
|
5013
5236
|
guessInlineDataFilename(parsed.mimeType)
|
|
5014
5237
|
)
|
|
5015
5238
|
});
|
|
5239
|
+
const signedUrl = await getCanonicalFileSignedUrl({ fileId: uploaded.fileId });
|
|
5016
5240
|
return {
|
|
5017
5241
|
type: "input_image",
|
|
5242
|
+
image_url: signedUrl,
|
|
5018
5243
|
detail: toOpenAiImageDetail(
|
|
5019
5244
|
isLlmMediaResolution(item.detail) ? item.detail : void 0,
|
|
5020
5245
|
options?.model
|
|
5021
|
-
)
|
|
5022
|
-
|
|
5246
|
+
)
|
|
5247
|
+
};
|
|
5248
|
+
}
|
|
5249
|
+
if (item.type !== "input_file") {
|
|
5250
|
+
return item;
|
|
5251
|
+
}
|
|
5252
|
+
if (isCanonicalLlmFileId(item.file_id)) {
|
|
5253
|
+
const signedUrl = await getCanonicalFileSignedUrl({ fileId: item.file_id });
|
|
5254
|
+
return {
|
|
5255
|
+
type: "input_file",
|
|
5256
|
+
file_url: signedUrl
|
|
5023
5257
|
};
|
|
5024
5258
|
}
|
|
5025
|
-
if (
|
|
5259
|
+
if (options?.offloadInlineData !== true) {
|
|
5026
5260
|
return item;
|
|
5027
5261
|
}
|
|
5028
5262
|
if (typeof item.file_data === "string" && item.file_data.trim().length > 0) {
|
|
@@ -5036,7 +5270,11 @@ async function prepareOpenAiPromptContentItem(item, options) {
|
|
|
5036
5270
|
mimeType,
|
|
5037
5271
|
filename
|
|
5038
5272
|
});
|
|
5039
|
-
|
|
5273
|
+
const signedUrl = await getCanonicalFileSignedUrl({ fileId: uploaded.fileId });
|
|
5274
|
+
return {
|
|
5275
|
+
type: "input_file",
|
|
5276
|
+
file_url: signedUrl
|
|
5277
|
+
};
|
|
5040
5278
|
}
|
|
5041
5279
|
if (typeof item.file_url === "string" && item.file_url.trim().toLowerCase().startsWith("data:")) {
|
|
5042
5280
|
const parsed = parseDataUrlPayload(item.file_url);
|
|
@@ -5051,7 +5289,11 @@ async function prepareOpenAiPromptContentItem(item, options) {
|
|
|
5051
5289
|
guessInlineDataFilename(parsed.mimeType)
|
|
5052
5290
|
)
|
|
5053
5291
|
});
|
|
5054
|
-
|
|
5292
|
+
const signedUrl = await getCanonicalFileSignedUrl({ fileId: uploaded.fileId });
|
|
5293
|
+
return {
|
|
5294
|
+
type: "input_file",
|
|
5295
|
+
file_url: signedUrl
|
|
5296
|
+
};
|
|
5055
5297
|
}
|
|
5056
5298
|
return item;
|
|
5057
5299
|
}
|
|
@@ -5081,11 +5323,40 @@ async function prepareOpenAiPromptInput(input, options) {
|
|
|
5081
5323
|
};
|
|
5082
5324
|
return await Promise.all(input.map((item) => prepareItem(item)));
|
|
5083
5325
|
}
|
|
5326
|
+
function hasCanonicalOpenAiFileReferences(input) {
|
|
5327
|
+
let found = false;
|
|
5328
|
+
const visitItems = (items) => {
|
|
5329
|
+
for (const item of items) {
|
|
5330
|
+
if (found || !item || typeof item !== "object") {
|
|
5331
|
+
continue;
|
|
5332
|
+
}
|
|
5333
|
+
if (Array.isArray(item.content)) {
|
|
5334
|
+
visitItems(item.content);
|
|
5335
|
+
}
|
|
5336
|
+
if (Array.isArray(item.output)) {
|
|
5337
|
+
visitItems(item.output);
|
|
5338
|
+
}
|
|
5339
|
+
if (!isOpenAiNativeContentItem(item)) {
|
|
5340
|
+
continue;
|
|
5341
|
+
}
|
|
5342
|
+
if ((item.type === "input_image" || item.type === "input_file") && isCanonicalLlmFileId(item.file_id)) {
|
|
5343
|
+
found = true;
|
|
5344
|
+
return;
|
|
5345
|
+
}
|
|
5346
|
+
}
|
|
5347
|
+
};
|
|
5348
|
+
visitItems(input);
|
|
5349
|
+
return found;
|
|
5350
|
+
}
|
|
5084
5351
|
async function maybePrepareOpenAiPromptInput(input, options) {
|
|
5085
|
-
|
|
5352
|
+
const offloadInlineData = estimateOpenAiInlinePromptBytes(input) > INLINE_ATTACHMENT_PROMPT_THRESHOLD_BYTES;
|
|
5353
|
+
if (!offloadInlineData && !hasCanonicalOpenAiFileReferences(input)) {
|
|
5086
5354
|
return Array.from(input);
|
|
5087
5355
|
}
|
|
5088
|
-
return await prepareOpenAiPromptInput(input,
|
|
5356
|
+
return await prepareOpenAiPromptInput(input, {
|
|
5357
|
+
...options,
|
|
5358
|
+
offloadInlineData
|
|
5359
|
+
});
|
|
5089
5360
|
}
|
|
5090
5361
|
function estimateGeminiInlinePromptBytes(contents) {
|
|
5091
5362
|
let total = 0;
|
|
@@ -6387,9 +6658,6 @@ async function maybeSpillToolOutputItem(item, toolName, options) {
|
|
|
6387
6658
|
return item;
|
|
6388
6659
|
}
|
|
6389
6660
|
async function maybeSpillToolOutput(value, toolName, options) {
|
|
6390
|
-
if (options?.provider === "chatgpt") {
|
|
6391
|
-
return value;
|
|
6392
|
-
}
|
|
6393
6661
|
if (typeof value === "string") {
|
|
6394
6662
|
if (options?.force !== true && import_node_buffer4.Buffer.byteLength(value, "utf8") <= TOOL_OUTPUT_SPILL_THRESHOLD_BYTES) {
|
|
6395
6663
|
return value;
|
|
@@ -7472,7 +7740,7 @@ async function runTextCall(params) {
|
|
|
7472
7740
|
defaultMediaResolution: request.mediaResolution,
|
|
7473
7741
|
model: request.model
|
|
7474
7742
|
}),
|
|
7475
|
-
{ model: request.model }
|
|
7743
|
+
{ model: request.model, provider: "openai" }
|
|
7476
7744
|
);
|
|
7477
7745
|
const openAiTools = toOpenAiTools(request.tools);
|
|
7478
7746
|
const reasoningEffort = resolveOpenAiReasoningEffort(
|
|
@@ -7551,6 +7819,10 @@ async function runTextCall(params) {
|
|
|
7551
7819
|
defaultMediaResolution: request.mediaResolution,
|
|
7552
7820
|
model: request.model
|
|
7553
7821
|
});
|
|
7822
|
+
const preparedChatGptInput = await maybePrepareOpenAiPromptInput(chatGptInput.input, {
|
|
7823
|
+
model: request.model,
|
|
7824
|
+
provider: "chatgpt"
|
|
7825
|
+
});
|
|
7554
7826
|
const reasoningEffort = resolveOpenAiReasoningEffort(request.model, request.thinkingLevel);
|
|
7555
7827
|
const openAiTools = toOpenAiTools(request.tools);
|
|
7556
7828
|
const requestPayload = {
|
|
@@ -7559,7 +7831,7 @@ async function runTextCall(params) {
|
|
|
7559
7831
|
stream: true,
|
|
7560
7832
|
...providerInfo.serviceTier ? { service_tier: providerInfo.serviceTier } : {},
|
|
7561
7833
|
instructions: chatGptInput.instructions ?? "You are a helpful assistant.",
|
|
7562
|
-
input:
|
|
7834
|
+
input: preparedChatGptInput,
|
|
7563
7835
|
include: ["reasoning.encrypted_content"],
|
|
7564
7836
|
reasoning: {
|
|
7565
7837
|
effort: toOpenAiReasoningEffort(reasoningEffort),
|
|
@@ -8367,7 +8639,8 @@ async function runToolLoop(request) {
|
|
|
8367
8639
|
let stepToolCallText;
|
|
8368
8640
|
let stepToolCallPayload;
|
|
8369
8641
|
const preparedInput = await maybePrepareOpenAiPromptInput(input, {
|
|
8370
|
-
model: request.model
|
|
8642
|
+
model: request.model,
|
|
8643
|
+
provider: "openai"
|
|
8371
8644
|
});
|
|
8372
8645
|
const stepRequestPayload = {
|
|
8373
8646
|
model: providerInfo.model,
|
|
@@ -8737,6 +9010,10 @@ async function runToolLoop(request) {
|
|
|
8737
9010
|
let reasoningSummaryText = "";
|
|
8738
9011
|
let stepToolCallText;
|
|
8739
9012
|
let stepToolCallPayload;
|
|
9013
|
+
const preparedInput = await maybePrepareOpenAiPromptInput(input, {
|
|
9014
|
+
model: request.model,
|
|
9015
|
+
provider: "chatgpt"
|
|
9016
|
+
});
|
|
8740
9017
|
const markFirstModelEvent = () => {
|
|
8741
9018
|
if (firstModelEventAtMs === void 0) {
|
|
8742
9019
|
firstModelEventAtMs = Date.now();
|
|
@@ -8748,7 +9025,7 @@ async function runToolLoop(request) {
|
|
|
8748
9025
|
stream: true,
|
|
8749
9026
|
...providerInfo.serviceTier ? { service_tier: providerInfo.serviceTier } : {},
|
|
8750
9027
|
instructions: toolLoopInput.instructions ?? "You are a helpful assistant.",
|
|
8751
|
-
input,
|
|
9028
|
+
input: preparedInput,
|
|
8752
9029
|
prompt_cache_key: promptCacheKey,
|
|
8753
9030
|
include: ["reasoning.encrypted_content"],
|
|
8754
9031
|
tools: openAiTools,
|