@ljoukov/llm 6.0.0 → 7.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -11
- package/dist/index.cjs +525 -237
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +528 -240
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -206,6 +206,14 @@ function getGeminiImagePricing(modelId) {
|
|
|
206
206
|
}
|
|
207
207
|
|
|
208
208
|
// src/openai/pricing.ts
|
|
209
|
+
var OPENAI_GPT_54_FAST_MODEL_IDS = ["gpt-5.4-fast", "chatgpt-gpt-5.4-fast"];
|
|
210
|
+
var OPENAI_GPT_54_MINI_MODEL_IDS = ["gpt-5.4-mini", "chatgpt-gpt-5.4-mini"];
|
|
211
|
+
var OPENAI_GPT_54_NANO_MODEL_IDS = ["gpt-5.4-nano"];
|
|
212
|
+
var OPENAI_GPT_53_CODEX_SPARK_MODEL_IDS = [
|
|
213
|
+
"gpt-5.3-codex-spark",
|
|
214
|
+
"chatgpt-gpt-5.3-codex-spark"
|
|
215
|
+
];
|
|
216
|
+
var OPENAI_GPT_54_STANDARD_MODEL_IDS = ["gpt-5.4", "chatgpt-gpt-5.4"];
|
|
209
217
|
var OPENAI_GPT_54_PRICING = {
|
|
210
218
|
inputRate: 2.5 / 1e6,
|
|
211
219
|
cachedRate: 0.25 / 1e6,
|
|
@@ -227,19 +235,19 @@ var OPENAI_GPT_54_NANO_PRICING = {
|
|
|
227
235
|
outputRate: 0.4 / 1e6
|
|
228
236
|
};
|
|
229
237
|
function getOpenAiPricing(modelId) {
|
|
230
|
-
if (
|
|
238
|
+
if (OPENAI_GPT_54_FAST_MODEL_IDS.includes(modelId)) {
|
|
231
239
|
return OPENAI_GPT_54_PRIORITY_PRICING;
|
|
232
240
|
}
|
|
233
|
-
if (
|
|
241
|
+
if (OPENAI_GPT_54_MINI_MODEL_IDS.includes(modelId)) {
|
|
234
242
|
return OPENAI_GPT_54_MINI_PRICING;
|
|
235
243
|
}
|
|
236
|
-
if (
|
|
244
|
+
if (OPENAI_GPT_54_NANO_MODEL_IDS.includes(modelId)) {
|
|
237
245
|
return OPENAI_GPT_54_NANO_PRICING;
|
|
238
246
|
}
|
|
239
|
-
if (
|
|
247
|
+
if (OPENAI_GPT_53_CODEX_SPARK_MODEL_IDS.includes(modelId)) {
|
|
240
248
|
return OPENAI_GPT_54_MINI_PRICING;
|
|
241
249
|
}
|
|
242
|
-
if (
|
|
250
|
+
if (OPENAI_GPT_54_STANDARD_MODEL_IDS.includes(modelId)) {
|
|
243
251
|
return OPENAI_GPT_54_PRICING;
|
|
244
252
|
}
|
|
245
253
|
return void 0;
|
|
@@ -2242,6 +2250,9 @@ function normaliseConfigValue(value) {
|
|
|
2242
2250
|
}
|
|
2243
2251
|
function resolveGeminiApiKey() {
|
|
2244
2252
|
loadLocalEnv();
|
|
2253
|
+
if (normaliseConfigValue(process.env.GOOGLE_SERVICE_ACCOUNT_JSON)) {
|
|
2254
|
+
return void 0;
|
|
2255
|
+
}
|
|
2245
2256
|
const raw = process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY;
|
|
2246
2257
|
return normaliseConfigValue(raw);
|
|
2247
2258
|
}
|
|
@@ -3240,19 +3251,16 @@ function getCurrentAgentLoggingSession() {
|
|
|
3240
3251
|
|
|
3241
3252
|
// src/files.ts
|
|
3242
3253
|
import { AsyncLocalStorage as AsyncLocalStorage2 } from "async_hooks";
|
|
3243
|
-
import { Buffer as Buffer4
|
|
3254
|
+
import { Buffer as Buffer4 } from "buffer";
|
|
3244
3255
|
import { createHash } from "crypto";
|
|
3245
|
-
import { createReadStream
|
|
3246
|
-
import { copyFile, mkdir as mkdir2,
|
|
3256
|
+
import { createReadStream } from "fs";
|
|
3257
|
+
import { copyFile, mkdir as mkdir2, readFile, stat, unlink, writeFile as writeFile2 } from "fs/promises";
|
|
3247
3258
|
import os3 from "os";
|
|
3248
3259
|
import path4 from "path";
|
|
3249
|
-
import { Readable } from "stream";
|
|
3250
3260
|
import { pipeline } from "stream/promises";
|
|
3251
3261
|
import { Storage } from "@google-cloud/storage";
|
|
3252
3262
|
import mime from "mime";
|
|
3253
3263
|
var DEFAULT_FILE_TTL_SECONDS = 48 * 60 * 60;
|
|
3254
|
-
var OPENAI_FILE_CREATE_MAX_BYTES = 512 * 1024 * 1024;
|
|
3255
|
-
var OPENAI_UPLOAD_PART_MAX_BYTES = 64 * 1024 * 1024;
|
|
3256
3264
|
var GEMINI_FILE_POLL_INTERVAL_MS = 1e3;
|
|
3257
3265
|
var GEMINI_FILE_POLL_TIMEOUT_MS = 6e4;
|
|
3258
3266
|
var FILES_TEMP_ROOT = path4.join(os3.tmpdir(), "ljoukov-llm-files");
|
|
@@ -3261,7 +3269,7 @@ var FILES_CACHE_CONTENT_ROOT = path4.join(FILES_CACHE_ROOT, "content");
|
|
|
3261
3269
|
var FILES_CACHE_METADATA_ROOT = path4.join(FILES_CACHE_ROOT, "metadata");
|
|
3262
3270
|
var filesState = getRuntimeSingleton(/* @__PURE__ */ Symbol.for("@ljoukov/llm.filesState"), () => ({
|
|
3263
3271
|
metadataById: /* @__PURE__ */ new Map(),
|
|
3264
|
-
|
|
3272
|
+
canonicalUploadCacheByKey: /* @__PURE__ */ new Map(),
|
|
3265
3273
|
materializedById: /* @__PURE__ */ new Map(),
|
|
3266
3274
|
geminiMirrorById: /* @__PURE__ */ new Map(),
|
|
3267
3275
|
vertexMirrorById: /* @__PURE__ */ new Map(),
|
|
@@ -3342,7 +3350,7 @@ function formatUploadLogLine(event) {
|
|
|
3342
3350
|
}
|
|
3343
3351
|
function recordUploadEvent(event) {
|
|
3344
3352
|
const scope = fileUploadScopeStorage.getStore();
|
|
3345
|
-
const resolvedSource = event.source ?? scope?.source ?? (event.backend === "
|
|
3353
|
+
const resolvedSource = event.source ?? scope?.source ?? (event.backend === "gcs" ? "files_api" : "provider_mirror");
|
|
3346
3354
|
const timestampedEvent = {
|
|
3347
3355
|
...event,
|
|
3348
3356
|
source: resolvedSource,
|
|
@@ -3389,16 +3397,117 @@ async function computeFileSha256Hex(filePath) {
|
|
|
3389
3397
|
}
|
|
3390
3398
|
return hash.digest("hex");
|
|
3391
3399
|
}
|
|
3392
|
-
function
|
|
3400
|
+
function buildCanonicalFileId(filename, mimeType, sha256Hex) {
|
|
3401
|
+
return `file_${createHash("sha256").update(filename).update("\0").update(mimeType).update("\0").update(sha256Hex).digest("hex")}`;
|
|
3402
|
+
}
|
|
3403
|
+
function resolveCanonicalFilesBucket() {
|
|
3404
|
+
const raw = process.env.LLM_FILES_GCS_BUCKET ?? process.env.VERTEX_GCS_BUCKET ?? process.env.LLM_VERTEX_GCS_BUCKET;
|
|
3405
|
+
const trimmed = raw?.trim();
|
|
3406
|
+
if (!trimmed) {
|
|
3407
|
+
throw new Error(
|
|
3408
|
+
"LLM_FILES_GCS_BUCKET (or VERTEX_GCS_BUCKET) must be set to use the canonical files API."
|
|
3409
|
+
);
|
|
3410
|
+
}
|
|
3411
|
+
return trimmed.replace(/^gs:\/\//u, "").replace(/\/+$/u, "");
|
|
3412
|
+
}
|
|
3413
|
+
function resolveCanonicalFilesPrefix() {
|
|
3414
|
+
const raw = process.env.LLM_FILES_GCS_PREFIX;
|
|
3415
|
+
const trimmed = raw?.trim().replace(/^\/+/u, "").replace(/\/+$/u, "");
|
|
3416
|
+
return trimmed ? `${trimmed}/` : "canonical-files/";
|
|
3417
|
+
}
|
|
3418
|
+
function isLatexLikeFile(filename, mimeType) {
|
|
3419
|
+
const extension = path4.extname(filename).trim().toLowerCase();
|
|
3420
|
+
const normalisedMimeType = mimeType.trim().toLowerCase();
|
|
3421
|
+
return extension === ".tex" || extension === ".ltx" || extension === ".latex" || normalisedMimeType === "application/x-tex" || normalisedMimeType === "text/x-tex";
|
|
3422
|
+
}
|
|
3423
|
+
function resolveCanonicalStorageContentType(filename, mimeType) {
|
|
3424
|
+
if (isLatexLikeFile(filename, mimeType)) {
|
|
3425
|
+
return "text/plain";
|
|
3426
|
+
}
|
|
3427
|
+
return mimeType;
|
|
3428
|
+
}
|
|
3429
|
+
function resolveCanonicalObjectExtension(filename, mimeType) {
|
|
3430
|
+
if (isLatexLikeFile(filename, mimeType)) {
|
|
3431
|
+
return "txt";
|
|
3432
|
+
}
|
|
3433
|
+
const fromFilename = path4.extname(filename).replace(/^\./u, "").trim().toLowerCase();
|
|
3434
|
+
if (fromFilename) {
|
|
3435
|
+
return fromFilename;
|
|
3436
|
+
}
|
|
3437
|
+
const fromMimeType = mime.getExtension(mimeType)?.trim().toLowerCase();
|
|
3438
|
+
if (fromMimeType) {
|
|
3439
|
+
return fromMimeType;
|
|
3440
|
+
}
|
|
3441
|
+
return "bin";
|
|
3442
|
+
}
|
|
3443
|
+
function buildCanonicalObjectName(fileId, filename, mimeType) {
|
|
3444
|
+
const extension = resolveCanonicalObjectExtension(filename, mimeType);
|
|
3445
|
+
return `${resolveCanonicalFilesPrefix()}${fileId}.${extension}`;
|
|
3446
|
+
}
|
|
3447
|
+
function toSafeStorageFilename(filename) {
|
|
3448
|
+
const normalized = normaliseFilename(filename).replace(/[^\w.-]+/gu, "-");
|
|
3449
|
+
return normalized.length > 0 ? normalized : "attachment.bin";
|
|
3450
|
+
}
|
|
3451
|
+
function parseUnixSeconds(value, fallback) {
|
|
3452
|
+
if (value) {
|
|
3453
|
+
const numeric = Number.parseInt(value, 10);
|
|
3454
|
+
if (Number.isFinite(numeric) && numeric > 0) {
|
|
3455
|
+
return numeric;
|
|
3456
|
+
}
|
|
3457
|
+
}
|
|
3458
|
+
if (fallback) {
|
|
3459
|
+
const millis = Date.parse(fallback);
|
|
3460
|
+
if (Number.isFinite(millis)) {
|
|
3461
|
+
return Math.floor(millis / 1e3);
|
|
3462
|
+
}
|
|
3463
|
+
}
|
|
3464
|
+
return Math.floor(Date.now() / 1e3);
|
|
3465
|
+
}
|
|
3466
|
+
function parseOptionalUnixSeconds(value) {
|
|
3467
|
+
if (!value) {
|
|
3468
|
+
return void 0;
|
|
3469
|
+
}
|
|
3470
|
+
const millis = Date.parse(value);
|
|
3471
|
+
if (Number.isFinite(millis)) {
|
|
3472
|
+
return Math.floor(millis / 1e3);
|
|
3473
|
+
}
|
|
3474
|
+
const numeric = Number.parseInt(value, 10);
|
|
3475
|
+
return Number.isFinite(numeric) && numeric > 0 ? numeric : void 0;
|
|
3476
|
+
}
|
|
3477
|
+
function toStoredFileFromCanonicalMetadata(options) {
|
|
3478
|
+
const metadata = options.objectMetadata.metadata;
|
|
3479
|
+
const filenameRaw = typeof metadata?.filename === "string" && metadata.filename.trim().length > 0 ? metadata.filename.trim() : path4.basename(options.objectName);
|
|
3480
|
+
const filename = normaliseFilename(filenameRaw);
|
|
3481
|
+
const bytesRaw = options.objectMetadata.size;
|
|
3482
|
+
const bytes = typeof bytesRaw === "string" ? Number.parseInt(bytesRaw, 10) : typeof bytesRaw === "number" ? bytesRaw : 0;
|
|
3483
|
+
const purpose = metadata?.purpose === "user_data" ? "user_data" : "user_data";
|
|
3484
|
+
const createdAt = parseUnixSeconds(
|
|
3485
|
+
typeof metadata?.createdAtUnix === "string" ? metadata.createdAtUnix : void 0,
|
|
3486
|
+
typeof options.objectMetadata.timeCreated === "string" ? options.objectMetadata.timeCreated : void 0
|
|
3487
|
+
);
|
|
3488
|
+
const expiresAt = parseOptionalUnixSeconds(
|
|
3489
|
+
typeof metadata?.expiresAt === "string" ? metadata.expiresAt : void 0
|
|
3490
|
+
);
|
|
3491
|
+
const mimeType = typeof metadata?.mimeType === "string" && metadata.mimeType.trim().length > 0 ? metadata.mimeType.trim() : typeof options.objectMetadata.contentType === "string" && options.objectMetadata.contentType.trim().length > 0 ? options.objectMetadata.contentType.trim() : resolveMimeType(filename, void 0);
|
|
3492
|
+
const sha256Hex = typeof metadata?.sha256 === "string" && metadata.sha256.trim().length > 0 ? metadata.sha256.trim() : void 0;
|
|
3393
3493
|
return {
|
|
3394
|
-
|
|
3395
|
-
|
|
3396
|
-
|
|
3397
|
-
|
|
3398
|
-
|
|
3399
|
-
|
|
3400
|
-
|
|
3401
|
-
|
|
3494
|
+
file: {
|
|
3495
|
+
id: options.fileId,
|
|
3496
|
+
bytes: Number.isFinite(bytes) ? bytes : 0,
|
|
3497
|
+
created_at: createdAt,
|
|
3498
|
+
filename,
|
|
3499
|
+
object: "file",
|
|
3500
|
+
purpose,
|
|
3501
|
+
status: "processed",
|
|
3502
|
+
...expiresAt ? { expires_at: expiresAt } : {}
|
|
3503
|
+
},
|
|
3504
|
+
filename,
|
|
3505
|
+
bytes: Number.isFinite(bytes) ? bytes : 0,
|
|
3506
|
+
mimeType,
|
|
3507
|
+
sha256Hex,
|
|
3508
|
+
localPath: options.localPath,
|
|
3509
|
+
bucketName: options.bucketName,
|
|
3510
|
+
objectName: options.objectName
|
|
3402
3511
|
};
|
|
3403
3512
|
}
|
|
3404
3513
|
function buildCacheKey(filename, mimeType, sha256Hex) {
|
|
@@ -3419,7 +3528,7 @@ function isFresh(file) {
|
|
|
3419
3528
|
function recordMetadata(metadata) {
|
|
3420
3529
|
filesState.metadataById.set(metadata.file.id, metadata);
|
|
3421
3530
|
if (metadata.sha256Hex) {
|
|
3422
|
-
filesState.
|
|
3531
|
+
filesState.canonicalUploadCacheByKey.set(
|
|
3423
3532
|
buildCacheKey(
|
|
3424
3533
|
metadata.filename,
|
|
3425
3534
|
metadata.mimeType ?? "application/octet-stream",
|
|
@@ -3468,7 +3577,9 @@ async function persistMetadataToDisk(metadata) {
|
|
|
3468
3577
|
bytes: metadata.bytes,
|
|
3469
3578
|
mimeType: metadata.mimeType,
|
|
3470
3579
|
sha256Hex: metadata.sha256Hex,
|
|
3471
|
-
localPath: metadata.localPath
|
|
3580
|
+
localPath: metadata.localPath,
|
|
3581
|
+
bucketName: metadata.bucketName,
|
|
3582
|
+
objectName: metadata.objectName
|
|
3472
3583
|
};
|
|
3473
3584
|
await writeFile2(
|
|
3474
3585
|
buildCachedMetadataPath(metadata.file.id),
|
|
@@ -3500,175 +3611,271 @@ async function loadPersistedMetadata(fileId) {
|
|
|
3500
3611
|
bytes: payload.bytes,
|
|
3501
3612
|
mimeType: payload.mimeType,
|
|
3502
3613
|
sha256Hex: payload.sha256Hex,
|
|
3503
|
-
localPath: payload.localPath
|
|
3614
|
+
localPath: payload.localPath,
|
|
3615
|
+
bucketName: payload.bucketName,
|
|
3616
|
+
objectName: payload.objectName
|
|
3504
3617
|
});
|
|
3505
3618
|
} catch {
|
|
3506
3619
|
return void 0;
|
|
3507
3620
|
}
|
|
3508
3621
|
}
|
|
3509
|
-
async function
|
|
3510
|
-
const
|
|
3511
|
-
const
|
|
3512
|
-
|
|
3513
|
-
|
|
3622
|
+
async function writeCanonicalFileFromPath(options) {
|
|
3623
|
+
const file = getStorageClient().bucket(options.bucketName).file(options.objectName);
|
|
3624
|
+
const storageContentType = resolveCanonicalStorageContentType(
|
|
3625
|
+
options.metadata.filename ?? "attachment.bin",
|
|
3626
|
+
options.mimeType
|
|
3627
|
+
);
|
|
3628
|
+
try {
|
|
3629
|
+
await pipeline(
|
|
3630
|
+
createReadStream(options.filePath),
|
|
3631
|
+
file.createWriteStream({
|
|
3632
|
+
resumable: options.bytes >= 10 * 1024 * 1024,
|
|
3633
|
+
preconditionOpts: { ifGenerationMatch: 0 },
|
|
3634
|
+
metadata: {
|
|
3635
|
+
contentType: storageContentType,
|
|
3636
|
+
contentDisposition: `inline; filename="${toSafeStorageFilename(options.metadata.filename ?? "attachment.bin")}"`,
|
|
3637
|
+
metadata: options.metadata
|
|
3638
|
+
}
|
|
3639
|
+
})
|
|
3640
|
+
);
|
|
3641
|
+
return true;
|
|
3642
|
+
} catch (error) {
|
|
3643
|
+
const code = error.code;
|
|
3644
|
+
if (code === 412 || code === "412") {
|
|
3645
|
+
return false;
|
|
3646
|
+
}
|
|
3647
|
+
throw error;
|
|
3514
3648
|
}
|
|
3515
|
-
|
|
3516
|
-
|
|
3517
|
-
|
|
3518
|
-
|
|
3519
|
-
|
|
3520
|
-
|
|
3521
|
-
|
|
3522
|
-
|
|
3523
|
-
|
|
3524
|
-
|
|
3525
|
-
|
|
3526
|
-
|
|
3527
|
-
|
|
3528
|
-
|
|
3649
|
+
}
|
|
3650
|
+
async function writeCanonicalFileFromBytes(options) {
|
|
3651
|
+
const file = getStorageClient().bucket(options.bucketName).file(options.objectName);
|
|
3652
|
+
const storageContentType = resolveCanonicalStorageContentType(
|
|
3653
|
+
options.metadata.filename ?? "attachment.bin",
|
|
3654
|
+
options.mimeType
|
|
3655
|
+
);
|
|
3656
|
+
try {
|
|
3657
|
+
await file.save(options.bytes, {
|
|
3658
|
+
resumable: options.bytes.byteLength >= 10 * 1024 * 1024,
|
|
3659
|
+
preconditionOpts: { ifGenerationMatch: 0 },
|
|
3660
|
+
metadata: {
|
|
3661
|
+
contentType: storageContentType,
|
|
3662
|
+
contentDisposition: `inline; filename="${toSafeStorageFilename(options.metadata.filename ?? "attachment.bin")}"`,
|
|
3663
|
+
metadata: options.metadata
|
|
3529
3664
|
}
|
|
3530
3665
|
});
|
|
3531
|
-
|
|
3532
|
-
|
|
3533
|
-
const
|
|
3534
|
-
|
|
3535
|
-
|
|
3536
|
-
mime_type: params.mimeType,
|
|
3537
|
-
purpose: params.purpose
|
|
3538
|
-
});
|
|
3539
|
-
const partIds = [];
|
|
3540
|
-
for (let offset = 0; offset < params.bytes.byteLength; offset += OPENAI_UPLOAD_PART_MAX_BYTES) {
|
|
3541
|
-
const chunk = params.bytes.subarray(
|
|
3542
|
-
offset,
|
|
3543
|
-
Math.min(offset + OPENAI_UPLOAD_PART_MAX_BYTES, params.bytes.byteLength)
|
|
3544
|
-
);
|
|
3545
|
-
const uploadPart = await client.uploads.parts.create(upload.id, {
|
|
3546
|
-
data: new NodeFile([new Uint8Array(chunk)], `${params.sha256Hex}.part`, {
|
|
3547
|
-
type: params.mimeType
|
|
3548
|
-
})
|
|
3549
|
-
});
|
|
3550
|
-
partIds.push(uploadPart.id);
|
|
3551
|
-
}
|
|
3552
|
-
const completed = await client.uploads.complete(upload.id, { part_ids: partIds });
|
|
3553
|
-
const fileId = completed.file?.id;
|
|
3554
|
-
if (!fileId) {
|
|
3555
|
-
throw new Error("OpenAI upload completed without a file id.");
|
|
3666
|
+
return true;
|
|
3667
|
+
} catch (error) {
|
|
3668
|
+
const code = error.code;
|
|
3669
|
+
if (code === 412 || code === "412") {
|
|
3670
|
+
return false;
|
|
3556
3671
|
}
|
|
3557
|
-
|
|
3672
|
+
throw error;
|
|
3558
3673
|
}
|
|
3559
|
-
|
|
3560
|
-
|
|
3561
|
-
|
|
3562
|
-
filename
|
|
3563
|
-
|
|
3564
|
-
|
|
3565
|
-
|
|
3674
|
+
}
|
|
3675
|
+
async function refreshCanonicalObjectMetadata(options) {
|
|
3676
|
+
const storageContentType = resolveCanonicalStorageContentType(
|
|
3677
|
+
options.metadata.filename ?? "attachment.bin",
|
|
3678
|
+
options.mimeType
|
|
3679
|
+
);
|
|
3680
|
+
await getStorageClient().bucket(options.bucketName).file(options.objectName).setMetadata({
|
|
3681
|
+
contentType: storageContentType,
|
|
3682
|
+
contentDisposition: `inline; filename="${toSafeStorageFilename(options.metadata.filename ?? "attachment.bin")}"`,
|
|
3683
|
+
metadata: options.metadata
|
|
3566
3684
|
});
|
|
3567
|
-
|
|
3568
|
-
|
|
3569
|
-
|
|
3570
|
-
|
|
3571
|
-
|
|
3572
|
-
|
|
3573
|
-
|
|
3574
|
-
|
|
3685
|
+
}
|
|
3686
|
+
async function createCanonicalMetadata(options) {
|
|
3687
|
+
const createdAt = Math.floor(Date.now() / 1e3);
|
|
3688
|
+
const expiresAt = createdAt + options.expiresAfterSeconds;
|
|
3689
|
+
const storedFile = {
|
|
3690
|
+
id: options.fileId,
|
|
3691
|
+
bytes: options.bytes,
|
|
3692
|
+
created_at: createdAt,
|
|
3693
|
+
filename: options.filename,
|
|
3694
|
+
object: "file",
|
|
3695
|
+
purpose: options.purpose,
|
|
3696
|
+
status: "processed",
|
|
3697
|
+
expires_at: expiresAt
|
|
3698
|
+
};
|
|
3699
|
+
const metadata = recordMetadata({
|
|
3700
|
+
file: storedFile,
|
|
3701
|
+
filename: options.filename,
|
|
3702
|
+
bytes: options.bytes,
|
|
3703
|
+
mimeType: options.mimeType,
|
|
3704
|
+
sha256Hex: options.sha256Hex,
|
|
3705
|
+
localPath: options.localPath,
|
|
3706
|
+
bucketName: options.bucketName,
|
|
3707
|
+
objectName: options.objectName
|
|
3575
3708
|
});
|
|
3709
|
+
await persistMetadataToDisk(metadata);
|
|
3576
3710
|
return metadata;
|
|
3577
3711
|
}
|
|
3578
|
-
async function
|
|
3712
|
+
async function uploadCanonicalFileFromBytes(params) {
|
|
3579
3713
|
const cacheKey = buildCacheKey(params.filename, params.mimeType, params.sha256Hex);
|
|
3580
|
-
const cached = filesState.
|
|
3714
|
+
const cached = filesState.canonicalUploadCacheByKey.get(cacheKey);
|
|
3581
3715
|
if (cached && isFresh(cached.file)) {
|
|
3582
3716
|
return cached;
|
|
3583
3717
|
}
|
|
3584
|
-
const
|
|
3718
|
+
const fileId = buildCanonicalFileId(params.filename, params.mimeType, params.sha256Hex);
|
|
3719
|
+
const bucketName = resolveCanonicalFilesBucket();
|
|
3720
|
+
const objectName = buildCanonicalObjectName(fileId, params.filename, params.mimeType);
|
|
3721
|
+
const metadataFields = {
|
|
3722
|
+
fileId,
|
|
3723
|
+
filename: params.filename,
|
|
3724
|
+
mimeType: params.mimeType,
|
|
3725
|
+
purpose: params.purpose,
|
|
3726
|
+
sha256: params.sha256Hex,
|
|
3727
|
+
createdAtUnix: Math.floor(Date.now() / 1e3).toString(),
|
|
3728
|
+
expiresAt: new Date(Date.now() + params.expiresAfterSeconds * 1e3).toISOString()
|
|
3729
|
+
};
|
|
3585
3730
|
const startedAtMs = Date.now();
|
|
3586
|
-
|
|
3587
|
-
|
|
3588
|
-
|
|
3589
|
-
|
|
3590
|
-
|
|
3591
|
-
|
|
3592
|
-
|
|
3593
|
-
|
|
3594
|
-
|
|
3595
|
-
|
|
3596
|
-
|
|
3597
|
-
|
|
3731
|
+
const uploaded = await writeCanonicalFileFromBytes({
|
|
3732
|
+
bytes: params.bytes,
|
|
3733
|
+
bucketName,
|
|
3734
|
+
objectName,
|
|
3735
|
+
mimeType: params.mimeType,
|
|
3736
|
+
metadata: metadataFields
|
|
3737
|
+
});
|
|
3738
|
+
if (!uploaded) {
|
|
3739
|
+
await refreshCanonicalObjectMetadata({
|
|
3740
|
+
bucketName,
|
|
3741
|
+
objectName,
|
|
3742
|
+
mimeType: params.mimeType,
|
|
3743
|
+
metadata: metadataFields
|
|
3598
3744
|
});
|
|
3599
|
-
}
|
|
3600
|
-
|
|
3601
|
-
|
|
3602
|
-
|
|
3745
|
+
}
|
|
3746
|
+
const localPath = await cacheBufferLocally(params.bytes, params.sha256Hex);
|
|
3747
|
+
const canonical = await createCanonicalMetadata({
|
|
3748
|
+
fileId,
|
|
3749
|
+
filename: params.filename,
|
|
3750
|
+
mimeType: params.mimeType,
|
|
3751
|
+
purpose: params.purpose,
|
|
3752
|
+
expiresAfterSeconds: params.expiresAfterSeconds,
|
|
3753
|
+
sha256Hex: params.sha256Hex,
|
|
3754
|
+
bytes: params.bytes.byteLength,
|
|
3755
|
+
bucketName,
|
|
3756
|
+
objectName,
|
|
3757
|
+
localPath
|
|
3758
|
+
});
|
|
3759
|
+
if (uploaded) {
|
|
3760
|
+
recordUploadEvent({
|
|
3761
|
+
backend: "gcs",
|
|
3762
|
+
mode: "gcs",
|
|
3603
3763
|
filename: params.filename,
|
|
3604
|
-
|
|
3605
|
-
|
|
3606
|
-
|
|
3607
|
-
|
|
3608
|
-
|
|
3609
|
-
highWaterMark: OPENAI_UPLOAD_PART_MAX_BYTES
|
|
3764
|
+
bytes: params.bytes.byteLength,
|
|
3765
|
+
durationMs: Math.max(0, Date.now() - startedAtMs),
|
|
3766
|
+
mimeType: params.mimeType,
|
|
3767
|
+
fileId,
|
|
3768
|
+
fileUri: `gs://${bucketName}/${objectName}`
|
|
3610
3769
|
});
|
|
3611
|
-
let partIndex = 0;
|
|
3612
|
-
for await (const chunk of stream) {
|
|
3613
|
-
const buffer = Buffer4.isBuffer(chunk) ? chunk : Buffer4.from(chunk);
|
|
3614
|
-
const uploadPart = await client.uploads.parts.create(upload.id, {
|
|
3615
|
-
data: new NodeFile(
|
|
3616
|
-
[new Uint8Array(buffer)],
|
|
3617
|
-
`${params.sha256Hex}.${partIndex.toString()}.part`,
|
|
3618
|
-
{
|
|
3619
|
-
type: params.mimeType
|
|
3620
|
-
}
|
|
3621
|
-
)
|
|
3622
|
-
});
|
|
3623
|
-
partIds.push(uploadPart.id);
|
|
3624
|
-
partIndex += 1;
|
|
3625
|
-
}
|
|
3626
|
-
const completed = await client.uploads.complete(upload.id, { part_ids: partIds });
|
|
3627
|
-
const fileId = completed.file?.id;
|
|
3628
|
-
if (!fileId) {
|
|
3629
|
-
throw new Error("OpenAI upload completed without a file id.");
|
|
3630
|
-
}
|
|
3631
|
-
uploaded = await client.files.retrieve(fileId);
|
|
3632
3770
|
}
|
|
3633
|
-
|
|
3634
|
-
|
|
3635
|
-
|
|
3636
|
-
|
|
3637
|
-
|
|
3771
|
+
return canonical;
|
|
3772
|
+
}
|
|
3773
|
+
async function uploadCanonicalFileFromPath(params) {
|
|
3774
|
+
const cacheKey = buildCacheKey(params.filename, params.mimeType, params.sha256Hex);
|
|
3775
|
+
const cached = filesState.canonicalUploadCacheByKey.get(cacheKey);
|
|
3776
|
+
if (cached && isFresh(cached.file)) {
|
|
3777
|
+
return cached;
|
|
3778
|
+
}
|
|
3779
|
+
const fileId = buildCanonicalFileId(params.filename, params.mimeType, params.sha256Hex);
|
|
3780
|
+
const bucketName = resolveCanonicalFilesBucket();
|
|
3781
|
+
const objectName = buildCanonicalObjectName(fileId, params.filename, params.mimeType);
|
|
3782
|
+
const metadataFields = {
|
|
3783
|
+
fileId,
|
|
3784
|
+
filename: params.filename,
|
|
3785
|
+
mimeType: params.mimeType,
|
|
3786
|
+
purpose: params.purpose,
|
|
3787
|
+
sha256: params.sha256Hex,
|
|
3788
|
+
createdAtUnix: Math.floor(Date.now() / 1e3).toString(),
|
|
3789
|
+
expiresAt: new Date(Date.now() + params.expiresAfterSeconds * 1e3).toISOString()
|
|
3790
|
+
};
|
|
3791
|
+
const startedAtMs = Date.now();
|
|
3792
|
+
const uploaded = await writeCanonicalFileFromPath({
|
|
3793
|
+
filePath: params.filePath,
|
|
3794
|
+
bucketName,
|
|
3795
|
+
objectName,
|
|
3796
|
+
bytes: params.bytes,
|
|
3638
3797
|
mimeType: params.mimeType,
|
|
3639
|
-
|
|
3798
|
+
metadata: metadataFields
|
|
3640
3799
|
});
|
|
3641
|
-
|
|
3642
|
-
|
|
3643
|
-
|
|
3644
|
-
|
|
3645
|
-
|
|
3646
|
-
|
|
3800
|
+
if (!uploaded) {
|
|
3801
|
+
await refreshCanonicalObjectMetadata({
|
|
3802
|
+
bucketName,
|
|
3803
|
+
objectName,
|
|
3804
|
+
mimeType: params.mimeType,
|
|
3805
|
+
metadata: metadataFields
|
|
3806
|
+
});
|
|
3807
|
+
}
|
|
3808
|
+
const localPath = await cacheFileLocally(params.filePath, params.sha256Hex);
|
|
3809
|
+
const canonical = await createCanonicalMetadata({
|
|
3810
|
+
fileId,
|
|
3811
|
+
filename: params.filename,
|
|
3647
3812
|
mimeType: params.mimeType,
|
|
3648
|
-
|
|
3813
|
+
purpose: params.purpose,
|
|
3814
|
+
expiresAfterSeconds: params.expiresAfterSeconds,
|
|
3815
|
+
sha256Hex: params.sha256Hex,
|
|
3816
|
+
bytes: params.bytes,
|
|
3817
|
+
bucketName,
|
|
3818
|
+
objectName,
|
|
3819
|
+
localPath
|
|
3649
3820
|
});
|
|
3650
|
-
|
|
3821
|
+
if (uploaded) {
|
|
3822
|
+
recordUploadEvent({
|
|
3823
|
+
backend: "gcs",
|
|
3824
|
+
mode: "gcs",
|
|
3825
|
+
filename: params.filename,
|
|
3826
|
+
bytes: params.bytes,
|
|
3827
|
+
durationMs: Math.max(0, Date.now() - startedAtMs),
|
|
3828
|
+
mimeType: params.mimeType,
|
|
3829
|
+
fileId,
|
|
3830
|
+
fileUri: `gs://${bucketName}/${objectName}`
|
|
3831
|
+
});
|
|
3832
|
+
}
|
|
3833
|
+
return canonical;
|
|
3834
|
+
}
|
|
3835
|
+
async function resolveCanonicalStorageLocation(fileId) {
|
|
3836
|
+
const cached = filesState.metadataById.get(fileId) ?? await loadPersistedMetadata(fileId);
|
|
3837
|
+
if (cached?.bucketName && cached.objectName) {
|
|
3838
|
+
return {
|
|
3839
|
+
bucketName: cached.bucketName,
|
|
3840
|
+
objectName: cached.objectName
|
|
3841
|
+
};
|
|
3842
|
+
}
|
|
3843
|
+
const bucketName = resolveCanonicalFilesBucket();
|
|
3844
|
+
const [files2] = await getStorageClient().bucket(bucketName).getFiles({
|
|
3845
|
+
prefix: `${resolveCanonicalFilesPrefix()}${fileId}.`,
|
|
3846
|
+
maxResults: 1,
|
|
3847
|
+
autoPaginate: false
|
|
3848
|
+
});
|
|
3849
|
+
const file = files2[0];
|
|
3850
|
+
if (!file) {
|
|
3851
|
+
throw new Error(`Canonical file ${fileId} was not found in GCS.`);
|
|
3852
|
+
}
|
|
3853
|
+
return {
|
|
3854
|
+
bucketName,
|
|
3855
|
+
objectName: file.name
|
|
3856
|
+
};
|
|
3651
3857
|
}
|
|
3652
|
-
async function
|
|
3858
|
+
async function retrieveCanonicalFile(fileId) {
|
|
3653
3859
|
const cached = filesState.metadataById.get(fileId);
|
|
3654
|
-
if (cached && isFresh(cached.file)) {
|
|
3860
|
+
if (cached && isFresh(cached.file) && cached.bucketName && cached.objectName) {
|
|
3655
3861
|
return cached;
|
|
3656
3862
|
}
|
|
3657
3863
|
const persisted = await loadPersistedMetadata(fileId);
|
|
3658
|
-
if (persisted && isFresh(persisted.file)) {
|
|
3864
|
+
if (persisted && isFresh(persisted.file) && persisted.bucketName && persisted.objectName) {
|
|
3659
3865
|
return persisted;
|
|
3660
3866
|
}
|
|
3661
|
-
const
|
|
3662
|
-
const
|
|
3663
|
-
const
|
|
3664
|
-
const metadata = recordMetadata(
|
|
3665
|
-
|
|
3666
|
-
|
|
3667
|
-
|
|
3668
|
-
|
|
3669
|
-
|
|
3670
|
-
|
|
3671
|
-
|
|
3867
|
+
const existingLocalPath = cached?.localPath ?? persisted?.localPath;
|
|
3868
|
+
const { bucketName, objectName } = await resolveCanonicalStorageLocation(fileId);
|
|
3869
|
+
const [objectMetadata] = await getStorageClient().bucket(bucketName).file(objectName).getMetadata();
|
|
3870
|
+
const metadata = recordMetadata(
|
|
3871
|
+
toStoredFileFromCanonicalMetadata({
|
|
3872
|
+
fileId,
|
|
3873
|
+
bucketName,
|
|
3874
|
+
objectName,
|
|
3875
|
+
objectMetadata,
|
|
3876
|
+
localPath: existingLocalPath
|
|
3877
|
+
})
|
|
3878
|
+
);
|
|
3672
3879
|
await persistMetadataToDisk(metadata);
|
|
3673
3880
|
return metadata;
|
|
3674
3881
|
}
|
|
@@ -3696,7 +3903,7 @@ function resolveVertexMirrorBucket() {
|
|
|
3696
3903
|
const trimmed = raw?.trim();
|
|
3697
3904
|
if (!trimmed) {
|
|
3698
3905
|
throw new Error(
|
|
3699
|
-
"VERTEX_GCS_BUCKET must be set to use
|
|
3906
|
+
"VERTEX_GCS_BUCKET must be set to use canonical file ids with Vertex Gemini models."
|
|
3700
3907
|
);
|
|
3701
3908
|
}
|
|
3702
3909
|
return trimmed.replace(/^gs:\/\//u, "").replace(/\/+$/u, "");
|
|
@@ -3726,61 +3933,41 @@ function getGeminiMirrorClient() {
|
|
|
3726
3933
|
}
|
|
3727
3934
|
return filesState.geminiClientPromise;
|
|
3728
3935
|
}
|
|
3729
|
-
async function
|
|
3936
|
+
async function materializeCanonicalFile(fileId) {
|
|
3730
3937
|
const cachedPromise = filesState.materializedById.get(fileId);
|
|
3731
3938
|
if (cachedPromise) {
|
|
3732
3939
|
return await cachedPromise;
|
|
3733
3940
|
}
|
|
3734
3941
|
const promise = (async () => {
|
|
3735
|
-
const metadata = await
|
|
3736
|
-
if (metadata.localPath && metadata.sha256Hex && metadata.mimeType) {
|
|
3942
|
+
const metadata = await retrieveCanonicalFile(fileId);
|
|
3943
|
+
if (metadata.localPath && metadata.sha256Hex && metadata.mimeType && metadata.bucketName && metadata.objectName) {
|
|
3737
3944
|
return {
|
|
3738
3945
|
file: metadata.file,
|
|
3739
3946
|
filename: metadata.filename,
|
|
3740
3947
|
bytes: metadata.bytes,
|
|
3741
3948
|
mimeType: metadata.mimeType,
|
|
3742
3949
|
sha256Hex: metadata.sha256Hex,
|
|
3743
|
-
localPath: metadata.localPath
|
|
3950
|
+
localPath: metadata.localPath,
|
|
3951
|
+
bucketName: metadata.bucketName,
|
|
3952
|
+
objectName: metadata.objectName
|
|
3744
3953
|
};
|
|
3745
3954
|
}
|
|
3746
|
-
|
|
3747
|
-
|
|
3748
|
-
path4.join(FILES_TEMP_ROOT, `${fileId.replace(/[^a-z0-9_-]/giu, "")}-`)
|
|
3749
|
-
);
|
|
3750
|
-
const localPath = path4.join(tempDir, normaliseFilename(metadata.filename, `${fileId}.bin`));
|
|
3751
|
-
const response = await getOpenAiClient().files.content(fileId);
|
|
3752
|
-
if (!response.ok) {
|
|
3753
|
-
throw new Error(
|
|
3754
|
-
`Failed to download OpenAI file ${fileId}: ${response.status} ${response.statusText}`
|
|
3755
|
-
);
|
|
3756
|
-
}
|
|
3757
|
-
const responseMimeType = response.headers.get("content-type")?.trim() || void 0;
|
|
3758
|
-
const mimeType = resolveMimeType(metadata.filename, responseMimeType);
|
|
3759
|
-
const hash = createHash("sha256");
|
|
3760
|
-
let bytes = 0;
|
|
3761
|
-
if (response.body) {
|
|
3762
|
-
const source = Readable.fromWeb(response.body);
|
|
3763
|
-
const writable = createWriteStream(localPath, { flags: "wx" });
|
|
3764
|
-
source.on("data", (chunk) => {
|
|
3765
|
-
const buffer = Buffer4.isBuffer(chunk) ? chunk : Buffer4.from(chunk);
|
|
3766
|
-
hash.update(buffer);
|
|
3767
|
-
bytes += buffer.byteLength;
|
|
3768
|
-
});
|
|
3769
|
-
await pipeline(source, writable);
|
|
3770
|
-
} else {
|
|
3771
|
-
const buffer = Buffer4.from(await response.arrayBuffer());
|
|
3772
|
-
hash.update(buffer);
|
|
3773
|
-
bytes = buffer.byteLength;
|
|
3774
|
-
await writeFile2(localPath, buffer);
|
|
3955
|
+
if (!metadata.bucketName || !metadata.objectName) {
|
|
3956
|
+
throw new Error(`Canonical file ${fileId} is missing GCS location metadata.`);
|
|
3775
3957
|
}
|
|
3776
|
-
const
|
|
3958
|
+
const [downloadedBytes] = await getStorageClient().bucket(metadata.bucketName).file(metadata.objectName).download();
|
|
3959
|
+
const mimeType = metadata.mimeType ?? resolveMimeType(metadata.filename, void 0);
|
|
3960
|
+
const sha256Hex = metadata.sha256Hex ?? computeSha256Hex(downloadedBytes);
|
|
3961
|
+
const localPath = await cacheBufferLocally(downloadedBytes, sha256Hex);
|
|
3777
3962
|
const updated = recordMetadata({
|
|
3778
3963
|
file: metadata.file,
|
|
3779
3964
|
filename: metadata.filename,
|
|
3780
|
-
bytes:
|
|
3965
|
+
bytes: downloadedBytes.byteLength || metadata.bytes,
|
|
3781
3966
|
mimeType,
|
|
3782
3967
|
sha256Hex,
|
|
3783
|
-
localPath
|
|
3968
|
+
localPath,
|
|
3969
|
+
bucketName: metadata.bucketName,
|
|
3970
|
+
objectName: metadata.objectName
|
|
3784
3971
|
});
|
|
3785
3972
|
await persistMetadataToDisk(updated);
|
|
3786
3973
|
return {
|
|
@@ -3789,7 +3976,9 @@ async function materializeOpenAiFile(fileId) {
|
|
|
3789
3976
|
bytes: updated.bytes,
|
|
3790
3977
|
mimeType: updated.mimeType ?? mimeType,
|
|
3791
3978
|
sha256Hex,
|
|
3792
|
-
localPath
|
|
3979
|
+
localPath,
|
|
3980
|
+
bucketName: metadata.bucketName,
|
|
3981
|
+
objectName: metadata.objectName
|
|
3793
3982
|
};
|
|
3794
3983
|
})();
|
|
3795
3984
|
filesState.materializedById.set(fileId, promise);
|
|
@@ -3805,14 +3994,14 @@ async function ensureGeminiFileMirror(fileId) {
|
|
|
3805
3994
|
if (cached) {
|
|
3806
3995
|
return cached;
|
|
3807
3996
|
}
|
|
3808
|
-
const materialized = await
|
|
3997
|
+
const materialized = await materializeCanonicalFile(fileId);
|
|
3809
3998
|
const client = await getGeminiMirrorClient();
|
|
3810
3999
|
const name = buildGeminiMirrorName(materialized.sha256Hex);
|
|
3811
4000
|
try {
|
|
3812
4001
|
const existing = await client.files.get({ name });
|
|
3813
4002
|
if (existing.name && existing.uri && existing.mimeType) {
|
|
3814
4003
|
const mirror2 = {
|
|
3815
|
-
|
|
4004
|
+
canonicalFileId: fileId,
|
|
3816
4005
|
name: existing.name,
|
|
3817
4006
|
uri: existing.uri,
|
|
3818
4007
|
mimeType: existing.mimeType,
|
|
@@ -3840,7 +4029,7 @@ async function ensureGeminiFileMirror(fileId) {
|
|
|
3840
4029
|
throw new Error("Gemini file upload completed without a usable URI.");
|
|
3841
4030
|
}
|
|
3842
4031
|
const mirror = {
|
|
3843
|
-
|
|
4032
|
+
canonicalFileId: fileId,
|
|
3844
4033
|
name: resolved.name,
|
|
3845
4034
|
uri: resolved.uri,
|
|
3846
4035
|
mimeType: resolved.mimeType,
|
|
@@ -3865,7 +4054,7 @@ async function ensureVertexFileMirror(fileId) {
|
|
|
3865
4054
|
if (cached) {
|
|
3866
4055
|
return cached;
|
|
3867
4056
|
}
|
|
3868
|
-
const materialized = await
|
|
4057
|
+
const materialized = await materializeCanonicalFile(fileId);
|
|
3869
4058
|
const bucketName = resolveVertexMirrorBucket();
|
|
3870
4059
|
const prefix = resolveVertexMirrorPrefix();
|
|
3871
4060
|
const extension = mime.getExtension(materialized.mimeType) ?? path4.extname(materialized.filename).replace(/^\./u, "") ?? "bin";
|
|
@@ -3906,7 +4095,7 @@ async function ensureVertexFileMirror(fileId) {
|
|
|
3906
4095
|
}
|
|
3907
4096
|
}
|
|
3908
4097
|
const mirror = {
|
|
3909
|
-
|
|
4098
|
+
canonicalFileId: fileId,
|
|
3910
4099
|
bucket: bucketName,
|
|
3911
4100
|
objectName,
|
|
3912
4101
|
fileUri: `gs://${bucketName}/${objectName}`,
|
|
@@ -3937,7 +4126,7 @@ async function filesCreate(params) {
|
|
|
3937
4126
|
const filename2 = normaliseFilename(params.filename, path4.basename(filePath));
|
|
3938
4127
|
const mimeType2 = resolveMimeType(filename2, params.mimeType);
|
|
3939
4128
|
const sha256Hex2 = await computeFileSha256Hex(filePath);
|
|
3940
|
-
const uploaded2 = await
|
|
4129
|
+
const uploaded2 = await uploadCanonicalFileFromPath({
|
|
3941
4130
|
filePath,
|
|
3942
4131
|
filename: filename2,
|
|
3943
4132
|
mimeType: mimeType2,
|
|
@@ -3946,19 +4135,13 @@ async function filesCreate(params) {
|
|
|
3946
4135
|
sha256Hex: sha256Hex2,
|
|
3947
4136
|
bytes: info.size
|
|
3948
4137
|
});
|
|
3949
|
-
|
|
3950
|
-
const cached2 = recordMetadata({
|
|
3951
|
-
...uploaded2,
|
|
3952
|
-
localPath: localPath2
|
|
3953
|
-
});
|
|
3954
|
-
await persistMetadataToDisk(cached2);
|
|
3955
|
-
return cached2.file;
|
|
4138
|
+
return uploaded2.file;
|
|
3956
4139
|
}
|
|
3957
4140
|
const filename = normaliseFilename(params.filename);
|
|
3958
4141
|
const bytes = toBuffer(params.data);
|
|
3959
4142
|
const mimeType = resolveMimeType(filename, params.mimeType, "text/plain");
|
|
3960
4143
|
const sha256Hex = computeSha256Hex(bytes);
|
|
3961
|
-
const uploaded = await
|
|
4144
|
+
const uploaded = await uploadCanonicalFileFromBytes({
|
|
3962
4145
|
bytes,
|
|
3963
4146
|
filename,
|
|
3964
4147
|
mimeType,
|
|
@@ -3966,16 +4149,10 @@ async function filesCreate(params) {
|
|
|
3966
4149
|
expiresAfterSeconds,
|
|
3967
4150
|
sha256Hex
|
|
3968
4151
|
});
|
|
3969
|
-
|
|
3970
|
-
const cached = recordMetadata({
|
|
3971
|
-
...uploaded,
|
|
3972
|
-
localPath
|
|
3973
|
-
});
|
|
3974
|
-
await persistMetadataToDisk(cached);
|
|
3975
|
-
return cached.file;
|
|
4152
|
+
return uploaded.file;
|
|
3976
4153
|
}
|
|
3977
4154
|
async function filesRetrieve(fileId) {
|
|
3978
|
-
return (await
|
|
4155
|
+
return (await retrieveCanonicalFile(fileId)).file;
|
|
3979
4156
|
}
|
|
3980
4157
|
async function filesDelete(fileId) {
|
|
3981
4158
|
const cachedGemini = filesState.geminiMirrorById.get(fileId);
|
|
@@ -4002,34 +4179,73 @@ async function filesDelete(fileId) {
|
|
|
4002
4179
|
} catch {
|
|
4003
4180
|
}
|
|
4004
4181
|
}
|
|
4005
|
-
|
|
4182
|
+
try {
|
|
4183
|
+
const { bucketName, objectName } = await resolveCanonicalStorageLocation(fileId);
|
|
4184
|
+
await getStorageClient().bucket(bucketName).file(objectName).delete({ ignoreNotFound: true });
|
|
4185
|
+
} catch {
|
|
4186
|
+
}
|
|
4006
4187
|
filesState.metadataById.delete(fileId);
|
|
4188
|
+
filesState.canonicalUploadCacheByKey.forEach((value, key) => {
|
|
4189
|
+
if (value.file.id === fileId) {
|
|
4190
|
+
filesState.canonicalUploadCacheByKey.delete(key);
|
|
4191
|
+
}
|
|
4192
|
+
});
|
|
4007
4193
|
filesState.materializedById.delete(fileId);
|
|
4008
4194
|
try {
|
|
4009
4195
|
await unlink(buildCachedMetadataPath(fileId));
|
|
4010
4196
|
} catch {
|
|
4011
4197
|
}
|
|
4012
4198
|
return {
|
|
4013
|
-
id:
|
|
4014
|
-
deleted:
|
|
4199
|
+
id: fileId,
|
|
4200
|
+
deleted: true,
|
|
4015
4201
|
object: "file"
|
|
4016
4202
|
};
|
|
4017
4203
|
}
|
|
4018
4204
|
async function filesContent(fileId) {
|
|
4019
|
-
|
|
4205
|
+
const metadata = await retrieveCanonicalFile(fileId);
|
|
4206
|
+
if (!metadata.bucketName || !metadata.objectName) {
|
|
4207
|
+
throw new Error(`Canonical file ${fileId} is missing GCS location metadata.`);
|
|
4208
|
+
}
|
|
4209
|
+
const [bytes] = await getStorageClient().bucket(metadata.bucketName).file(metadata.objectName).download();
|
|
4210
|
+
const headers = new Headers();
|
|
4211
|
+
headers.set("content-type", metadata.mimeType ?? resolveMimeType(metadata.filename, void 0));
|
|
4212
|
+
headers.set("content-length", bytes.byteLength.toString());
|
|
4213
|
+
headers.set(
|
|
4214
|
+
"content-disposition",
|
|
4215
|
+
`inline; filename="${toSafeStorageFilename(metadata.filename)}"`
|
|
4216
|
+
);
|
|
4217
|
+
return new Response(bytes, {
|
|
4218
|
+
status: 200,
|
|
4219
|
+
headers
|
|
4220
|
+
});
|
|
4020
4221
|
}
|
|
4021
4222
|
async function getCanonicalFileMetadata(fileId) {
|
|
4022
|
-
const metadata = await
|
|
4223
|
+
const metadata = await retrieveCanonicalFile(fileId);
|
|
4023
4224
|
const mimeType = metadata.mimeType ?? resolveMimeType(metadata.filename, void 0);
|
|
4024
4225
|
const updated = metadata.mimeType === mimeType ? metadata : recordMetadata({
|
|
4025
4226
|
...metadata,
|
|
4026
4227
|
mimeType
|
|
4027
4228
|
});
|
|
4229
|
+
if (!updated.bucketName || !updated.objectName) {
|
|
4230
|
+
throw new Error(`Canonical file ${fileId} is missing GCS location metadata.`);
|
|
4231
|
+
}
|
|
4028
4232
|
return {
|
|
4029
4233
|
...updated,
|
|
4030
|
-
mimeType
|
|
4234
|
+
mimeType,
|
|
4235
|
+
bucketName: updated.bucketName,
|
|
4236
|
+
objectName: updated.objectName
|
|
4031
4237
|
};
|
|
4032
4238
|
}
|
|
4239
|
+
async function getCanonicalFileSignedUrl(options) {
|
|
4240
|
+
const metadata = await getCanonicalFileMetadata(options.fileId);
|
|
4241
|
+
const [signedUrl] = await getStorageClient().bucket(metadata.bucketName).file(metadata.objectName).getSignedUrl({
|
|
4242
|
+
version: "v4",
|
|
4243
|
+
action: "read",
|
|
4244
|
+
expires: Date.now() + (options.expiresAfterSeconds ?? 15 * 60) * 1e3,
|
|
4245
|
+
responseType: resolveCanonicalStorageContentType(metadata.filename, metadata.mimeType)
|
|
4246
|
+
});
|
|
4247
|
+
return signedUrl;
|
|
4248
|
+
}
|
|
4033
4249
|
var files = {
|
|
4034
4250
|
create: filesCreate,
|
|
4035
4251
|
retrieve: filesRetrieve,
|
|
@@ -4391,6 +4607,7 @@ function isJsonSchemaObject(schema) {
|
|
|
4391
4607
|
return false;
|
|
4392
4608
|
}
|
|
4393
4609
|
var CANONICAL_GEMINI_FILE_URI_PREFIX = "openai://file/";
|
|
4610
|
+
var CANONICAL_LLM_FILE_ID_PATTERN = /^file_[a-f0-9]{64}$/u;
|
|
4394
4611
|
function buildCanonicalGeminiFileUri(fileId) {
|
|
4395
4612
|
return `${CANONICAL_GEMINI_FILE_URI_PREFIX}${fileId}`;
|
|
4396
4613
|
}
|
|
@@ -4401,6 +4618,9 @@ function parseCanonicalGeminiFileId(fileUri) {
|
|
|
4401
4618
|
const fileId = fileUri.slice(CANONICAL_GEMINI_FILE_URI_PREFIX.length).trim();
|
|
4402
4619
|
return fileId.length > 0 ? fileId : void 0;
|
|
4403
4620
|
}
|
|
4621
|
+
function isCanonicalLlmFileId(fileId) {
|
|
4622
|
+
return typeof fileId === "string" && CANONICAL_LLM_FILE_ID_PATTERN.test(fileId.trim());
|
|
4623
|
+
}
|
|
4404
4624
|
function isLlmMediaResolution(value) {
|
|
4405
4625
|
return value === "auto" || value === "low" || value === "medium" || value === "high" || value === "original";
|
|
4406
4626
|
}
|
|
@@ -4888,7 +5108,21 @@ async function prepareOpenAiPromptContentItem(item, options) {
|
|
|
4888
5108
|
if (!isOpenAiNativeContentItem(item)) {
|
|
4889
5109
|
return item;
|
|
4890
5110
|
}
|
|
4891
|
-
if (item.type === "input_image"
|
|
5111
|
+
if (item.type === "input_image") {
|
|
5112
|
+
if (isCanonicalLlmFileId(item.file_id)) {
|
|
5113
|
+
const signedUrl2 = await getCanonicalFileSignedUrl({ fileId: item.file_id });
|
|
5114
|
+
return {
|
|
5115
|
+
type: "input_image",
|
|
5116
|
+
image_url: signedUrl2,
|
|
5117
|
+
detail: toOpenAiImageDetail(
|
|
5118
|
+
isLlmMediaResolution(item.detail) ? item.detail : void 0,
|
|
5119
|
+
options?.model
|
|
5120
|
+
)
|
|
5121
|
+
};
|
|
5122
|
+
}
|
|
5123
|
+
if (options?.offloadInlineData !== true || typeof item.image_url !== "string" || !item.image_url.trim().toLowerCase().startsWith("data:")) {
|
|
5124
|
+
return item;
|
|
5125
|
+
}
|
|
4892
5126
|
const parsed = parseDataUrlPayload(item.image_url);
|
|
4893
5127
|
if (!parsed) {
|
|
4894
5128
|
return item;
|
|
@@ -4901,16 +5135,27 @@ async function prepareOpenAiPromptContentItem(item, options) {
|
|
|
4901
5135
|
guessInlineDataFilename(parsed.mimeType)
|
|
4902
5136
|
)
|
|
4903
5137
|
});
|
|
5138
|
+
const signedUrl = await getCanonicalFileSignedUrl({ fileId: uploaded.fileId });
|
|
4904
5139
|
return {
|
|
4905
5140
|
type: "input_image",
|
|
5141
|
+
image_url: signedUrl,
|
|
4906
5142
|
detail: toOpenAiImageDetail(
|
|
4907
5143
|
isLlmMediaResolution(item.detail) ? item.detail : void 0,
|
|
4908
5144
|
options?.model
|
|
4909
|
-
)
|
|
4910
|
-
file_id: uploaded.fileId
|
|
5145
|
+
)
|
|
4911
5146
|
};
|
|
4912
5147
|
}
|
|
4913
|
-
if (item.type !== "input_file"
|
|
5148
|
+
if (item.type !== "input_file") {
|
|
5149
|
+
return item;
|
|
5150
|
+
}
|
|
5151
|
+
if (isCanonicalLlmFileId(item.file_id)) {
|
|
5152
|
+
const signedUrl = await getCanonicalFileSignedUrl({ fileId: item.file_id });
|
|
5153
|
+
return {
|
|
5154
|
+
type: "input_file",
|
|
5155
|
+
file_url: signedUrl
|
|
5156
|
+
};
|
|
5157
|
+
}
|
|
5158
|
+
if (options?.offloadInlineData !== true) {
|
|
4914
5159
|
return item;
|
|
4915
5160
|
}
|
|
4916
5161
|
if (typeof item.file_data === "string" && item.file_data.trim().length > 0) {
|
|
@@ -4924,7 +5169,11 @@ async function prepareOpenAiPromptContentItem(item, options) {
|
|
|
4924
5169
|
mimeType,
|
|
4925
5170
|
filename
|
|
4926
5171
|
});
|
|
4927
|
-
|
|
5172
|
+
const signedUrl = await getCanonicalFileSignedUrl({ fileId: uploaded.fileId });
|
|
5173
|
+
return {
|
|
5174
|
+
type: "input_file",
|
|
5175
|
+
file_url: signedUrl
|
|
5176
|
+
};
|
|
4928
5177
|
}
|
|
4929
5178
|
if (typeof item.file_url === "string" && item.file_url.trim().toLowerCase().startsWith("data:")) {
|
|
4930
5179
|
const parsed = parseDataUrlPayload(item.file_url);
|
|
@@ -4939,7 +5188,11 @@ async function prepareOpenAiPromptContentItem(item, options) {
|
|
|
4939
5188
|
guessInlineDataFilename(parsed.mimeType)
|
|
4940
5189
|
)
|
|
4941
5190
|
});
|
|
4942
|
-
|
|
5191
|
+
const signedUrl = await getCanonicalFileSignedUrl({ fileId: uploaded.fileId });
|
|
5192
|
+
return {
|
|
5193
|
+
type: "input_file",
|
|
5194
|
+
file_url: signedUrl
|
|
5195
|
+
};
|
|
4943
5196
|
}
|
|
4944
5197
|
return item;
|
|
4945
5198
|
}
|
|
@@ -4969,11 +5222,40 @@ async function prepareOpenAiPromptInput(input, options) {
|
|
|
4969
5222
|
};
|
|
4970
5223
|
return await Promise.all(input.map((item) => prepareItem(item)));
|
|
4971
5224
|
}
|
|
5225
|
+
function hasCanonicalOpenAiFileReferences(input) {
|
|
5226
|
+
let found = false;
|
|
5227
|
+
const visitItems = (items) => {
|
|
5228
|
+
for (const item of items) {
|
|
5229
|
+
if (found || !item || typeof item !== "object") {
|
|
5230
|
+
continue;
|
|
5231
|
+
}
|
|
5232
|
+
if (Array.isArray(item.content)) {
|
|
5233
|
+
visitItems(item.content);
|
|
5234
|
+
}
|
|
5235
|
+
if (Array.isArray(item.output)) {
|
|
5236
|
+
visitItems(item.output);
|
|
5237
|
+
}
|
|
5238
|
+
if (!isOpenAiNativeContentItem(item)) {
|
|
5239
|
+
continue;
|
|
5240
|
+
}
|
|
5241
|
+
if ((item.type === "input_image" || item.type === "input_file") && isCanonicalLlmFileId(item.file_id)) {
|
|
5242
|
+
found = true;
|
|
5243
|
+
return;
|
|
5244
|
+
}
|
|
5245
|
+
}
|
|
5246
|
+
};
|
|
5247
|
+
visitItems(input);
|
|
5248
|
+
return found;
|
|
5249
|
+
}
|
|
4972
5250
|
async function maybePrepareOpenAiPromptInput(input, options) {
|
|
4973
|
-
|
|
5251
|
+
const offloadInlineData = estimateOpenAiInlinePromptBytes(input) > INLINE_ATTACHMENT_PROMPT_THRESHOLD_BYTES;
|
|
5252
|
+
if (!offloadInlineData && !hasCanonicalOpenAiFileReferences(input)) {
|
|
4974
5253
|
return Array.from(input);
|
|
4975
5254
|
}
|
|
4976
|
-
return await prepareOpenAiPromptInput(input,
|
|
5255
|
+
return await prepareOpenAiPromptInput(input, {
|
|
5256
|
+
...options,
|
|
5257
|
+
offloadInlineData
|
|
5258
|
+
});
|
|
4977
5259
|
}
|
|
4978
5260
|
function estimateGeminiInlinePromptBytes(contents) {
|
|
4979
5261
|
let total = 0;
|
|
@@ -6275,9 +6557,6 @@ async function maybeSpillToolOutputItem(item, toolName, options) {
|
|
|
6275
6557
|
return item;
|
|
6276
6558
|
}
|
|
6277
6559
|
async function maybeSpillToolOutput(value, toolName, options) {
|
|
6278
|
-
if (options?.provider === "chatgpt") {
|
|
6279
|
-
return value;
|
|
6280
|
-
}
|
|
6281
6560
|
if (typeof value === "string") {
|
|
6282
6561
|
if (options?.force !== true && Buffer5.byteLength(value, "utf8") <= TOOL_OUTPUT_SPILL_THRESHOLD_BYTES) {
|
|
6283
6562
|
return value;
|
|
@@ -7360,7 +7639,7 @@ async function runTextCall(params) {
|
|
|
7360
7639
|
defaultMediaResolution: request.mediaResolution,
|
|
7361
7640
|
model: request.model
|
|
7362
7641
|
}),
|
|
7363
|
-
{ model: request.model }
|
|
7642
|
+
{ model: request.model, provider: "openai" }
|
|
7364
7643
|
);
|
|
7365
7644
|
const openAiTools = toOpenAiTools(request.tools);
|
|
7366
7645
|
const reasoningEffort = resolveOpenAiReasoningEffort(
|
|
@@ -7439,6 +7718,10 @@ async function runTextCall(params) {
|
|
|
7439
7718
|
defaultMediaResolution: request.mediaResolution,
|
|
7440
7719
|
model: request.model
|
|
7441
7720
|
});
|
|
7721
|
+
const preparedChatGptInput = await maybePrepareOpenAiPromptInput(chatGptInput.input, {
|
|
7722
|
+
model: request.model,
|
|
7723
|
+
provider: "chatgpt"
|
|
7724
|
+
});
|
|
7442
7725
|
const reasoningEffort = resolveOpenAiReasoningEffort(request.model, request.thinkingLevel);
|
|
7443
7726
|
const openAiTools = toOpenAiTools(request.tools);
|
|
7444
7727
|
const requestPayload = {
|
|
@@ -7447,7 +7730,7 @@ async function runTextCall(params) {
|
|
|
7447
7730
|
stream: true,
|
|
7448
7731
|
...providerInfo.serviceTier ? { service_tier: providerInfo.serviceTier } : {},
|
|
7449
7732
|
instructions: chatGptInput.instructions ?? "You are a helpful assistant.",
|
|
7450
|
-
input:
|
|
7733
|
+
input: preparedChatGptInput,
|
|
7451
7734
|
include: ["reasoning.encrypted_content"],
|
|
7452
7735
|
reasoning: {
|
|
7453
7736
|
effort: toOpenAiReasoningEffort(reasoningEffort),
|
|
@@ -8255,7 +8538,8 @@ async function runToolLoop(request) {
|
|
|
8255
8538
|
let stepToolCallText;
|
|
8256
8539
|
let stepToolCallPayload;
|
|
8257
8540
|
const preparedInput = await maybePrepareOpenAiPromptInput(input, {
|
|
8258
|
-
model: request.model
|
|
8541
|
+
model: request.model,
|
|
8542
|
+
provider: "openai"
|
|
8259
8543
|
});
|
|
8260
8544
|
const stepRequestPayload = {
|
|
8261
8545
|
model: providerInfo.model,
|
|
@@ -8625,6 +8909,10 @@ async function runToolLoop(request) {
|
|
|
8625
8909
|
let reasoningSummaryText = "";
|
|
8626
8910
|
let stepToolCallText;
|
|
8627
8911
|
let stepToolCallPayload;
|
|
8912
|
+
const preparedInput = await maybePrepareOpenAiPromptInput(input, {
|
|
8913
|
+
model: request.model,
|
|
8914
|
+
provider: "chatgpt"
|
|
8915
|
+
});
|
|
8628
8916
|
const markFirstModelEvent = () => {
|
|
8629
8917
|
if (firstModelEventAtMs === void 0) {
|
|
8630
8918
|
firstModelEventAtMs = Date.now();
|
|
@@ -8636,7 +8924,7 @@ async function runToolLoop(request) {
|
|
|
8636
8924
|
stream: true,
|
|
8637
8925
|
...providerInfo.serviceTier ? { service_tier: providerInfo.serviceTier } : {},
|
|
8638
8926
|
instructions: toolLoopInput.instructions ?? "You are a helpful assistant.",
|
|
8639
|
-
input,
|
|
8927
|
+
input: preparedInput,
|
|
8640
8928
|
prompt_cache_key: promptCacheKey,
|
|
8641
8929
|
include: ["reasoning.encrypted_content"],
|
|
8642
8930
|
tools: openAiTools,
|