@transcribe-api/sdk 0.1.2 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +389 -54
- package/index.js +1185 -1377
- package/package.json +15 -15
- package/worker.js +567 -301
package/index.js
CHANGED
|
@@ -1,100 +1,109 @@
|
|
|
1
|
-
const DEFAULT_BASE_URL = "https://api.transcribeapi.com/v1";
|
|
2
|
-
const MAX_BATCH_FILES = 10000;
|
|
3
|
-
const MAX_BATCH_TOTAL_SIZE_BYTES = 10 * 1024 * 1024 * 1024;
|
|
4
|
-
const MAX_SYNC_AUDIO_BYTES = 30 * 1024 * 1024;
|
|
5
|
-
const MAX_SYNC_AUDIO_SECONDS = 10 * 60;
|
|
6
|
-
const MULTIPART_UPLOAD_THRESHOLD_BYTES =
|
|
7
|
-
const
|
|
8
|
-
const
|
|
1
|
+
const DEFAULT_BASE_URL = "https://api.transcribeapi.com/v1";
|
|
2
|
+
const MAX_BATCH_FILES = 10000;
|
|
3
|
+
const MAX_BATCH_TOTAL_SIZE_BYTES = 10 * 1024 * 1024 * 1024;
|
|
4
|
+
const MAX_SYNC_AUDIO_BYTES = 30 * 1024 * 1024;
|
|
5
|
+
const MAX_SYNC_AUDIO_SECONDS = 10 * 60;
|
|
6
|
+
const MULTIPART_UPLOAD_THRESHOLD_BYTES = 128 * 1024 * 1024;
|
|
7
|
+
const DEFAULT_UPLOAD_CONCURRENCY = 1;
|
|
8
|
+
const MAX_UPLOAD_CONCURRENCY = 32;
|
|
9
9
|
const MAX_MULTIPART_ADAPTIVE_ATTEMPTS = 12;
|
|
10
10
|
const MULTIPART_IDLE_WAIT_MS = 50;
|
|
11
|
-
const MULTIPART_RESUME_STATE_VERSION = 1;
|
|
12
11
|
const MIN_POLLING_INTERVAL_SECONDS = 10;
|
|
13
12
|
const DEFAULT_POLLING_SPINNER_INTERVAL_MS = 150;
|
|
14
13
|
const TERMINAL_JOB_STATUSES = new Set(["completed", "failed", "insufficient_funds"]);
|
|
15
14
|
const BATCH_MP4_UNSUPPORTED_MESSAGE = "Batch uploads do not support .mp4 for MVP. Supported batch audio formats: mp3, mpeg, mpga, m4a, wav, webm.";
|
|
16
15
|
const BATCH_UNSUPPORTED_MESSAGE = "Unsupported batch audio format. Supported batch audio formats: mp3, mpeg, mpga, m4a, wav, webm.";
|
|
17
|
-
|
|
18
|
-
export class TranscribeAPIError extends Error {
|
|
19
|
-
constructor(
|
|
20
|
-
super(
|
|
21
|
-
this.name = "TranscribeAPIError";
|
|
22
|
-
this.status = status;
|
|
23
|
-
this.code = code;
|
|
24
|
-
this.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
function
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|| code
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
16
|
+
|
|
17
|
+
export class TranscribeAPIError extends Error {
|
|
18
|
+
constructor(message, { status = null, code = null, extra = null, response = null } = {}) {
|
|
19
|
+
super(message);
|
|
20
|
+
this.name = "TranscribeAPIError";
|
|
21
|
+
this.status = status;
|
|
22
|
+
this.code = code;
|
|
23
|
+
this.response = response || { message, ...(code ? { code } : {}) };
|
|
24
|
+
if (extra && typeof extra === "object") {
|
|
25
|
+
for (const [key, value] of Object.entries(extra)) {
|
|
26
|
+
if (key === "status" || key === "stack" || key === "name" || key === "message" || key === "code") {
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
this[key] = value;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
toJSON() {
|
|
35
|
+
return {
|
|
36
|
+
message: this.message,
|
|
37
|
+
...(this.code ? { code: this.code } : {}),
|
|
38
|
+
...Object.fromEntries(
|
|
39
|
+
Object.entries(this)
|
|
40
|
+
.filter(([key]) => !["name", "status", "code", "response"].includes(key)),
|
|
41
|
+
),
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function sleep(ms) {
|
|
47
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function extractErrorInfo(error) {
|
|
51
|
+
return {
|
|
52
|
+
name: error?.name || null,
|
|
53
|
+
message: error?.message || String(error),
|
|
54
|
+
code: error?.code || error?.cause?.code || null,
|
|
55
|
+
status: error?.status || null,
|
|
56
|
+
cause_message: error?.cause?.message || null,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function isRetryableError(error) {
|
|
61
|
+
const code = String(error?.code || error?.cause?.code || "");
|
|
62
|
+
if (
|
|
63
|
+
code === "ECONNRESET"
|
|
64
|
+
|| code === "ETIMEDOUT"
|
|
65
|
+
|| code === "ECONNREFUSED"
|
|
66
|
+
|| code === "EPIPE"
|
|
67
|
+
|| code === "UND_ERR_SOCKET"
|
|
68
|
+
|| code === "UND_ERR_CONNECT_TIMEOUT"
|
|
69
|
+
|| code === "ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC"
|
|
70
|
+
) {
|
|
71
|
+
return true;
|
|
72
|
+
}
|
|
73
|
+
if (String(error?.message || "").includes("fetch failed")) {
|
|
74
|
+
return true;
|
|
75
|
+
}
|
|
76
|
+
return !error?.status || error.status === 429 || error.status >= 500;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
async function retry(operation, { attempts = 3, baseDelayMs = 250 } = {}) {
|
|
80
|
+
let lastError;
|
|
81
|
+
for (let index = 0; index < attempts; index += 1) {
|
|
82
|
+
try {
|
|
83
|
+
return await operation(index);
|
|
84
|
+
} catch (error) {
|
|
85
|
+
lastError = error;
|
|
86
|
+
const retryable = isRetryableError(error);
|
|
87
|
+
if (!retryable || index === attempts - 1) {
|
|
88
|
+
throw error;
|
|
89
|
+
}
|
|
90
|
+
await sleep(baseDelayMs * (2 ** index));
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
throw lastError;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function normalizeUploadConcurrency(value) {
|
|
97
|
+
if (value === undefined || value === null || value === "") {
|
|
98
|
+
return DEFAULT_UPLOAD_CONCURRENCY;
|
|
99
|
+
}
|
|
100
|
+
const parsed = Number.parseInt(String(value), 10);
|
|
101
|
+
if (!Number.isInteger(parsed) || parsed < 1) {
|
|
102
|
+
throw new TranscribeAPIError("`uploadConcurrency` must be an integer >= 1.", {
|
|
103
|
+
code: "invalid_upload_concurrency",
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
return Math.min(parsed, MAX_UPLOAD_CONCURRENCY);
|
|
98
107
|
}
|
|
99
108
|
|
|
100
109
|
function normalizePollingConfig(polling) {
|
|
@@ -133,212 +142,97 @@ function normalizePollingConfig(polling) {
|
|
|
133
142
|
timeout,
|
|
134
143
|
};
|
|
135
144
|
}
|
|
136
|
-
|
|
137
|
-
function
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
return;
|
|
228
|
-
}
|
|
229
|
-
const fs = await import("node:fs/promises");
|
|
230
|
-
await fs.writeFile(
|
|
231
|
-
multipartResumeStatePath(file.path),
|
|
232
|
-
JSON.stringify(state, null, 2),
|
|
233
|
-
"utf8",
|
|
234
|
-
);
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
async function deleteMultipartResumeState(file) {
|
|
238
|
-
if (!isNodePathFile(file)) {
|
|
239
|
-
return;
|
|
240
|
-
}
|
|
241
|
-
try {
|
|
242
|
-
const fs = await import("node:fs/promises");
|
|
243
|
-
await fs.unlink(multipartResumeStatePath(file.path));
|
|
244
|
-
} catch {
|
|
245
|
-
// Ignore missing or inaccessible state files.
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
function buildMultipartResumeState({ baseUrl, jobId, model, file, upload, completedParts }) {
|
|
250
|
-
return {
|
|
251
|
-
version: MULTIPART_RESUME_STATE_VERSION,
|
|
252
|
-
api_base_url: baseUrl,
|
|
253
|
-
job_id: jobId,
|
|
254
|
-
model: model || null,
|
|
255
|
-
file_path: file.path,
|
|
256
|
-
file_name: file.name,
|
|
257
|
-
file_size: file.size,
|
|
258
|
-
upload: serializeUploadForResume(upload),
|
|
259
|
-
completed_parts: normalizeCompletedParts(completedParts),
|
|
260
|
-
};
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
function contentTypeFromName(name = "") {
|
|
264
|
-
const lower = String(name).toLowerCase();
|
|
265
|
-
if (lower.endsWith(".mp3")) return "audio/mpeg";
|
|
266
|
-
if (lower.endsWith(".mpeg")) return "audio/mpeg";
|
|
267
|
-
if (lower.endsWith(".mpga")) return "audio/mpeg";
|
|
268
|
-
if (lower.endsWith(".wav")) return "audio/wav";
|
|
269
|
-
if (lower.endsWith(".m4a")) return "audio/mp4";
|
|
270
|
-
if (lower.endsWith(".webm")) return "audio/webm";
|
|
271
|
-
return "application/octet-stream";
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
function isRemoteFileInput(input) {
|
|
275
|
-
return Boolean(input && typeof input === "object" && typeof input.url === "string");
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
function isRemoteBatchItem(input) {
|
|
279
|
-
return Boolean(input && typeof input === "object" && typeof input.url === "string");
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
function defaultReferenceId(index) {
|
|
283
|
-
return `file_${String(index + 1).padStart(6, "0")}`;
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
function normalizeBatchInputItem(item, index) {
|
|
287
|
-
if (!item || typeof item !== "object" || Array.isArray(item)) {
|
|
288
|
-
throw new TranscribeAPIError("Each batch item must be an object with `reference_id` and either `file` or `url`.", {
|
|
289
|
-
code: "invalid_batch_item",
|
|
290
|
-
});
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
const referenceId = String(item.reference_id || "").trim();
|
|
294
|
-
if (!referenceId) {
|
|
295
|
-
throw new TranscribeAPIError(`files[${index}].reference_id is required.`, {
|
|
296
|
-
code: "missing_reference_id",
|
|
297
|
-
});
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
const hasFile = Object.prototype.hasOwnProperty.call(item, "file");
|
|
301
|
-
const hasUrl = typeof item.url === "string" && item.url.trim();
|
|
302
|
-
if (hasFile && hasUrl) {
|
|
303
|
-
throw new TranscribeAPIError(`files[${index}] must include either \`file\` or \`url\`, not both.`, {
|
|
304
|
-
code: "invalid_batch_item",
|
|
305
|
-
});
|
|
306
|
-
}
|
|
307
|
-
if (!hasFile && !hasUrl) {
|
|
308
|
-
throw new TranscribeAPIError(`files[${index}] must include either \`file\` or \`url\`.`, {
|
|
309
|
-
code: "invalid_batch_item",
|
|
310
|
-
});
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
return {
|
|
314
|
-
referenceId,
|
|
315
|
-
file: hasFile ? item.file : null,
|
|
316
|
-
url: hasUrl ? item.url.trim() : null,
|
|
317
|
-
durationEstimateSec: item.durationEstimateSec || item.duration_estimate_sec || null,
|
|
318
|
-
};
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
function uploadDescriptorForFile(referenceId, file) {
|
|
322
|
-
const descriptor = { reference_id: referenceId };
|
|
323
|
-
if (Number(file?.size || 0) >= MULTIPART_UPLOAD_THRESHOLD_BYTES) {
|
|
324
|
-
descriptor.size_bytes = Number(file.size || 0);
|
|
325
|
-
}
|
|
326
|
-
return descriptor;
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
function normalizeResponseUploads(response) {
|
|
330
|
-
if (Array.isArray(response?.uploads)) {
|
|
331
|
-
return response.uploads;
|
|
332
|
-
}
|
|
333
|
-
if (response?.upload) {
|
|
334
|
-
return [{
|
|
335
|
-
reference_id: response.reference_id || defaultReferenceId(0),
|
|
336
|
-
upload: response.upload,
|
|
337
|
-
}];
|
|
338
|
-
}
|
|
339
|
-
return [];
|
|
340
|
-
}
|
|
341
|
-
|
|
145
|
+
|
|
146
|
+
function contentTypeFromName(name = "") {
|
|
147
|
+
const lower = String(name).toLowerCase();
|
|
148
|
+
if (lower.endsWith(".mp3")) return "audio/mpeg";
|
|
149
|
+
if (lower.endsWith(".mpeg")) return "audio/mpeg";
|
|
150
|
+
if (lower.endsWith(".mpga")) return "audio/mpeg";
|
|
151
|
+
if (lower.endsWith(".wav")) return "audio/wav";
|
|
152
|
+
if (lower.endsWith(".m4a")) return "audio/mp4";
|
|
153
|
+
if (lower.endsWith(".webm")) return "audio/webm";
|
|
154
|
+
return "application/octet-stream";
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function isRemoteBatchItem(input) {
|
|
158
|
+
return Boolean(input && typeof input === "object" && typeof input.url === "string");
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function isFilesInput(input) {
|
|
162
|
+
return Array.isArray(input);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function normalizeLanguageCode(language, fieldName = "`language`") {
|
|
166
|
+
const value = String(language || "").trim().toLowerCase();
|
|
167
|
+
if (!value || value === "auto") {
|
|
168
|
+
return null;
|
|
169
|
+
}
|
|
170
|
+
if (!/^[a-z]{2}$/i.test(value)) {
|
|
171
|
+
throw new TranscribeAPIError(`${fieldName} must be a two-letter language code such as \`en\` or \`fr\`.`, {
|
|
172
|
+
code: "invalid_language",
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
return value;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function defaultReferenceId(index) {
|
|
179
|
+
return String(index + 1).padStart(5, "0");
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function normalizeBatchInputItem(item, index) {
|
|
183
|
+
if (!item || typeof item !== "object" || Array.isArray(item)) {
|
|
184
|
+
throw new TranscribeAPIError("Each batch item must be an object with either `file` or `url`.", {
|
|
185
|
+
code: "invalid_batch_item",
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const referenceId = String(item.reference_id || "").trim() || null;
|
|
190
|
+
|
|
191
|
+
const hasFile = Object.prototype.hasOwnProperty.call(item, "file");
|
|
192
|
+
const hasUrl = typeof item.url === "string" && item.url.trim();
|
|
193
|
+
if (hasFile && hasUrl) {
|
|
194
|
+
throw new TranscribeAPIError(`files[${index}] must include either \`file\` or \`url\`, not both.`, {
|
|
195
|
+
code: "invalid_batch_item",
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
if (!hasFile && !hasUrl) {
|
|
199
|
+
throw new TranscribeAPIError(`files[${index}] must include either \`file\` or \`url\`.`, {
|
|
200
|
+
code: "invalid_batch_item",
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
return {
|
|
205
|
+
referenceId,
|
|
206
|
+
file: hasFile ? item.file : null,
|
|
207
|
+
url: hasUrl ? item.url.trim() : null,
|
|
208
|
+
durationEstimateSec: item.durationEstimateSec || item.duration_estimate_sec || null,
|
|
209
|
+
hasLanguage: Object.prototype.hasOwnProperty.call(item, "language"),
|
|
210
|
+
language: normalizeLanguageCode(item.language, `files[${index}].language`),
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function uploadDescriptorForFile(referenceId, file) {
|
|
215
|
+
const descriptor = {};
|
|
216
|
+
if (referenceId) descriptor.reference_id = referenceId;
|
|
217
|
+
if (Number(file?.size || 0) >= MULTIPART_UPLOAD_THRESHOLD_BYTES) {
|
|
218
|
+
descriptor.size_bytes = Number(file.size || 0);
|
|
219
|
+
}
|
|
220
|
+
return descriptor;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function normalizeResponseUploads(response) {
|
|
224
|
+
if (Array.isArray(response?.uploads)) {
|
|
225
|
+
return response.uploads;
|
|
226
|
+
}
|
|
227
|
+
if (response?.upload) {
|
|
228
|
+
return [{
|
|
229
|
+
reference_id: response.reference_id || defaultReferenceId(0),
|
|
230
|
+
upload: response.upload,
|
|
231
|
+
}];
|
|
232
|
+
}
|
|
233
|
+
return [];
|
|
234
|
+
}
|
|
235
|
+
|
|
342
236
|
function uploadFromResponse(response, referenceId) {
|
|
343
237
|
if (response?.upload) {
|
|
344
238
|
return response.upload;
|
|
@@ -374,11 +268,14 @@ function formatBytes(bytes) {
|
|
|
374
268
|
|
|
375
269
|
function createSdkLoggerProgressHandler(logger = console) {
|
|
376
270
|
let activeProgressLine = false;
|
|
271
|
+
let lastRenderedLength = 0;
|
|
377
272
|
|
|
378
273
|
const writeLine = (line) => {
|
|
379
274
|
if (activeProgressLine && typeof process !== "undefined" && process?.stdout?.write) {
|
|
275
|
+
process.stdout.write("\r".padEnd(lastRenderedLength + 1, " "));
|
|
380
276
|
process.stdout.write("\n");
|
|
381
277
|
activeProgressLine = false;
|
|
278
|
+
lastRenderedLength = 0;
|
|
382
279
|
}
|
|
383
280
|
if (typeof logger?.log === "function") {
|
|
384
281
|
logger.log(line);
|
|
@@ -387,7 +284,9 @@ function createSdkLoggerProgressHandler(logger = console) {
|
|
|
387
284
|
|
|
388
285
|
const writeProgress = (line) => {
|
|
389
286
|
if (typeof process !== "undefined" && process?.stdout?.write) {
|
|
390
|
-
|
|
287
|
+
const paddedLine = line.padEnd(lastRenderedLength, " ");
|
|
288
|
+
lastRenderedLength = paddedLine.length;
|
|
289
|
+
process.stdout.write(`\r${paddedLine}`);
|
|
391
290
|
activeProgressLine = true;
|
|
392
291
|
return;
|
|
393
292
|
}
|
|
@@ -401,7 +300,7 @@ function createSdkLoggerProgressHandler(logger = console) {
|
|
|
401
300
|
return;
|
|
402
301
|
}
|
|
403
302
|
if (event.event === "upload_started") {
|
|
404
|
-
writeLine(`Uploading ${event.uploadFiles}
|
|
303
|
+
writeLine(`Uploading ${event.uploadFiles} file(s) for ${event.jobId}`);
|
|
405
304
|
return;
|
|
406
305
|
}
|
|
407
306
|
if (event.event === "upload_progress") {
|
|
@@ -412,11 +311,17 @@ function createSdkLoggerProgressHandler(logger = console) {
|
|
|
412
311
|
if (total && loaded >= total && activeProgressLine && typeof process !== "undefined" && process?.stdout?.write) {
|
|
413
312
|
process.stdout.write("\n");
|
|
414
313
|
activeProgressLine = false;
|
|
314
|
+
lastRenderedLength = 0;
|
|
415
315
|
}
|
|
416
316
|
return;
|
|
417
317
|
}
|
|
418
318
|
if (event.event === "upload_completed") {
|
|
419
319
|
if (event.suppressLog) {
|
|
320
|
+
if (activeProgressLine && typeof process !== "undefined" && process?.stdout?.write) {
|
|
321
|
+
process.stdout.write("\n");
|
|
322
|
+
activeProgressLine = false;
|
|
323
|
+
lastRenderedLength = 0;
|
|
324
|
+
}
|
|
420
325
|
return;
|
|
421
326
|
}
|
|
422
327
|
writeLine(`Uploaded completed: ${JSON.stringify(event.response, null, 2)}`);
|
|
@@ -487,9 +392,7 @@ function createSdkPollingLogger(logger = console) {
|
|
|
487
392
|
},
|
|
488
393
|
finish({ jobStatus, resultUrl }) {
|
|
489
394
|
currentStatus = jobStatus || currentStatus;
|
|
490
|
-
writeLine(resultUrl
|
|
491
|
-
? `Polling complete: ${currentStatus} - ${resultUrl}`
|
|
492
|
-
: `Polling complete: ${currentStatus}`);
|
|
395
|
+
writeLine(resultUrl || `Polling complete: ${currentStatus}`);
|
|
493
396
|
},
|
|
494
397
|
timeout({ timeoutSeconds, jobStatus }) {
|
|
495
398
|
currentStatus = jobStatus || currentStatus;
|
|
@@ -498,6 +401,17 @@ function createSdkPollingLogger(logger = console) {
|
|
|
498
401
|
};
|
|
499
402
|
}
|
|
500
403
|
|
|
404
|
+
function logTerminalAsyncResult(result, logger = console) {
|
|
405
|
+
if (typeof logger?.log !== "function" || !result || typeof result !== "object") {
|
|
406
|
+
return;
|
|
407
|
+
}
|
|
408
|
+
if (result.result_url) {
|
|
409
|
+
logger.log(String(result.result_url));
|
|
410
|
+
return;
|
|
411
|
+
}
|
|
412
|
+
logger.log(JSON.stringify(result, null, 2));
|
|
413
|
+
}
|
|
414
|
+
|
|
501
415
|
function composeProgressHandler({ onProgress, showLogs = false, logger = console } = {}) {
|
|
502
416
|
const logHandler = showLogs ? createSdkLoggerProgressHandler(logger) : null;
|
|
503
417
|
if (!logHandler) {
|
|
@@ -508,836 +422,742 @@ function composeProgressHandler({ onProgress, showLogs = false, logger = console
|
|
|
508
422
|
emitProgress(onProgress, event);
|
|
509
423
|
};
|
|
510
424
|
}
|
|
511
|
-
|
|
512
|
-
function assertSupportedBatchFormat(file) {
|
|
513
|
-
const lowerName = String(file?.name || "").toLowerCase();
|
|
514
|
-
const lowerType = String(file?.type || contentTypeFromName(file?.name)).toLowerCase();
|
|
515
|
-
|
|
516
|
-
if (lowerName.endsWith(".mp4") || lowerType === "video/mp4") {
|
|
517
|
-
throw new TranscribeAPIError(BATCH_MP4_UNSUPPORTED_MESSAGE, { code: "unsupported_batch_format" });
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
if (
|
|
521
|
-
lowerName.endsWith(".mp3")
|
|
522
|
-
|| lowerName.endsWith(".mpeg")
|
|
523
|
-
|| lowerName.endsWith(".mpga")
|
|
524
|
-
|| lowerName.endsWith(".m4a")
|
|
525
|
-
|| lowerName.endsWith(".wav")
|
|
526
|
-
|| lowerName.endsWith(".webm")
|
|
527
|
-
|| lowerType.includes("audio/mpeg")
|
|
528
|
-
|| lowerType.includes("mpga")
|
|
529
|
-
|| lowerType.includes("audio/mp4")
|
|
530
|
-
|| lowerType.includes("audio/x-m4a")
|
|
531
|
-
|| lowerType.includes("audio/wav")
|
|
532
|
-
|| lowerType.includes("audio/wave")
|
|
533
|
-
|| lowerType.includes("audio/webm")
|
|
534
|
-
|| lowerType.includes("video/webm")
|
|
535
|
-
) {
|
|
536
|
-
return;
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
throw new TranscribeAPIError(BATCH_UNSUPPORTED_MESSAGE, { code: "unsupported_batch_format" });
|
|
540
|
-
}
|
|
541
|
-
|
|
542
|
-
function estimateDurationFromSize(sizeBytes) {
|
|
543
|
-
return Math.max(1, Math.ceil(Number(sizeBytes || 0) / 16000));
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
function ascii(bytes, offset, length) {
|
|
547
|
-
return Array.from(bytes.slice(offset, offset + length), (byte) => String.fromCharCode(byte)).join("");
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
function readUint32(bytes, offset, littleEndian = false) {
|
|
551
|
-
return new DataView(bytes.buffer, bytes.byteOffset + offset, 4).getUint32(0, littleEndian);
|
|
552
|
-
}
|
|
553
|
-
|
|
554
|
-
function readUint64(bytes, offset) {
|
|
555
|
-
const high = readUint32(bytes, offset);
|
|
556
|
-
const low = readUint32(bytes, offset + 4);
|
|
557
|
-
return high * 2 ** 32 + low;
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
function syncSafeInteger(bytes, offset) {
|
|
561
|
-
return ((bytes[offset] & 0x7f) << 21)
|
|
562
|
-
| ((bytes[offset + 1] & 0x7f) << 14)
|
|
563
|
-
| ((bytes[offset + 2] & 0x7f) << 7)
|
|
564
|
-
| (bytes[offset + 3] & 0x7f);
|
|
565
|
-
}
|
|
566
|
-
|
|
567
|
-
function id3Offset(bytes) {
|
|
568
|
-
if (bytes.length >= 10 && ascii(bytes, 0, 3) === "ID3") {
|
|
569
|
-
return 10 + syncSafeInteger(bytes, 6);
|
|
570
|
-
}
|
|
571
|
-
return 0;
|
|
572
|
-
}
|
|
573
|
-
|
|
574
|
-
const MP3_BITRATES = {
|
|
575
|
-
V1L3: [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320],
|
|
576
|
-
V2L3: [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160],
|
|
577
|
-
};
|
|
578
|
-
|
|
579
|
-
const MP3_SAMPLE_RATES = {
|
|
580
|
-
3: [44100, 48000, 32000],
|
|
581
|
-
2: [22050, 24000, 16000],
|
|
582
|
-
0: [11025, 12000, 8000],
|
|
583
|
-
};
|
|
584
|
-
|
|
585
|
-
function mp3FrameInfoAt(bytes, offset) {
|
|
586
|
-
if (offset + 4 > bytes.length || bytes[offset] !== 0xff || (bytes[offset + 1] & 0xe0) !== 0xe0) {
|
|
587
|
-
return null;
|
|
588
|
-
}
|
|
589
|
-
|
|
590
|
-
const versionBits = (bytes[offset + 1] >> 3) & 0x03;
|
|
591
|
-
const layerBits = (bytes[offset + 1] >> 1) & 0x03;
|
|
592
|
-
const bitrateIndex = (bytes[offset + 2] >> 4) & 0x0f;
|
|
593
|
-
const sampleRateIndex = (bytes[offset + 2] >> 2) & 0x03;
|
|
594
|
-
const padding = (bytes[offset + 2] >> 1) & 0x01;
|
|
595
|
-
if (versionBits === 1 || layerBits !== 1 || bitrateIndex === 0 || bitrateIndex === 15 || sampleRateIndex === 3) {
|
|
596
|
-
return null;
|
|
597
|
-
}
|
|
598
|
-
|
|
599
|
-
const sampleRate = MP3_SAMPLE_RATES[versionBits]?.[sampleRateIndex];
|
|
600
|
-
const bitrateKbps = versionBits === 3
|
|
601
|
-
? MP3_BITRATES.V1L3[bitrateIndex]
|
|
602
|
-
: MP3_BITRATES.V2L3[bitrateIndex];
|
|
603
|
-
if (!sampleRate || !bitrateKbps) {
|
|
604
|
-
return null;
|
|
605
|
-
}
|
|
606
|
-
|
|
607
|
-
const bitrate = bitrateKbps * 1000;
|
|
608
|
-
const frameLength = Math.floor(((versionBits === 3 ? 144 : 72) * bitrate) / sampleRate + padding);
|
|
609
|
-
if (frameLength <= 0) {
|
|
610
|
-
return null;
|
|
611
|
-
}
|
|
612
|
-
|
|
613
|
-
return {
|
|
614
|
-
bitrate,
|
|
615
|
-
sampleRate,
|
|
616
|
-
frameLength,
|
|
617
|
-
};
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
function findMp3Frame(bytes, startOffset = 0) {
|
|
621
|
-
for (let offset = Math.max(0, startOffset); offset < bytes.length - 4; offset += 1) {
|
|
622
|
-
const info = mp3FrameInfoAt(bytes, offset);
|
|
623
|
-
if (info) {
|
|
624
|
-
return { offset, ...info };
|
|
625
|
-
}
|
|
626
|
-
}
|
|
627
|
-
return null;
|
|
628
|
-
}
|
|
629
|
-
|
|
630
|
-
function parseMp3Duration(bytes, objectSize) {
|
|
631
|
-
const offset = id3Offset(bytes);
|
|
632
|
-
const firstFrame = findMp3Frame(bytes, offset);
|
|
633
|
-
if (!firstFrame) {
|
|
634
|
-
return null;
|
|
635
|
-
}
|
|
636
|
-
const audioBytes = Math.max(0, Number(objectSize || bytes.length) - firstFrame.offset);
|
|
637
|
-
const duration = (audioBytes * 8) / firstFrame.bitrate;
|
|
638
|
-
return Number.isFinite(duration) && duration > 0 ? duration : null;
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
function parseWavDuration(bytes) {
|
|
642
|
-
if (bytes.length < 44 || ascii(bytes, 0, 4) !== "RIFF" || ascii(bytes, 8, 4) !== "WAVE") {
|
|
643
|
-
return null;
|
|
644
|
-
}
|
|
645
|
-
|
|
646
|
-
let offset = 12;
|
|
647
|
-
let byteRate = 0;
|
|
648
|
-
let dataSize = 0;
|
|
649
|
-
while (offset + 8 <= bytes.length) {
|
|
650
|
-
const chunkId = ascii(bytes, offset, 4);
|
|
651
|
-
const chunkSize = readUint32(bytes, offset + 4, true);
|
|
652
|
-
if (chunkId === "fmt " && offset + 20 <= bytes.length) {
|
|
653
|
-
byteRate = readUint32(bytes, offset + 16, true);
|
|
654
|
-
} else if (chunkId === "data") {
|
|
655
|
-
dataSize = chunkSize;
|
|
656
|
-
break;
|
|
657
|
-
}
|
|
658
|
-
offset += 8 + chunkSize + (chunkSize % 2);
|
|
659
|
-
}
|
|
660
|
-
|
|
661
|
-
if (!byteRate || !dataSize) {
|
|
662
|
-
return null;
|
|
663
|
-
}
|
|
664
|
-
return dataSize / byteRate;
|
|
665
|
-
}
|
|
666
|
-
|
|
667
|
-
function parseMp4Duration(bytes) {
|
|
668
|
-
for (let offset = 0; offset + 32 < bytes.length; offset += 1) {
|
|
669
|
-
if (ascii(bytes, offset + 4, 4) !== "mvhd") {
|
|
670
|
-
continue;
|
|
671
|
-
}
|
|
672
|
-
const size = readUint32(bytes, offset);
|
|
673
|
-
if (size < 32 || offset + size > bytes.length + 8) {
|
|
674
|
-
continue;
|
|
675
|
-
}
|
|
676
|
-
const version = bytes[offset + 8];
|
|
677
|
-
if (version === 0 && offset + 28 <= bytes.length) {
|
|
678
|
-
const timescale = readUint32(bytes, offset + 20);
|
|
679
|
-
const duration = readUint32(bytes, offset + 24);
|
|
680
|
-
return timescale > 0 ? duration / timescale : null;
|
|
681
|
-
}
|
|
682
|
-
if (version === 1 && offset + 40 <= bytes.length) {
|
|
683
|
-
const timescale = readUint32(bytes, offset + 28);
|
|
684
|
-
const duration = readUint64(bytes, offset + 32);
|
|
685
|
-
return timescale > 0 ? duration / timescale : null;
|
|
686
|
-
}
|
|
687
|
-
}
|
|
688
|
-
return null;
|
|
689
|
-
}
|
|
690
|
-
|
|
691
|
-
function typeFromNameOrContentType(name = "", contentType = "") {
|
|
692
|
-
const lowerName = String(name || "").toLowerCase();
|
|
693
|
-
const lowerType = String(contentType || "").toLowerCase();
|
|
694
|
-
if (lowerType.includes("wav") || lowerName.endsWith(".wav")) {
|
|
695
|
-
return "wav";
|
|
696
|
-
}
|
|
697
|
-
if (
|
|
698
|
-
lowerType.includes("mp4")
|
|
699
|
-
|| lowerType.includes("m4a")
|
|
700
|
-
|| lowerType.includes("audio/x-m4a")
|
|
701
|
-
|| lowerName.endsWith(".m4a")
|
|
702
|
-
) {
|
|
703
|
-
return "m4a";
|
|
704
|
-
}
|
|
705
|
-
return "mp3";
|
|
706
|
-
}
|
|
707
|
-
|
|
708
|
-
async function estimateDurationFromFile(file) {
|
|
709
|
-
const headerLength = Math.min(Number(file?.size || 0), 2 * 1024 * 1024);
|
|
710
|
-
if (!headerLength) {
|
|
711
|
-
return estimateDurationFromSize(file?.size);
|
|
712
|
-
}
|
|
713
|
-
|
|
714
|
-
try {
|
|
715
|
-
let bytes;
|
|
716
|
-
if (typeof file?.readSlice === "function") {
|
|
717
|
-
bytes = new Uint8Array(await file.readSlice(0, headerLength));
|
|
718
|
-
} else if (typeof file?.slice === "function") {
|
|
719
|
-
bytes = new Uint8Array(await file.slice(0, headerLength).arrayBuffer());
|
|
720
|
-
} else {
|
|
721
|
-
return estimateDurationFromSize(file?.size);
|
|
722
|
-
}
|
|
723
|
-
const type = typeFromNameOrContentType(file?.name, file?.type || contentTypeFromName(file?.name));
|
|
724
|
-
const duration = type === "wav"
|
|
725
|
-
? parseWavDuration(bytes)
|
|
726
|
-
: type === "m4a"
|
|
727
|
-
? parseMp4Duration(bytes)
|
|
728
|
-
: parseMp3Duration(bytes, file.size);
|
|
729
|
-
if (Number.isFinite(duration) && duration > 0) {
|
|
730
|
-
return Math.max(1, Math.ceil(duration));
|
|
731
|
-
}
|
|
732
|
-
} catch {
|
|
733
|
-
// Fall back to the coarse size heuristic when metadata parsing fails.
|
|
734
|
-
}
|
|
735
|
-
|
|
736
|
-
return estimateDurationFromSize(file?.size);
|
|
737
|
-
}
|
|
738
|
-
|
|
739
|
-
async function fileFromPath(pathValue) {
|
|
740
|
-
const fs = await import("node:fs/promises");
|
|
741
|
-
const path = await import("node:path");
|
|
742
|
-
const stats = await fs.stat(pathValue);
|
|
743
|
-
const name = path.basename(pathValue);
|
|
744
|
-
return {
|
|
745
|
-
name,
|
|
746
|
-
size: stats.size,
|
|
747
|
-
type: contentTypeFromName(name),
|
|
748
|
-
path: pathValue,
|
|
749
|
-
async readSlice(start, end) {
|
|
750
|
-
const handle = await fs.open(pathValue, "r");
|
|
751
|
-
try {
|
|
752
|
-
const length = Math.max(0, end - start);
|
|
753
|
-
const buffer = Buffer.alloc(length);
|
|
754
|
-
const { bytesRead } = await handle.read(buffer, 0, length, start);
|
|
755
|
-
return buffer.subarray(0, bytesRead);
|
|
756
|
-
} finally {
|
|
757
|
-
await handle.close();
|
|
758
|
-
}
|
|
759
|
-
},
|
|
760
|
-
};
|
|
761
|
-
}
|
|
762
|
-
|
|
763
|
-
function makeFile(parts, name, type) {
|
|
764
|
-
if (typeof File !== "undefined") {
|
|
765
|
-
return new File(parts, name, { type });
|
|
766
|
-
}
|
|
767
|
-
const blob = new Blob(parts, { type });
|
|
768
|
-
Object.defineProperty(blob, "name", { value: name });
|
|
769
|
-
return blob;
|
|
770
|
-
}
|
|
771
|
-
|
|
772
|
-
async function normalizeFile(input, fallbackName = "audio.mp3") {
|
|
773
|
-
if (
|
|
774
|
-
input
|
|
775
|
-
&& typeof input === "object"
|
|
776
|
-
&& typeof input.name === "string"
|
|
777
|
-
&& Number.isFinite(Number(input.size))
|
|
778
|
-
&& typeof input.type === "string"
|
|
779
|
-
&& (typeof input.readSlice === "function" || typeof input.slice === "function")
|
|
780
|
-
) {
|
|
781
|
-
return input;
|
|
782
|
-
}
|
|
783
|
-
if (typeof input === "string") {
|
|
784
|
-
return fileFromPath(input);
|
|
785
|
-
}
|
|
786
|
-
if (typeof File !== "undefined" && input instanceof File) {
|
|
787
|
-
return input;
|
|
788
|
-
}
|
|
789
|
-
if (input instanceof Blob) {
|
|
790
|
-
return input.name ? input : makeFile([input], fallbackName, input.type || contentTypeFromName(fallbackName));
|
|
791
|
-
}
|
|
792
|
-
if (input?.data instanceof Uint8Array || input?.data instanceof ArrayBuffer) {
|
|
793
|
-
return makeFile([input.data], input.name || fallbackName, input.type || contentTypeFromName(input.name || fallbackName));
|
|
794
|
-
}
|
|
795
|
-
throw new TranscribeAPIError("Invalid file. Provide a path, File, Blob, or { data, name, type }.");
|
|
796
|
-
}
|
|
797
|
-
|
|
798
|
-
async function parseApiResponse(response) {
|
|
799
|
-
const text = await response.text();
|
|
800
|
-
let data = {};
|
|
801
|
-
try {
|
|
802
|
-
data = text ? JSON.parse(text) : {};
|
|
803
|
-
} catch {
|
|
804
|
-
data = { raw: text };
|
|
805
|
-
}
|
|
806
|
-
if (!response.ok) {
|
|
807
|
-
throw new TranscribeAPIError(data.error || text || `HTTP ${response.status}`, {
|
|
808
|
-
status: response.status,
|
|
809
|
-
code: data.code || null,
|
|
810
|
-
extra: data,
|
|
811
|
-
response: data,
|
|
812
|
-
});
|
|
813
|
-
}
|
|
814
|
-
return data;
|
|
815
|
-
}
|
|
816
|
-
|
|
817
|
-
function normalizeHeaders(headers = {}) {
|
|
818
|
-
return Object.fromEntries(
|
|
819
|
-
Object.entries(headers)
|
|
820
|
-
.filter(([, value]) => value !== undefined && value !== null)
|
|
821
|
-
.map(([key, value]) => [key, String(value)]),
|
|
822
|
-
);
|
|
823
|
-
}
|
|
824
|
-
|
|
825
|
-
function isNodeStreamBody(body) {
|
|
826
|
-
return Boolean(body && typeof body === "object" && typeof body.pipe === "function");
|
|
827
|
-
}
|
|
828
|
-
|
|
829
|
-
async function putNodeStream(upload, streamBody, headers) {
|
|
830
|
-
const http = await import("node:http");
|
|
831
|
-
const https = await import("node:https");
|
|
832
|
-
const { pipeline } = await import("node:stream/promises");
|
|
833
|
-
const target = new URL(upload.url || upload);
|
|
834
|
-
const transport = target.protocol === "https:" ? https : http;
|
|
835
|
-
|
|
836
|
-
return new Promise((resolve, reject) => {
|
|
837
|
-
let settled = false;
|
|
838
|
-
const finishResolve = (value) => {
|
|
839
|
-
if (settled) {
|
|
840
|
-
return;
|
|
841
|
-
}
|
|
842
|
-
settled = true;
|
|
843
|
-
resolve(value);
|
|
844
|
-
};
|
|
845
|
-
const finishReject = (error) => {
|
|
846
|
-
if (settled) {
|
|
847
|
-
return;
|
|
848
|
-
}
|
|
849
|
-
settled = true;
|
|
850
|
-
reject(error);
|
|
851
|
-
};
|
|
852
|
-
const request = transport.request(target, {
|
|
853
|
-
method: "PUT",
|
|
854
|
-
headers,
|
|
855
|
-
agent: false,
|
|
856
|
-
}, (response) => {
|
|
857
|
-
response.resume();
|
|
858
|
-
response.on("error", finishReject);
|
|
859
|
-
response.on("end", () => {
|
|
860
|
-
finishResolve({
|
|
861
|
-
ok: response.statusCode >= 200 && response.statusCode < 300,
|
|
862
|
-
status: response.statusCode || 0,
|
|
863
|
-
headers: {
|
|
864
|
-
get(name) {
|
|
865
|
-
const value = response.headers[String(name || "").toLowerCase()];
|
|
866
|
-
return Array.isArray(value) ? value[0] : value || null;
|
|
867
|
-
},
|
|
868
|
-
},
|
|
869
|
-
});
|
|
870
|
-
});
|
|
871
|
-
});
|
|
872
|
-
|
|
873
|
-
request.on("error", finishReject);
|
|
874
|
-
request.on("socket", (socket) => {
|
|
875
|
-
socket.on("error", finishReject);
|
|
876
|
-
});
|
|
877
|
-
request.setTimeout(120000, () => {
|
|
878
|
-
request.destroy(new Error("stream_upload_timeout"));
|
|
879
|
-
});
|
|
880
|
-
if (typeof streamBody.on === "function") {
|
|
881
|
-
streamBody.on("error", finishReject);
|
|
882
|
-
}
|
|
883
|
-
pipeline(streamBody, request).catch(finishReject);
|
|
884
|
-
});
|
|
885
|
-
}
|
|
886
|
-
|
|
887
|
-
async function putObjectWithRetry(upload, body, {
|
|
888
|
-
onProgress,
|
|
889
|
-
loadedOffset = 0,
|
|
890
|
-
totalBytes = 0,
|
|
891
|
-
contentLength = null,
|
|
892
|
-
progressMeta = null,
|
|
893
|
-
debugContext = null,
|
|
894
|
-
} = {}) {
|
|
895
|
-
const response = await retry(async (attemptIndex) => {
|
|
896
|
-
const resolvedBody = typeof body === "function" ? await body() : body;
|
|
897
|
-
const headers = normalizeHeaders({
|
|
898
|
-
...(upload.headers || {}),
|
|
899
|
-
...(contentLength !== null ? { "Content-Length": contentLength } : {}),
|
|
900
|
-
...(typeof body === "function" ? { Connection: "close" } : {}),
|
|
901
|
-
});
|
|
902
|
-
const putResponse = isNodeStreamBody(resolvedBody)
|
|
903
|
-
? await putNodeStream(upload, resolvedBody, headers)
|
|
904
|
-
: await fetch(upload.url || upload, {
|
|
905
|
-
method: "PUT",
|
|
906
|
-
headers,
|
|
907
|
-
body: resolvedBody,
|
|
908
|
-
});
|
|
909
|
-
if (!putResponse.ok) {
|
|
910
|
-
throw new TranscribeAPIError(`R2 upload failed with HTTP ${putResponse.status}.`, {
|
|
911
|
-
status: putResponse.status,
|
|
912
|
-
code: "upload_failed",
|
|
913
|
-
extra: {
|
|
914
|
-
...(progressMeta || {}),
|
|
915
|
-
...(debugContext || {}),
|
|
916
|
-
content_length: contentLength,
|
|
917
|
-
},
|
|
918
|
-
});
|
|
919
|
-
}
|
|
920
|
-
return putResponse;
|
|
921
|
-
}, {
|
|
922
|
-
attempts: 5,
|
|
923
|
-
baseDelayMs: 1000,
|
|
924
|
-
});
|
|
925
|
-
const transferredBytes = Number(
|
|
926
|
-
contentLength
|
|
927
|
-
?? body?.size
|
|
928
|
-
?? body?.byteLength
|
|
929
|
-
?? 0,
|
|
930
|
-
);
|
|
931
|
-
const loaded = loadedOffset + transferredBytes;
|
|
932
|
-
if (onProgress) {
|
|
933
|
-
onProgress({
|
|
934
|
-
loaded,
|
|
935
|
-
total: totalBytes || loaded,
|
|
936
|
-
...(progressMeta || {}),
|
|
937
|
-
});
|
|
938
|
-
}
|
|
939
|
-
return response;
|
|
940
|
-
}
|
|
941
|
-
|
|
942
|
-
async function openFileBody(file, start = 0, end = null) {
|
|
943
|
-
if (!file?.path) {
|
|
944
|
-
return file;
|
|
945
|
-
}
|
|
946
|
-
const fs = await import("node:fs");
|
|
947
|
-
return fs.createReadStream(file.path, end === null ? { start } : { start, end: end - 1 });
|
|
948
|
-
}
|
|
949
|
-
|
|
950
|
-
async function runWithConcurrency(items, concurrency, worker) {
|
|
951
|
-
let index = 0;
|
|
952
|
-
const workerCount = Math.max(1, Math.min(items.length || 1, concurrency));
|
|
953
|
-
const workers = Array.from({ length: workerCount }, async () => {
|
|
954
|
-
while (true) {
|
|
955
|
-
const current = index;
|
|
956
|
-
index += 1;
|
|
957
|
-
if (current >= items.length) {
|
|
958
|
-
return;
|
|
959
|
-
}
|
|
960
|
-
await worker(items[current], current);
|
|
961
|
-
}
|
|
962
|
-
});
|
|
963
|
-
await Promise.all(workers);
|
|
964
|
-
}
|
|
965
|
-
|
|
966
|
-
function multipartCompleteXml(parts) {
|
|
967
|
-
const rows = parts
|
|
968
|
-
.sort((a, b) => a.partNumber - b.partNumber)
|
|
969
|
-
.map((part) => `<Part><PartNumber>${part.partNumber}</PartNumber><ETag>${part.etag}</ETag></Part>`)
|
|
970
|
-
.join("");
|
|
971
|
-
return `<CompleteMultipartUpload>${rows}</CompleteMultipartUpload>`;
|
|
972
|
-
}
|
|
973
|
-
|
|
974
|
-
async function uploadMultipart(upload, file, {
|
|
975
|
-
onProgress,
|
|
976
|
-
multipartConcurrency = DEFAULT_MULTIPART_CONCURRENCY,
|
|
977
|
-
resumeState = null,
|
|
978
|
-
onPartComplete = null,
|
|
979
|
-
onConcurrencyChange = null,
|
|
980
|
-
} = {}) {
|
|
981
|
-
const completed = normalizeCompletedParts(resumeState?.completed_parts).map((part) => ({
|
|
982
|
-
partNumber: part.part_number,
|
|
983
|
-
etag: part.etag,
|
|
984
|
-
}));
|
|
985
|
-
const totalParts = upload.parts.length;
|
|
986
|
-
const completedPartNumbers = new Set(completed.map((part) => part.partNumber));
|
|
987
|
-
let completedBytes = completed.reduce((total, part) => {
|
|
988
|
-
const start = (part.partNumber - 1) * upload.part_size;
|
|
989
|
-
const end = Math.min(file.size, start + upload.part_size);
|
|
990
|
-
return total + Math.max(0, end - start);
|
|
991
|
-
}, 0);
|
|
992
|
-
const concurrency = normalizeMultipartConcurrency(multipartConcurrency);
|
|
993
|
-
let targetConcurrency = concurrency;
|
|
994
|
-
let activeWorkers = 0;
|
|
995
|
-
let fatalError = null;
|
|
996
|
-
const pendingParts = upload.parts.filter((part) => !completedPartNumbers.has(part.part_number));
|
|
997
|
-
const partAttempts = new Map();
|
|
998
|
-
|
|
999
|
-
if (onProgress && completedBytes > 0) {
|
|
1000
|
-
onProgress({
|
|
1001
|
-
loaded: completedBytes,
|
|
1002
|
-
total: file.size,
|
|
1003
|
-
uploadType: "multipart",
|
|
1004
|
-
totalParts,
|
|
1005
|
-
multipartConcurrency: targetConcurrency,
|
|
1006
|
-
resumed: true,
|
|
1007
|
-
});
|
|
1008
|
-
}
|
|
1009
|
-
|
|
1010
|
-
const workers = Array.from({ length: concurrency }, async (_, workerIndex) => {
|
|
1011
|
-
while (true) {
|
|
1012
|
-
if (fatalError) {
|
|
1013
|
-
return;
|
|
1014
|
-
}
|
|
1015
|
-
if (completedPartNumbers.size >= totalParts) {
|
|
1016
|
-
return;
|
|
1017
|
-
}
|
|
1018
|
-
if (workerIndex >= targetConcurrency) {
|
|
1019
|
-
await sleep(MULTIPART_IDLE_WAIT_MS);
|
|
1020
|
-
continue;
|
|
1021
|
-
}
|
|
1022
|
-
const part = pendingParts.shift();
|
|
1023
|
-
if (!part) {
|
|
1024
|
-
if (!activeWorkers) {
|
|
1025
|
-
return;
|
|
1026
|
-
}
|
|
1027
|
-
await sleep(MULTIPART_IDLE_WAIT_MS);
|
|
1028
|
-
continue;
|
|
1029
|
-
}
|
|
1030
|
-
|
|
1031
|
-
activeWorkers += 1;
|
|
1032
|
-
const start = (part.part_number - 1) * upload.part_size;
|
|
1033
|
-
const end = Math.min(file.size, start + upload.part_size);
|
|
1034
|
-
const chunkSize = end - start;
|
|
1035
|
-
try {
|
|
1036
|
-
const response = await putObjectWithRetry(
|
|
1037
|
-
{ url: part.url },
|
|
1038
|
-
file.path ? (() => openFileBody(file, start, end)) : file.slice(start, end),
|
|
1039
|
-
{
|
|
1040
|
-
onProgress: null,
|
|
1041
|
-
loadedOffset: 0,
|
|
1042
|
-
totalBytes: file.size,
|
|
1043
|
-
contentLength: chunkSize,
|
|
1044
|
-
progressMeta: {
|
|
1045
|
-
uploadType: "multipart",
|
|
1046
|
-
partNumber: part.part_number,
|
|
1047
|
-
totalParts,
|
|
1048
|
-
chunkBytes: chunkSize,
|
|
1049
|
-
},
|
|
1050
|
-
debugContext: {
|
|
1051
|
-
file_name: file?.name || null,
|
|
1052
|
-
file_size: file?.size || null,
|
|
1053
|
-
range_start: start,
|
|
1054
|
-
range_end_exclusive: end,
|
|
1055
|
-
},
|
|
1056
|
-
},
|
|
1057
|
-
);
|
|
1058
|
-
const etag = response.headers.get("ETag") || response.headers.get("etag");
|
|
1059
|
-
completed.push({
|
|
1060
|
-
partNumber: part.part_number,
|
|
1061
|
-
etag,
|
|
1062
|
-
});
|
|
1063
|
-
completedPartNumbers.add(part.part_number);
|
|
1064
|
-
completedBytes += chunkSize;
|
|
1065
|
-
if (onPartComplete) {
|
|
1066
|
-
try {
|
|
1067
|
-
await onPartComplete({
|
|
1068
|
-
partNumber: part.part_number,
|
|
1069
|
-
etag,
|
|
1070
|
-
completedParts: completed,
|
|
1071
|
-
});
|
|
1072
|
-
} catch {
|
|
1073
|
-
// Resume-state persistence is best-effort.
|
|
1074
|
-
}
|
|
1075
|
-
}
|
|
1076
|
-
if (onProgress) {
|
|
1077
|
-
onProgress({
|
|
1078
|
-
loaded: completedBytes,
|
|
1079
|
-
total: file.size,
|
|
1080
|
-
uploadType: "multipart",
|
|
1081
|
-
partNumber: part.part_number,
|
|
1082
|
-
totalParts,
|
|
1083
|
-
chunkBytes: chunkSize,
|
|
1084
|
-
multipartConcurrency: targetConcurrency,
|
|
1085
|
-
});
|
|
1086
|
-
}
|
|
1087
|
-
} catch (error) {
|
|
1088
|
-
const attempts = (partAttempts.get(part.part_number) || 0) + 1;
|
|
1089
|
-
partAttempts.set(part.part_number, attempts);
|
|
1090
|
-
const retryable = isRetryableError(error);
|
|
1091
|
-
if (retryable && attempts < MAX_MULTIPART_ADAPTIVE_ATTEMPTS) {
|
|
1092
|
-
const previousConcurrency = targetConcurrency;
|
|
1093
|
-
targetConcurrency = Math.max(1, Math.floor(targetConcurrency / 2));
|
|
1094
|
-
pendingParts.push(part);
|
|
1095
|
-
if (onConcurrencyChange && targetConcurrency !== previousConcurrency) {
|
|
1096
|
-
try {
|
|
1097
|
-
onConcurrencyChange({
|
|
1098
|
-
previousConcurrency,
|
|
1099
|
-
nextConcurrency: targetConcurrency,
|
|
1100
|
-
partNumber: part.part_number,
|
|
1101
|
-
attempts,
|
|
1102
|
-
error: extractErrorInfo(error),
|
|
1103
|
-
});
|
|
1104
|
-
} catch {
|
|
1105
|
-
// Ignore observer failures.
|
|
1106
|
-
}
|
|
1107
|
-
}
|
|
1108
|
-
activeWorkers -= 1;
|
|
1109
|
-
await sleep(Math.min(10000, 1000 * (2 ** Math.min(attempts - 1, 3))));
|
|
1110
|
-
continue;
|
|
1111
|
-
}
|
|
1112
|
-
fatalError = new TranscribeAPIError(
|
|
1113
|
-
`Multipart upload failed for part ${part.part_number}/${totalParts}.`,
|
|
1114
|
-
{
|
|
1115
|
-
status: error?.status || null,
|
|
1116
|
-
code: error?.code || "multipart_part_upload_failed",
|
|
1117
|
-
extra: {
|
|
1118
|
-
cause_error: error?.error || error?.message || String(error),
|
|
1119
|
-
cause_code: error?.code || error?.cause?.code || null,
|
|
1120
|
-
cause_status: error?.status || null,
|
|
1121
|
-
upload_type: "multipart",
|
|
1122
|
-
part_number: part.part_number,
|
|
1123
|
-
total_parts: totalParts,
|
|
1124
|
-
chunk_bytes: chunkSize,
|
|
1125
|
-
completed_bytes_before_failure: completedBytes,
|
|
1126
|
-
multipart_concurrency: targetConcurrency,
|
|
1127
|
-
file_name: file?.name || null,
|
|
1128
|
-
file_size: file?.size || null,
|
|
1129
|
-
range_start: start,
|
|
1130
|
-
range_end_exclusive: end,
|
|
1131
|
-
attempts,
|
|
1132
|
-
},
|
|
1133
|
-
},
|
|
1134
|
-
);
|
|
1135
|
-
activeWorkers -= 1;
|
|
1136
|
-
return;
|
|
1137
|
-
}
|
|
1138
|
-
activeWorkers -= 1;
|
|
1139
|
-
}
|
|
1140
|
-
});
|
|
1141
|
-
|
|
1142
|
-
await Promise.all(workers);
|
|
1143
|
-
if (fatalError) {
|
|
1144
|
-
throw fatalError;
|
|
1145
|
-
}
|
|
1146
|
-
|
|
1147
|
-
const completeResponse = await retry(async () => {
|
|
1148
|
-
const response = await fetch(upload.complete_url, {
|
|
1149
|
-
method: "POST",
|
|
1150
|
-
headers: { "Content-Type": "application/xml" },
|
|
1151
|
-
body: multipartCompleteXml(completed),
|
|
1152
|
-
});
|
|
1153
|
-
if (!response.ok) {
|
|
1154
|
-
throw new TranscribeAPIError(`R2 multipart complete failed with HTTP ${response.status}.`, {
|
|
1155
|
-
status: response.status,
|
|
1156
|
-
code: "multipart_complete_failed",
|
|
1157
|
-
});
|
|
1158
|
-
}
|
|
1159
|
-
return response;
|
|
1160
|
-
});
|
|
1161
|
-
return completeResponse;
|
|
1162
|
-
}
|
|
1163
|
-
|
|
1164
|
-
async function uploadUsingInstructions(upload, file, options = {}) {
|
|
1165
|
-
if (upload.type === "multipart") {
|
|
1166
|
-
return uploadMultipart(upload, file, options);
|
|
1167
|
-
}
|
|
1168
|
-
const body = file.path ? (() => openFileBody(file)) : file;
|
|
1169
|
-
return putObjectWithRetry(upload, body, {
|
|
1170
|
-
onProgress: options.onProgress,
|
|
1171
|
-
loadedOffset: 0,
|
|
1172
|
-
totalBytes: file.size,
|
|
1173
|
-
contentLength: file.size,
|
|
1174
|
-
progressMeta: {
|
|
1175
|
-
uploadType: "single_put",
|
|
1176
|
-
chunkBytes: file.size,
|
|
1177
|
-
},
|
|
1178
|
-
debugContext: {
|
|
1179
|
-
file_name: file?.name || null,
|
|
1180
|
-
file_size: file?.size || null,
|
|
1181
|
-
},
|
|
1182
|
-
});
|
|
1183
|
-
}
|
|
1184
|
-
|
|
1185
|
-
function assertBatchLimits(totalFiles, totalSizeBytes) {
|
|
1186
|
-
if (totalFiles > MAX_BATCH_FILES) {
|
|
1187
|
-
throw new TranscribeAPIError(`Batch jobs support up to ${MAX_BATCH_FILES} files.`, {
|
|
1188
|
-
code: "too_many_files",
|
|
1189
|
-
});
|
|
1190
|
-
}
|
|
1191
|
-
if (totalSizeBytes > MAX_BATCH_TOTAL_SIZE_BYTES) {
|
|
1192
|
-
throw new TranscribeAPIError("Batch jobs support up to 10GB total.", {
|
|
1193
|
-
code: "batch_too_large",
|
|
1194
|
-
});
|
|
1195
|
-
}
|
|
1196
|
-
}
|
|
1197
|
-
|
|
1198
|
-
class BigFileJob {
|
|
1199
|
-
constructor(client, file, createResponse, options) {
|
|
1200
|
-
this.client = client;
|
|
1201
|
-
this.file = file;
|
|
1202
|
-
this.createResponse = createResponse;
|
|
1203
|
-
this.referenceId = options.referenceId || defaultReferenceId(0);
|
|
1204
|
-
this.jobId = createResponse.job_id;
|
|
1205
|
-
this.jobStatus = createResponse.job_status;
|
|
1206
|
-
this.model = createResponse.model || options.model || null;
|
|
1207
|
-
this.uploadInfo = uploadFromResponse(createResponse, this.referenceId);
|
|
1208
|
-
this.options = options;
|
|
1209
|
-
this.resumeState = options.resumeState || null;
|
|
1210
|
-
this.resumeWriteChain = Promise.resolve();
|
|
1211
|
-
}
|
|
1212
|
-
|
|
1213
|
-
async persistResumeState(completedParts = this.resumeState?.completed_parts || []) {
|
|
1214
|
-
if (!isNodePathFile(this.file) || this.uploadInfo?.type !== "multipart") {
|
|
1215
|
-
return;
|
|
1216
|
-
}
|
|
1217
|
-
const state = buildMultipartResumeState({
|
|
1218
|
-
baseUrl: this.client.baseUrl,
|
|
1219
|
-
jobId: this.jobId,
|
|
1220
|
-
model: this.model,
|
|
1221
|
-
file: this.file,
|
|
1222
|
-
upload: this.uploadInfo,
|
|
1223
|
-
completedParts,
|
|
1224
|
-
});
|
|
1225
|
-
this.resumeState = state;
|
|
1226
|
-
this.resumeWriteChain = this.resumeWriteChain
|
|
1227
|
-
.catch(() => {})
|
|
1228
|
-
.then(() => writeMultipartResumeState(this.file, state))
|
|
1229
|
-
.catch(() => {});
|
|
1230
|
-
await this.resumeWriteChain;
|
|
1231
|
-
}
|
|
1232
|
-
|
|
1233
|
-
async clearResumeState() {
|
|
1234
|
-
await this.resumeWriteChain.catch(() => {});
|
|
1235
|
-
await deleteMultipartResumeState(this.file);
|
|
1236
|
-
this.resumeState = null;
|
|
1237
|
-
}
|
|
1238
|
-
|
|
1239
|
-
async upload({ onProgress } = {}) {
|
|
1240
|
-
const progress = onProgress || this.options.onProgress;
|
|
1241
|
-
if (!this.uploadInfo) {
|
|
1242
|
-
return this.createResponse;
|
|
425
|
+
|
|
426
|
+
function assertSupportedBatchFormat(file) {
|
|
427
|
+
const lowerName = String(file?.name || "").toLowerCase();
|
|
428
|
+
const lowerType = String(file?.type || contentTypeFromName(file?.name)).toLowerCase();
|
|
429
|
+
|
|
430
|
+
if (lowerName.endsWith(".mp4") || lowerType === "video/mp4") {
|
|
431
|
+
throw new TranscribeAPIError(BATCH_MP4_UNSUPPORTED_MESSAGE, { code: "unsupported_batch_format" });
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
if (
|
|
435
|
+
lowerName.endsWith(".mp3")
|
|
436
|
+
|| lowerName.endsWith(".mpeg")
|
|
437
|
+
|| lowerName.endsWith(".mpga")
|
|
438
|
+
|| lowerName.endsWith(".m4a")
|
|
439
|
+
|| lowerName.endsWith(".wav")
|
|
440
|
+
|| lowerName.endsWith(".webm")
|
|
441
|
+
|| lowerType.includes("audio/mpeg")
|
|
442
|
+
|| lowerType.includes("mpga")
|
|
443
|
+
|| lowerType.includes("audio/mp4")
|
|
444
|
+
|| lowerType.includes("audio/x-m4a")
|
|
445
|
+
|| lowerType.includes("audio/wav")
|
|
446
|
+
|| lowerType.includes("audio/wave")
|
|
447
|
+
|| lowerType.includes("audio/webm")
|
|
448
|
+
|| lowerType.includes("video/webm")
|
|
449
|
+
) {
|
|
450
|
+
return;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
throw new TranscribeAPIError(BATCH_UNSUPPORTED_MESSAGE, { code: "unsupported_batch_format" });
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
function estimateDurationFromSize(sizeBytes) {
|
|
457
|
+
return Math.max(1, Math.ceil(Number(sizeBytes || 0) / 16000));
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
function ascii(bytes, offset, length) {
|
|
461
|
+
return Array.from(bytes.slice(offset, offset + length), (byte) => String.fromCharCode(byte)).join("");
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
function readUint32(bytes, offset, littleEndian = false) {
|
|
465
|
+
return new DataView(bytes.buffer, bytes.byteOffset + offset, 4).getUint32(0, littleEndian);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
function readUint64(bytes, offset) {
|
|
469
|
+
const high = readUint32(bytes, offset);
|
|
470
|
+
const low = readUint32(bytes, offset + 4);
|
|
471
|
+
return high * 2 ** 32 + low;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
function syncSafeInteger(bytes, offset) {
|
|
475
|
+
return ((bytes[offset] & 0x7f) << 21)
|
|
476
|
+
| ((bytes[offset + 1] & 0x7f) << 14)
|
|
477
|
+
| ((bytes[offset + 2] & 0x7f) << 7)
|
|
478
|
+
| (bytes[offset + 3] & 0x7f);
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
function id3Offset(bytes) {
|
|
482
|
+
if (bytes.length >= 10 && ascii(bytes, 0, 3) === "ID3") {
|
|
483
|
+
return 10 + syncSafeInteger(bytes, 6);
|
|
484
|
+
}
|
|
485
|
+
return 0;
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
const MP3_BITRATES = {
|
|
489
|
+
V1L3: [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320],
|
|
490
|
+
V2L3: [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160],
|
|
491
|
+
};
|
|
492
|
+
|
|
493
|
+
const MP3_SAMPLE_RATES = {
|
|
494
|
+
3: [44100, 48000, 32000],
|
|
495
|
+
2: [22050, 24000, 16000],
|
|
496
|
+
0: [11025, 12000, 8000],
|
|
497
|
+
};
|
|
498
|
+
|
|
499
|
+
function mp3FrameInfoAt(bytes, offset) {
|
|
500
|
+
if (offset + 4 > bytes.length || bytes[offset] !== 0xff || (bytes[offset + 1] & 0xe0) !== 0xe0) {
|
|
501
|
+
return null;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
const versionBits = (bytes[offset + 1] >> 3) & 0x03;
|
|
505
|
+
const layerBits = (bytes[offset + 1] >> 1) & 0x03;
|
|
506
|
+
const bitrateIndex = (bytes[offset + 2] >> 4) & 0x0f;
|
|
507
|
+
const sampleRateIndex = (bytes[offset + 2] >> 2) & 0x03;
|
|
508
|
+
const padding = (bytes[offset + 2] >> 1) & 0x01;
|
|
509
|
+
if (versionBits === 1 || layerBits !== 1 || bitrateIndex === 0 || bitrateIndex === 15 || sampleRateIndex === 3) {
|
|
510
|
+
return null;
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
const sampleRate = MP3_SAMPLE_RATES[versionBits]?.[sampleRateIndex];
|
|
514
|
+
const bitrateKbps = versionBits === 3
|
|
515
|
+
? MP3_BITRATES.V1L3[bitrateIndex]
|
|
516
|
+
: MP3_BITRATES.V2L3[bitrateIndex];
|
|
517
|
+
if (!sampleRate || !bitrateKbps) {
|
|
518
|
+
return null;
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
const bitrate = bitrateKbps * 1000;
|
|
522
|
+
const frameLength = Math.floor(((versionBits === 3 ? 144 : 72) * bitrate) / sampleRate + padding);
|
|
523
|
+
if (frameLength <= 0) {
|
|
524
|
+
return null;
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
return {
|
|
528
|
+
bitrate,
|
|
529
|
+
sampleRate,
|
|
530
|
+
frameLength,
|
|
531
|
+
};
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
function findMp3Frame(bytes, startOffset = 0) {
|
|
535
|
+
for (let offset = Math.max(0, startOffset); offset < bytes.length - 4; offset += 1) {
|
|
536
|
+
const info = mp3FrameInfoAt(bytes, offset);
|
|
537
|
+
if (info) {
|
|
538
|
+
return { offset, ...info };
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
return null;
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
function parseMp3Duration(bytes, objectSize) {
|
|
545
|
+
const offset = id3Offset(bytes);
|
|
546
|
+
const firstFrame = findMp3Frame(bytes, offset);
|
|
547
|
+
if (!firstFrame) {
|
|
548
|
+
return null;
|
|
549
|
+
}
|
|
550
|
+
const audioBytes = Math.max(0, Number(objectSize || bytes.length) - firstFrame.offset);
|
|
551
|
+
const duration = (audioBytes * 8) / firstFrame.bitrate;
|
|
552
|
+
return Number.isFinite(duration) && duration > 0 ? duration : null;
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
function parseWavDuration(bytes) {
|
|
556
|
+
if (bytes.length < 44 || ascii(bytes, 0, 4) !== "RIFF" || ascii(bytes, 8, 4) !== "WAVE") {
|
|
557
|
+
return null;
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
let offset = 12;
|
|
561
|
+
let byteRate = 0;
|
|
562
|
+
let dataSize = 0;
|
|
563
|
+
while (offset + 8 <= bytes.length) {
|
|
564
|
+
const chunkId = ascii(bytes, offset, 4);
|
|
565
|
+
const chunkSize = readUint32(bytes, offset + 4, true);
|
|
566
|
+
if (chunkId === "fmt " && offset + 20 <= bytes.length) {
|
|
567
|
+
byteRate = readUint32(bytes, offset + 16, true);
|
|
568
|
+
} else if (chunkId === "data") {
|
|
569
|
+
dataSize = chunkSize;
|
|
570
|
+
break;
|
|
571
|
+
}
|
|
572
|
+
offset += 8 + chunkSize + (chunkSize % 2);
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
if (!byteRate || !dataSize) {
|
|
576
|
+
return null;
|
|
577
|
+
}
|
|
578
|
+
return dataSize / byteRate;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
function parseMp4Duration(bytes) {
|
|
582
|
+
for (let offset = 0; offset + 32 < bytes.length; offset += 1) {
|
|
583
|
+
if (ascii(bytes, offset + 4, 4) !== "mvhd") {
|
|
584
|
+
continue;
|
|
1243
585
|
}
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
});
|
|
1248
|
-
if (persisted?.job_id === this.jobId && persisted.upload?.type === "multipart") {
|
|
1249
|
-
this.resumeState = persisted;
|
|
1250
|
-
try {
|
|
1251
|
-
const refreshed = await this.client.refreshBigFileUpload(this.jobId, {
|
|
1252
|
-
upload: persisted.upload,
|
|
1253
|
-
});
|
|
1254
|
-
this.uploadInfo = refreshed;
|
|
1255
|
-
} catch (error) {
|
|
1256
|
-
if (error?.status === 404 || error?.code === "not_found") {
|
|
1257
|
-
await this.clearResumeState();
|
|
1258
|
-
}
|
|
1259
|
-
if (error?.status === 409) {
|
|
1260
|
-
const currentJob = await this.client.jobs.get(this.jobId);
|
|
1261
|
-
this.jobStatus = currentJob?.job_status || this.jobStatus;
|
|
1262
|
-
this.uploadInfo = null;
|
|
1263
|
-
await this.clearResumeState();
|
|
1264
|
-
return currentJob;
|
|
1265
|
-
}
|
|
1266
|
-
if (!(error?.status === 404 || error?.code === "not_found")) {
|
|
1267
|
-
throw error;
|
|
1268
|
-
}
|
|
1269
|
-
}
|
|
1270
|
-
}
|
|
1271
|
-
await this.persistResumeState(this.resumeState?.completed_parts || []);
|
|
586
|
+
const size = readUint32(bytes, offset);
|
|
587
|
+
if (size < 32 || offset + size > bytes.length + 8) {
|
|
588
|
+
continue;
|
|
1272
589
|
}
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
590
|
+
const version = bytes[offset + 8];
|
|
591
|
+
if (version === 0 && offset + 28 <= bytes.length) {
|
|
592
|
+
const timescale = readUint32(bytes, offset + 20);
|
|
593
|
+
const duration = readUint32(bytes, offset + 24);
|
|
594
|
+
return timescale > 0 ? duration / timescale : null;
|
|
595
|
+
}
|
|
596
|
+
if (version === 1 && offset + 40 <= bytes.length) {
|
|
597
|
+
const timescale = readUint32(bytes, offset + 28);
|
|
598
|
+
const duration = readUint64(bytes, offset + 32);
|
|
599
|
+
return timescale > 0 ? duration / timescale : null;
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
return null;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
function typeFromNameOrContentType(name = "", contentType = "") {
|
|
606
|
+
const lowerName = String(name || "").toLowerCase();
|
|
607
|
+
const lowerType = String(contentType || "").toLowerCase();
|
|
608
|
+
if (lowerType.includes("wav") || lowerName.endsWith(".wav")) {
|
|
609
|
+
return "wav";
|
|
610
|
+
}
|
|
611
|
+
if (
|
|
612
|
+
lowerType.includes("mp4")
|
|
613
|
+
|| lowerType.includes("m4a")
|
|
614
|
+
|| lowerType.includes("audio/x-m4a")
|
|
615
|
+
|| lowerName.endsWith(".m4a")
|
|
616
|
+
) {
|
|
617
|
+
return "m4a";
|
|
618
|
+
}
|
|
619
|
+
return "mp3";
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
async function estimateDurationFromFile(file) {
|
|
623
|
+
const headerLength = Math.min(Number(file?.size || 0), 2 * 1024 * 1024);
|
|
624
|
+
if (!headerLength) {
|
|
625
|
+
return estimateDurationFromSize(file?.size);
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
try {
|
|
629
|
+
let bytes;
|
|
630
|
+
if (typeof file?.readSlice === "function") {
|
|
631
|
+
bytes = new Uint8Array(await file.readSlice(0, headerLength));
|
|
632
|
+
} else if (typeof file?.slice === "function") {
|
|
633
|
+
bytes = new Uint8Array(await file.slice(0, headerLength).arrayBuffer());
|
|
634
|
+
} else {
|
|
635
|
+
return estimateDurationFromSize(file?.size);
|
|
636
|
+
}
|
|
637
|
+
const type = typeFromNameOrContentType(file?.name, file?.type || contentTypeFromName(file?.name));
|
|
638
|
+
const duration = type === "wav"
|
|
639
|
+
? parseWavDuration(bytes)
|
|
640
|
+
: type === "m4a"
|
|
641
|
+
? parseMp4Duration(bytes)
|
|
642
|
+
: parseMp3Duration(bytes, file.size);
|
|
643
|
+
if (Number.isFinite(duration) && duration > 0) {
|
|
644
|
+
return Math.max(1, Math.ceil(duration));
|
|
645
|
+
}
|
|
646
|
+
} catch {
|
|
647
|
+
// Fall back to the coarse size heuristic when metadata parsing fails.
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
return estimateDurationFromSize(file?.size);
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
async function fileFromPath(pathValue) {
|
|
654
|
+
const fs = await import("node:fs/promises");
|
|
655
|
+
const path = await import("node:path");
|
|
656
|
+
const stats = await fs.stat(pathValue);
|
|
657
|
+
const name = path.basename(pathValue);
|
|
658
|
+
return {
|
|
659
|
+
name,
|
|
660
|
+
size: stats.size,
|
|
661
|
+
type: contentTypeFromName(name),
|
|
662
|
+
path: pathValue,
|
|
663
|
+
async readSlice(start, end) {
|
|
664
|
+
const handle = await fs.open(pathValue, "r");
|
|
665
|
+
try {
|
|
666
|
+
const length = Math.max(0, end - start);
|
|
667
|
+
const buffer = Buffer.alloc(length);
|
|
668
|
+
const { bytesRead } = await handle.read(buffer, 0, length, start);
|
|
669
|
+
return buffer.subarray(0, bytesRead);
|
|
670
|
+
} finally {
|
|
671
|
+
await handle.close();
|
|
672
|
+
}
|
|
673
|
+
},
|
|
674
|
+
};
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
function makeFile(parts, name, type) {
|
|
678
|
+
if (typeof File !== "undefined") {
|
|
679
|
+
return new File(parts, name, { type });
|
|
680
|
+
}
|
|
681
|
+
const blob = new Blob(parts, { type });
|
|
682
|
+
Object.defineProperty(blob, "name", { value: name });
|
|
683
|
+
return blob;
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
async function normalizeFile(input, fallbackName = "audio.mp3") {
|
|
687
|
+
if (
|
|
688
|
+
input
|
|
689
|
+
&& typeof input === "object"
|
|
690
|
+
&& typeof input.name === "string"
|
|
691
|
+
&& Number.isFinite(Number(input.size))
|
|
692
|
+
&& typeof input.type === "string"
|
|
693
|
+
&& (typeof input.readSlice === "function" || typeof input.slice === "function")
|
|
694
|
+
) {
|
|
695
|
+
return input;
|
|
696
|
+
}
|
|
697
|
+
if (typeof input === "string") {
|
|
698
|
+
return fileFromPath(input);
|
|
699
|
+
}
|
|
700
|
+
if (typeof File !== "undefined" && input instanceof File) {
|
|
701
|
+
return input;
|
|
702
|
+
}
|
|
703
|
+
if (input instanceof Blob) {
|
|
704
|
+
return input.name ? input : makeFile([input], fallbackName, input.type || contentTypeFromName(fallbackName));
|
|
705
|
+
}
|
|
706
|
+
if (input?.data instanceof Uint8Array || input?.data instanceof ArrayBuffer) {
|
|
707
|
+
return makeFile([input.data], input.name || fallbackName, input.type || contentTypeFromName(input.name || fallbackName));
|
|
708
|
+
}
|
|
709
|
+
throw new TranscribeAPIError("Invalid file. Provide a path, File, Blob, or { data, name, type }.");
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
async function parseApiResponse(response) {
|
|
713
|
+
const text = await response.text();
|
|
714
|
+
let data = {};
|
|
715
|
+
try {
|
|
716
|
+
data = text ? JSON.parse(text) : {};
|
|
717
|
+
} catch {
|
|
718
|
+
data = text;
|
|
719
|
+
}
|
|
720
|
+
if (!response.ok) {
|
|
721
|
+
const body = data && typeof data === "object" ? data : {};
|
|
722
|
+
const message = body.message || body.error || text || `HTTP ${response.status}`;
|
|
723
|
+
const extra = { ...body };
|
|
724
|
+
delete extra.message;
|
|
725
|
+
delete extra.error;
|
|
726
|
+
delete extra.code;
|
|
727
|
+
throw new TranscribeAPIError(message, {
|
|
728
|
+
status: response.status,
|
|
729
|
+
code: body.code || null,
|
|
730
|
+
extra,
|
|
731
|
+
response: {
|
|
732
|
+
message,
|
|
733
|
+
...(body.code ? { code: body.code } : {}),
|
|
734
|
+
...extra,
|
|
1298
735
|
},
|
|
1299
736
|
});
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
737
|
+
}
|
|
738
|
+
return data;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
function normalizeHeaders(headers = {}) {
|
|
742
|
+
return Object.fromEntries(
|
|
743
|
+
Object.entries(headers)
|
|
744
|
+
.filter(([, value]) => value !== undefined && value !== null)
|
|
745
|
+
.map(([key, value]) => [key, String(value)]),
|
|
746
|
+
);
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
function isNodeStreamBody(body) {
|
|
750
|
+
return Boolean(body && typeof body === "object" && typeof body.pipe === "function");
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
async function putNodeStream(upload, streamBody, headers) {
|
|
754
|
+
const http = await import("node:http");
|
|
755
|
+
const https = await import("node:https");
|
|
756
|
+
const { pipeline } = await import("node:stream/promises");
|
|
757
|
+
const target = new URL(upload.url || upload);
|
|
758
|
+
const transport = target.protocol === "https:" ? https : http;
|
|
759
|
+
|
|
760
|
+
return new Promise((resolve, reject) => {
|
|
761
|
+
let settled = false;
|
|
762
|
+
const finishResolve = (value) => {
|
|
763
|
+
if (settled) {
|
|
764
|
+
return;
|
|
765
|
+
}
|
|
766
|
+
settled = true;
|
|
767
|
+
resolve(value);
|
|
768
|
+
};
|
|
769
|
+
const finishReject = (error) => {
|
|
770
|
+
if (settled) {
|
|
771
|
+
return;
|
|
772
|
+
}
|
|
773
|
+
settled = true;
|
|
774
|
+
reject(error);
|
|
775
|
+
};
|
|
776
|
+
const request = transport.request(target, {
|
|
777
|
+
method: "PUT",
|
|
778
|
+
headers,
|
|
779
|
+
agent: false,
|
|
780
|
+
}, (response) => {
|
|
781
|
+
response.resume();
|
|
782
|
+
response.on("error", finishReject);
|
|
783
|
+
response.on("end", () => {
|
|
784
|
+
finishResolve({
|
|
785
|
+
ok: response.statusCode >= 200 && response.statusCode < 300,
|
|
786
|
+
status: response.statusCode || 0,
|
|
787
|
+
headers: {
|
|
788
|
+
get(name) {
|
|
789
|
+
const value = response.headers[String(name || "").toLowerCase()];
|
|
790
|
+
return Array.isArray(value) ? value[0] : value || null;
|
|
791
|
+
},
|
|
792
|
+
},
|
|
793
|
+
});
|
|
794
|
+
});
|
|
1307
795
|
});
|
|
1308
|
-
|
|
796
|
+
|
|
797
|
+
request.on("error", finishReject);
|
|
798
|
+
request.on("socket", (socket) => {
|
|
799
|
+
socket.on("error", finishReject);
|
|
800
|
+
});
|
|
801
|
+
request.setTimeout(120000, () => {
|
|
802
|
+
request.destroy(new Error("stream_upload_timeout"));
|
|
803
|
+
});
|
|
804
|
+
if (typeof streamBody.on === "function") {
|
|
805
|
+
streamBody.on("error", finishReject);
|
|
806
|
+
}
|
|
807
|
+
pipeline(streamBody, request).catch(finishReject);
|
|
808
|
+
});
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
async function putObjectWithRetry(upload, body, {
|
|
812
|
+
onProgress,
|
|
813
|
+
loadedOffset = 0,
|
|
814
|
+
totalBytes = 0,
|
|
815
|
+
contentLength = null,
|
|
816
|
+
progressMeta = null,
|
|
817
|
+
debugContext = null,
|
|
818
|
+
uploadLimiter = null,
|
|
819
|
+
} = {}) {
|
|
820
|
+
const response = await retry(async (attemptIndex) => {
|
|
821
|
+
const executePut = async () => {
|
|
822
|
+
const resolvedBody = typeof body === "function" ? await body() : body;
|
|
823
|
+
const headers = normalizeHeaders({
|
|
824
|
+
...(upload.headers || {}),
|
|
825
|
+
...(contentLength !== null ? { "Content-Length": contentLength } : {}),
|
|
826
|
+
...(typeof body === "function" ? { Connection: "close" } : {}),
|
|
827
|
+
});
|
|
828
|
+
const putResponse = isNodeStreamBody(resolvedBody)
|
|
829
|
+
? await putNodeStream(upload, resolvedBody, headers)
|
|
830
|
+
: await fetch(upload.url || upload, {
|
|
831
|
+
method: "PUT",
|
|
832
|
+
headers,
|
|
833
|
+
body: resolvedBody,
|
|
834
|
+
});
|
|
835
|
+
if (!putResponse.ok) {
|
|
836
|
+
throw new TranscribeAPIError(`R2 upload failed with HTTP ${putResponse.status}.`, {
|
|
837
|
+
status: putResponse.status,
|
|
838
|
+
code: "upload_failed",
|
|
839
|
+
extra: {
|
|
840
|
+
...(progressMeta || {}),
|
|
841
|
+
...(debugContext || {}),
|
|
842
|
+
content_length: contentLength,
|
|
843
|
+
},
|
|
844
|
+
});
|
|
845
|
+
}
|
|
846
|
+
return putResponse;
|
|
847
|
+
};
|
|
848
|
+
const putResponse = uploadLimiter?.withToken
|
|
849
|
+
? await uploadLimiter.withToken(executePut)
|
|
850
|
+
: await executePut();
|
|
851
|
+
if (!putResponse.ok) {
|
|
852
|
+
throw new TranscribeAPIError(`R2 upload failed with HTTP ${putResponse.status}.`, {
|
|
853
|
+
status: putResponse.status,
|
|
854
|
+
code: "upload_failed",
|
|
855
|
+
extra: {
|
|
856
|
+
...(progressMeta || {}),
|
|
857
|
+
...(debugContext || {}),
|
|
858
|
+
content_length: contentLength,
|
|
859
|
+
},
|
|
860
|
+
});
|
|
861
|
+
}
|
|
862
|
+
return putResponse;
|
|
863
|
+
}, {
|
|
864
|
+
attempts: 5,
|
|
865
|
+
baseDelayMs: 1000,
|
|
866
|
+
});
|
|
867
|
+
const transferredBytes = Number(
|
|
868
|
+
contentLength
|
|
869
|
+
?? body?.size
|
|
870
|
+
?? body?.byteLength
|
|
871
|
+
?? 0,
|
|
872
|
+
);
|
|
873
|
+
const loaded = loadedOffset + transferredBytes;
|
|
874
|
+
if (onProgress) {
|
|
875
|
+
onProgress({
|
|
876
|
+
loaded,
|
|
877
|
+
total: totalBytes || loaded,
|
|
878
|
+
...(progressMeta || {}),
|
|
879
|
+
});
|
|
880
|
+
}
|
|
881
|
+
return response;
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
async function openFileBody(file, start = 0, end = null) {
|
|
885
|
+
if (!file?.path) {
|
|
886
|
+
return file;
|
|
1309
887
|
}
|
|
888
|
+
const fs = await import("node:fs");
|
|
889
|
+
return fs.createReadStream(file.path, end === null ? { start } : { start, end: end - 1 });
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
async function runWithConcurrency(items, concurrency, worker) {
|
|
893
|
+
let index = 0;
|
|
894
|
+
const workerCount = Math.max(1, Math.min(items.length || 1, concurrency));
|
|
895
|
+
const workers = Array.from({ length: workerCount }, async () => {
|
|
896
|
+
while (true) {
|
|
897
|
+
const current = index;
|
|
898
|
+
index += 1;
|
|
899
|
+
if (current >= items.length) {
|
|
900
|
+
return;
|
|
901
|
+
}
|
|
902
|
+
await worker(items[current], current);
|
|
903
|
+
}
|
|
904
|
+
});
|
|
905
|
+
await Promise.all(workers);
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
function createUploadConcurrencyLimiter(concurrency) {
|
|
909
|
+
const maxConcurrency = Math.max(1, normalizeUploadConcurrency(concurrency));
|
|
910
|
+
let active = 0;
|
|
911
|
+
const waiters = [];
|
|
912
|
+
|
|
913
|
+
const acquire = async () => {
|
|
914
|
+
if (active < maxConcurrency) {
|
|
915
|
+
active += 1;
|
|
916
|
+
return;
|
|
917
|
+
}
|
|
918
|
+
await new Promise((resolve) => waiters.push(resolve));
|
|
919
|
+
active += 1;
|
|
920
|
+
};
|
|
921
|
+
|
|
922
|
+
const release = () => {
|
|
923
|
+
active = Math.max(0, active - 1);
|
|
924
|
+
const next = waiters.shift();
|
|
925
|
+
if (next) {
|
|
926
|
+
next();
|
|
927
|
+
}
|
|
928
|
+
};
|
|
929
|
+
|
|
930
|
+
return {
|
|
931
|
+
concurrency: maxConcurrency,
|
|
932
|
+
async withToken(operation) {
|
|
933
|
+
await acquire();
|
|
934
|
+
try {
|
|
935
|
+
return await operation();
|
|
936
|
+
} finally {
|
|
937
|
+
release();
|
|
938
|
+
}
|
|
939
|
+
},
|
|
940
|
+
};
|
|
1310
941
|
}
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
}
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
942
|
+
|
|
943
|
+
function multipartCompleteXml(parts) {
|
|
944
|
+
const rows = parts
|
|
945
|
+
.sort((a, b) => a.partNumber - b.partNumber)
|
|
946
|
+
.map((part) => `<Part><PartNumber>${part.partNumber}</PartNumber><ETag>${part.etag}</ETag></Part>`)
|
|
947
|
+
.join("");
|
|
948
|
+
return `<CompleteMultipartUpload>${rows}</CompleteMultipartUpload>`;
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
async function uploadMultipart(upload, file, {
|
|
952
|
+
onProgress,
|
|
953
|
+
uploadConcurrency = DEFAULT_UPLOAD_CONCURRENCY,
|
|
954
|
+
onConcurrencyChange = null,
|
|
955
|
+
uploadLimiter = null,
|
|
956
|
+
} = {}) {
|
|
957
|
+
const completed = [];
|
|
958
|
+
const totalParts = upload.parts.length;
|
|
959
|
+
const completedPartNumbers = new Set(completed.map((part) => part.partNumber));
|
|
960
|
+
let completedBytes = 0;
|
|
961
|
+
const concurrency = normalizeUploadConcurrency(uploadConcurrency);
|
|
962
|
+
let targetConcurrency = concurrency;
|
|
963
|
+
let activeWorkers = 0;
|
|
964
|
+
let fatalError = null;
|
|
965
|
+
const pendingParts = upload.parts.filter((part) => !completedPartNumbers.has(part.part_number));
|
|
966
|
+
const partAttempts = new Map();
|
|
967
|
+
|
|
968
|
+
const workers = Array.from({ length: concurrency }, async (_, workerIndex) => {
|
|
969
|
+
while (true) {
|
|
970
|
+
if (fatalError) {
|
|
971
|
+
return;
|
|
972
|
+
}
|
|
973
|
+
if (completedPartNumbers.size >= totalParts) {
|
|
974
|
+
return;
|
|
975
|
+
}
|
|
976
|
+
if (workerIndex >= targetConcurrency) {
|
|
977
|
+
await sleep(MULTIPART_IDLE_WAIT_MS);
|
|
978
|
+
continue;
|
|
979
|
+
}
|
|
980
|
+
const part = pendingParts.shift();
|
|
981
|
+
if (!part) {
|
|
982
|
+
if (!activeWorkers) {
|
|
983
|
+
return;
|
|
984
|
+
}
|
|
985
|
+
await sleep(MULTIPART_IDLE_WAIT_MS);
|
|
986
|
+
continue;
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
activeWorkers += 1;
|
|
990
|
+
const start = (part.part_number - 1) * upload.part_size;
|
|
991
|
+
const end = Math.min(file.size, start + upload.part_size);
|
|
992
|
+
const chunkSize = end - start;
|
|
993
|
+
try {
|
|
994
|
+
const response = await putObjectWithRetry(
|
|
995
|
+
{ url: part.url },
|
|
996
|
+
file.path ? (() => openFileBody(file, start, end)) : file.slice(start, end),
|
|
997
|
+
{
|
|
998
|
+
onProgress: null,
|
|
999
|
+
loadedOffset: 0,
|
|
1000
|
+
totalBytes: file.size,
|
|
1001
|
+
contentLength: chunkSize,
|
|
1002
|
+
progressMeta: {
|
|
1003
|
+
uploadType: "multipart",
|
|
1004
|
+
partNumber: part.part_number,
|
|
1005
|
+
totalParts,
|
|
1006
|
+
chunkBytes: chunkSize,
|
|
1007
|
+
},
|
|
1008
|
+
debugContext: {
|
|
1009
|
+
file_name: file?.name || null,
|
|
1010
|
+
file_size: file?.size || null,
|
|
1011
|
+
range_start: start,
|
|
1012
|
+
range_end_exclusive: end,
|
|
1013
|
+
},
|
|
1014
|
+
uploadLimiter,
|
|
1015
|
+
},
|
|
1016
|
+
);
|
|
1017
|
+
const etag = response.headers.get("ETag") || response.headers.get("etag");
|
|
1018
|
+
completed.push({
|
|
1019
|
+
partNumber: part.part_number,
|
|
1020
|
+
etag,
|
|
1021
|
+
});
|
|
1022
|
+
completedPartNumbers.add(part.part_number);
|
|
1023
|
+
completedBytes += chunkSize;
|
|
1024
|
+
if (onProgress) {
|
|
1025
|
+
onProgress({
|
|
1026
|
+
loaded: completedBytes,
|
|
1027
|
+
total: file.size,
|
|
1028
|
+
uploadType: "multipart",
|
|
1029
|
+
partNumber: part.part_number,
|
|
1030
|
+
totalParts,
|
|
1031
|
+
chunkBytes: chunkSize,
|
|
1032
|
+
multipartConcurrency: targetConcurrency,
|
|
1033
|
+
});
|
|
1034
|
+
}
|
|
1035
|
+
} catch (error) {
|
|
1036
|
+
const attempts = (partAttempts.get(part.part_number) || 0) + 1;
|
|
1037
|
+
partAttempts.set(part.part_number, attempts);
|
|
1038
|
+
const retryable = isRetryableError(error);
|
|
1039
|
+
if (retryable && attempts < MAX_MULTIPART_ADAPTIVE_ATTEMPTS) {
|
|
1040
|
+
const previousConcurrency = targetConcurrency;
|
|
1041
|
+
targetConcurrency = Math.max(1, Math.floor(targetConcurrency / 2));
|
|
1042
|
+
pendingParts.push(part);
|
|
1043
|
+
if (onConcurrencyChange && targetConcurrency !== previousConcurrency) {
|
|
1044
|
+
try {
|
|
1045
|
+
onConcurrencyChange({
|
|
1046
|
+
previousConcurrency,
|
|
1047
|
+
nextConcurrency: targetConcurrency,
|
|
1048
|
+
partNumber: part.part_number,
|
|
1049
|
+
attempts,
|
|
1050
|
+
error: extractErrorInfo(error),
|
|
1051
|
+
});
|
|
1052
|
+
} catch {
|
|
1053
|
+
// Ignore observer failures.
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
activeWorkers -= 1;
|
|
1057
|
+
await sleep(Math.min(10000, 1000 * (2 ** Math.min(attempts - 1, 3))));
|
|
1058
|
+
continue;
|
|
1059
|
+
}
|
|
1060
|
+
fatalError = new TranscribeAPIError(
|
|
1061
|
+
`Multipart upload failed for part ${part.part_number}/${totalParts}.`,
|
|
1062
|
+
{
|
|
1063
|
+
status: error?.status || null,
|
|
1064
|
+
code: error?.code || "multipart_part_upload_failed",
|
|
1065
|
+
extra: {
|
|
1066
|
+
cause_error: error?.error || error?.message || String(error),
|
|
1067
|
+
cause_code: error?.code || error?.cause?.code || null,
|
|
1068
|
+
cause_status: error?.status || null,
|
|
1069
|
+
upload_type: "multipart",
|
|
1070
|
+
part_number: part.part_number,
|
|
1071
|
+
total_parts: totalParts,
|
|
1072
|
+
chunk_bytes: chunkSize,
|
|
1073
|
+
completed_bytes_before_failure: completedBytes,
|
|
1074
|
+
multipart_concurrency: targetConcurrency,
|
|
1075
|
+
file_name: file?.name || null,
|
|
1076
|
+
file_size: file?.size || null,
|
|
1077
|
+
range_start: start,
|
|
1078
|
+
range_end_exclusive: end,
|
|
1079
|
+
attempts,
|
|
1080
|
+
},
|
|
1081
|
+
},
|
|
1082
|
+
);
|
|
1083
|
+
activeWorkers -= 1;
|
|
1084
|
+
return;
|
|
1085
|
+
}
|
|
1086
|
+
activeWorkers -= 1;
|
|
1087
|
+
}
|
|
1088
|
+
});
|
|
1089
|
+
|
|
1090
|
+
await Promise.all(workers);
|
|
1091
|
+
if (fatalError) {
|
|
1092
|
+
throw fatalError;
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
const completeResponse = await retry(async () => {
|
|
1096
|
+
const response = await fetch(upload.complete_url, {
|
|
1097
|
+
method: "POST",
|
|
1098
|
+
headers: { "Content-Type": "application/xml" },
|
|
1099
|
+
body: multipartCompleteXml(completed),
|
|
1100
|
+
});
|
|
1101
|
+
if (!response.ok) {
|
|
1102
|
+
throw new TranscribeAPIError(`R2 multipart complete failed with HTTP ${response.status}.`, {
|
|
1103
|
+
status: response.status,
|
|
1104
|
+
code: "multipart_complete_failed",
|
|
1105
|
+
});
|
|
1106
|
+
}
|
|
1107
|
+
return response;
|
|
1108
|
+
});
|
|
1109
|
+
return completeResponse;
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
async function uploadUsingInstructions(upload, file, options = {}) {
|
|
1113
|
+
if (upload.type === "multipart") {
|
|
1114
|
+
return uploadMultipart(upload, file, options);
|
|
1115
|
+
}
|
|
1116
|
+
const body = file.path ? (() => openFileBody(file)) : file;
|
|
1117
|
+
return putObjectWithRetry(upload, body, {
|
|
1118
|
+
onProgress: options.onProgress,
|
|
1119
|
+
loadedOffset: 0,
|
|
1120
|
+
totalBytes: file.size,
|
|
1121
|
+
contentLength: file.size,
|
|
1122
|
+
progressMeta: {
|
|
1123
|
+
uploadType: "single_put",
|
|
1124
|
+
chunkBytes: file.size,
|
|
1125
|
+
},
|
|
1126
|
+
debugContext: {
|
|
1127
|
+
file_name: file?.name || null,
|
|
1128
|
+
file_size: file?.size || null,
|
|
1129
|
+
},
|
|
1130
|
+
uploadLimiter: options.uploadLimiter || null,
|
|
1131
|
+
});
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
function assertBatchLimits(totalFiles, totalSizeBytes) {
|
|
1135
|
+
if (totalFiles > MAX_BATCH_FILES) {
|
|
1136
|
+
throw new TranscribeAPIError(`Batch jobs support up to ${MAX_BATCH_FILES} files.`, {
|
|
1137
|
+
code: "too_many_files",
|
|
1138
|
+
});
|
|
1139
|
+
}
|
|
1140
|
+
if (totalSizeBytes > MAX_BATCH_TOTAL_SIZE_BYTES) {
|
|
1141
|
+
throw new TranscribeAPIError("Batch jobs support up to 10GB total.", {
|
|
1142
|
+
code: "batch_too_large",
|
|
1143
|
+
});
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
class BatchJob {
|
|
1148
|
+
constructor(client, files, createResponse, options) {
|
|
1149
|
+
this.client = client;
|
|
1150
|
+
this.files = files;
|
|
1151
|
+
this.createResponse = createResponse;
|
|
1152
|
+
this.jobId = createResponse.job_id;
|
|
1153
|
+
this.jobStatus = createResponse.job_status;
|
|
1154
|
+
this.model = createResponse.model || options.model || null;
|
|
1155
|
+
this.uploadsByReferenceId = new Map(
|
|
1156
|
+
normalizeResponseUploads(createResponse).map((entry) => [entry.reference_id, entry.upload]),
|
|
1157
|
+
);
|
|
1158
|
+
this.options = options;
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1341
1161
|
async upload({ onProgress } = {}) {
|
|
1342
1162
|
if (!this.uploadsByReferenceId.size) {
|
|
1343
1163
|
return this.createResponse;
|
|
@@ -1347,6 +1167,7 @@ class BatchJob {
|
|
|
1347
1167
|
const totalBytes = this.files.reduce((sum, item) => sum + Number(item?.file?.size || 0), 0);
|
|
1348
1168
|
const loadedByReferenceId = new Map();
|
|
1349
1169
|
const uploadableFiles = this.files.filter((item) => item.file);
|
|
1170
|
+
const uploadLimiter = this.options.uploadLimiter || createUploadConcurrencyLimiter(this.options.uploadConcurrency);
|
|
1350
1171
|
|
|
1351
1172
|
emitProgress(progress, {
|
|
1352
1173
|
event: "upload_started",
|
|
@@ -1356,17 +1177,16 @@ class BatchJob {
|
|
|
1356
1177
|
totalBytes,
|
|
1357
1178
|
});
|
|
1358
1179
|
|
|
1359
|
-
|
|
1360
|
-
if (!item.file) {
|
|
1361
|
-
continue;
|
|
1362
|
-
}
|
|
1180
|
+
await runWithConcurrency(uploadableFiles, this.options.uploadConcurrency, async (item) => {
|
|
1363
1181
|
const upload = this.uploadsByReferenceId.get(item.referenceId);
|
|
1364
|
-
if (!upload) {
|
|
1365
|
-
throw new TranscribeAPIError(`Missing upload instructions for \`${item.referenceId}\`.`, {
|
|
1366
|
-
code: "missing_upload_instructions",
|
|
1367
|
-
});
|
|
1182
|
+
if (!upload) {
|
|
1183
|
+
throw new TranscribeAPIError(`Missing upload instructions for \`${item.referenceId}\`.`, {
|
|
1184
|
+
code: "missing_upload_instructions",
|
|
1185
|
+
});
|
|
1368
1186
|
}
|
|
1369
1187
|
await uploadUsingInstructions(upload, item.file, {
|
|
1188
|
+
uploadConcurrency: this.options.uploadConcurrency,
|
|
1189
|
+
uploadLimiter,
|
|
1370
1190
|
onProgress: (event) => {
|
|
1371
1191
|
const fileLoaded = Math.max(0, Math.min(Number(event?.loaded || 0), Number(item.file.size || 0)));
|
|
1372
1192
|
loadedByReferenceId.set(item.referenceId, fileLoaded);
|
|
@@ -1385,7 +1205,7 @@ class BatchJob {
|
|
|
1385
1205
|
});
|
|
1386
1206
|
},
|
|
1387
1207
|
});
|
|
1388
|
-
}
|
|
1208
|
+
});
|
|
1389
1209
|
const completion = await this.client.jobs.complete(this.jobId);
|
|
1390
1210
|
emitProgress(progress, {
|
|
1391
1211
|
event: "upload_completed",
|
|
@@ -1395,17 +1215,17 @@ class BatchJob {
|
|
|
1395
1215
|
batchTotal: totalBytes,
|
|
1396
1216
|
totalFiles: this.files.length,
|
|
1397
1217
|
uploadFiles: uploadableFiles.length,
|
|
1398
|
-
suppressLog: Boolean(this.client.polling),
|
|
1218
|
+
suppressLog: Boolean(this.client.polling) && !TERMINAL_JOB_STATUSES.has(String(completion?.job_status || "")),
|
|
1399
1219
|
});
|
|
1400
1220
|
return completion;
|
|
1401
1221
|
}
|
|
1402
1222
|
}
|
|
1403
|
-
|
|
1223
|
+
|
|
1404
1224
|
export class TranscribeAPI {
|
|
1405
1225
|
constructor({
|
|
1406
1226
|
apiKey,
|
|
1407
1227
|
baseUrl = DEFAULT_BASE_URL,
|
|
1408
|
-
|
|
1228
|
+
uploadConcurrency = DEFAULT_UPLOAD_CONCURRENCY,
|
|
1409
1229
|
showLogs = false,
|
|
1410
1230
|
logger = console,
|
|
1411
1231
|
polling = null,
|
|
@@ -1416,13 +1236,10 @@ export class TranscribeAPI {
|
|
|
1416
1236
|
|
|
1417
1237
|
this.apiKey = apiKey;
|
|
1418
1238
|
this.baseUrl = baseUrl.replace(/\/+$/, "");
|
|
1419
|
-
this.
|
|
1239
|
+
this.uploadConcurrency = normalizeUploadConcurrency(uploadConcurrency);
|
|
1420
1240
|
this.showLogs = Boolean(showLogs);
|
|
1421
1241
|
this.logger = logger || console;
|
|
1422
1242
|
this.polling = normalizePollingConfig(polling);
|
|
1423
|
-
this.batch = {
|
|
1424
|
-
transcribe: (options) => this.transcribeMany(options),
|
|
1425
|
-
};
|
|
1426
1243
|
this.jobs = {
|
|
1427
1244
|
createBigFile: (options) => this.createBigFileJob(options),
|
|
1428
1245
|
createBatch: (options) => this.createBatchJob(options),
|
|
@@ -1432,29 +1249,20 @@ export class TranscribeAPI {
|
|
|
1432
1249
|
result: (jobId) => this.requestJson(`/transcribe/${jobId}`),
|
|
1433
1250
|
};
|
|
1434
1251
|
}
|
|
1435
|
-
|
|
1436
|
-
async requestJson(path, { method = "GET", body = null, retryable = false } = {}) {
|
|
1437
|
-
const run = async () => {
|
|
1438
|
-
const response = await fetch(`${this.baseUrl}${path}`, {
|
|
1439
|
-
method,
|
|
1440
|
-
headers: {
|
|
1441
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
1442
|
-
...(body ? { "Content-Type": "application/json" } : {}),
|
|
1443
|
-
},
|
|
1444
|
-
body: body ? JSON.stringify(body) : null,
|
|
1445
|
-
});
|
|
1446
|
-
return parseApiResponse(response);
|
|
1447
|
-
};
|
|
1448
|
-
return retryable ? retry(run) : run();
|
|
1449
|
-
}
|
|
1450
|
-
|
|
1451
|
-
async refreshBigFileUpload(jobId, { upload } = {}) {
|
|
1452
|
-
const response = await this.requestJson(`/jobs/${jobId}/resume-upload`, {
|
|
1453
|
-
method: "POST",
|
|
1454
|
-
body: { upload },
|
|
1455
|
-
retryable: true,
|
|
1456
|
-
});
|
|
1457
|
-
return response.upload;
|
|
1252
|
+
|
|
1253
|
+
async requestJson(path, { method = "GET", body = null, retryable = false } = {}) {
|
|
1254
|
+
const run = async () => {
|
|
1255
|
+
const response = await fetch(`${this.baseUrl}${path}`, {
|
|
1256
|
+
method,
|
|
1257
|
+
headers: {
|
|
1258
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
1259
|
+
...(body ? { "Content-Type": "application/json" } : {}),
|
|
1260
|
+
},
|
|
1261
|
+
body: body ? JSON.stringify(body) : null,
|
|
1262
|
+
});
|
|
1263
|
+
return parseApiResponse(response);
|
|
1264
|
+
};
|
|
1265
|
+
return retryable ? retry(run) : run();
|
|
1458
1266
|
}
|
|
1459
1267
|
|
|
1460
1268
|
async waitForJobCompletion(jobId, {
|
|
@@ -1506,120 +1314,104 @@ export class TranscribeAPI {
|
|
|
1506
1314
|
});
|
|
1507
1315
|
return lastJob;
|
|
1508
1316
|
}
|
|
1509
|
-
|
|
1317
|
+
|
|
1510
1318
|
async transcribe({
|
|
1511
|
-
|
|
1319
|
+
files,
|
|
1512
1320
|
webhookUrl,
|
|
1513
1321
|
onProgress,
|
|
1514
1322
|
showLogs,
|
|
1515
1323
|
logger,
|
|
1516
1324
|
language,
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
initialPrompt,
|
|
1520
|
-
vttGranularity,
|
|
1521
|
-
exclude,
|
|
1522
|
-
multipartConcurrency,
|
|
1325
|
+
exclude,
|
|
1326
|
+
uploadConcurrency,
|
|
1523
1327
|
} = {}) {
|
|
1524
1328
|
const progress = composeProgressHandler({
|
|
1525
1329
|
onProgress,
|
|
1526
1330
|
showLogs: showLogs ?? this.showLogs,
|
|
1527
1331
|
logger: logger ?? this.logger,
|
|
1528
1332
|
});
|
|
1529
|
-
if (
|
|
1530
|
-
|
|
1531
|
-
file,
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1333
|
+
if (isFilesInput(files)) {
|
|
1334
|
+
if (files.length === 0) {
|
|
1335
|
+
throw new TranscribeAPIError("Transcribe requires at least one file.", {
|
|
1336
|
+
code: "invalid_files",
|
|
1337
|
+
});
|
|
1338
|
+
}
|
|
1339
|
+
if (files.length > 1) {
|
|
1340
|
+
for (let index = 0; index < files.length; index += 1) {
|
|
1341
|
+
if (!String(files[index]?.reference_id || "").trim()) {
|
|
1342
|
+
throw new TranscribeAPIError(`files[${index}].reference_id is required when sending multiple files.`, {
|
|
1343
|
+
code: "missing_reference_id",
|
|
1344
|
+
});
|
|
1345
|
+
}
|
|
1346
|
+
}
|
|
1347
|
+
return this._transcribeAsync({
|
|
1348
|
+
files,
|
|
1349
|
+
webhookUrl,
|
|
1350
|
+
durationEstimateSec: undefined,
|
|
1351
|
+
onProgress,
|
|
1352
|
+
showLogs,
|
|
1542
1353
|
logger: logger ?? this.logger,
|
|
1543
|
-
|
|
1354
|
+
uploadConcurrency,
|
|
1355
|
+
language,
|
|
1356
|
+
exclude,
|
|
1544
1357
|
});
|
|
1545
1358
|
}
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
const
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1359
|
+
|
|
1360
|
+
const normalizedItem = normalizeBatchInputItem(files[0], 0);
|
|
1361
|
+
const normalizedSingleFile = normalizedItem.url ? null : await normalizeFile(normalizedItem.file);
|
|
1362
|
+
const estimatedDurationSec = normalizedItem.url
|
|
1363
|
+
? normalizedItem.durationEstimateSec
|
|
1364
|
+
: (normalizedItem.durationEstimateSec || await estimateDurationFromFile(normalizedSingleFile));
|
|
1365
|
+
const effectiveLanguage = normalizedItem.hasLanguage ? normalizedItem.language : normalizeLanguageCode(language);
|
|
1366
|
+
const isAsync = files.length > 1
|
|
1367
|
+
|| Boolean(normalizedItem.url)
|
|
1368
|
+
|| Boolean(webhookUrl)
|
|
1369
|
+
|| (
|
|
1370
|
+
normalizedSingleFile
|
|
1371
|
+
&& (
|
|
1372
|
+
normalizedSingleFile.size > MAX_SYNC_AUDIO_BYTES
|
|
1373
|
+
|| estimatedDurationSec > MAX_SYNC_AUDIO_SECONDS
|
|
1374
|
+
)
|
|
1375
|
+
);
|
|
1376
|
+
if (isAsync) {
|
|
1377
|
+
return this._transcribeAsync({
|
|
1378
|
+
files,
|
|
1379
|
+
webhookUrl,
|
|
1380
|
+
durationEstimateSec: estimatedDurationSec,
|
|
1381
|
+
onProgress,
|
|
1382
|
+
showLogs,
|
|
1565
1383
|
logger: logger ?? this.logger,
|
|
1566
|
-
|
|
1384
|
+
uploadConcurrency,
|
|
1385
|
+
language,
|
|
1386
|
+
exclude,
|
|
1567
1387
|
});
|
|
1568
1388
|
}
|
|
1569
|
-
|
|
1389
|
+
|
|
1390
|
+
return this.transcribeDirect({
|
|
1391
|
+
file: normalizedSingleFile,
|
|
1392
|
+
referenceId: normalizedItem.referenceId,
|
|
1393
|
+
language: effectiveLanguage,
|
|
1394
|
+
exclude,
|
|
1395
|
+
webhookUrl,
|
|
1396
|
+
showLogs: showLogs ?? this.showLogs,
|
|
1397
|
+
logger: logger ?? this.logger,
|
|
1398
|
+
});
|
|
1570
1399
|
}
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
initialPrompt,
|
|
1578
|
-
vttGranularity,
|
|
1579
|
-
exclude,
|
|
1580
|
-
});
|
|
1581
|
-
}
|
|
1582
|
-
|
|
1583
|
-
async transcribeDirect({
|
|
1584
|
-
file,
|
|
1585
|
-
language,
|
|
1586
|
-
task,
|
|
1587
|
-
vadFilter,
|
|
1588
|
-
initialPrompt,
|
|
1589
|
-
vttGranularity,
|
|
1590
|
-
exclude,
|
|
1591
|
-
} = {}) {
|
|
1592
|
-
const normalizedFile = await normalizeFile(file);
|
|
1593
|
-
const directFile = normalizedFile.path
|
|
1594
|
-
? makeFile([await normalizedFile.readSlice(0, normalizedFile.size)], normalizedFile.name, normalizedFile.type)
|
|
1595
|
-
: normalizedFile;
|
|
1596
|
-
const form = new FormData();
|
|
1597
|
-
form.set("file", directFile, directFile.name);
|
|
1598
|
-
if (language) form.set("language", language);
|
|
1599
|
-
if (task) form.set("task", task);
|
|
1600
|
-
if (vadFilter !== undefined) form.set("vad_filter", String(Boolean(vadFilter)));
|
|
1601
|
-
if (initialPrompt) form.set("initial_prompt", initialPrompt);
|
|
1602
|
-
if (vttGranularity) form.set("vtt_granularity", vttGranularity);
|
|
1603
|
-
if (exclude) form.set("exclude", Array.isArray(exclude) ? exclude.join(",") : exclude);
|
|
1604
|
-
|
|
1605
|
-
const response = await fetch(`${this.baseUrl}/transcribe`, {
|
|
1606
|
-
method: "POST",
|
|
1607
|
-
headers: {
|
|
1608
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
1609
|
-
},
|
|
1610
|
-
body: form,
|
|
1611
|
-
});
|
|
1612
|
-
return parseApiResponse(response);
|
|
1613
|
-
}
|
|
1614
|
-
|
|
1615
|
-
async transcribeMany({
|
|
1400
|
+
throw new TranscribeAPIError("`transcribe` requires a `files` array.", {
|
|
1401
|
+
code: "invalid_files",
|
|
1402
|
+
});
|
|
1403
|
+
}
|
|
1404
|
+
|
|
1405
|
+
async _transcribeAsync({
|
|
1616
1406
|
files,
|
|
1617
1407
|
webhookUrl,
|
|
1618
1408
|
durationEstimateSec,
|
|
1619
1409
|
onProgress,
|
|
1620
1410
|
showLogs,
|
|
1621
1411
|
logger,
|
|
1622
|
-
|
|
1412
|
+
uploadConcurrency,
|
|
1413
|
+
language,
|
|
1414
|
+
exclude,
|
|
1623
1415
|
} = {}) {
|
|
1624
1416
|
const progress = composeProgressHandler({
|
|
1625
1417
|
onProgress,
|
|
@@ -1632,7 +1424,9 @@ export class TranscribeAPI {
|
|
|
1632
1424
|
durationEstimateSec,
|
|
1633
1425
|
onProgress: progress,
|
|
1634
1426
|
showLogs: false,
|
|
1635
|
-
|
|
1427
|
+
uploadConcurrency,
|
|
1428
|
+
language,
|
|
1429
|
+
exclude,
|
|
1636
1430
|
});
|
|
1637
1431
|
const result = await job.upload({ onProgress: progress });
|
|
1638
1432
|
if (this.polling && !TERMINAL_JOB_STATUSES.has(String(result?.job_status || ""))) {
|
|
@@ -1643,101 +1437,102 @@ export class TranscribeAPI {
|
|
|
1643
1437
|
initialJob: result,
|
|
1644
1438
|
});
|
|
1645
1439
|
}
|
|
1440
|
+
if ((showLogs ?? this.showLogs) && TERMINAL_JOB_STATUSES.has(String(result?.job_status || ""))) {
|
|
1441
|
+
logTerminalAsyncResult(result, logger ?? this.logger);
|
|
1442
|
+
}
|
|
1646
1443
|
return result;
|
|
1647
1444
|
}
|
|
1648
1445
|
|
|
1649
|
-
async
|
|
1446
|
+
async transcribeDirect({
|
|
1650
1447
|
file,
|
|
1448
|
+
referenceId,
|
|
1449
|
+
language,
|
|
1450
|
+
exclude,
|
|
1451
|
+
webhookUrl,
|
|
1452
|
+
showLogs = false,
|
|
1453
|
+
logger = console,
|
|
1454
|
+
} = {}) {
|
|
1455
|
+
const normalizedFile = await normalizeFile(file);
|
|
1456
|
+
const directFile = normalizedFile.path
|
|
1457
|
+
? makeFile([await normalizedFile.readSlice(0, normalizedFile.size)], normalizedFile.name, normalizedFile.type)
|
|
1458
|
+
: normalizedFile;
|
|
1459
|
+
const form = new FormData();
|
|
1460
|
+
if (String(referenceId || "").trim()) form.set("reference_id", String(referenceId).trim());
|
|
1461
|
+
form.set("file", directFile, directFile.name);
|
|
1462
|
+
if (language) form.set("language", language);
|
|
1463
|
+
if (exclude) form.set("exclude", Array.isArray(exclude) ? exclude.join(",") : exclude);
|
|
1464
|
+
if (webhookUrl) form.set("webhook_url", webhookUrl);
|
|
1465
|
+
|
|
1466
|
+
const response = await fetch(`${this.baseUrl}/transcribe`, {
|
|
1467
|
+
method: "POST",
|
|
1468
|
+
headers: {
|
|
1469
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
1470
|
+
},
|
|
1471
|
+
body: form,
|
|
1472
|
+
});
|
|
1473
|
+
const result = await parseApiResponse(response);
|
|
1474
|
+
if (this.polling && result && typeof result === "object" && result.job_status && !TERMINAL_JOB_STATUSES.has(String(result.job_status))) {
|
|
1475
|
+
return this.waitForJobCompletion(result.job_id, {
|
|
1476
|
+
polling: this.polling,
|
|
1477
|
+
showLogs,
|
|
1478
|
+
logger,
|
|
1479
|
+
initialJob: result,
|
|
1480
|
+
});
|
|
1481
|
+
}
|
|
1482
|
+
if (showLogs && typeof logger?.log === "function") {
|
|
1483
|
+
logger.log(typeof result === "string" ? result : JSON.stringify(result, null, 2));
|
|
1484
|
+
}
|
|
1485
|
+
return result;
|
|
1486
|
+
}
|
|
1487
|
+
|
|
1488
|
+
async transcribeMany({
|
|
1489
|
+
files,
|
|
1651
1490
|
webhookUrl,
|
|
1652
1491
|
durationEstimateSec,
|
|
1653
1492
|
onProgress,
|
|
1654
1493
|
showLogs,
|
|
1655
1494
|
logger,
|
|
1656
|
-
|
|
1495
|
+
uploadConcurrency,
|
|
1496
|
+
language,
|
|
1497
|
+
exclude,
|
|
1657
1498
|
} = {}) {
|
|
1658
|
-
|
|
1499
|
+
return this._transcribeAsync({
|
|
1500
|
+
files,
|
|
1501
|
+
webhookUrl,
|
|
1502
|
+
durationEstimateSec,
|
|
1659
1503
|
onProgress,
|
|
1660
|
-
showLogs
|
|
1504
|
+
showLogs,
|
|
1661
1505
|
logger: logger ?? this.logger,
|
|
1506
|
+
uploadConcurrency,
|
|
1507
|
+
language,
|
|
1508
|
+
exclude,
|
|
1662
1509
|
});
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
const refreshedUpload = await this.refreshBigFileUpload(resumeState.job_id, {
|
|
1687
|
-
upload: resumeState.upload,
|
|
1688
|
-
});
|
|
1689
|
-
const existingJob = await this.requestJson(`/transcribe/${resumeState.job_id}`, { retryable: true });
|
|
1690
|
-
const currentStatus = existingJob?.job_status || "uploading";
|
|
1691
|
-
return new BigFileJob(this, normalizedFile, {
|
|
1692
|
-
job_id: resumeState.job_id,
|
|
1693
|
-
job_status: currentStatus,
|
|
1694
|
-
model: existingJob?.model || resumeState.model || null,
|
|
1695
|
-
upload: currentStatus === "uploading" ? refreshedUpload : null,
|
|
1696
|
-
}, {
|
|
1697
|
-
onProgress: progress,
|
|
1698
|
-
multipartConcurrency: normalizedConcurrency,
|
|
1699
|
-
referenceId: defaultReferenceId(0),
|
|
1700
|
-
resumeState,
|
|
1701
|
-
});
|
|
1702
|
-
} catch (error) {
|
|
1703
|
-
if (error?.status === 404 || error?.code === "not_found") {
|
|
1704
|
-
await deleteMultipartResumeState(normalizedFile);
|
|
1705
|
-
} else {
|
|
1706
|
-
throw error;
|
|
1707
|
-
}
|
|
1708
|
-
}
|
|
1709
|
-
}
|
|
1710
|
-
const estimate = durationEstimateSec || await estimateDurationFromFile(normalizedFile);
|
|
1711
|
-
const response = await this.requestJson("/transcribe", {
|
|
1712
|
-
method: "POST",
|
|
1713
|
-
body: {
|
|
1714
|
-
files: [
|
|
1715
|
-
estimate > MAX_SYNC_AUDIO_SECONDS || normalizedFile.size > MAX_SYNC_AUDIO_BYTES
|
|
1716
|
-
? uploadDescriptorForFile(defaultReferenceId(0), normalizedFile)
|
|
1717
|
-
: { reference_id: defaultReferenceId(0) },
|
|
1718
|
-
],
|
|
1719
|
-
...(webhookUrl ? { webhook_url: webhookUrl } : {}),
|
|
1720
|
-
},
|
|
1721
|
-
retryable: true,
|
|
1722
|
-
});
|
|
1723
|
-
emitProgress(progress, {
|
|
1724
|
-
event: "upload_urls_received",
|
|
1725
|
-
jobId: response.job_id,
|
|
1726
|
-
jobStatus: response.job_status,
|
|
1727
|
-
uploadCount: normalizeResponseUploads(response).length,
|
|
1728
|
-
totalFiles: 1,
|
|
1510
|
+
}
|
|
1511
|
+
|
|
1512
|
+
async createBigFileJob({
|
|
1513
|
+
file,
|
|
1514
|
+
webhookUrl,
|
|
1515
|
+
durationEstimateSec,
|
|
1516
|
+
onProgress,
|
|
1517
|
+
showLogs,
|
|
1518
|
+
logger,
|
|
1519
|
+
uploadConcurrency,
|
|
1520
|
+
language,
|
|
1521
|
+
exclude,
|
|
1522
|
+
} = {}) {
|
|
1523
|
+
return this.createBatchJob({
|
|
1524
|
+
files: [file],
|
|
1525
|
+
webhookUrl,
|
|
1526
|
+
durationEstimateSec,
|
|
1527
|
+
onProgress,
|
|
1528
|
+
showLogs,
|
|
1529
|
+
logger: logger ?? this.logger,
|
|
1530
|
+
uploadConcurrency,
|
|
1531
|
+
language,
|
|
1532
|
+
exclude,
|
|
1729
1533
|
});
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
multipartConcurrency: normalizedConcurrency,
|
|
1733
|
-
referenceId: defaultReferenceId(0),
|
|
1734
|
-
});
|
|
1735
|
-
if (uploadFromResponse(response, defaultReferenceId(0))?.type === "multipart" && isNodePathFile(normalizedFile)) {
|
|
1736
|
-
await job.persistResumeState([]);
|
|
1737
|
-
}
|
|
1738
|
-
return job;
|
|
1739
|
-
}
|
|
1740
|
-
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1741
1536
|
async createBatchJob({
|
|
1742
1537
|
files,
|
|
1743
1538
|
webhookUrl,
|
|
@@ -1745,56 +1540,69 @@ export class TranscribeAPI {
|
|
|
1745
1540
|
onProgress,
|
|
1746
1541
|
showLogs,
|
|
1747
1542
|
logger,
|
|
1748
|
-
|
|
1543
|
+
uploadConcurrency,
|
|
1544
|
+
language,
|
|
1545
|
+
exclude,
|
|
1749
1546
|
} = {}) {
|
|
1750
1547
|
const progress = composeProgressHandler({
|
|
1751
1548
|
onProgress,
|
|
1752
1549
|
showLogs: showLogs ?? this.showLogs,
|
|
1753
1550
|
logger: logger ?? this.logger,
|
|
1754
1551
|
});
|
|
1552
|
+
const normalizedLanguage = normalizeLanguageCode(language);
|
|
1755
1553
|
if (!Array.isArray(files) || files.length === 0) {
|
|
1756
1554
|
throw new TranscribeAPIError("Batch upload requires at least one file.", { code: "invalid_files" });
|
|
1757
1555
|
}
|
|
1758
|
-
assertBatchLimits(files.length, 0);
|
|
1759
|
-
const normalizedItems = files.map((item, index) => normalizeBatchInputItem(item, index));
|
|
1760
|
-
|
|
1761
|
-
const normalizedBatchItems = [];
|
|
1762
|
-
for (let index = 0; index < normalizedItems.length; index += 1) {
|
|
1763
|
-
const item = normalizedItems[index];
|
|
1764
|
-
if (isRemoteBatchItem(item)) {
|
|
1765
|
-
normalizedBatchItems.push({
|
|
1766
|
-
file: null,
|
|
1767
|
-
referenceId: item.referenceId,
|
|
1768
|
-
url: item.url,
|
|
1769
|
-
durationEstimateSec: item.durationEstimateSec || durationEstimateSec || null,
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1556
|
+
assertBatchLimits(files.length, 0);
|
|
1557
|
+
const normalizedItems = files.map((item, index) => normalizeBatchInputItem(item, index));
|
|
1558
|
+
|
|
1559
|
+
const normalizedBatchItems = [];
|
|
1560
|
+
for (let index = 0; index < normalizedItems.length; index += 1) {
|
|
1561
|
+
const item = normalizedItems[index];
|
|
1562
|
+
if (isRemoteBatchItem(item)) {
|
|
1563
|
+
normalizedBatchItems.push({
|
|
1564
|
+
file: null,
|
|
1565
|
+
referenceId: item.referenceId,
|
|
1566
|
+
url: item.url,
|
|
1567
|
+
durationEstimateSec: item.durationEstimateSec || durationEstimateSec || null,
|
|
1568
|
+
hasLanguage: item.hasLanguage,
|
|
1569
|
+
language: item.language,
|
|
1570
|
+
});
|
|
1571
|
+
continue;
|
|
1572
|
+
}
|
|
1573
|
+
|
|
1574
|
+
const file = await normalizeFile(item.file, `file_${String(index + 1).padStart(6, "0")}.mp3`);
|
|
1575
|
+
assertSupportedBatchFormat(file);
|
|
1576
|
+
normalizedBatchItems.push({
|
|
1577
|
+
file,
|
|
1578
|
+
referenceId: item.referenceId || defaultReferenceId(index),
|
|
1579
|
+
url: null,
|
|
1580
|
+
durationEstimateSec: item.durationEstimateSec || await estimateDurationFromFile(file),
|
|
1581
|
+
hasLanguage: item.hasLanguage,
|
|
1582
|
+
language: item.language,
|
|
1583
|
+
});
|
|
1584
|
+
}
|
|
1585
|
+
|
|
1586
|
+
const totalSizeBytes = normalizedBatchItems.reduce((total, item) => total + Number(item.file?.size || 0), 0);
|
|
1587
|
+
assertBatchLimits(normalizedBatchItems.length, totalSizeBytes);
|
|
1786
1588
|
const response = await this.requestJson("/transcribe", {
|
|
1787
1589
|
method: "POST",
|
|
1788
1590
|
body: {
|
|
1789
|
-
files: normalizedBatchItems.map((item) => (
|
|
1790
|
-
item.url
|
|
1791
|
-
? {
|
|
1792
|
-
reference_id: item.referenceId,
|
|
1793
|
-
url: item.url,
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1591
|
+
files: normalizedBatchItems.map((item) => (
|
|
1592
|
+
item.url
|
|
1593
|
+
? {
|
|
1594
|
+
...(item.referenceId ? { reference_id: item.referenceId } : {}),
|
|
1595
|
+
url: item.url,
|
|
1596
|
+
...(item.hasLanguage ? { language: item.language } : {}),
|
|
1597
|
+
}
|
|
1598
|
+
: {
|
|
1599
|
+
...uploadDescriptorForFile(item.referenceId, item.file),
|
|
1600
|
+
...(item.hasLanguage ? { language: item.language } : {}),
|
|
1601
|
+
}
|
|
1602
|
+
)),
|
|
1603
|
+
...(normalizedLanguage ? { language: normalizedLanguage } : {}),
|
|
1604
|
+
...(webhookUrl ? { webhook_url: webhookUrl } : {}),
|
|
1605
|
+
...(exclude ? { exclude: Array.isArray(exclude) ? exclude.join(",") : exclude } : {}),
|
|
1798
1606
|
},
|
|
1799
1607
|
retryable: true,
|
|
1800
1608
|
});
|
|
@@ -1808,9 +1616,9 @@ export class TranscribeAPI {
|
|
|
1808
1616
|
return new BatchJob(this, normalizedBatchItems, response, {
|
|
1809
1617
|
onProgress: progress,
|
|
1810
1618
|
webhookUrl,
|
|
1811
|
-
|
|
1619
|
+
uploadConcurrency: normalizeUploadConcurrency(uploadConcurrency ?? this.uploadConcurrency),
|
|
1812
1620
|
});
|
|
1813
|
-
}
|
|
1814
|
-
}
|
|
1815
|
-
|
|
1816
|
-
export default TranscribeAPI;
|
|
1621
|
+
}
|
|
1622
|
+
}
|
|
1623
|
+
|
|
1624
|
+
export default TranscribeAPI;
|