@arke-institute/sdk 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1645 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __esm = (fn, res) => function __init() {
9
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
10
+ };
11
+ var __export = (target, all) => {
12
+ for (var name in all)
13
+ __defProp(target, name, { get: all[name], enumerable: true });
14
+ };
15
+ var __copyProps = (to, from, except, desc) => {
16
+ if (from && typeof from === "object" || typeof from === "function") {
17
+ for (let key of __getOwnPropNames(from))
18
+ if (!__hasOwnProp.call(to, key) && key !== except)
19
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
20
+ }
21
+ return to;
22
+ };
23
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
24
+ // If the importer is in node compatibility mode or this is not an ESM
25
+ // file that has been converted to a CommonJS file using a Babel-
26
+ // compatible transform (i.e. "__esModule" has not been set), then set
27
+ // "default" to the CommonJS "module.exports" for node compatibility.
28
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
29
+ mod
30
+ ));
31
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
32
+
33
+ // src/upload/utils/errors.ts
34
+ function isRetryableError(error) {
35
+ if (error instanceof NetworkError) {
36
+ return true;
37
+ }
38
+ if (error instanceof WorkerAPIError) {
39
+ return error.statusCode ? error.statusCode >= 500 : false;
40
+ }
41
+ if (error instanceof UploadError) {
42
+ if (error.statusCode) {
43
+ return error.statusCode >= 500 || error.statusCode === 429;
44
+ }
45
+ return false;
46
+ }
47
+ if (error.code === "ECONNRESET" || error.code === "ETIMEDOUT" || error.code === "ENOTFOUND" || error.code === "ECONNREFUSED") {
48
+ return true;
49
+ }
50
+ return false;
51
+ }
52
+ var WorkerAPIError, UploadError, ValidationError, NetworkError, ScanError;
53
+ var init_errors = __esm({
54
+ "src/upload/utils/errors.ts"() {
55
+ "use strict";
56
+ WorkerAPIError = class extends Error {
57
+ constructor(message, statusCode, details) {
58
+ super(message);
59
+ this.statusCode = statusCode;
60
+ this.details = details;
61
+ this.name = "WorkerAPIError";
62
+ Error.captureStackTrace(this, this.constructor);
63
+ }
64
+ };
65
+ UploadError = class extends Error {
66
+ constructor(message, fileName, statusCode, cause) {
67
+ super(message);
68
+ this.fileName = fileName;
69
+ this.statusCode = statusCode;
70
+ this.cause = cause;
71
+ this.name = "UploadError";
72
+ Error.captureStackTrace(this, this.constructor);
73
+ }
74
+ };
75
+ ValidationError = class extends Error {
76
+ constructor(message, field) {
77
+ super(message);
78
+ this.field = field;
79
+ this.name = "ValidationError";
80
+ Error.captureStackTrace(this, this.constructor);
81
+ }
82
+ };
83
+ NetworkError = class extends Error {
84
+ constructor(message, cause) {
85
+ super(message);
86
+ this.cause = cause;
87
+ this.name = "NetworkError";
88
+ Error.captureStackTrace(this, this.constructor);
89
+ }
90
+ };
91
+ ScanError = class extends Error {
92
+ constructor(message, path2) {
93
+ super(message);
94
+ this.path = path2;
95
+ this.name = "ScanError";
96
+ Error.captureStackTrace(this, this.constructor);
97
+ }
98
+ };
99
+ }
100
+ });
101
+
102
+ // src/upload/platforms/common.ts
103
+ function detectPlatform() {
104
+ if (typeof process !== "undefined" && process.versions != null && process.versions.node != null) {
105
+ return "node";
106
+ }
107
+ if (typeof window !== "undefined" && typeof document !== "undefined") {
108
+ return "browser";
109
+ }
110
+ return "unknown";
111
+ }
112
+ function normalizePath(p) {
113
+ return p.replace(/\\/g, "/");
114
+ }
115
+ function getExtension(filename) {
116
+ const lastDot = filename.lastIndexOf(".");
117
+ return lastDot === -1 ? "" : filename.slice(lastDot + 1).toLowerCase();
118
+ }
119
+ function getMimeType(filename) {
120
+ const ext = getExtension(filename);
121
+ const mimeTypes = {
122
+ // Images
123
+ "jpg": "image/jpeg",
124
+ "jpeg": "image/jpeg",
125
+ "png": "image/png",
126
+ "gif": "image/gif",
127
+ "webp": "image/webp",
128
+ "tif": "image/tiff",
129
+ "tiff": "image/tiff",
130
+ "bmp": "image/bmp",
131
+ "svg": "image/svg+xml",
132
+ // Documents
133
+ "pdf": "application/pdf",
134
+ "txt": "text/plain",
135
+ "json": "application/json",
136
+ "xml": "application/xml",
137
+ "html": "text/html",
138
+ "htm": "text/html",
139
+ "css": "text/css",
140
+ "js": "application/javascript",
141
+ // Archives
142
+ "zip": "application/zip",
143
+ "tar": "application/x-tar",
144
+ "gz": "application/gzip",
145
+ // Audio
146
+ "mp3": "audio/mpeg",
147
+ "wav": "audio/wav",
148
+ "ogg": "audio/ogg",
149
+ // Video
150
+ "mp4": "video/mp4",
151
+ "webm": "video/webm",
152
+ "mov": "video/quicktime"
153
+ };
154
+ return mimeTypes[ext] || "application/octet-stream";
155
+ }
156
+ var init_common = __esm({
157
+ "src/upload/platforms/common.ts"() {
158
+ "use strict";
159
+ }
160
+ });
161
+
162
+ // src/upload/lib/validation.ts
163
+ function validateFileSize(size) {
164
+ if (size <= 0) {
165
+ throw new ValidationError("File size must be greater than 0");
166
+ }
167
+ if (size > MAX_FILE_SIZE) {
168
+ throw new ValidationError(
169
+ `File size (${formatBytes(size)}) exceeds maximum allowed size (${formatBytes(MAX_FILE_SIZE)})`
170
+ );
171
+ }
172
+ }
173
+ function validateBatchSize(totalSize) {
174
+ if (totalSize > MAX_BATCH_SIZE) {
175
+ throw new ValidationError(
176
+ `Total batch size (${formatBytes(totalSize)}) exceeds maximum allowed size (${formatBytes(MAX_BATCH_SIZE)})`
177
+ );
178
+ }
179
+ }
180
+ function validateLogicalPath(path2) {
181
+ if (!path2.startsWith("/")) {
182
+ throw new ValidationError("Logical path must start with /", "path");
183
+ }
184
+ if (INVALID_PATH_CHARS.test(path2)) {
185
+ throw new ValidationError(
186
+ "Logical path contains invalid characters",
187
+ "path"
188
+ );
189
+ }
190
+ const segments = path2.split("/").filter((s) => s.length > 0);
191
+ if (segments.length === 0 && path2 !== "/") {
192
+ throw new ValidationError("Logical path cannot be empty", "path");
193
+ }
194
+ for (const segment of segments) {
195
+ if (segment === "." || segment === "..") {
196
+ throw new ValidationError(
197
+ "Logical path cannot contain . or .. segments",
198
+ "path"
199
+ );
200
+ }
201
+ }
202
+ }
203
+ function validateRefJson(content, fileName, logger) {
204
+ let parsed;
205
+ try {
206
+ parsed = JSON.parse(content);
207
+ } catch (error) {
208
+ throw new ValidationError(
209
+ `Invalid JSON in ${fileName}: ${error.message}`,
210
+ "ref"
211
+ );
212
+ }
213
+ if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
214
+ throw new ValidationError(
215
+ `${fileName} must contain a JSON object`,
216
+ "ref"
217
+ );
218
+ }
219
+ if (!parsed.url || typeof parsed.url !== "string") {
220
+ throw new ValidationError(
221
+ `${fileName} must contain a 'url' field with a string value`,
222
+ "ref"
223
+ );
224
+ }
225
+ try {
226
+ const url = new URL(parsed.url);
227
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
228
+ throw new Error("URL must use HTTP or HTTPS protocol");
229
+ }
230
+ } catch (error) {
231
+ throw new ValidationError(
232
+ `Invalid URL in ${fileName}: ${error.message}`,
233
+ "ref"
234
+ );
235
+ }
236
+ if (!parsed.type) {
237
+ if (logger) {
238
+ logger.warn(`${fileName}: Missing 'type' field (optional but recommended)`);
239
+ }
240
+ }
241
+ if (parsed.type && OCR_PROCESSABLE_TYPES.includes(parsed.type)) {
242
+ const typeToExt = {
243
+ "image/jpeg": ".jpg",
244
+ "image/png": ".png",
245
+ "image/webp": ".webp"
246
+ };
247
+ const expectedExt = typeToExt[parsed.type];
248
+ if (expectedExt && !fileName.includes(`${expectedExt}.ref.json`)) {
249
+ if (logger) {
250
+ logger.warn(
251
+ `${fileName}: Type is '${parsed.type}' but filename doesn't include '${expectedExt}.ref.json' pattern. This file may not be processed by OCR. Consider renaming to include the extension (e.g., 'photo${expectedExt}.ref.json').`
252
+ );
253
+ }
254
+ }
255
+ }
256
+ }
257
+ function formatBytes(bytes) {
258
+ if (bytes === 0) return "0 B";
259
+ const k = 1024;
260
+ const sizes = ["B", "KB", "MB", "GB", "TB"];
261
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
262
+ return `${(bytes / Math.pow(k, i)).toFixed(2)} ${sizes[i]}`;
263
+ }
264
+ function validateCustomPrompts(prompts) {
265
+ if (!prompts) return;
266
+ const MAX_LENGTH = 5e4;
267
+ const MAX_TOTAL_LENGTH = 75e3;
268
+ const fields = [
269
+ "general",
270
+ "reorganization",
271
+ "pinax",
272
+ "description",
273
+ "cheimarros"
274
+ ];
275
+ let totalLength = 0;
276
+ for (const field of fields) {
277
+ const value = prompts[field];
278
+ if (value) {
279
+ if (value.length > MAX_LENGTH) {
280
+ throw new ValidationError(
281
+ `Custom prompt '${field}' exceeds maximum length of ${MAX_LENGTH} characters (current: ${value.length})`,
282
+ "customPrompts"
283
+ );
284
+ }
285
+ totalLength += value.length;
286
+ }
287
+ }
288
+ if (totalLength > MAX_TOTAL_LENGTH) {
289
+ throw new ValidationError(
290
+ `Total custom prompts length (${totalLength}) exceeds maximum of ${MAX_TOTAL_LENGTH} characters`,
291
+ "customPrompts"
292
+ );
293
+ }
294
+ }
295
+ function validateCustomPromptsLocation(processingConfig) {
296
+ if (!processingConfig) return;
297
+ if ("customPrompts" in processingConfig) {
298
+ throw new ValidationError(
299
+ "customPrompts must be a top-level field in UploaderConfig, not inside the processing config. Use: new ArkeUploader({ customPrompts: {...}, processing: {...} }) NOT: new ArkeUploader({ processing: { customPrompts: {...} } })",
300
+ "processing"
301
+ );
302
+ }
303
+ }
304
+ var MAX_FILE_SIZE, MAX_BATCH_SIZE, INVALID_PATH_CHARS, OCR_PROCESSABLE_TYPES;
305
+ var init_validation = __esm({
306
+ "src/upload/lib/validation.ts"() {
307
+ "use strict";
308
+ init_errors();
309
+ MAX_FILE_SIZE = 5 * 1024 * 1024 * 1024;
310
+ MAX_BATCH_SIZE = 100 * 1024 * 1024 * 1024;
311
+ INVALID_PATH_CHARS = /[<>:"|?*\x00-\x1f]/;
312
+ OCR_PROCESSABLE_TYPES = [
313
+ "image/jpeg",
314
+ "image/png",
315
+ "image/webp"
316
+ ];
317
+ }
318
+ });
319
+
320
+ // src/upload/utils/hash.ts
321
+ async function computeFileCID(filePath) {
322
+ const fs2 = await import("fs/promises");
323
+ try {
324
+ const fileBuffer = await fs2.readFile(filePath);
325
+ const hash = await import_sha2.sha256.digest(fileBuffer);
326
+ const cid = import_cid.CID.create(1, raw.code, hash);
327
+ return cid.toString();
328
+ } catch (error) {
329
+ throw new Error(`CID computation failed: ${error.message}`);
330
+ }
331
+ }
332
+ async function computeCIDFromBuffer(data) {
333
+ const hash = await import_sha2.sha256.digest(data);
334
+ const cid = import_cid.CID.create(1, raw.code, hash);
335
+ return cid.toString();
336
+ }
337
+ var import_cid, raw, import_sha2;
338
+ var init_hash = __esm({
339
+ "src/upload/utils/hash.ts"() {
340
+ "use strict";
341
+ import_cid = require("multiformats/cid");
342
+ raw = __toESM(require("multiformats/codecs/raw"), 1);
343
+ import_sha2 = require("multiformats/hashes/sha2");
344
+ }
345
+ });
346
+
347
+ // src/upload/types/processing.ts
348
+ var DEFAULT_PROCESSING_CONFIG;
349
+ var init_processing = __esm({
350
+ "src/upload/types/processing.ts"() {
351
+ "use strict";
352
+ DEFAULT_PROCESSING_CONFIG = {
353
+ ocr: true,
354
+ describe: true,
355
+ pinax: true
356
+ };
357
+ }
358
+ });
359
+
360
+ // src/upload/platforms/node.ts
361
+ var node_exports = {};
362
+ __export(node_exports, {
363
+ NodeScanner: () => NodeScanner
364
+ });
365
+ var import_promises, import_path, NodeScanner;
366
+ var init_node = __esm({
367
+ "src/upload/platforms/node.ts"() {
368
+ "use strict";
369
+ import_promises = __toESM(require("fs/promises"), 1);
370
+ import_path = __toESM(require("path"), 1);
371
+ init_errors();
372
+ init_validation();
373
+ init_hash();
374
+ init_processing();
375
+ init_common();
376
+ NodeScanner = class {
377
+ /**
378
+ * Scan directory recursively and collect file metadata
379
+ */
380
+ async scanFiles(source, options) {
381
+ const dirPath = Array.isArray(source) ? source[0] : source;
382
+ if (!dirPath || typeof dirPath !== "string") {
383
+ throw new ScanError("Node.js scanner requires a directory path", "");
384
+ }
385
+ const files = [];
386
+ try {
387
+ const stats = await import_promises.default.stat(dirPath);
388
+ if (!stats.isDirectory()) {
389
+ throw new ScanError(`Path is not a directory: ${dirPath}`, dirPath);
390
+ }
391
+ } catch (error) {
392
+ if (error.code === "ENOENT") {
393
+ throw new ScanError(`Directory not found: ${dirPath}`, dirPath);
394
+ }
395
+ throw new ScanError(`Cannot access directory: ${error.message}`, dirPath);
396
+ }
397
+ validateLogicalPath(options.rootPath);
398
+ const globalProcessingConfig = options.defaultProcessingConfig || DEFAULT_PROCESSING_CONFIG;
399
+ async function loadDirectoryProcessingConfig(dirPath2) {
400
+ const configPath = import_path.default.join(dirPath2, ".arke-process.json");
401
+ try {
402
+ const content = await import_promises.default.readFile(configPath, "utf-8");
403
+ return JSON.parse(content);
404
+ } catch (error) {
405
+ if (error.code !== "ENOENT") {
406
+ console.warn(`Error reading processing config ${configPath}: ${error.message}`);
407
+ }
408
+ return null;
409
+ }
410
+ }
411
+ function mergeProcessingConfig(defaults, override) {
412
+ if (!override) return defaults;
413
+ return {
414
+ ocr: override.ocr ?? defaults.ocr,
415
+ describe: override.describe ?? defaults.describe,
416
+ pinax: override.pinax ?? defaults.pinax
417
+ };
418
+ }
419
+ async function walk(currentPath, relativePath = "") {
420
+ const dirConfigOverride = await loadDirectoryProcessingConfig(currentPath);
421
+ const currentProcessingConfig = mergeProcessingConfig(
422
+ globalProcessingConfig,
423
+ dirConfigOverride
424
+ );
425
+ let entries;
426
+ try {
427
+ entries = await import_promises.default.readdir(currentPath, { withFileTypes: true });
428
+ } catch (error) {
429
+ console.warn(`Cannot read directory: ${currentPath}`, error.message);
430
+ return;
431
+ }
432
+ for (const entry of entries) {
433
+ const fullPath = import_path.default.join(currentPath, entry.name);
434
+ const relPath = import_path.default.join(relativePath, entry.name);
435
+ try {
436
+ if (entry.isSymbolicLink()) {
437
+ if (!options.followSymlinks) {
438
+ continue;
439
+ }
440
+ const stats = await import_promises.default.stat(fullPath);
441
+ if (stats.isDirectory()) {
442
+ await walk(fullPath, relPath);
443
+ } else if (stats.isFile()) {
444
+ await processFile(fullPath, relPath, stats.size, currentProcessingConfig);
445
+ }
446
+ continue;
447
+ }
448
+ if (entry.isDirectory()) {
449
+ await walk(fullPath, relPath);
450
+ continue;
451
+ }
452
+ if (entry.isFile()) {
453
+ const stats = await import_promises.default.stat(fullPath);
454
+ await processFile(fullPath, relPath, stats.size, currentProcessingConfig);
455
+ }
456
+ } catch (error) {
457
+ if (error instanceof ScanError && error.message.includes(".ref.json")) {
458
+ throw error;
459
+ }
460
+ console.warn(`Error processing ${fullPath}: ${error.message}`);
461
+ continue;
462
+ }
463
+ }
464
+ }
465
+ async function processFile(fullPath, relativePath, size, processingConfig) {
466
+ const fileName = import_path.default.basename(fullPath);
467
+ if (fileName === ".arke-process.json") {
468
+ return;
469
+ }
470
+ if (fileName.endsWith(".ref.json")) {
471
+ try {
472
+ const content = await import_promises.default.readFile(fullPath, "utf-8");
473
+ validateRefJson(content, fileName, console);
474
+ } catch (error) {
475
+ throw new ScanError(
476
+ `Invalid .ref.json file: ${fileName} - ${error.message}`,
477
+ fullPath
478
+ );
479
+ }
480
+ }
481
+ try {
482
+ validateFileSize(size);
483
+ } catch (error) {
484
+ console.warn(`Skipping file that exceeds size limit: ${fileName}`, error.message);
485
+ return;
486
+ }
487
+ const normalizedRelPath = normalizePath(relativePath);
488
+ const logicalPath = import_path.default.posix.join(options.rootPath, normalizedRelPath);
489
+ try {
490
+ validateLogicalPath(logicalPath);
491
+ } catch (error) {
492
+ console.warn(`Skipping file with invalid logical path: ${logicalPath}`, error.message);
493
+ return;
494
+ }
495
+ const contentType = getMimeType(fileName);
496
+ try {
497
+ await import_promises.default.access(fullPath, import_promises.default.constants.R_OK);
498
+ } catch (error) {
499
+ console.warn(`Skipping unreadable file: ${fullPath}`);
500
+ return;
501
+ }
502
+ let cid;
503
+ try {
504
+ cid = await computeFileCID(fullPath);
505
+ } catch (error) {
506
+ console.warn(`Warning: CID computation failed for ${fullPath}, continuing without CID:`, error.message);
507
+ cid = void 0;
508
+ }
509
+ files.push({
510
+ localPath: fullPath,
511
+ logicalPath,
512
+ fileName,
513
+ size,
514
+ contentType,
515
+ cid,
516
+ processingConfig
517
+ });
518
+ }
519
+ await walk(dirPath);
520
+ files.sort((a, b) => a.size - b.size);
521
+ return files;
522
+ }
523
+ /**
524
+ * Read file contents as ArrayBuffer
525
+ */
526
+ async readFile(file) {
527
+ const buffer = await import_promises.default.readFile(file.localPath);
528
+ return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
529
+ }
530
+ };
531
+ }
532
+ });
533
+
534
+ // src/upload/platforms/browser.ts
535
+ var browser_exports = {};
536
+ __export(browser_exports, {
537
+ BrowserScanner: () => BrowserScanner
538
+ });
539
+ var BrowserScanner;
540
+ var init_browser = __esm({
541
+ "src/upload/platforms/browser.ts"() {
542
+ "use strict";
543
+ init_errors();
544
+ init_validation();
545
+ init_hash();
546
+ init_processing();
547
+ init_common();
548
+ BrowserScanner = class {
549
+ /**
550
+ * Scan files from File or FileList
551
+ */
552
+ async scanFiles(source, options) {
553
+ const fileList = Array.isArray(source) ? source : [source];
554
+ if (fileList.length === 0) {
555
+ throw new ScanError("No files provided", "");
556
+ }
557
+ validateLogicalPath(options.rootPath);
558
+ const globalProcessingConfig = options.defaultProcessingConfig || DEFAULT_PROCESSING_CONFIG;
559
+ const files = [];
560
+ for (const file of fileList) {
561
+ try {
562
+ const fileInfo = await this.processFile(file, options.rootPath, globalProcessingConfig);
563
+ if (fileInfo) {
564
+ files.push(fileInfo);
565
+ }
566
+ } catch (error) {
567
+ console.warn(`Error processing ${file.name}: ${error.message}`);
568
+ continue;
569
+ }
570
+ }
571
+ files.sort((a, b) => a.size - b.size);
572
+ return files;
573
+ }
574
+ /**
575
+ * Process a single File object
576
+ */
577
+ async processFile(file, rootPath, processingConfig) {
578
+ const fileName = file.name;
579
+ const size = file.size;
580
+ if (fileName.startsWith(".")) {
581
+ return null;
582
+ }
583
+ const skipFiles = ["Thumbs.db", "desktop.ini", "__MACOSX"];
584
+ if (skipFiles.includes(fileName)) {
585
+ return null;
586
+ }
587
+ if (fileName === ".arke-process.json") {
588
+ return null;
589
+ }
590
+ try {
591
+ validateFileSize(size);
592
+ } catch (error) {
593
+ console.warn(`Skipping file that exceeds size limit: ${fileName}`, error.message);
594
+ return null;
595
+ }
596
+ let relativePath = "";
597
+ if ("webkitRelativePath" in file && file.webkitRelativePath) {
598
+ const parts = file.webkitRelativePath.split("/");
599
+ if (parts.length > 1) {
600
+ relativePath = parts.slice(1).join("/");
601
+ } else {
602
+ relativePath = fileName;
603
+ }
604
+ } else {
605
+ relativePath = fileName;
606
+ }
607
+ const normalizedRelPath = normalizePath(relativePath);
608
+ const logicalPath = `${rootPath}/${normalizedRelPath}`.replace(/\/+/g, "/");
609
+ try {
610
+ validateLogicalPath(logicalPath);
611
+ } catch (error) {
612
+ console.warn(`Skipping file with invalid logical path: ${logicalPath}`, error.message);
613
+ return null;
614
+ }
615
+ const contentType = file.type || getMimeType(fileName);
616
+ let cid;
617
+ try {
618
+ const buffer = await file.arrayBuffer();
619
+ cid = await computeCIDFromBuffer(new Uint8Array(buffer));
620
+ } catch (error) {
621
+ console.warn(`Warning: CID computation failed for ${fileName}, continuing without CID:`, error.message);
622
+ cid = void 0;
623
+ }
624
+ return {
625
+ localPath: `__browser_file__${fileName}`,
626
+ // Special marker for browser files
627
+ logicalPath,
628
+ fileName,
629
+ size,
630
+ contentType,
631
+ cid,
632
+ processingConfig
633
+ };
634
+ }
635
+ /**
636
+ * Read file contents as ArrayBuffer
637
+ * Note: In browser context, the File object should be passed directly
638
+ */
639
+ async readFile(file) {
640
+ throw new Error("Browser scanner requires File objects to be provided directly during upload");
641
+ }
642
+ };
643
+ }
644
+ });
645
+
646
+ // src/upload/index.ts
647
+ var upload_exports = {};
648
+ __export(upload_exports, {
649
+ ArkeUploader: () => ArkeUploader,
650
+ NetworkError: () => NetworkError,
651
+ ScanError: () => ScanError,
652
+ UploadClient: () => UploadClient,
653
+ UploadError: () => UploadError,
654
+ ValidationError: () => ValidationError,
655
+ WorkerAPIError: () => WorkerAPIError
656
+ });
657
+ module.exports = __toCommonJS(upload_exports);
658
+
659
+ // src/upload/lib/worker-client-fetch.ts
660
+ init_errors();
661
+
662
+ // src/upload/utils/retry.ts
663
+ init_errors();
664
+ var DEFAULT_OPTIONS = {
665
+ maxRetries: 3,
666
+ initialDelay: 1e3,
667
+ // 1 second
668
+ maxDelay: 3e4,
669
+ // 30 seconds
670
+ shouldRetry: isRetryableError,
671
+ jitter: true
672
+ };
673
+ async function retryWithBackoff(fn, options = {}) {
674
+ const opts = { ...DEFAULT_OPTIONS, ...options };
675
+ let lastError;
676
+ for (let attempt = 0; attempt <= opts.maxRetries; attempt++) {
677
+ try {
678
+ return await fn();
679
+ } catch (error) {
680
+ lastError = error;
681
+ if (attempt >= opts.maxRetries) {
682
+ throw error;
683
+ }
684
+ if (opts.shouldRetry && !opts.shouldRetry(error)) {
685
+ throw error;
686
+ }
687
+ let delay;
688
+ if (error.statusCode === 429 && error.retryAfter) {
689
+ delay = Math.min(error.retryAfter * 1e3, opts.maxDelay);
690
+ } else {
691
+ delay = Math.min(
692
+ opts.initialDelay * Math.pow(2, attempt),
693
+ opts.maxDelay
694
+ );
695
+ }
696
+ if (opts.jitter) {
697
+ const jitterAmount = delay * 0.25;
698
+ delay = delay + (Math.random() * jitterAmount * 2 - jitterAmount);
699
+ }
700
+ await sleep(Math.floor(delay));
701
+ }
702
+ }
703
+ throw lastError;
704
+ }
705
+ function sleep(ms) {
706
+ return new Promise((resolve) => setTimeout(resolve, ms));
707
+ }
708
+
709
+ // src/upload/lib/worker-client-fetch.ts
710
+ var WorkerClient = class {
711
+ constructor(config) {
712
+ this.baseUrl = config.baseUrl.replace(/\/$/, "");
713
+ this.authToken = config.authToken;
714
+ this.timeout = config.timeout ?? 3e4;
715
+ this.maxRetries = config.maxRetries ?? 3;
716
+ this.retryInitialDelay = config.retryInitialDelay ?? 1e3;
717
+ this.retryMaxDelay = config.retryMaxDelay ?? 3e4;
718
+ this.retryJitter = config.retryJitter ?? true;
719
+ this.debug = config.debug ?? false;
720
+ }
721
+ setAuthToken(token) {
722
+ this.authToken = token;
723
+ }
724
+ /**
725
+ * Make HTTP request with fetch
726
+ */
727
+ async request(method, path2, body) {
728
+ const url = `${this.baseUrl}${path2}`;
729
+ if (this.debug) {
730
+ console.log(`HTTP Request: ${method} ${url}`, body);
731
+ }
732
+ try {
733
+ const controller = new AbortController();
734
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
735
+ const headers = {
736
+ "Content-Type": "application/json"
737
+ };
738
+ if (this.authToken) {
739
+ headers["Authorization"] = `Bearer ${this.authToken}`;
740
+ }
741
+ const response = await fetch(url, {
742
+ method,
743
+ headers,
744
+ body: body ? JSON.stringify(body) : void 0,
745
+ signal: controller.signal
746
+ });
747
+ clearTimeout(timeoutId);
748
+ const data = await response.json();
749
+ if (this.debug) {
750
+ console.log(`HTTP Response: ${response.status}`, data);
751
+ }
752
+ if (!response.ok) {
753
+ const errorData = data;
754
+ throw new WorkerAPIError(
755
+ errorData.error || "Request failed",
756
+ response.status,
757
+ errorData.details
758
+ );
759
+ }
760
+ return data;
761
+ } catch (error) {
762
+ if (error instanceof WorkerAPIError) {
763
+ throw error;
764
+ }
765
+ if (error.name === "AbortError") {
766
+ throw new NetworkError(`Request timeout after ${this.timeout}ms`);
767
+ }
768
+ throw new NetworkError(`Network request failed: ${error.message}`);
769
+ }
770
+ }
771
+ /**
772
+ * Initialize a new batch upload
773
+ */
774
+ async initBatch(params) {
775
+ return retryWithBackoff(
776
+ () => this.request("POST", "/ingest/batches/init", params),
777
+ {
778
+ maxRetries: this.maxRetries,
779
+ initialDelay: this.retryInitialDelay,
780
+ maxDelay: this.retryMaxDelay,
781
+ jitter: this.retryJitter
782
+ }
783
+ );
784
+ }
785
+ /**
786
+ * Request presigned URLs for a file upload
787
+ */
788
+ async startFileUpload(batchId, params) {
789
+ return retryWithBackoff(
790
+ () => this.request(
791
+ "POST",
792
+ `/ingest/batches/${batchId}/files/start`,
793
+ params
794
+ ),
795
+ {
796
+ maxRetries: this.maxRetries,
797
+ initialDelay: this.retryInitialDelay,
798
+ maxDelay: this.retryMaxDelay,
799
+ jitter: this.retryJitter
800
+ }
801
+ );
802
+ }
803
+ /**
804
+ * Mark a file upload as complete
805
+ */
806
+ async completeFileUpload(batchId, params) {
807
+ return retryWithBackoff(
808
+ () => this.request(
809
+ "POST",
810
+ `/ingest/batches/${batchId}/files/complete`,
811
+ params
812
+ ),
813
+ {
814
+ maxRetries: this.maxRetries,
815
+ initialDelay: this.retryInitialDelay,
816
+ maxDelay: this.retryMaxDelay,
817
+ jitter: this.retryJitter
818
+ }
819
+ );
820
+ }
821
+ /**
822
+ * Finalize the batch after all files are uploaded
823
+ * Returns root_pi immediately for small batches, or status='discovery' for large batches
824
+ */
825
+ async finalizeBatch(batchId) {
826
+ return retryWithBackoff(
827
+ () => this.request(
828
+ "POST",
829
+ `/ingest/batches/${batchId}/finalize`,
830
+ {}
831
+ ),
832
+ {
833
+ maxRetries: this.maxRetries,
834
+ initialDelay: this.retryInitialDelay,
835
+ maxDelay: this.retryMaxDelay,
836
+ jitter: this.retryJitter
837
+ }
838
+ );
839
+ }
840
+ /**
841
+ * Get current batch status (used for polling during async discovery)
842
+ */
843
+ async getBatchStatus(batchId) {
844
+ return retryWithBackoff(
845
+ () => this.request(
846
+ "GET",
847
+ `/ingest/batches/${batchId}/status`
848
+ ),
849
+ {
850
+ maxRetries: this.maxRetries,
851
+ initialDelay: this.retryInitialDelay,
852
+ maxDelay: this.retryMaxDelay,
853
+ jitter: this.retryJitter
854
+ }
855
+ );
856
+ }
857
+ };
858
+
859
+ // src/upload/uploader.ts
860
+ init_common();
861
+
862
+ // src/upload/lib/simple-fetch.ts
863
+ init_errors();
864
+ async function uploadSimple(fileData, presignedUrl, contentType, options = {}) {
865
+ const { maxRetries = 3, retryInitialDelay, retryMaxDelay, retryJitter } = options;
866
+ await retryWithBackoff(
867
+ async () => {
868
+ let response;
869
+ try {
870
+ response = await fetch(presignedUrl, {
871
+ method: "PUT",
872
+ body: fileData,
873
+ headers: {
874
+ ...contentType ? { "Content-Type": contentType } : {}
875
+ }
876
+ });
877
+ } catch (error) {
878
+ throw new UploadError(`Upload failed: ${error.message}`, void 0, void 0, error);
879
+ }
880
+ if (!response.ok) {
881
+ const retryAfter = response.headers.get("retry-after");
882
+ const error = new UploadError(
883
+ `Upload failed with status ${response.status}: ${response.statusText}`,
884
+ void 0,
885
+ response.status
886
+ );
887
+ if (retryAfter && response.status === 429) {
888
+ error.retryAfter = parseInt(retryAfter, 10);
889
+ }
890
+ throw error;
891
+ }
892
+ },
893
+ {
894
+ maxRetries,
895
+ initialDelay: retryInitialDelay,
896
+ maxDelay: retryMaxDelay,
897
+ jitter: retryJitter
898
+ }
899
+ );
900
+ }
901
+
902
+ // src/upload/lib/multipart-fetch.ts
903
+ init_errors();
904
+ var DEFAULT_PART_SIZE = 10 * 1024 * 1024;
905
+ async function uploadMultipart(fileData, presignedUrls, concurrency = 3, options = {}) {
906
+ const totalSize = fileData.byteLength;
907
+ const partSize = Math.ceil(totalSize / presignedUrls.length);
908
+ const parts = [];
909
+ const queue = [];
910
+ const { maxRetries = 3, retryInitialDelay, retryMaxDelay, retryJitter } = options;
911
+ for (let i = 0; i < presignedUrls.length; i++) {
912
+ const partNumber = i + 1;
913
+ const start = i * partSize;
914
+ const end = Math.min(start + partSize, totalSize);
915
+ const partData = fileData.slice(start, end);
916
+ const url = presignedUrls[i];
917
+ queue.push(async () => {
918
+ const etag = await uploadPart(partData, url, partNumber, maxRetries, {
919
+ initialDelay: retryInitialDelay,
920
+ maxDelay: retryMaxDelay,
921
+ jitter: retryJitter
922
+ });
923
+ parts.push({ part_number: partNumber, etag });
924
+ });
925
+ }
926
+ await executeWithConcurrency(queue, concurrency);
927
+ parts.sort((a, b) => a.part_number - b.part_number);
928
+ return parts;
929
+ }
930
+ async function uploadPart(partData, presignedUrl, partNumber, maxRetries = 3, retryOptions = {}) {
931
+ return retryWithBackoff(
932
+ async () => {
933
+ let response;
934
+ try {
935
+ response = await fetch(presignedUrl, {
936
+ method: "PUT",
937
+ body: partData
938
+ });
939
+ } catch (error) {
940
+ throw new UploadError(
941
+ `Part ${partNumber} upload failed: ${error.message}`,
942
+ void 0,
943
+ void 0,
944
+ error
945
+ );
946
+ }
947
+ if (!response.ok) {
948
+ const retryAfter = response.headers.get("retry-after");
949
+ const error = new UploadError(
950
+ `Part ${partNumber} upload failed with status ${response.status}: ${response.statusText}`,
951
+ void 0,
952
+ response.status
953
+ );
954
+ if (retryAfter && response.status === 429) {
955
+ error.retryAfter = parseInt(retryAfter, 10);
956
+ }
957
+ throw error;
958
+ }
959
+ const etag = response.headers.get("etag");
960
+ if (!etag) {
961
+ throw new UploadError(
962
+ `Part ${partNumber} upload succeeded but no ETag returned`,
963
+ void 0,
964
+ response.status
965
+ );
966
+ }
967
+ return etag.replace(/"/g, "");
968
+ },
969
+ {
970
+ maxRetries,
971
+ initialDelay: retryOptions.initialDelay,
972
+ maxDelay: retryOptions.maxDelay,
973
+ jitter: retryOptions.jitter
974
+ }
975
+ );
976
+ }
977
+ async function executeWithConcurrency(tasks, concurrency) {
978
+ const queue = [...tasks];
979
+ const workers = [];
980
+ const processNext = async () => {
981
+ while (queue.length > 0) {
982
+ const task = queue.shift();
983
+ await task();
984
+ }
985
+ };
986
+ for (let i = 0; i < Math.min(concurrency, tasks.length); i++) {
987
+ workers.push(processNext());
988
+ }
989
+ await Promise.all(workers);
990
+ }
991
+
992
+ // src/upload/uploader.ts
993
+ init_errors();
994
+ init_validation();
995
+ var MULTIPART_THRESHOLD = 5 * 1024 * 1024;
996
+ var ArkeUploader = class {
997
+ constructor(config) {
998
+ this.scanner = null;
999
+ validateCustomPromptsLocation(config.processing);
1000
+ this.config = {
1001
+ rootPath: "/uploads",
1002
+ // Must have at least one segment (not just '/')
1003
+ parallelUploads: 5,
1004
+ parallelParts: 3,
1005
+ ...config
1006
+ };
1007
+ this.workerClient = new WorkerClient({
1008
+ baseUrl: config.gatewayUrl,
1009
+ authToken: config.authToken,
1010
+ timeout: config.timeout,
1011
+ maxRetries: config.maxRetries,
1012
+ retryInitialDelay: config.retryInitialDelay,
1013
+ retryMaxDelay: config.retryMaxDelay,
1014
+ retryJitter: config.retryJitter,
1015
+ debug: false
1016
+ });
1017
+ this.platform = detectPlatform();
1018
+ }
1019
+ /**
1020
+ * Get platform-specific scanner
1021
+ */
1022
+ async getScanner() {
1023
+ if (this.scanner) {
1024
+ return this.scanner;
1025
+ }
1026
+ if (this.platform === "node") {
1027
+ const { NodeScanner: NodeScanner2 } = await Promise.resolve().then(() => (init_node(), node_exports));
1028
+ this.scanner = new NodeScanner2();
1029
+ } else if (this.platform === "browser") {
1030
+ const { BrowserScanner: BrowserScanner2 } = await Promise.resolve().then(() => (init_browser(), browser_exports));
1031
+ this.scanner = new BrowserScanner2();
1032
+ } else {
1033
+ throw new ValidationError("Unsupported platform");
1034
+ }
1035
+ return this.scanner;
1036
+ }
1037
+ /**
1038
+ * Upload a batch of files
1039
+ * @param source - Directory path (Node.js) or File[]/FileList (browser)
1040
+ * @param options - Upload options
1041
+ */
1042
+ async uploadBatch(source, options = {}) {
1043
+ const startTime = Date.now();
1044
+ const { onProgress, dryRun = false } = options;
1045
+ this.reportProgress(onProgress, {
1046
+ phase: "scanning",
1047
+ filesTotal: 0,
1048
+ filesUploaded: 0,
1049
+ bytesTotal: 0,
1050
+ bytesUploaded: 0,
1051
+ percentComplete: 0
1052
+ });
1053
+ const scanner = await this.getScanner();
1054
+ const files = await scanner.scanFiles(source, {
1055
+ rootPath: this.config.rootPath || "/",
1056
+ followSymlinks: true,
1057
+ defaultProcessingConfig: this.config.processing
1058
+ });
1059
+ if (files.length === 0) {
1060
+ throw new ValidationError("No files found to upload");
1061
+ }
1062
+ const totalSize = files.reduce((sum, f) => sum + f.size, 0);
1063
+ validateBatchSize(totalSize);
1064
+ if (this.config.customPrompts) {
1065
+ validateCustomPrompts(this.config.customPrompts);
1066
+ const promptFields = Object.keys(this.config.customPrompts).filter(
1067
+ (key) => this.config.customPrompts[key]
1068
+ );
1069
+ console.log(`[Arke Upload SDK] Custom prompts configured: ${promptFields.join(", ")}`);
1070
+ }
1071
+ if (dryRun) {
1072
+ return {
1073
+ batchId: "dry-run",
1074
+ rootPi: "dry-run",
1075
+ filesUploaded: files.length,
1076
+ bytesUploaded: totalSize,
1077
+ durationMs: Date.now() - startTime
1078
+ };
1079
+ }
1080
+ const { batch_id } = await this.workerClient.initBatch({
1081
+ uploader: this.config.uploader,
1082
+ root_path: this.config.rootPath || "/",
1083
+ parent_pi: this.config.parentPi || "",
1084
+ metadata: this.config.metadata,
1085
+ file_count: files.length,
1086
+ total_size: totalSize,
1087
+ custom_prompts: this.config.customPrompts
1088
+ });
1089
+ if (this.config.customPrompts) {
1090
+ console.log(`[Arke Upload SDK] Custom prompts sent to worker for batch ${batch_id}`);
1091
+ }
1092
+ this.reportProgress(onProgress, {
1093
+ phase: "uploading",
1094
+ filesTotal: files.length,
1095
+ filesUploaded: 0,
1096
+ bytesTotal: totalSize,
1097
+ bytesUploaded: 0,
1098
+ percentComplete: 0
1099
+ });
1100
+ let filesUploaded = 0;
1101
+ let bytesUploaded = 0;
1102
+ const { failedFiles } = await this.uploadFilesWithConcurrency(
1103
+ batch_id,
1104
+ files,
1105
+ source,
1106
+ this.config.parallelUploads || 5,
1107
+ (file, bytes) => {
1108
+ filesUploaded++;
1109
+ bytesUploaded += bytes;
1110
+ this.reportProgress(onProgress, {
1111
+ phase: "uploading",
1112
+ filesTotal: files.length,
1113
+ filesUploaded,
1114
+ bytesTotal: totalSize,
1115
+ bytesUploaded,
1116
+ currentFile: file.fileName,
1117
+ percentComplete: Math.round(bytesUploaded / totalSize * 100)
1118
+ });
1119
+ }
1120
+ );
1121
+ if (failedFiles.length === files.length) {
1122
+ throw new ValidationError(
1123
+ `All ${files.length} files failed to upload. First error: ${failedFiles[0]?.error || "Unknown"}`
1124
+ );
1125
+ }
1126
+ if (failedFiles.length > 0) {
1127
+ console.warn(
1128
+ `Warning: ${failedFiles.length} of ${files.length} files failed to upload:`,
1129
+ failedFiles.map((f) => `${f.file.fileName}: ${f.error}`).join(", ")
1130
+ );
1131
+ }
1132
+ this.reportProgress(onProgress, {
1133
+ phase: "finalizing",
1134
+ filesTotal: files.length,
1135
+ filesUploaded,
1136
+ bytesTotal: totalSize,
1137
+ bytesUploaded,
1138
+ percentComplete: 95
1139
+ });
1140
+ const finalizeResult = await this.workerClient.finalizeBatch(batch_id);
1141
+ let rootPi;
1142
+ if (finalizeResult.root_pi) {
1143
+ rootPi = finalizeResult.root_pi;
1144
+ } else if (finalizeResult.status === "discovery") {
1145
+ this.reportProgress(onProgress, {
1146
+ phase: "discovery",
1147
+ filesTotal: files.length,
1148
+ filesUploaded,
1149
+ bytesTotal: totalSize,
1150
+ bytesUploaded,
1151
+ percentComplete: 97
1152
+ });
1153
+ rootPi = await this.pollForRootPi(batch_id, onProgress, files.length, totalSize, bytesUploaded);
1154
+ } else {
1155
+ throw new ValidationError(
1156
+ `Finalization returned unexpected status: ${finalizeResult.status} without root_pi`
1157
+ );
1158
+ }
1159
+ this.reportProgress(onProgress, {
1160
+ phase: "complete",
1161
+ filesTotal: files.length,
1162
+ filesUploaded,
1163
+ bytesTotal: totalSize,
1164
+ bytesUploaded,
1165
+ percentComplete: 100
1166
+ });
1167
+ return {
1168
+ batchId: batch_id,
1169
+ rootPi,
1170
+ filesUploaded,
1171
+ bytesUploaded,
1172
+ durationMs: Date.now() - startTime
1173
+ };
1174
+ }
1175
+ /**
1176
+ * Poll for root_pi during async discovery
1177
+ */
1178
+ async pollForRootPi(batchId, onProgress, filesTotal, bytesTotal, bytesUploaded) {
1179
+ const POLL_INTERVAL_MS = 2e3;
1180
+ const MAX_POLL_TIME_MS = 30 * 60 * 1e3;
1181
+ const startTime = Date.now();
1182
+ while (Date.now() - startTime < MAX_POLL_TIME_MS) {
1183
+ const status = await this.workerClient.getBatchStatus(batchId);
1184
+ if (status.root_pi) {
1185
+ return status.root_pi;
1186
+ }
1187
+ if (status.status === "failed") {
1188
+ throw new ValidationError(`Batch discovery failed`);
1189
+ }
1190
+ if (status.discovery_progress && onProgress) {
1191
+ const { total, published } = status.discovery_progress;
1192
+ const discoveryPercent = total > 0 ? Math.round(published / total * 100) : 0;
1193
+ this.reportProgress(onProgress, {
1194
+ phase: "discovery",
1195
+ filesTotal,
1196
+ filesUploaded: filesTotal,
1197
+ bytesTotal,
1198
+ bytesUploaded,
1199
+ percentComplete: 95 + Math.round(discoveryPercent * 0.04)
1200
+ // 95-99%
1201
+ });
1202
+ }
1203
+ await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
1204
+ }
1205
+ throw new ValidationError(`Discovery timed out after ${MAX_POLL_TIME_MS / 1e3} seconds`);
1206
+ }
1207
+ /**
1208
+ * Upload files with controlled concurrency
1209
+ */
1210
+ async uploadFilesWithConcurrency(batchId, files, source, concurrency, onFileComplete) {
1211
+ const queue = [...files];
1212
+ const workers = [];
1213
+ const failedFiles = [];
1214
+ const processNext = async () => {
1215
+ while (queue.length > 0) {
1216
+ const file = queue.shift();
1217
+ try {
1218
+ await this.uploadSingleFile(batchId, file, source);
1219
+ onFileComplete(file, file.size);
1220
+ } catch (error) {
1221
+ const errorMessage = error.message || "Unknown error";
1222
+ console.error(`Failed to upload ${file.fileName}: ${errorMessage}`);
1223
+ failedFiles.push({ file, error: errorMessage });
1224
+ }
1225
+ }
1226
+ };
1227
+ for (let i = 0; i < Math.min(concurrency, files.length); i++) {
1228
+ workers.push(processNext());
1229
+ }
1230
+ await Promise.all(workers);
1231
+ return { failedFiles };
1232
+ }
1233
+ /**
1234
+ * Upload a single file
1235
+ */
1236
+ async uploadSingleFile(batchId, file, source) {
1237
+ const uploadInfo = await this.workerClient.startFileUpload(batchId, {
1238
+ file_name: file.fileName,
1239
+ file_size: file.size,
1240
+ logical_path: file.logicalPath,
1241
+ content_type: file.contentType,
1242
+ cid: file.cid,
1243
+ processing_config: file.processingConfig
1244
+ });
1245
+ const fileData = await this.getFileData(file, source);
1246
+ const retryOptions = {
1247
+ maxRetries: this.config.maxRetries,
1248
+ retryInitialDelay: this.config.retryInitialDelay,
1249
+ retryMaxDelay: this.config.retryMaxDelay,
1250
+ retryJitter: this.config.retryJitter
1251
+ };
1252
+ if (uploadInfo.upload_type === "simple") {
1253
+ await uploadSimple(fileData, uploadInfo.presigned_url, file.contentType, retryOptions);
1254
+ } else {
1255
+ const partUrls = uploadInfo.presigned_urls.map((p) => p.url);
1256
+ const parts = await uploadMultipart(
1257
+ fileData,
1258
+ partUrls,
1259
+ this.config.parallelParts || 3,
1260
+ retryOptions
1261
+ );
1262
+ await this.workerClient.completeFileUpload(batchId, {
1263
+ r2_key: uploadInfo.r2_key,
1264
+ upload_id: uploadInfo.upload_id,
1265
+ parts
1266
+ });
1267
+ return;
1268
+ }
1269
+ await this.workerClient.completeFileUpload(batchId, {
1270
+ r2_key: uploadInfo.r2_key
1271
+ });
1272
+ }
1273
+ /**
1274
+ * Get file data based on platform
1275
+ */
1276
+ async getFileData(file, source) {
1277
+ if (this.platform === "node") {
1278
+ const fs2 = await import("fs/promises");
1279
+ const buffer = await fs2.readFile(file.localPath);
1280
+ return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
1281
+ } else if (this.platform === "browser") {
1282
+ const files = Array.isArray(source) ? source : [source];
1283
+ const browserFile = files.find(
1284
+ (f) => f instanceof File && f.name === file.fileName
1285
+ );
1286
+ if (!browserFile) {
1287
+ throw new Error(`Could not find browser File object for ${file.fileName}`);
1288
+ }
1289
+ return browserFile.arrayBuffer();
1290
+ }
1291
+ throw new Error("Unsupported platform for file reading");
1292
+ }
1293
+ /**
1294
+ * Report progress to callback
1295
+ */
1296
+ reportProgress(callback, progress) {
1297
+ if (callback) {
1298
+ callback(progress);
1299
+ }
1300
+ }
1301
+ };
1302
+
1303
+ // src/collections/errors.ts
1304
+ var CollectionsError = class extends Error {
1305
+ constructor(message, code2 = "UNKNOWN_ERROR", details) {
1306
+ super(message);
1307
+ this.code = code2;
1308
+ this.details = details;
1309
+ this.name = "CollectionsError";
1310
+ }
1311
+ };
1312
+
1313
+ // src/collections/client.ts
1314
+ var CollectionsClient = class {
1315
+ constructor(config) {
1316
+ this.baseUrl = config.gatewayUrl.replace(/\/$/, "");
1317
+ this.authToken = config.authToken;
1318
+ this.fetchImpl = config.fetchImpl ?? fetch;
1319
+ }
1320
+ setAuthToken(token) {
1321
+ this.authToken = token;
1322
+ }
1323
+ // ---------------------------------------------------------------------------
1324
+ // Request helpers
1325
+ // ---------------------------------------------------------------------------
1326
+ buildUrl(path2, query) {
1327
+ const url = new URL(`${this.baseUrl}${path2}`);
1328
+ if (query) {
1329
+ Object.entries(query).forEach(([key, value]) => {
1330
+ if (value !== void 0 && value !== null) {
1331
+ url.searchParams.set(key, String(value));
1332
+ }
1333
+ });
1334
+ }
1335
+ return url.toString();
1336
+ }
1337
+ getHeaders(authRequired) {
1338
+ const headers = { "Content-Type": "application/json" };
1339
+ if (authRequired || this.authToken) {
1340
+ if (!this.authToken && authRequired) {
1341
+ throw new CollectionsError("Authentication required for this operation", "AUTH_REQUIRED");
1342
+ }
1343
+ if (this.authToken) {
1344
+ headers["Authorization"] = `Bearer ${this.authToken}`;
1345
+ }
1346
+ }
1347
+ return headers;
1348
+ }
1349
+ async request(path2, options = {}) {
1350
+ const authRequired = options.authRequired ?? false;
1351
+ const url = this.buildUrl(path2, options.query);
1352
+ const headers = new Headers(this.getHeaders(authRequired));
1353
+ if (options.headers) {
1354
+ Object.entries(options.headers).forEach(([k, v]) => {
1355
+ if (v !== void 0) headers.set(k, v);
1356
+ });
1357
+ }
1358
+ const response = await this.fetchImpl(url, { ...options, headers });
1359
+ if (response.ok) {
1360
+ if (response.status === 204) {
1361
+ return void 0;
1362
+ }
1363
+ const contentType = response.headers.get("content-type") || "";
1364
+ if (contentType.includes("application/json")) {
1365
+ return await response.json();
1366
+ }
1367
+ return await response.text();
1368
+ }
1369
+ let body;
1370
+ const text = await response.text();
1371
+ try {
1372
+ body = JSON.parse(text);
1373
+ } catch {
1374
+ body = text;
1375
+ }
1376
+ const message = body?.error && typeof body.error === "string" ? body.error : `Request failed with status ${response.status}`;
1377
+ throw new CollectionsError(message, "HTTP_ERROR", {
1378
+ status: response.status,
1379
+ body
1380
+ });
1381
+ }
1382
+ // ---------------------------------------------------------------------------
1383
+ // Collections
1384
+ // ---------------------------------------------------------------------------
1385
+ async listCollections(params) {
1386
+ return this.request("/collections", {
1387
+ method: "GET",
1388
+ query: { limit: params?.limit, offset: params?.offset }
1389
+ });
1390
+ }
1391
+ async getCollection(id) {
1392
+ return this.request(`/collections/${id}`, { method: "GET" });
1393
+ }
1394
+ async getCollectionRoot(id) {
1395
+ return this.request(`/collections/${id}/root`, { method: "GET" });
1396
+ }
1397
+ async getMyAccess(id) {
1398
+ return this.request(`/collections/${id}/my-access`, { method: "GET", authRequired: true });
1399
+ }
1400
+ async createCollection(payload) {
1401
+ return this.request("/collections", {
1402
+ method: "POST",
1403
+ authRequired: true,
1404
+ body: JSON.stringify(payload)
1405
+ });
1406
+ }
1407
+ async registerRoot(payload) {
1408
+ return this.request("/collections/register-root", {
1409
+ method: "POST",
1410
+ authRequired: true,
1411
+ body: JSON.stringify(payload)
1412
+ });
1413
+ }
1414
+ async updateCollection(id, payload) {
1415
+ return this.request(`/collections/${id}`, {
1416
+ method: "PATCH",
1417
+ authRequired: true,
1418
+ body: JSON.stringify(payload)
1419
+ });
1420
+ }
1421
+ async changeRoot(id, payload) {
1422
+ return this.request(`/collections/${id}/change-root`, {
1423
+ method: "PATCH",
1424
+ authRequired: true,
1425
+ body: JSON.stringify(payload)
1426
+ });
1427
+ }
1428
+ async deleteCollection(id) {
1429
+ return this.request(`/collections/${id}`, {
1430
+ method: "DELETE",
1431
+ authRequired: true
1432
+ });
1433
+ }
1434
+ // ---------------------------------------------------------------------------
1435
+ // Members
1436
+ // ---------------------------------------------------------------------------
1437
+ async listMembers(collectionId) {
1438
+ return this.request(`/collections/${collectionId}/members`, { method: "GET" });
1439
+ }
1440
+ async updateMemberRole(collectionId, userId, role) {
1441
+ return this.request(`/collections/${collectionId}/members/${userId}`, {
1442
+ method: "PATCH",
1443
+ authRequired: true,
1444
+ body: JSON.stringify({ role })
1445
+ });
1446
+ }
1447
+ async removeMember(collectionId, userId) {
1448
+ return this.request(`/collections/${collectionId}/members/${userId}`, {
1449
+ method: "DELETE",
1450
+ authRequired: true
1451
+ });
1452
+ }
1453
+ // ---------------------------------------------------------------------------
1454
+ // Invitations
1455
+ // ---------------------------------------------------------------------------
1456
+ async createInvitation(collectionId, email, role) {
1457
+ return this.request(`/collections/${collectionId}/invitations`, {
1458
+ method: "POST",
1459
+ authRequired: true,
1460
+ body: JSON.stringify({ email, role })
1461
+ });
1462
+ }
1463
+ async listInvitations(collectionId) {
1464
+ return this.request(`/collections/${collectionId}/invitations`, {
1465
+ method: "GET",
1466
+ authRequired: true
1467
+ });
1468
+ }
1469
+ async acceptInvitation(invitationId) {
1470
+ return this.request(`/invitations/${invitationId}/accept`, {
1471
+ method: "POST",
1472
+ authRequired: true
1473
+ });
1474
+ }
1475
+ async declineInvitation(invitationId) {
1476
+ return this.request(`/invitations/${invitationId}/decline`, {
1477
+ method: "POST",
1478
+ authRequired: true
1479
+ });
1480
+ }
1481
+ async revokeInvitation(invitationId) {
1482
+ return this.request(`/invitations/${invitationId}`, {
1483
+ method: "DELETE",
1484
+ authRequired: true
1485
+ });
1486
+ }
1487
+ // ---------------------------------------------------------------------------
1488
+ // Current user
1489
+ // ---------------------------------------------------------------------------
1490
+ async getMyCollections() {
1491
+ return this.request("/me/collections", { method: "GET", authRequired: true });
1492
+ }
1493
+ async getMyInvitations() {
1494
+ return this.request("/me/invitations", { method: "GET", authRequired: true });
1495
+ }
1496
+ // ---------------------------------------------------------------------------
1497
+ // PI permissions
1498
+ // ---------------------------------------------------------------------------
1499
+ async getPiPermissions(pi) {
1500
+ return this.request(`/pi/${pi}/permissions`, { method: "GET" });
1501
+ }
1502
+ };
1503
+
1504
+ // src/upload/client.ts
1505
+ init_errors();
1506
+ function getUserIdFromToken(token) {
1507
+ try {
1508
+ const parts = token.split(".");
1509
+ if (parts.length !== 3) return null;
1510
+ const payload = parts[1].replace(/-/g, "+").replace(/_/g, "/");
1511
+ let decoded;
1512
+ if (typeof atob === "function") {
1513
+ decoded = atob(payload);
1514
+ } else {
1515
+ decoded = Buffer.from(payload, "base64").toString("utf-8");
1516
+ }
1517
+ const data = JSON.parse(decoded);
1518
+ return data.sub || null;
1519
+ } catch {
1520
+ return null;
1521
+ }
1522
+ }
1523
+ var UploadClient = class {
1524
+ constructor(config) {
1525
+ const uploader = config.uploader || getUserIdFromToken(config.authToken) || "unknown";
1526
+ this.config = { ...config, uploader };
1527
+ this.collectionsClient = new CollectionsClient({
1528
+ gatewayUrl: config.gatewayUrl,
1529
+ authToken: config.authToken,
1530
+ fetchImpl: config.fetchImpl
1531
+ });
1532
+ }
1533
+ /**
1534
+ * Update the auth token (e.g., after token refresh)
1535
+ */
1536
+ setAuthToken(token) {
1537
+ this.config = { ...this.config, authToken: token };
1538
+ this.collectionsClient.setAuthToken(token);
1539
+ }
1540
+ /**
1541
+ * Create a new collection and upload files to it
1542
+ *
1543
+ * Anyone authenticated can create a new collection.
1544
+ * The root PI of the uploaded files becomes the collection's root.
1545
+ */
1546
+ async createCollection(options) {
1547
+ const { files, collectionMetadata, customPrompts, processing, onProgress, dryRun } = options;
1548
+ const metadata = {
1549
+ ...collectionMetadata,
1550
+ visibility: collectionMetadata.visibility || "public"
1551
+ };
1552
+ const uploader = new ArkeUploader({
1553
+ gatewayUrl: this.config.gatewayUrl,
1554
+ authToken: this.config.authToken,
1555
+ uploader: this.config.uploader,
1556
+ customPrompts,
1557
+ processing
1558
+ });
1559
+ const batchResult = await uploader.uploadBatch(files, {
1560
+ onProgress,
1561
+ dryRun
1562
+ });
1563
+ if (dryRun) {
1564
+ return {
1565
+ ...batchResult,
1566
+ collection: {
1567
+ id: "dry-run",
1568
+ title: metadata.title,
1569
+ slug: metadata.slug,
1570
+ description: metadata.description,
1571
+ visibility: metadata.visibility,
1572
+ rootPi: "dry-run"
1573
+ }
1574
+ };
1575
+ }
1576
+ const collection = await this.collectionsClient.registerRoot({
1577
+ ...metadata,
1578
+ rootPi: batchResult.rootPi
1579
+ });
1580
+ return {
1581
+ ...batchResult,
1582
+ collection
1583
+ };
1584
+ }
1585
+ /**
1586
+ * Add files to an existing collection
1587
+ *
1588
+ * Requires owner or editor role on the collection containing the parent PI.
1589
+ * Use this to add a folder or files to an existing collection hierarchy.
1590
+ */
1591
+ async addToCollection(options) {
1592
+ const { files, parentPi, customPrompts, processing, onProgress, dryRun } = options;
1593
+ if (!dryRun) {
1594
+ const permissions = await this.collectionsClient.getPiPermissions(parentPi);
1595
+ if (!permissions.canEdit) {
1596
+ if (!permissions.collection) {
1597
+ throw new ValidationError(
1598
+ `Cannot add files: PI "${parentPi}" is not part of any collection`
1599
+ );
1600
+ }
1601
+ throw new ValidationError(
1602
+ `Cannot add files to collection "${permissions.collection.title}": you need editor or owner role (current role: ${permissions.collection.role || "none"})`
1603
+ );
1604
+ }
1605
+ }
1606
+ const uploader = new ArkeUploader({
1607
+ gatewayUrl: this.config.gatewayUrl,
1608
+ authToken: this.config.authToken,
1609
+ uploader: this.config.uploader,
1610
+ parentPi,
1611
+ customPrompts,
1612
+ processing
1613
+ });
1614
+ return uploader.uploadBatch(files, {
1615
+ onProgress,
1616
+ dryRun
1617
+ });
1618
+ }
1619
+ /**
1620
+ * Check if you can edit a specific PI (i.e., add files to its collection)
1621
+ */
1622
+ async canEdit(pi) {
1623
+ return this.collectionsClient.getPiPermissions(pi);
1624
+ }
1625
+ /**
1626
+ * Get access to the underlying collections client for other operations
1627
+ */
1628
+ get collections() {
1629
+ return this.collectionsClient;
1630
+ }
1631
+ };
1632
+
1633
+ // src/upload/index.ts
1634
+ init_errors();
1635
+ // Annotate the CommonJS export names for ESM import in node:
1636
+ 0 && (module.exports = {
1637
+ ArkeUploader,
1638
+ NetworkError,
1639
+ ScanError,
1640
+ UploadClient,
1641
+ UploadError,
1642
+ ValidationError,
1643
+ WorkerAPIError
1644
+ });
1645
+ //# sourceMappingURL=index.cjs.map