@arke-institute/sdk 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,1649 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __esm = (fn, res) => function __init() {
9
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
10
+ };
11
+ var __export = (target, all) => {
12
+ for (var name in all)
13
+ __defProp(target, name, { get: all[name], enumerable: true });
14
+ };
15
+ var __copyProps = (to, from, except, desc) => {
16
+ if (from && typeof from === "object" || typeof from === "function") {
17
+ for (let key of __getOwnPropNames(from))
18
+ if (!__hasOwnProp.call(to, key) && key !== except)
19
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
20
+ }
21
+ return to;
22
+ };
23
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
24
+ // If the importer is in node compatibility mode or this is not an ESM
25
+ // file that has been converted to a CommonJS file using a Babel-
26
+ // compatible transform (i.e. "__esModule" has not been set), then set
27
+ // "default" to the CommonJS "module.exports" for node compatibility.
28
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
29
+ mod
30
+ ));
31
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
32
+
33
+ // src/upload/utils/errors.ts
34
+ function isRetryableError(error) {
35
+ if (error instanceof NetworkError) {
36
+ return true;
37
+ }
38
+ if (error instanceof WorkerAPIError) {
39
+ return error.statusCode ? error.statusCode >= 500 : false;
40
+ }
41
+ if (error instanceof UploadError) {
42
+ if (error.statusCode) {
43
+ return error.statusCode >= 500 || error.statusCode === 429;
44
+ }
45
+ return false;
46
+ }
47
+ if (error.code === "ECONNRESET" || error.code === "ETIMEDOUT" || error.code === "ENOTFOUND" || error.code === "ECONNREFUSED") {
48
+ return true;
49
+ }
50
+ return false;
51
+ }
52
+ var WorkerAPIError, UploadError, ValidationError, NetworkError, ScanError;
53
+ var init_errors = __esm({
54
+ "src/upload/utils/errors.ts"() {
55
+ "use strict";
56
+ WorkerAPIError = class extends Error {
57
+ constructor(message, statusCode, details) {
58
+ super(message);
59
+ this.statusCode = statusCode;
60
+ this.details = details;
61
+ this.name = "WorkerAPIError";
62
+ Error.captureStackTrace(this, this.constructor);
63
+ }
64
+ };
65
+ UploadError = class extends Error {
66
+ constructor(message, fileName, statusCode, cause) {
67
+ super(message);
68
+ this.fileName = fileName;
69
+ this.statusCode = statusCode;
70
+ this.cause = cause;
71
+ this.name = "UploadError";
72
+ Error.captureStackTrace(this, this.constructor);
73
+ }
74
+ };
75
+ ValidationError = class extends Error {
76
+ constructor(message, field) {
77
+ super(message);
78
+ this.field = field;
79
+ this.name = "ValidationError";
80
+ Error.captureStackTrace(this, this.constructor);
81
+ }
82
+ };
83
+ NetworkError = class extends Error {
84
+ constructor(message, cause) {
85
+ super(message);
86
+ this.cause = cause;
87
+ this.name = "NetworkError";
88
+ Error.captureStackTrace(this, this.constructor);
89
+ }
90
+ };
91
+ ScanError = class extends Error {
92
+ constructor(message, path2) {
93
+ super(message);
94
+ this.path = path2;
95
+ this.name = "ScanError";
96
+ Error.captureStackTrace(this, this.constructor);
97
+ }
98
+ };
99
+ }
100
+ });
101
+
102
+ // src/upload/platforms/common.ts
103
+ function detectPlatform() {
104
+ if (typeof process !== "undefined" && process.versions != null && process.versions.node != null) {
105
+ return "node";
106
+ }
107
+ if (typeof window !== "undefined" && typeof document !== "undefined") {
108
+ return "browser";
109
+ }
110
+ return "unknown";
111
+ }
112
+ function normalizePath(p) {
113
+ return p.replace(/\\/g, "/");
114
+ }
115
+ function getExtension(filename) {
116
+ const lastDot = filename.lastIndexOf(".");
117
+ return lastDot === -1 ? "" : filename.slice(lastDot + 1).toLowerCase();
118
+ }
119
+ function getMimeType(filename) {
120
+ const ext = getExtension(filename);
121
+ const mimeTypes = {
122
+ // Images
123
+ "jpg": "image/jpeg",
124
+ "jpeg": "image/jpeg",
125
+ "png": "image/png",
126
+ "gif": "image/gif",
127
+ "webp": "image/webp",
128
+ "tif": "image/tiff",
129
+ "tiff": "image/tiff",
130
+ "bmp": "image/bmp",
131
+ "svg": "image/svg+xml",
132
+ // Documents
133
+ "pdf": "application/pdf",
134
+ "txt": "text/plain",
135
+ "json": "application/json",
136
+ "xml": "application/xml",
137
+ "html": "text/html",
138
+ "htm": "text/html",
139
+ "css": "text/css",
140
+ "js": "application/javascript",
141
+ // Archives
142
+ "zip": "application/zip",
143
+ "tar": "application/x-tar",
144
+ "gz": "application/gzip",
145
+ // Audio
146
+ "mp3": "audio/mpeg",
147
+ "wav": "audio/wav",
148
+ "ogg": "audio/ogg",
149
+ // Video
150
+ "mp4": "video/mp4",
151
+ "webm": "video/webm",
152
+ "mov": "video/quicktime"
153
+ };
154
+ return mimeTypes[ext] || "application/octet-stream";
155
+ }
156
+ var init_common = __esm({
157
+ "src/upload/platforms/common.ts"() {
158
+ "use strict";
159
+ }
160
+ });
161
+
162
+ // src/upload/lib/validation.ts
163
+ function validateFileSize(size) {
164
+ if (size <= 0) {
165
+ throw new ValidationError("File size must be greater than 0");
166
+ }
167
+ if (size > MAX_FILE_SIZE) {
168
+ throw new ValidationError(
169
+ `File size (${formatBytes(size)}) exceeds maximum allowed size (${formatBytes(MAX_FILE_SIZE)})`
170
+ );
171
+ }
172
+ }
173
+ function validateBatchSize(totalSize) {
174
+ if (totalSize > MAX_BATCH_SIZE) {
175
+ throw new ValidationError(
176
+ `Total batch size (${formatBytes(totalSize)}) exceeds maximum allowed size (${formatBytes(MAX_BATCH_SIZE)})`
177
+ );
178
+ }
179
+ }
180
+ function validateLogicalPath(path2) {
181
+ if (!path2.startsWith("/")) {
182
+ throw new ValidationError("Logical path must start with /", "path");
183
+ }
184
+ if (INVALID_PATH_CHARS.test(path2)) {
185
+ throw new ValidationError(
186
+ "Logical path contains invalid characters",
187
+ "path"
188
+ );
189
+ }
190
+ const segments = path2.split("/").filter((s) => s.length > 0);
191
+ if (segments.length === 0 && path2 !== "/") {
192
+ throw new ValidationError("Logical path cannot be empty", "path");
193
+ }
194
+ for (const segment of segments) {
195
+ if (segment === "." || segment === "..") {
196
+ throw new ValidationError(
197
+ "Logical path cannot contain . or .. segments",
198
+ "path"
199
+ );
200
+ }
201
+ }
202
+ }
203
+ function validateRefJson(content, fileName, logger) {
204
+ let parsed;
205
+ try {
206
+ parsed = JSON.parse(content);
207
+ } catch (error) {
208
+ throw new ValidationError(
209
+ `Invalid JSON in ${fileName}: ${error.message}`,
210
+ "ref"
211
+ );
212
+ }
213
+ if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
214
+ throw new ValidationError(
215
+ `${fileName} must contain a JSON object`,
216
+ "ref"
217
+ );
218
+ }
219
+ if (!parsed.url || typeof parsed.url !== "string") {
220
+ throw new ValidationError(
221
+ `${fileName} must contain a 'url' field with a string value`,
222
+ "ref"
223
+ );
224
+ }
225
+ try {
226
+ const url = new URL(parsed.url);
227
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
228
+ throw new Error("URL must use HTTP or HTTPS protocol");
229
+ }
230
+ } catch (error) {
231
+ throw new ValidationError(
232
+ `Invalid URL in ${fileName}: ${error.message}`,
233
+ "ref"
234
+ );
235
+ }
236
+ if (!parsed.type) {
237
+ if (logger) {
238
+ logger.warn(`${fileName}: Missing 'type' field (optional but recommended)`);
239
+ }
240
+ }
241
+ if (parsed.type && OCR_PROCESSABLE_TYPES.includes(parsed.type)) {
242
+ const typeToExt = {
243
+ "image/jpeg": ".jpg",
244
+ "image/png": ".png",
245
+ "image/webp": ".webp"
246
+ };
247
+ const expectedExt = typeToExt[parsed.type];
248
+ if (expectedExt && !fileName.includes(`${expectedExt}.ref.json`)) {
249
+ if (logger) {
250
+ logger.warn(
251
+ `${fileName}: Type is '${parsed.type}' but filename doesn't include '${expectedExt}.ref.json' pattern. This file may not be processed by OCR. Consider renaming to include the extension (e.g., 'photo${expectedExt}.ref.json').`
252
+ );
253
+ }
254
+ }
255
+ }
256
+ }
257
+ function formatBytes(bytes) {
258
+ if (bytes === 0) return "0 B";
259
+ const k = 1024;
260
+ const sizes = ["B", "KB", "MB", "GB", "TB"];
261
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
262
+ return `${(bytes / Math.pow(k, i)).toFixed(2)} ${sizes[i]}`;
263
+ }
264
+ function validateCustomPrompts(prompts) {
265
+ if (!prompts) return;
266
+ const MAX_LENGTH = 5e4;
267
+ const MAX_TOTAL_LENGTH = 75e3;
268
+ const fields = [
269
+ "general",
270
+ "reorganization",
271
+ "pinax",
272
+ "description",
273
+ "cheimarros"
274
+ ];
275
+ let totalLength = 0;
276
+ for (const field of fields) {
277
+ const value = prompts[field];
278
+ if (value) {
279
+ if (value.length > MAX_LENGTH) {
280
+ throw new ValidationError(
281
+ `Custom prompt '${field}' exceeds maximum length of ${MAX_LENGTH} characters (current: ${value.length})`,
282
+ "customPrompts"
283
+ );
284
+ }
285
+ totalLength += value.length;
286
+ }
287
+ }
288
+ if (totalLength > MAX_TOTAL_LENGTH) {
289
+ throw new ValidationError(
290
+ `Total custom prompts length (${totalLength}) exceeds maximum of ${MAX_TOTAL_LENGTH} characters`,
291
+ "customPrompts"
292
+ );
293
+ }
294
+ }
295
+ function validateCustomPromptsLocation(processingConfig) {
296
+ if (!processingConfig) return;
297
+ if ("customPrompts" in processingConfig) {
298
+ throw new ValidationError(
299
+ "customPrompts must be a top-level field in UploaderConfig, not inside the processing config. Use: new ArkeUploader({ customPrompts: {...}, processing: {...} }) NOT: new ArkeUploader({ processing: { customPrompts: {...} } })",
300
+ "processing"
301
+ );
302
+ }
303
+ }
304
+ var MAX_FILE_SIZE, MAX_BATCH_SIZE, INVALID_PATH_CHARS, OCR_PROCESSABLE_TYPES;
305
+ var init_validation = __esm({
306
+ "src/upload/lib/validation.ts"() {
307
+ "use strict";
308
+ init_errors();
309
+ MAX_FILE_SIZE = 5 * 1024 * 1024 * 1024;
310
+ MAX_BATCH_SIZE = 100 * 1024 * 1024 * 1024;
311
+ INVALID_PATH_CHARS = /[<>:"|?*\x00-\x1f]/;
312
+ OCR_PROCESSABLE_TYPES = [
313
+ "image/jpeg",
314
+ "image/png",
315
+ "image/webp"
316
+ ];
317
+ }
318
+ });
319
+
320
+ // src/upload/utils/hash.ts
321
+ async function computeFileCID(filePath) {
322
+ const fs2 = await import("fs/promises");
323
+ try {
324
+ const fileBuffer = await fs2.readFile(filePath);
325
+ const hash = await import_sha2.sha256.digest(fileBuffer);
326
+ const cid = import_cid.CID.create(1, raw.code, hash);
327
+ return cid.toString();
328
+ } catch (error) {
329
+ throw new Error(`CID computation failed: ${error.message}`);
330
+ }
331
+ }
332
+ async function computeCIDFromBuffer(data) {
333
+ const hash = await import_sha2.sha256.digest(data);
334
+ const cid = import_cid.CID.create(1, raw.code, hash);
335
+ return cid.toString();
336
+ }
337
+ var import_cid, raw, import_sha2;
338
+ var init_hash = __esm({
339
+ "src/upload/utils/hash.ts"() {
340
+ "use strict";
341
+ import_cid = require("multiformats/cid");
342
+ raw = __toESM(require("multiformats/codecs/raw"), 1);
343
+ import_sha2 = require("multiformats/hashes/sha2");
344
+ }
345
+ });
346
+
347
+ // src/upload/types/processing.ts
348
+ var DEFAULT_PROCESSING_CONFIG;
349
+ var init_processing = __esm({
350
+ "src/upload/types/processing.ts"() {
351
+ "use strict";
352
+ DEFAULT_PROCESSING_CONFIG = {
353
+ ocr: true,
354
+ describe: true,
355
+ pinax: true
356
+ };
357
+ }
358
+ });
359
+
360
+ // src/upload/platforms/node.ts
361
+ var node_exports = {};
362
+ __export(node_exports, {
363
+ NodeScanner: () => NodeScanner
364
+ });
365
+ var import_promises, import_path, NodeScanner;
366
+ var init_node = __esm({
367
+ "src/upload/platforms/node.ts"() {
368
+ "use strict";
369
+ import_promises = __toESM(require("fs/promises"), 1);
370
+ import_path = __toESM(require("path"), 1);
371
+ init_errors();
372
+ init_validation();
373
+ init_hash();
374
+ init_processing();
375
+ init_common();
376
+ NodeScanner = class {
377
+ /**
378
+ * Scan directory recursively and collect file metadata
379
+ */
380
+ async scanFiles(source, options) {
381
+ const dirPath = Array.isArray(source) ? source[0] : source;
382
+ if (!dirPath || typeof dirPath !== "string") {
383
+ throw new ScanError("Node.js scanner requires a directory path", "");
384
+ }
385
+ const files = [];
386
+ try {
387
+ const stats = await import_promises.default.stat(dirPath);
388
+ if (!stats.isDirectory()) {
389
+ throw new ScanError(`Path is not a directory: ${dirPath}`, dirPath);
390
+ }
391
+ } catch (error) {
392
+ if (error.code === "ENOENT") {
393
+ throw new ScanError(`Directory not found: ${dirPath}`, dirPath);
394
+ }
395
+ throw new ScanError(`Cannot access directory: ${error.message}`, dirPath);
396
+ }
397
+ validateLogicalPath(options.rootPath);
398
+ const globalProcessingConfig = options.defaultProcessingConfig || DEFAULT_PROCESSING_CONFIG;
399
+ async function loadDirectoryProcessingConfig(dirPath2) {
400
+ const configPath = import_path.default.join(dirPath2, ".arke-process.json");
401
+ try {
402
+ const content = await import_promises.default.readFile(configPath, "utf-8");
403
+ return JSON.parse(content);
404
+ } catch (error) {
405
+ if (error.code !== "ENOENT") {
406
+ console.warn(`Error reading processing config ${configPath}: ${error.message}`);
407
+ }
408
+ return null;
409
+ }
410
+ }
411
+ function mergeProcessingConfig(defaults, override) {
412
+ if (!override) return defaults;
413
+ return {
414
+ ocr: override.ocr ?? defaults.ocr,
415
+ describe: override.describe ?? defaults.describe,
416
+ pinax: override.pinax ?? defaults.pinax
417
+ };
418
+ }
419
+ async function walk(currentPath, relativePath = "") {
420
+ const dirConfigOverride = await loadDirectoryProcessingConfig(currentPath);
421
+ const currentProcessingConfig = mergeProcessingConfig(
422
+ globalProcessingConfig,
423
+ dirConfigOverride
424
+ );
425
+ let entries;
426
+ try {
427
+ entries = await import_promises.default.readdir(currentPath, { withFileTypes: true });
428
+ } catch (error) {
429
+ console.warn(`Cannot read directory: ${currentPath}`, error.message);
430
+ return;
431
+ }
432
+ for (const entry of entries) {
433
+ const fullPath = import_path.default.join(currentPath, entry.name);
434
+ const relPath = import_path.default.join(relativePath, entry.name);
435
+ try {
436
+ if (entry.isSymbolicLink()) {
437
+ if (!options.followSymlinks) {
438
+ continue;
439
+ }
440
+ const stats = await import_promises.default.stat(fullPath);
441
+ if (stats.isDirectory()) {
442
+ await walk(fullPath, relPath);
443
+ } else if (stats.isFile()) {
444
+ await processFile(fullPath, relPath, stats.size, currentProcessingConfig);
445
+ }
446
+ continue;
447
+ }
448
+ if (entry.isDirectory()) {
449
+ await walk(fullPath, relPath);
450
+ continue;
451
+ }
452
+ if (entry.isFile()) {
453
+ const stats = await import_promises.default.stat(fullPath);
454
+ await processFile(fullPath, relPath, stats.size, currentProcessingConfig);
455
+ }
456
+ } catch (error) {
457
+ if (error instanceof ScanError && error.message.includes(".ref.json")) {
458
+ throw error;
459
+ }
460
+ console.warn(`Error processing ${fullPath}: ${error.message}`);
461
+ continue;
462
+ }
463
+ }
464
+ }
465
+ async function processFile(fullPath, relativePath, size, processingConfig) {
466
+ const fileName = import_path.default.basename(fullPath);
467
+ if (fileName === ".arke-process.json") {
468
+ return;
469
+ }
470
+ if (fileName.endsWith(".ref.json")) {
471
+ try {
472
+ const content = await import_promises.default.readFile(fullPath, "utf-8");
473
+ validateRefJson(content, fileName, console);
474
+ } catch (error) {
475
+ throw new ScanError(
476
+ `Invalid .ref.json file: ${fileName} - ${error.message}`,
477
+ fullPath
478
+ );
479
+ }
480
+ }
481
+ try {
482
+ validateFileSize(size);
483
+ } catch (error) {
484
+ console.warn(`Skipping file that exceeds size limit: ${fileName}`, error.message);
485
+ return;
486
+ }
487
+ const normalizedRelPath = normalizePath(relativePath);
488
+ const logicalPath = import_path.default.posix.join(options.rootPath, normalizedRelPath);
489
+ try {
490
+ validateLogicalPath(logicalPath);
491
+ } catch (error) {
492
+ console.warn(`Skipping file with invalid logical path: ${logicalPath}`, error.message);
493
+ return;
494
+ }
495
+ const contentType = getMimeType(fileName);
496
+ try {
497
+ await import_promises.default.access(fullPath, import_promises.default.constants.R_OK);
498
+ } catch (error) {
499
+ console.warn(`Skipping unreadable file: ${fullPath}`);
500
+ return;
501
+ }
502
+ let cid;
503
+ try {
504
+ cid = await computeFileCID(fullPath);
505
+ } catch (error) {
506
+ console.warn(`Warning: CID computation failed for ${fullPath}, continuing without CID:`, error.message);
507
+ cid = void 0;
508
+ }
509
+ files.push({
510
+ localPath: fullPath,
511
+ logicalPath,
512
+ fileName,
513
+ size,
514
+ contentType,
515
+ cid,
516
+ processingConfig
517
+ });
518
+ }
519
+ await walk(dirPath);
520
+ files.sort((a, b) => a.size - b.size);
521
+ return files;
522
+ }
523
+ /**
524
+ * Read file contents as ArrayBuffer
525
+ */
526
+ async readFile(file) {
527
+ const buffer = await import_promises.default.readFile(file.localPath);
528
+ return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
529
+ }
530
+ };
531
+ }
532
+ });
533
+
534
+ // src/upload/platforms/browser.ts
535
+ var browser_exports = {};
536
+ __export(browser_exports, {
537
+ BrowserScanner: () => BrowserScanner
538
+ });
539
+ var BrowserScanner;
540
+ var init_browser = __esm({
541
+ "src/upload/platforms/browser.ts"() {
542
+ "use strict";
543
+ init_errors();
544
+ init_validation();
545
+ init_hash();
546
+ init_processing();
547
+ init_common();
548
+ BrowserScanner = class {
549
+ /**
550
+ * Scan files from File or FileList
551
+ */
552
+ async scanFiles(source, options) {
553
+ const fileList = Array.isArray(source) ? source : [source];
554
+ if (fileList.length === 0) {
555
+ throw new ScanError("No files provided", "");
556
+ }
557
+ validateLogicalPath(options.rootPath);
558
+ const globalProcessingConfig = options.defaultProcessingConfig || DEFAULT_PROCESSING_CONFIG;
559
+ const files = [];
560
+ for (const file of fileList) {
561
+ try {
562
+ const fileInfo = await this.processFile(file, options.rootPath, globalProcessingConfig);
563
+ if (fileInfo) {
564
+ files.push(fileInfo);
565
+ }
566
+ } catch (error) {
567
+ console.warn(`Error processing ${file.name}: ${error.message}`);
568
+ continue;
569
+ }
570
+ }
571
+ files.sort((a, b) => a.size - b.size);
572
+ return files;
573
+ }
574
+ /**
575
+ * Process a single File object
576
+ */
577
+ async processFile(file, rootPath, processingConfig) {
578
+ const fileName = file.name;
579
+ const size = file.size;
580
+ if (fileName.startsWith(".")) {
581
+ return null;
582
+ }
583
+ const skipFiles = ["Thumbs.db", "desktop.ini", "__MACOSX"];
584
+ if (skipFiles.includes(fileName)) {
585
+ return null;
586
+ }
587
+ if (fileName === ".arke-process.json") {
588
+ return null;
589
+ }
590
+ try {
591
+ validateFileSize(size);
592
+ } catch (error) {
593
+ console.warn(`Skipping file that exceeds size limit: ${fileName}`, error.message);
594
+ return null;
595
+ }
596
+ let relativePath = "";
597
+ if ("webkitRelativePath" in file && file.webkitRelativePath) {
598
+ const parts = file.webkitRelativePath.split("/");
599
+ if (parts.length > 1) {
600
+ relativePath = parts.slice(1).join("/");
601
+ } else {
602
+ relativePath = fileName;
603
+ }
604
+ } else {
605
+ relativePath = fileName;
606
+ }
607
+ const normalizedRelPath = normalizePath(relativePath);
608
+ const logicalPath = `${rootPath}/${normalizedRelPath}`.replace(/\/+/g, "/");
609
+ try {
610
+ validateLogicalPath(logicalPath);
611
+ } catch (error) {
612
+ console.warn(`Skipping file with invalid logical path: ${logicalPath}`, error.message);
613
+ return null;
614
+ }
615
+ const contentType = file.type || getMimeType(fileName);
616
+ let cid;
617
+ try {
618
+ const buffer = await file.arrayBuffer();
619
+ cid = await computeCIDFromBuffer(new Uint8Array(buffer));
620
+ } catch (error) {
621
+ console.warn(`Warning: CID computation failed for ${fileName}, continuing without CID:`, error.message);
622
+ cid = void 0;
623
+ }
624
+ return {
625
+ localPath: `__browser_file__${fileName}`,
626
+ // Special marker for browser files
627
+ logicalPath,
628
+ fileName,
629
+ size,
630
+ contentType,
631
+ cid,
632
+ processingConfig
633
+ };
634
+ }
635
+ /**
636
+ * Read file contents as ArrayBuffer
637
+ * Note: In browser context, the File object should be passed directly
638
+ */
639
+ async readFile(file) {
640
+ throw new Error("Browser scanner requires File objects to be provided directly during upload");
641
+ }
642
+ };
643
+ }
644
+ });
645
+
646
+ // src/index.ts
647
+ var src_exports = {};
648
+ __export(src_exports, {
649
+ ArkeUploader: () => ArkeUploader,
650
+ CollectionsClient: () => CollectionsClient,
651
+ CollectionsError: () => CollectionsError,
652
+ NetworkError: () => NetworkError,
653
+ ScanError: () => ScanError,
654
+ UploadClient: () => UploadClient,
655
+ UploadError: () => UploadError,
656
+ ValidationError: () => ValidationError,
657
+ WorkerAPIError: () => WorkerAPIError
658
+ });
659
+ module.exports = __toCommonJS(src_exports);
660
+
661
+ // src/collections/errors.ts
662
+ var CollectionsError = class extends Error {
663
+ constructor(message, code2 = "UNKNOWN_ERROR", details) {
664
+ super(message);
665
+ this.code = code2;
666
+ this.details = details;
667
+ this.name = "CollectionsError";
668
+ }
669
+ };
670
+
671
+ // src/collections/client.ts
672
+ var CollectionsClient = class {
673
+ constructor(config) {
674
+ this.baseUrl = config.gatewayUrl.replace(/\/$/, "");
675
+ this.authToken = config.authToken;
676
+ this.fetchImpl = config.fetchImpl ?? fetch;
677
+ }
678
+ setAuthToken(token) {
679
+ this.authToken = token;
680
+ }
681
+ // ---------------------------------------------------------------------------
682
+ // Request helpers
683
+ // ---------------------------------------------------------------------------
684
+ buildUrl(path2, query) {
685
+ const url = new URL(`${this.baseUrl}${path2}`);
686
+ if (query) {
687
+ Object.entries(query).forEach(([key, value]) => {
688
+ if (value !== void 0 && value !== null) {
689
+ url.searchParams.set(key, String(value));
690
+ }
691
+ });
692
+ }
693
+ return url.toString();
694
+ }
695
+ getHeaders(authRequired) {
696
+ const headers = { "Content-Type": "application/json" };
697
+ if (authRequired || this.authToken) {
698
+ if (!this.authToken && authRequired) {
699
+ throw new CollectionsError("Authentication required for this operation", "AUTH_REQUIRED");
700
+ }
701
+ if (this.authToken) {
702
+ headers["Authorization"] = `Bearer ${this.authToken}`;
703
+ }
704
+ }
705
+ return headers;
706
+ }
707
+ async request(path2, options = {}) {
708
+ const authRequired = options.authRequired ?? false;
709
+ const url = this.buildUrl(path2, options.query);
710
+ const headers = new Headers(this.getHeaders(authRequired));
711
+ if (options.headers) {
712
+ Object.entries(options.headers).forEach(([k, v]) => {
713
+ if (v !== void 0) headers.set(k, v);
714
+ });
715
+ }
716
+ const response = await this.fetchImpl(url, { ...options, headers });
717
+ if (response.ok) {
718
+ if (response.status === 204) {
719
+ return void 0;
720
+ }
721
+ const contentType = response.headers.get("content-type") || "";
722
+ if (contentType.includes("application/json")) {
723
+ return await response.json();
724
+ }
725
+ return await response.text();
726
+ }
727
+ let body;
728
+ const text = await response.text();
729
+ try {
730
+ body = JSON.parse(text);
731
+ } catch {
732
+ body = text;
733
+ }
734
+ const message = body?.error && typeof body.error === "string" ? body.error : `Request failed with status ${response.status}`;
735
+ throw new CollectionsError(message, "HTTP_ERROR", {
736
+ status: response.status,
737
+ body
738
+ });
739
+ }
740
+ // ---------------------------------------------------------------------------
741
+ // Collections
742
+ // ---------------------------------------------------------------------------
743
+ async listCollections(params) {
744
+ return this.request("/collections", {
745
+ method: "GET",
746
+ query: { limit: params?.limit, offset: params?.offset }
747
+ });
748
+ }
749
+ async getCollection(id) {
750
+ return this.request(`/collections/${id}`, { method: "GET" });
751
+ }
752
+ async getCollectionRoot(id) {
753
+ return this.request(`/collections/${id}/root`, { method: "GET" });
754
+ }
755
+ async getMyAccess(id) {
756
+ return this.request(`/collections/${id}/my-access`, { method: "GET", authRequired: true });
757
+ }
758
+ async createCollection(payload) {
759
+ return this.request("/collections", {
760
+ method: "POST",
761
+ authRequired: true,
762
+ body: JSON.stringify(payload)
763
+ });
764
+ }
765
+ async registerRoot(payload) {
766
+ return this.request("/collections/register-root", {
767
+ method: "POST",
768
+ authRequired: true,
769
+ body: JSON.stringify(payload)
770
+ });
771
+ }
772
+ async updateCollection(id, payload) {
773
+ return this.request(`/collections/${id}`, {
774
+ method: "PATCH",
775
+ authRequired: true,
776
+ body: JSON.stringify(payload)
777
+ });
778
+ }
779
+ async changeRoot(id, payload) {
780
+ return this.request(`/collections/${id}/change-root`, {
781
+ method: "PATCH",
782
+ authRequired: true,
783
+ body: JSON.stringify(payload)
784
+ });
785
+ }
786
+ async deleteCollection(id) {
787
+ return this.request(`/collections/${id}`, {
788
+ method: "DELETE",
789
+ authRequired: true
790
+ });
791
+ }
792
+ // ---------------------------------------------------------------------------
793
+ // Members
794
+ // ---------------------------------------------------------------------------
795
+ async listMembers(collectionId) {
796
+ return this.request(`/collections/${collectionId}/members`, { method: "GET" });
797
+ }
798
+ async updateMemberRole(collectionId, userId, role) {
799
+ return this.request(`/collections/${collectionId}/members/${userId}`, {
800
+ method: "PATCH",
801
+ authRequired: true,
802
+ body: JSON.stringify({ role })
803
+ });
804
+ }
805
+ async removeMember(collectionId, userId) {
806
+ return this.request(`/collections/${collectionId}/members/${userId}`, {
807
+ method: "DELETE",
808
+ authRequired: true
809
+ });
810
+ }
811
+ // ---------------------------------------------------------------------------
812
+ // Invitations
813
+ // ---------------------------------------------------------------------------
814
+ async createInvitation(collectionId, email, role) {
815
+ return this.request(`/collections/${collectionId}/invitations`, {
816
+ method: "POST",
817
+ authRequired: true,
818
+ body: JSON.stringify({ email, role })
819
+ });
820
+ }
821
+ async listInvitations(collectionId) {
822
+ return this.request(`/collections/${collectionId}/invitations`, {
823
+ method: "GET",
824
+ authRequired: true
825
+ });
826
+ }
827
+ async acceptInvitation(invitationId) {
828
+ return this.request(`/invitations/${invitationId}/accept`, {
829
+ method: "POST",
830
+ authRequired: true
831
+ });
832
+ }
833
+ async declineInvitation(invitationId) {
834
+ return this.request(`/invitations/${invitationId}/decline`, {
835
+ method: "POST",
836
+ authRequired: true
837
+ });
838
+ }
839
+ async revokeInvitation(invitationId) {
840
+ return this.request(`/invitations/${invitationId}`, {
841
+ method: "DELETE",
842
+ authRequired: true
843
+ });
844
+ }
845
+ // ---------------------------------------------------------------------------
846
+ // Current user
847
+ // ---------------------------------------------------------------------------
848
+ async getMyCollections() {
849
+ return this.request("/me/collections", { method: "GET", authRequired: true });
850
+ }
851
+ async getMyInvitations() {
852
+ return this.request("/me/invitations", { method: "GET", authRequired: true });
853
+ }
854
+ // ---------------------------------------------------------------------------
855
+ // PI permissions
856
+ // ---------------------------------------------------------------------------
857
+ async getPiPermissions(pi) {
858
+ return this.request(`/pi/${pi}/permissions`, { method: "GET" });
859
+ }
860
+ };
861
+
862
+ // src/upload/lib/worker-client-fetch.ts
863
+ init_errors();
864
+
865
+ // src/upload/utils/retry.ts
866
+ init_errors();
867
+ var DEFAULT_OPTIONS = {
868
+ maxRetries: 3,
869
+ initialDelay: 1e3,
870
+ // 1 second
871
+ maxDelay: 3e4,
872
+ // 30 seconds
873
+ shouldRetry: isRetryableError,
874
+ jitter: true
875
+ };
876
+ async function retryWithBackoff(fn, options = {}) {
877
+ const opts = { ...DEFAULT_OPTIONS, ...options };
878
+ let lastError;
879
+ for (let attempt = 0; attempt <= opts.maxRetries; attempt++) {
880
+ try {
881
+ return await fn();
882
+ } catch (error) {
883
+ lastError = error;
884
+ if (attempt >= opts.maxRetries) {
885
+ throw error;
886
+ }
887
+ if (opts.shouldRetry && !opts.shouldRetry(error)) {
888
+ throw error;
889
+ }
890
+ let delay;
891
+ if (error.statusCode === 429 && error.retryAfter) {
892
+ delay = Math.min(error.retryAfter * 1e3, opts.maxDelay);
893
+ } else {
894
+ delay = Math.min(
895
+ opts.initialDelay * Math.pow(2, attempt),
896
+ opts.maxDelay
897
+ );
898
+ }
899
+ if (opts.jitter) {
900
+ const jitterAmount = delay * 0.25;
901
+ delay = delay + (Math.random() * jitterAmount * 2 - jitterAmount);
902
+ }
903
+ await sleep(Math.floor(delay));
904
+ }
905
+ }
906
+ throw lastError;
907
+ }
908
+ function sleep(ms) {
909
+ return new Promise((resolve) => setTimeout(resolve, ms));
910
+ }
911
+
912
+ // src/upload/lib/worker-client-fetch.ts
913
+ var WorkerClient = class {
914
+ constructor(config) {
915
+ this.baseUrl = config.baseUrl.replace(/\/$/, "");
916
+ this.authToken = config.authToken;
917
+ this.timeout = config.timeout ?? 3e4;
918
+ this.maxRetries = config.maxRetries ?? 3;
919
+ this.retryInitialDelay = config.retryInitialDelay ?? 1e3;
920
+ this.retryMaxDelay = config.retryMaxDelay ?? 3e4;
921
+ this.retryJitter = config.retryJitter ?? true;
922
+ this.debug = config.debug ?? false;
923
+ }
924
+ setAuthToken(token) {
925
+ this.authToken = token;
926
+ }
927
+ /**
928
+ * Make HTTP request with fetch
929
+ */
930
+ async request(method, path2, body) {
931
+ const url = `${this.baseUrl}${path2}`;
932
+ if (this.debug) {
933
+ console.log(`HTTP Request: ${method} ${url}`, body);
934
+ }
935
+ try {
936
+ const controller = new AbortController();
937
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
938
+ const headers = {
939
+ "Content-Type": "application/json"
940
+ };
941
+ if (this.authToken) {
942
+ headers["Authorization"] = `Bearer ${this.authToken}`;
943
+ }
944
+ const response = await fetch(url, {
945
+ method,
946
+ headers,
947
+ body: body ? JSON.stringify(body) : void 0,
948
+ signal: controller.signal
949
+ });
950
+ clearTimeout(timeoutId);
951
+ const data = await response.json();
952
+ if (this.debug) {
953
+ console.log(`HTTP Response: ${response.status}`, data);
954
+ }
955
+ if (!response.ok) {
956
+ const errorData = data;
957
+ throw new WorkerAPIError(
958
+ errorData.error || "Request failed",
959
+ response.status,
960
+ errorData.details
961
+ );
962
+ }
963
+ return data;
964
+ } catch (error) {
965
+ if (error instanceof WorkerAPIError) {
966
+ throw error;
967
+ }
968
+ if (error.name === "AbortError") {
969
+ throw new NetworkError(`Request timeout after ${this.timeout}ms`);
970
+ }
971
+ throw new NetworkError(`Network request failed: ${error.message}`);
972
+ }
973
+ }
974
+ /**
975
+ * Initialize a new batch upload
976
+ */
977
+ async initBatch(params) {
978
+ return retryWithBackoff(
979
+ () => this.request("POST", "/ingest/batches/init", params),
980
+ {
981
+ maxRetries: this.maxRetries,
982
+ initialDelay: this.retryInitialDelay,
983
+ maxDelay: this.retryMaxDelay,
984
+ jitter: this.retryJitter
985
+ }
986
+ );
987
+ }
988
+ /**
989
+ * Request presigned URLs for a file upload
990
+ */
991
+ async startFileUpload(batchId, params) {
992
+ return retryWithBackoff(
993
+ () => this.request(
994
+ "POST",
995
+ `/ingest/batches/${batchId}/files/start`,
996
+ params
997
+ ),
998
+ {
999
+ maxRetries: this.maxRetries,
1000
+ initialDelay: this.retryInitialDelay,
1001
+ maxDelay: this.retryMaxDelay,
1002
+ jitter: this.retryJitter
1003
+ }
1004
+ );
1005
+ }
1006
+ /**
1007
+ * Mark a file upload as complete
1008
+ */
1009
+ async completeFileUpload(batchId, params) {
1010
+ return retryWithBackoff(
1011
+ () => this.request(
1012
+ "POST",
1013
+ `/ingest/batches/${batchId}/files/complete`,
1014
+ params
1015
+ ),
1016
+ {
1017
+ maxRetries: this.maxRetries,
1018
+ initialDelay: this.retryInitialDelay,
1019
+ maxDelay: this.retryMaxDelay,
1020
+ jitter: this.retryJitter
1021
+ }
1022
+ );
1023
+ }
1024
+ /**
1025
+ * Finalize the batch after all files are uploaded
1026
+ * Returns root_pi immediately for small batches, or status='discovery' for large batches
1027
+ */
1028
+ async finalizeBatch(batchId) {
1029
+ return retryWithBackoff(
1030
+ () => this.request(
1031
+ "POST",
1032
+ `/ingest/batches/${batchId}/finalize`,
1033
+ {}
1034
+ ),
1035
+ {
1036
+ maxRetries: this.maxRetries,
1037
+ initialDelay: this.retryInitialDelay,
1038
+ maxDelay: this.retryMaxDelay,
1039
+ jitter: this.retryJitter
1040
+ }
1041
+ );
1042
+ }
1043
+ /**
1044
+ * Get current batch status (used for polling during async discovery)
1045
+ */
1046
+ async getBatchStatus(batchId) {
1047
+ return retryWithBackoff(
1048
+ () => this.request(
1049
+ "GET",
1050
+ `/ingest/batches/${batchId}/status`
1051
+ ),
1052
+ {
1053
+ maxRetries: this.maxRetries,
1054
+ initialDelay: this.retryInitialDelay,
1055
+ maxDelay: this.retryMaxDelay,
1056
+ jitter: this.retryJitter
1057
+ }
1058
+ );
1059
+ }
1060
+ };
1061
+
1062
+ // src/upload/uploader.ts
1063
+ init_common();
1064
+
1065
+ // src/upload/lib/simple-fetch.ts
1066
+ init_errors();
1067
+ async function uploadSimple(fileData, presignedUrl, contentType, options = {}) {
1068
+ const { maxRetries = 3, retryInitialDelay, retryMaxDelay, retryJitter } = options;
1069
+ await retryWithBackoff(
1070
+ async () => {
1071
+ let response;
1072
+ try {
1073
+ response = await fetch(presignedUrl, {
1074
+ method: "PUT",
1075
+ body: fileData,
1076
+ headers: {
1077
+ ...contentType ? { "Content-Type": contentType } : {}
1078
+ }
1079
+ });
1080
+ } catch (error) {
1081
+ throw new UploadError(`Upload failed: ${error.message}`, void 0, void 0, error);
1082
+ }
1083
+ if (!response.ok) {
1084
+ const retryAfter = response.headers.get("retry-after");
1085
+ const error = new UploadError(
1086
+ `Upload failed with status ${response.status}: ${response.statusText}`,
1087
+ void 0,
1088
+ response.status
1089
+ );
1090
+ if (retryAfter && response.status === 429) {
1091
+ error.retryAfter = parseInt(retryAfter, 10);
1092
+ }
1093
+ throw error;
1094
+ }
1095
+ },
1096
+ {
1097
+ maxRetries,
1098
+ initialDelay: retryInitialDelay,
1099
+ maxDelay: retryMaxDelay,
1100
+ jitter: retryJitter
1101
+ }
1102
+ );
1103
+ }
1104
+
1105
+ // src/upload/lib/multipart-fetch.ts
1106
+ init_errors();
1107
+ var DEFAULT_PART_SIZE = 10 * 1024 * 1024;
1108
+ async function uploadMultipart(fileData, presignedUrls, concurrency = 3, options = {}) {
1109
+ const totalSize = fileData.byteLength;
1110
+ const partSize = Math.ceil(totalSize / presignedUrls.length);
1111
+ const parts = [];
1112
+ const queue = [];
1113
+ const { maxRetries = 3, retryInitialDelay, retryMaxDelay, retryJitter } = options;
1114
+ for (let i = 0; i < presignedUrls.length; i++) {
1115
+ const partNumber = i + 1;
1116
+ const start = i * partSize;
1117
+ const end = Math.min(start + partSize, totalSize);
1118
+ const partData = fileData.slice(start, end);
1119
+ const url = presignedUrls[i];
1120
+ queue.push(async () => {
1121
+ const etag = await uploadPart(partData, url, partNumber, maxRetries, {
1122
+ initialDelay: retryInitialDelay,
1123
+ maxDelay: retryMaxDelay,
1124
+ jitter: retryJitter
1125
+ });
1126
+ parts.push({ part_number: partNumber, etag });
1127
+ });
1128
+ }
1129
+ await executeWithConcurrency(queue, concurrency);
1130
+ parts.sort((a, b) => a.part_number - b.part_number);
1131
+ return parts;
1132
+ }
1133
+ async function uploadPart(partData, presignedUrl, partNumber, maxRetries = 3, retryOptions = {}) {
1134
+ return retryWithBackoff(
1135
+ async () => {
1136
+ let response;
1137
+ try {
1138
+ response = await fetch(presignedUrl, {
1139
+ method: "PUT",
1140
+ body: partData
1141
+ });
1142
+ } catch (error) {
1143
+ throw new UploadError(
1144
+ `Part ${partNumber} upload failed: ${error.message}`,
1145
+ void 0,
1146
+ void 0,
1147
+ error
1148
+ );
1149
+ }
1150
+ if (!response.ok) {
1151
+ const retryAfter = response.headers.get("retry-after");
1152
+ const error = new UploadError(
1153
+ `Part ${partNumber} upload failed with status ${response.status}: ${response.statusText}`,
1154
+ void 0,
1155
+ response.status
1156
+ );
1157
+ if (retryAfter && response.status === 429) {
1158
+ error.retryAfter = parseInt(retryAfter, 10);
1159
+ }
1160
+ throw error;
1161
+ }
1162
+ const etag = response.headers.get("etag");
1163
+ if (!etag) {
1164
+ throw new UploadError(
1165
+ `Part ${partNumber} upload succeeded but no ETag returned`,
1166
+ void 0,
1167
+ response.status
1168
+ );
1169
+ }
1170
+ return etag.replace(/"/g, "");
1171
+ },
1172
+ {
1173
+ maxRetries,
1174
+ initialDelay: retryOptions.initialDelay,
1175
+ maxDelay: retryOptions.maxDelay,
1176
+ jitter: retryOptions.jitter
1177
+ }
1178
+ );
1179
+ }
1180
+ async function executeWithConcurrency(tasks, concurrency) {
1181
+ const queue = [...tasks];
1182
+ const workers = [];
1183
+ const processNext = async () => {
1184
+ while (queue.length > 0) {
1185
+ const task = queue.shift();
1186
+ await task();
1187
+ }
1188
+ };
1189
+ for (let i = 0; i < Math.min(concurrency, tasks.length); i++) {
1190
+ workers.push(processNext());
1191
+ }
1192
+ await Promise.all(workers);
1193
+ }
1194
+
1195
+ // src/upload/uploader.ts
1196
+ init_errors();
1197
+ init_validation();
1198
+ var MULTIPART_THRESHOLD = 5 * 1024 * 1024;
1199
+ var ArkeUploader = class {
1200
+ constructor(config) {
1201
+ this.scanner = null;
1202
+ validateCustomPromptsLocation(config.processing);
1203
+ this.config = {
1204
+ rootPath: "/uploads",
1205
+ // Must have at least one segment (not just '/')
1206
+ parallelUploads: 5,
1207
+ parallelParts: 3,
1208
+ ...config
1209
+ };
1210
+ this.workerClient = new WorkerClient({
1211
+ baseUrl: config.gatewayUrl,
1212
+ authToken: config.authToken,
1213
+ timeout: config.timeout,
1214
+ maxRetries: config.maxRetries,
1215
+ retryInitialDelay: config.retryInitialDelay,
1216
+ retryMaxDelay: config.retryMaxDelay,
1217
+ retryJitter: config.retryJitter,
1218
+ debug: false
1219
+ });
1220
+ this.platform = detectPlatform();
1221
+ }
1222
+ /**
1223
+ * Get platform-specific scanner
1224
+ */
1225
+ async getScanner() {
1226
+ if (this.scanner) {
1227
+ return this.scanner;
1228
+ }
1229
+ if (this.platform === "node") {
1230
+ const { NodeScanner: NodeScanner2 } = await Promise.resolve().then(() => (init_node(), node_exports));
1231
+ this.scanner = new NodeScanner2();
1232
+ } else if (this.platform === "browser") {
1233
+ const { BrowserScanner: BrowserScanner2 } = await Promise.resolve().then(() => (init_browser(), browser_exports));
1234
+ this.scanner = new BrowserScanner2();
1235
+ } else {
1236
+ throw new ValidationError("Unsupported platform");
1237
+ }
1238
+ return this.scanner;
1239
+ }
1240
+ /**
1241
+ * Upload a batch of files
1242
+ * @param source - Directory path (Node.js) or File[]/FileList (browser)
1243
+ * @param options - Upload options
1244
+ */
1245
+ async uploadBatch(source, options = {}) {
1246
+ const startTime = Date.now();
1247
+ const { onProgress, dryRun = false } = options;
1248
+ this.reportProgress(onProgress, {
1249
+ phase: "scanning",
1250
+ filesTotal: 0,
1251
+ filesUploaded: 0,
1252
+ bytesTotal: 0,
1253
+ bytesUploaded: 0,
1254
+ percentComplete: 0
1255
+ });
1256
+ const scanner = await this.getScanner();
1257
+ const files = await scanner.scanFiles(source, {
1258
+ rootPath: this.config.rootPath || "/",
1259
+ followSymlinks: true,
1260
+ defaultProcessingConfig: this.config.processing
1261
+ });
1262
+ if (files.length === 0) {
1263
+ throw new ValidationError("No files found to upload");
1264
+ }
1265
+ const totalSize = files.reduce((sum, f) => sum + f.size, 0);
1266
+ validateBatchSize(totalSize);
1267
+ if (this.config.customPrompts) {
1268
+ validateCustomPrompts(this.config.customPrompts);
1269
+ const promptFields = Object.keys(this.config.customPrompts).filter(
1270
+ (key) => this.config.customPrompts[key]
1271
+ );
1272
+ console.log(`[Arke Upload SDK] Custom prompts configured: ${promptFields.join(", ")}`);
1273
+ }
1274
+ if (dryRun) {
1275
+ return {
1276
+ batchId: "dry-run",
1277
+ rootPi: "dry-run",
1278
+ filesUploaded: files.length,
1279
+ bytesUploaded: totalSize,
1280
+ durationMs: Date.now() - startTime
1281
+ };
1282
+ }
1283
+ const { batch_id } = await this.workerClient.initBatch({
1284
+ uploader: this.config.uploader,
1285
+ root_path: this.config.rootPath || "/",
1286
+ parent_pi: this.config.parentPi || "",
1287
+ metadata: this.config.metadata,
1288
+ file_count: files.length,
1289
+ total_size: totalSize,
1290
+ custom_prompts: this.config.customPrompts
1291
+ });
1292
+ if (this.config.customPrompts) {
1293
+ console.log(`[Arke Upload SDK] Custom prompts sent to worker for batch ${batch_id}`);
1294
+ }
1295
+ this.reportProgress(onProgress, {
1296
+ phase: "uploading",
1297
+ filesTotal: files.length,
1298
+ filesUploaded: 0,
1299
+ bytesTotal: totalSize,
1300
+ bytesUploaded: 0,
1301
+ percentComplete: 0
1302
+ });
1303
+ let filesUploaded = 0;
1304
+ let bytesUploaded = 0;
1305
+ const { failedFiles } = await this.uploadFilesWithConcurrency(
1306
+ batch_id,
1307
+ files,
1308
+ source,
1309
+ this.config.parallelUploads || 5,
1310
+ (file, bytes) => {
1311
+ filesUploaded++;
1312
+ bytesUploaded += bytes;
1313
+ this.reportProgress(onProgress, {
1314
+ phase: "uploading",
1315
+ filesTotal: files.length,
1316
+ filesUploaded,
1317
+ bytesTotal: totalSize,
1318
+ bytesUploaded,
1319
+ currentFile: file.fileName,
1320
+ percentComplete: Math.round(bytesUploaded / totalSize * 100)
1321
+ });
1322
+ }
1323
+ );
1324
+ if (failedFiles.length === files.length) {
1325
+ throw new ValidationError(
1326
+ `All ${files.length} files failed to upload. First error: ${failedFiles[0]?.error || "Unknown"}`
1327
+ );
1328
+ }
1329
+ if (failedFiles.length > 0) {
1330
+ console.warn(
1331
+ `Warning: ${failedFiles.length} of ${files.length} files failed to upload:`,
1332
+ failedFiles.map((f) => `${f.file.fileName}: ${f.error}`).join(", ")
1333
+ );
1334
+ }
1335
+ this.reportProgress(onProgress, {
1336
+ phase: "finalizing",
1337
+ filesTotal: files.length,
1338
+ filesUploaded,
1339
+ bytesTotal: totalSize,
1340
+ bytesUploaded,
1341
+ percentComplete: 95
1342
+ });
1343
+ const finalizeResult = await this.workerClient.finalizeBatch(batch_id);
1344
+ let rootPi;
1345
+ if (finalizeResult.root_pi) {
1346
+ rootPi = finalizeResult.root_pi;
1347
+ } else if (finalizeResult.status === "discovery") {
1348
+ this.reportProgress(onProgress, {
1349
+ phase: "discovery",
1350
+ filesTotal: files.length,
1351
+ filesUploaded,
1352
+ bytesTotal: totalSize,
1353
+ bytesUploaded,
1354
+ percentComplete: 97
1355
+ });
1356
+ rootPi = await this.pollForRootPi(batch_id, onProgress, files.length, totalSize, bytesUploaded);
1357
+ } else {
1358
+ throw new ValidationError(
1359
+ `Finalization returned unexpected status: ${finalizeResult.status} without root_pi`
1360
+ );
1361
+ }
1362
+ this.reportProgress(onProgress, {
1363
+ phase: "complete",
1364
+ filesTotal: files.length,
1365
+ filesUploaded,
1366
+ bytesTotal: totalSize,
1367
+ bytesUploaded,
1368
+ percentComplete: 100
1369
+ });
1370
+ return {
1371
+ batchId: batch_id,
1372
+ rootPi,
1373
+ filesUploaded,
1374
+ bytesUploaded,
1375
+ durationMs: Date.now() - startTime
1376
+ };
1377
+ }
1378
+ /**
1379
+ * Poll for root_pi during async discovery
1380
+ */
1381
+ async pollForRootPi(batchId, onProgress, filesTotal, bytesTotal, bytesUploaded) {
1382
+ const POLL_INTERVAL_MS = 2e3;
1383
+ const MAX_POLL_TIME_MS = 30 * 60 * 1e3;
1384
+ const startTime = Date.now();
1385
+ while (Date.now() - startTime < MAX_POLL_TIME_MS) {
1386
+ const status = await this.workerClient.getBatchStatus(batchId);
1387
+ if (status.root_pi) {
1388
+ return status.root_pi;
1389
+ }
1390
+ if (status.status === "failed") {
1391
+ throw new ValidationError(`Batch discovery failed`);
1392
+ }
1393
+ if (status.discovery_progress && onProgress) {
1394
+ const { total, published } = status.discovery_progress;
1395
+ const discoveryPercent = total > 0 ? Math.round(published / total * 100) : 0;
1396
+ this.reportProgress(onProgress, {
1397
+ phase: "discovery",
1398
+ filesTotal,
1399
+ filesUploaded: filesTotal,
1400
+ bytesTotal,
1401
+ bytesUploaded,
1402
+ percentComplete: 95 + Math.round(discoveryPercent * 0.04)
1403
+ // 95-99%
1404
+ });
1405
+ }
1406
+ await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
1407
+ }
1408
+ throw new ValidationError(`Discovery timed out after ${MAX_POLL_TIME_MS / 1e3} seconds`);
1409
+ }
1410
+ /**
1411
+ * Upload files with controlled concurrency
1412
+ */
1413
+ async uploadFilesWithConcurrency(batchId, files, source, concurrency, onFileComplete) {
1414
+ const queue = [...files];
1415
+ const workers = [];
1416
+ const failedFiles = [];
1417
+ const processNext = async () => {
1418
+ while (queue.length > 0) {
1419
+ const file = queue.shift();
1420
+ try {
1421
+ await this.uploadSingleFile(batchId, file, source);
1422
+ onFileComplete(file, file.size);
1423
+ } catch (error) {
1424
+ const errorMessage = error.message || "Unknown error";
1425
+ console.error(`Failed to upload ${file.fileName}: ${errorMessage}`);
1426
+ failedFiles.push({ file, error: errorMessage });
1427
+ }
1428
+ }
1429
+ };
1430
+ for (let i = 0; i < Math.min(concurrency, files.length); i++) {
1431
+ workers.push(processNext());
1432
+ }
1433
+ await Promise.all(workers);
1434
+ return { failedFiles };
1435
+ }
1436
+ /**
1437
+ * Upload a single file
1438
+ */
1439
+ async uploadSingleFile(batchId, file, source) {
1440
+ const uploadInfo = await this.workerClient.startFileUpload(batchId, {
1441
+ file_name: file.fileName,
1442
+ file_size: file.size,
1443
+ logical_path: file.logicalPath,
1444
+ content_type: file.contentType,
1445
+ cid: file.cid,
1446
+ processing_config: file.processingConfig
1447
+ });
1448
+ const fileData = await this.getFileData(file, source);
1449
+ const retryOptions = {
1450
+ maxRetries: this.config.maxRetries,
1451
+ retryInitialDelay: this.config.retryInitialDelay,
1452
+ retryMaxDelay: this.config.retryMaxDelay,
1453
+ retryJitter: this.config.retryJitter
1454
+ };
1455
+ if (uploadInfo.upload_type === "simple") {
1456
+ await uploadSimple(fileData, uploadInfo.presigned_url, file.contentType, retryOptions);
1457
+ } else {
1458
+ const partUrls = uploadInfo.presigned_urls.map((p) => p.url);
1459
+ const parts = await uploadMultipart(
1460
+ fileData,
1461
+ partUrls,
1462
+ this.config.parallelParts || 3,
1463
+ retryOptions
1464
+ );
1465
+ await this.workerClient.completeFileUpload(batchId, {
1466
+ r2_key: uploadInfo.r2_key,
1467
+ upload_id: uploadInfo.upload_id,
1468
+ parts
1469
+ });
1470
+ return;
1471
+ }
1472
+ await this.workerClient.completeFileUpload(batchId, {
1473
+ r2_key: uploadInfo.r2_key
1474
+ });
1475
+ }
1476
+ /**
1477
+ * Get file data based on platform
1478
+ */
1479
+ async getFileData(file, source) {
1480
+ if (this.platform === "node") {
1481
+ const fs2 = await import("fs/promises");
1482
+ const buffer = await fs2.readFile(file.localPath);
1483
+ return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
1484
+ } else if (this.platform === "browser") {
1485
+ const files = Array.isArray(source) ? source : [source];
1486
+ const browserFile = files.find(
1487
+ (f) => f instanceof File && f.name === file.fileName
1488
+ );
1489
+ if (!browserFile) {
1490
+ throw new Error(`Could not find browser File object for ${file.fileName}`);
1491
+ }
1492
+ return browserFile.arrayBuffer();
1493
+ }
1494
+ throw new Error("Unsupported platform for file reading");
1495
+ }
1496
+ /**
1497
+ * Report progress to callback
1498
+ */
1499
+ reportProgress(callback, progress) {
1500
+ if (callback) {
1501
+ callback(progress);
1502
+ }
1503
+ }
1504
+ };
1505
+
1506
+ // src/upload/client.ts
1507
+ init_errors();
1508
+ function getUserIdFromToken(token) {
1509
+ try {
1510
+ const parts = token.split(".");
1511
+ if (parts.length !== 3) return null;
1512
+ const payload = parts[1].replace(/-/g, "+").replace(/_/g, "/");
1513
+ let decoded;
1514
+ if (typeof atob === "function") {
1515
+ decoded = atob(payload);
1516
+ } else {
1517
+ decoded = Buffer.from(payload, "base64").toString("utf-8");
1518
+ }
1519
+ const data = JSON.parse(decoded);
1520
+ return data.sub || null;
1521
+ } catch {
1522
+ return null;
1523
+ }
1524
+ }
1525
+ var UploadClient = class {
1526
+ constructor(config) {
1527
+ const uploader = config.uploader || getUserIdFromToken(config.authToken) || "unknown";
1528
+ this.config = { ...config, uploader };
1529
+ this.collectionsClient = new CollectionsClient({
1530
+ gatewayUrl: config.gatewayUrl,
1531
+ authToken: config.authToken,
1532
+ fetchImpl: config.fetchImpl
1533
+ });
1534
+ }
1535
+ /**
1536
+ * Update the auth token (e.g., after token refresh)
1537
+ */
1538
+ setAuthToken(token) {
1539
+ this.config = { ...this.config, authToken: token };
1540
+ this.collectionsClient.setAuthToken(token);
1541
+ }
1542
+ /**
1543
+ * Create a new collection and upload files to it
1544
+ *
1545
+ * Anyone authenticated can create a new collection.
1546
+ * The root PI of the uploaded files becomes the collection's root.
1547
+ */
1548
+ async createCollection(options) {
1549
+ const { files, collectionMetadata, customPrompts, processing, onProgress, dryRun } = options;
1550
+ const metadata = {
1551
+ ...collectionMetadata,
1552
+ visibility: collectionMetadata.visibility || "public"
1553
+ };
1554
+ const uploader = new ArkeUploader({
1555
+ gatewayUrl: this.config.gatewayUrl,
1556
+ authToken: this.config.authToken,
1557
+ uploader: this.config.uploader,
1558
+ customPrompts,
1559
+ processing
1560
+ });
1561
+ const batchResult = await uploader.uploadBatch(files, {
1562
+ onProgress,
1563
+ dryRun
1564
+ });
1565
+ if (dryRun) {
1566
+ return {
1567
+ ...batchResult,
1568
+ collection: {
1569
+ id: "dry-run",
1570
+ title: metadata.title,
1571
+ slug: metadata.slug,
1572
+ description: metadata.description,
1573
+ visibility: metadata.visibility,
1574
+ rootPi: "dry-run"
1575
+ }
1576
+ };
1577
+ }
1578
+ const collection = await this.collectionsClient.registerRoot({
1579
+ ...metadata,
1580
+ rootPi: batchResult.rootPi
1581
+ });
1582
+ return {
1583
+ ...batchResult,
1584
+ collection
1585
+ };
1586
+ }
1587
+ /**
1588
+ * Add files to an existing collection
1589
+ *
1590
+ * Requires owner or editor role on the collection containing the parent PI.
1591
+ * Use this to add a folder or files to an existing collection hierarchy.
1592
+ */
1593
+ async addToCollection(options) {
1594
+ const { files, parentPi, customPrompts, processing, onProgress, dryRun } = options;
1595
+ if (!dryRun) {
1596
+ const permissions = await this.collectionsClient.getPiPermissions(parentPi);
1597
+ if (!permissions.canEdit) {
1598
+ if (!permissions.collection) {
1599
+ throw new ValidationError(
1600
+ `Cannot add files: PI "${parentPi}" is not part of any collection`
1601
+ );
1602
+ }
1603
+ throw new ValidationError(
1604
+ `Cannot add files to collection "${permissions.collection.title}": you need editor or owner role (current role: ${permissions.collection.role || "none"})`
1605
+ );
1606
+ }
1607
+ }
1608
+ const uploader = new ArkeUploader({
1609
+ gatewayUrl: this.config.gatewayUrl,
1610
+ authToken: this.config.authToken,
1611
+ uploader: this.config.uploader,
1612
+ parentPi,
1613
+ customPrompts,
1614
+ processing
1615
+ });
1616
+ return uploader.uploadBatch(files, {
1617
+ onProgress,
1618
+ dryRun
1619
+ });
1620
+ }
1621
+ /**
1622
+ * Check if you can edit a specific PI (i.e., add files to its collection)
1623
+ */
1624
+ async canEdit(pi) {
1625
+ return this.collectionsClient.getPiPermissions(pi);
1626
+ }
1627
+ /**
1628
+ * Get access to the underlying collections client for other operations
1629
+ */
1630
+ get collections() {
1631
+ return this.collectionsClient;
1632
+ }
1633
+ };
1634
+
1635
+ // src/index.ts
1636
+ init_errors();
1637
+ // Annotate the CommonJS export names for ESM import in node:
1638
+ 0 && (module.exports = {
1639
+ ArkeUploader,
1640
+ CollectionsClient,
1641
+ CollectionsError,
1642
+ NetworkError,
1643
+ ScanError,
1644
+ UploadClient,
1645
+ UploadError,
1646
+ ValidationError,
1647
+ WorkerAPIError
1648
+ });
1649
+ //# sourceMappingURL=index.cjs.map