@oh-my-pi/pi-coding-agent 16.0.7 → 16.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/CHANGELOG.md +31 -0
  2. package/dist/cli.js +4752 -12462
  3. package/dist/types/cli/update-cli.d.ts +11 -0
  4. package/dist/types/debug/remote-debugger.d.ts +45 -0
  5. package/dist/types/internal-urls/docs-index.d.ts +19 -0
  6. package/dist/types/markit/converters/docx.d.ts +6 -0
  7. package/dist/types/markit/converters/epub.d.ts +15 -0
  8. package/dist/types/markit/converters/pdf/columns.d.ts +35 -0
  9. package/dist/types/markit/converters/pdf/extract.d.ts +10 -0
  10. package/dist/types/markit/converters/pdf/grid.d.ts +25 -0
  11. package/dist/types/markit/converters/pdf/headers.d.ts +24 -0
  12. package/dist/types/markit/converters/pdf/index.d.ts +6 -0
  13. package/dist/types/markit/converters/pdf/render.d.ts +24 -0
  14. package/dist/types/markit/converters/pdf/types.d.ts +75 -0
  15. package/dist/types/markit/converters/pptx.d.ts +57 -0
  16. package/dist/types/markit/converters/xlsx.d.ts +25 -0
  17. package/dist/types/markit/index.d.ts +2 -0
  18. package/dist/types/markit/registry.d.ts +16 -0
  19. package/dist/types/markit/types.d.ts +30 -0
  20. package/dist/types/session/agent-session.d.ts +7 -8
  21. package/dist/types/session/auth-storage.d.ts +3 -2
  22. package/dist/types/session/yield-queue.d.ts +3 -1
  23. package/dist/types/tools/browser/attach.d.ts +1 -1
  24. package/dist/types/utils/markit.d.ts +0 -8
  25. package/dist/types/utils/mupdf-wasm-embed.d.ts +1 -0
  26. package/dist/types/utils/turndown.d.ts +15 -0
  27. package/dist/types/utils/zip.d.ts +119 -0
  28. package/package.json +20 -18
  29. package/scripts/build-binary.ts +7 -3
  30. package/scripts/bundle-dist.ts +28 -12
  31. package/scripts/embed-mupdf-wasm.ts +67 -0
  32. package/scripts/generate-docs-index.ts +48 -32
  33. package/scripts/omp +1 -1
  34. package/src/advisor/__tests__/advisor.test.ts +83 -0
  35. package/src/advisor/runtime.ts +16 -1
  36. package/src/cli/auth-broker-cli.ts +1 -3
  37. package/src/cli/auth-gateway-cli.ts +2 -5
  38. package/src/cli/update-cli.ts +63 -3
  39. package/src/config/model-discovery.ts +20 -8
  40. package/src/config/models-config-schema.ts +8 -1
  41. package/src/debug/index.ts +44 -0
  42. package/src/debug/remote-debugger.ts +151 -0
  43. package/src/debug/report-bundle.ts +2 -1
  44. package/src/internal-urls/docs-index.generated.txt +2 -0
  45. package/src/internal-urls/docs-index.ts +102 -0
  46. package/src/internal-urls/omp-protocol.ts +10 -9
  47. package/src/markit/NOTICE +32 -0
  48. package/src/markit/converters/docx.ts +56 -0
  49. package/src/markit/converters/epub.ts +136 -0
  50. package/src/markit/converters/mammoth.d.ts +24 -0
  51. package/src/markit/converters/pdf/columns.ts +103 -0
  52. package/src/markit/converters/pdf/extract.ts +574 -0
  53. package/src/markit/converters/pdf/grid.ts +780 -0
  54. package/src/markit/converters/pdf/headers.ts +106 -0
  55. package/src/markit/converters/pdf/index.ts +146 -0
  56. package/src/markit/converters/pdf/render.ts +501 -0
  57. package/src/markit/converters/pdf/types.ts +84 -0
  58. package/src/markit/converters/pptx.ts +325 -0
  59. package/src/markit/converters/xlsx.ts +173 -0
  60. package/src/markit/index.ts +2 -0
  61. package/src/markit/registry.ts +59 -0
  62. package/src/markit/types.ts +35 -0
  63. package/src/modes/components/snapcompact-shape-preview-doc.md +14 -7
  64. package/src/modes/components/snapcompact-shape-preview.ts +2 -2
  65. package/src/modes/controllers/input-controller.ts +29 -8
  66. package/src/modes/interactive-mode.ts +26 -9
  67. package/src/prompts/advisor/system.md +1 -0
  68. package/src/sdk.ts +5 -9
  69. package/src/session/agent-session.ts +62 -40
  70. package/src/session/auth-storage.ts +2 -11
  71. package/src/session/yield-queue.ts +7 -1
  72. package/src/tools/browser/attach.ts +2 -2
  73. package/src/tools/fetch.ts +25 -60
  74. package/src/tools/read.ts +1 -1
  75. package/src/tools/search.ts +1 -6
  76. package/src/tools/write.ts +25 -65
  77. package/src/utils/markit.ts +25 -9
  78. package/src/utils/mupdf-wasm-embed.ts +12 -0
  79. package/src/utils/tools-manager.ts +2 -11
  80. package/src/utils/turndown.ts +83 -0
  81. package/src/{tools/archive-reader.ts → utils/zip.ts} +453 -83
  82. package/src/web/scrapers/types.ts +3 -46
  83. package/dist/types/internal-urls/docs-index.generated.d.ts +0 -2
  84. package/dist/types/tools/archive-reader.d.ts +0 -49
  85. package/src/internal-urls/docs-index.generated.ts +0 -120
@@ -1,10 +1,47 @@
1
- import * as fs from "node:fs/promises";
2
- import * as os from "node:os";
1
+ // The single archive boundary for the codebase: ZIP (framed here, over the raw
2
+ // DEFLATE codec in `node:zlib`) and tar / tar.gz (via `Bun.Archive`). This is
3
+ // the ONLY module that frames ZIP containers or touches `Bun.Archive`; the
4
+ // markit document converters, the read/search/write tools, the URL fetcher, the
5
+ // debug report bundler, and the tool-binary installer all go through here so
6
+ // there is exactly one archive implementation to reason about. Do not parse or
7
+ // build ZIP/tar, or call `Bun.Archive`, anywhere else.
3
8
  import * as path from "node:path";
4
- import { inflateSync, strFromU8 } from "fflate";
9
+ import * as zlib from "node:zlib";
10
+ import { formatBytes } from "@oh-my-pi/pi-utils";
11
+ import { ToolError } from "../tools/tool-errors";
12
+
13
+ /** A ZIP archive decoded to a `path → bytes` map of its file members. */
14
+ export type Unzipped = Record<string, Uint8Array>;
15
+
16
+ const ENCODER = new TextEncoder();
17
+ // `node:zlib` is only the DEFLATE codec; ZIP container framing is ours (see
18
+ // `unzip` / `zip` below). Entry names use the platform text decoders.
19
+ const UTF8_DECODER = new TextDecoder();
20
+ // ZIP central-directory names without the UTF-8 flag carry no reliable encoding;
21
+ // decode them as their legacy code page (windows-1252) as a stable best effort.
22
+ const LEGACY_NAME_DECODER = new TextDecoder("windows-1252");
23
+
24
+ /** Read a single ZIP entry as UTF-8 text, or `undefined` when the entry is absent. */
25
+ export function unzipText(entries: Unzipped, entryPath: string): string | undefined {
26
+ const data = entries[entryPath];
27
+ return data ? UTF8_DECODER.decode(data) : undefined;
28
+ }
5
29
 
6
- import { formatBytes } from "./render-utils";
7
- import { ToolError } from "./tool-errors";
30
+ /**
31
+ * Decode an in-memory ZIP archive into a `path → bytes` map of its file members
32
+ * (directory entries and `..`-escaping names are dropped). Shares the
33
+ * central-directory record parser with the lazy, file-backed reader.
34
+ */
35
+ export function unzip(bytes: Uint8Array): Unzipped {
36
+ const info = readCentralDirectoryInfoSync(bytes);
37
+ const centralDirectory = readMemoryRange(bytes, info.offset, info.offset + info.size);
38
+ const out: Unzipped = {};
39
+ for (const entry of parseZipCentralDirectory(memoryByteSource(bytes), centralDirectory, info.entries)) {
40
+ if (entry.isDirectory || entry.storage?.type !== "zip") continue;
41
+ out[entry.path] = extractZipMember(bytes, entry.storage, entry.size);
42
+ }
43
+ return out;
44
+ }
8
45
 
9
46
  /**
10
47
  * Cap on the on-disk size of tar/tar.gz archives, which are loaded fully into
@@ -19,8 +56,23 @@ const MAX_TAR_ARCHIVE_BYTES = 256 * 1024 * 1024;
19
56
  */
20
57
  const MAX_ARCHIVE_MEMBER_BYTES = 64 * 1024 * 1024;
21
58
 
59
+ /** Inflate one raw DEFLATE stream, bounded to its declared uncompressed size. */
60
+ function inflateRaw(bytes: Uint8Array, declaredSize: number): Uint8Array {
61
+ return zlib.inflateRawSync(bytes, { maxOutputLength: Math.max(declaredSize, 1) });
62
+ }
63
+
22
64
  export type ArchiveFormat = "zip" | "tar" | "tar.gz";
23
65
 
66
+ /**
67
+ * Where to read an archive from: a filesystem path (format inferred from the
68
+ * extension; ZIP is read lazily via ranged central-directory access) or
69
+ * in-memory bytes with an explicit format.
70
+ */
71
+ export type ArchiveSource = string | { bytes: Uint8Array; format: ArchiveFormat };
72
+
73
+ /** Content for a member when packing or extracting an archive. */
74
+ export type ArchiveMemberContent = string | Uint8Array | Blob;
75
+
24
76
  export interface ArchivePathCandidate {
25
77
  archivePath: string;
26
78
  subPath: string;
@@ -41,6 +93,55 @@ export interface ExtractedArchiveFile extends ArchiveNode {
41
93
  bytes: Uint8Array;
42
94
  }
43
95
 
96
+ /** A byte window into an archive — file-backed (lazy) or in-memory. */
97
+ interface ByteSource {
98
+ readonly size: number;
99
+ read(start: number, end: number): Promise<Uint8Array>;
100
+ }
101
+
102
+ function assertValidRange(start: number, end: number): void {
103
+ if (!Number.isSafeInteger(start) || !Number.isSafeInteger(end) || start < 0 || end < start) {
104
+ throw new ToolError("Invalid ZIP archive range");
105
+ }
106
+ }
107
+
108
+ /** Read an exact in-memory range, throwing (not clamping) when it runs past the buffer. */
109
+ function readMemoryRange(buffer: Uint8Array, start: number, end: number): Uint8Array {
110
+ assertValidRange(start, end);
111
+ if (end > buffer.byteLength) {
112
+ throw new ToolError("Invalid ZIP archive: truncated data");
113
+ }
114
+ return buffer.subarray(start, end);
115
+ }
116
+
117
+ function fileByteSource(filePath: string): ByteSource {
118
+ const file = Bun.file(filePath);
119
+ const size = file.size;
120
+ if (!Number.isSafeInteger(size)) {
121
+ throw new ToolError("ZIP archive is too large to read safely");
122
+ }
123
+ return {
124
+ size,
125
+ async read(start, end) {
126
+ assertValidRange(start, end);
127
+ const bytes = await file.slice(start, end).bytes();
128
+ if (bytes.byteLength !== end - start) {
129
+ throw new ToolError("Invalid ZIP archive: truncated data");
130
+ }
131
+ return bytes;
132
+ },
133
+ };
134
+ }
135
+
136
+ function memoryByteSource(buffer: Uint8Array): ByteSource {
137
+ return {
138
+ size: buffer.byteLength,
139
+ async read(start, end) {
140
+ return readMemoryRange(buffer, start, end);
141
+ },
142
+ };
143
+ }
144
+
44
145
  interface TarStorage {
45
146
  type: "tar";
46
147
  file: File;
@@ -48,7 +149,7 @@ interface TarStorage {
48
149
 
49
150
  interface ZipStorage {
50
151
  type: "zip";
51
- archivePath: string;
152
+ source: ByteSource;
52
153
  compressedSize: number;
53
154
  compression: number;
54
155
  flags: number;
@@ -132,7 +233,8 @@ function ensureParentDirectories(map: Map<string, ArchiveIndexEntry>): void {
132
233
  }
133
234
  }
134
235
 
135
- function getArchiveFormatFromPath(filePath: string): ArchiveFormat | undefined {
236
+ /** Infer an archive format from a filesystem path's extension. */
237
+ export function archiveFormatFromPath(filePath: string): ArchiveFormat | undefined {
136
238
  const normalized = filePath.toLowerCase();
137
239
  if (normalized.endsWith(".tar.gz") || normalized.endsWith(".tgz")) return "tar.gz";
138
240
  if (normalized.endsWith(".tar")) return "tar";
@@ -233,18 +335,6 @@ function readUInt64LEAsNumber(bytes: Uint8Array, offset: number): number {
233
335
  return value;
234
336
  }
235
337
 
236
- async function readZipRange(filePath: string, start: number, end: number): Promise<Uint8Array> {
237
- if (!Number.isSafeInteger(start) || !Number.isSafeInteger(end) || start < 0 || end < start) {
238
- throw new ToolError("Invalid ZIP archive range");
239
- }
240
-
241
- const bytes = await Bun.file(filePath).slice(start, end).bytes();
242
- if (bytes.byteLength !== end - start) {
243
- throw new ToolError("Invalid ZIP archive: truncated data");
244
- }
245
- return bytes;
246
- }
247
-
248
338
  function findEndOfCentralDirectory(tail: Uint8Array): number {
249
339
  for (let offset = tail.byteLength - ZIP_EOCD_MIN_LENGTH; offset >= 0; offset--) {
250
340
  if (readUInt32LE(tail, offset) !== ZIP_EOCD_SIGNATURE) continue;
@@ -256,7 +346,7 @@ function findEndOfCentralDirectory(tail: Uint8Array): number {
256
346
  }
257
347
 
258
348
  async function readZip64CentralDirectoryInfo(
259
- filePath: string,
349
+ source: ByteSource,
260
350
  tail: Uint8Array,
261
351
  tailStart: number,
262
352
  eocdOffset: number,
@@ -267,7 +357,7 @@ async function readZip64CentralDirectoryInfo(
267
357
  const locator =
268
358
  locatorOffset >= tailStart
269
359
  ? tail.subarray(locatorOffset - tailStart, locatorOffset - tailStart + ZIP64_EOCD_LOCATOR_LENGTH)
270
- : await readZipRange(filePath, locatorOffset, eocdOffset);
360
+ : await source.read(locatorOffset, eocdOffset);
271
361
  if (readUInt32LE(locator, 0) !== ZIP64_EOCD_LOCATOR_SIGNATURE) return undefined;
272
362
 
273
363
  const zip64EocdDisk = readUInt32LE(locator, 4);
@@ -277,7 +367,7 @@ async function readZip64CentralDirectoryInfo(
277
367
  throw new ToolError("Multi-disk ZIP archives are not supported");
278
368
  }
279
369
 
280
- const record = await readZipRange(filePath, zip64EocdOffset, zip64EocdOffset + 56);
370
+ const record = await source.read(zip64EocdOffset, zip64EocdOffset + 56);
281
371
  if (readUInt32LE(record, 0) !== ZIP64_EOCD_SIGNATURE) {
282
372
  throw new ToolError("Invalid ZIP archive: missing ZIP64 end of central directory");
283
373
  }
@@ -292,14 +382,15 @@ async function readZip64CentralDirectoryInfo(
292
382
  };
293
383
  }
294
384
 
295
- async function readZipCentralDirectoryInfo(filePath: string, fileSize: number): Promise<ZipCentralDirectoryInfo> {
385
+ async function readZipCentralDirectoryInfo(source: ByteSource): Promise<ZipCentralDirectoryInfo> {
386
+ const fileSize = source.size;
296
387
  if (fileSize < ZIP_EOCD_MIN_LENGTH) {
297
388
  throw new ToolError("Invalid ZIP archive: missing end of central directory");
298
389
  }
299
390
 
300
391
  const tailLength = Math.min(fileSize, ZIP_EOCD_MIN_LENGTH + ZIP_EOCD_MAX_COMMENT_LENGTH);
301
392
  const tailStart = fileSize - tailLength;
302
- const tail = await readZipRange(filePath, tailStart, fileSize);
393
+ const tail = await source.read(tailStart, fileSize);
303
394
  const eocdIndex = findEndOfCentralDirectory(tail);
304
395
  const eocdOffset = tailStart + eocdIndex;
305
396
 
@@ -311,7 +402,7 @@ async function readZipCentralDirectoryInfo(filePath: string, fileSize: number):
311
402
  let size = readUInt32LE(tail, eocdIndex + 12);
312
403
  let offset = readUInt32LE(tail, eocdIndex + 16);
313
404
  const needsZip64 = entries === ZIP_UINT16_MAX || size === ZIP_UINT32_MAX || offset === ZIP_UINT32_MAX;
314
- const zip64Info = await readZip64CentralDirectoryInfo(filePath, tail, tailStart, eocdOffset);
405
+ const zip64Info = await readZip64CentralDirectoryInfo(source, tail, tailStart, eocdOffset);
315
406
  if (zip64Info) {
316
407
  ({ entries, size, offset } = zip64Info);
317
408
  } else if (needsZip64) {
@@ -386,7 +477,7 @@ function readZip64EntryValues(
386
477
  }
387
478
 
388
479
  function parseZipCentralDirectory(
389
- filePath: string,
480
+ source: ByteSource,
390
481
  centralDirectory: Uint8Array,
391
482
  expectedEntries: number,
392
483
  ): ArchiveIndexEntry[] {
@@ -417,7 +508,10 @@ function parseZipCentralDirectory(
417
508
  throw new ToolError("Invalid ZIP archive: truncated central directory entry");
418
509
  }
419
510
 
420
- const rawPath = strFromU8(centralDirectory.subarray(nameStart, extraStart), (flags & ZIP_UTF8_FLAG) === 0);
511
+ const useLegacyEncoding = (flags & ZIP_UTF8_FLAG) === 0;
512
+ const rawPath = (useLegacyEncoding ? LEGACY_NAME_DECODER : UTF8_DECODER).decode(
513
+ centralDirectory.subarray(nameStart, extraStart),
514
+ );
421
515
  const normalizedPath = normalizeArchiveEntryPath(rawPath);
422
516
  if (normalizedPath) {
423
517
  const values = readZip64EntryValues(
@@ -448,7 +542,7 @@ function parseZipCentralDirectory(
448
542
  ? undefined
449
543
  : {
450
544
  type: "zip",
451
- archivePath: filePath,
545
+ source,
452
546
  compressedSize: values.compressedSize,
453
547
  compression,
454
548
  flags,
@@ -463,16 +557,27 @@ function parseZipCentralDirectory(
463
557
  return entries;
464
558
  }
465
559
 
560
+ /** Decode a single ZIP member's already-read payload, bounded to its declared size. */
561
+ function decodeZipMember(compressed: Uint8Array, compression: number, declaredSize: number): Uint8Array {
562
+ if (compression === ZIP_STORED_COMPRESSION) {
563
+ return compressed;
564
+ }
565
+ if (compression !== ZIP_DEFLATE_COMPRESSION) {
566
+ throw new ToolError(`Unsupported ZIP compression method: ${compression}`);
567
+ }
568
+ try {
569
+ return inflateRaw(compressed, declaredSize);
570
+ } catch (error) {
571
+ throw new ToolError(error instanceof Error ? error.message : String(error));
572
+ }
573
+ }
574
+
466
575
  async function readZipFileBytes(storage: ZipStorage, uncompressedSize: number): Promise<Uint8Array> {
467
576
  if ((storage.flags & ZIP_ENCRYPTED_FLAG) !== 0) {
468
577
  throw new ToolError("Encrypted ZIP entries are not supported");
469
578
  }
470
579
 
471
- const localHeader = await readZipRange(
472
- storage.archivePath,
473
- storage.localHeaderOffset,
474
- storage.localHeaderOffset + 30,
475
- );
580
+ const localHeader = await storage.source.read(storage.localHeaderOffset, storage.localHeaderOffset + 30);
476
581
  if (readUInt32LE(localHeader, 0) !== ZIP_LOCAL_FILE_HEADER_SIGNATURE) {
477
582
  throw new ToolError("Invalid ZIP archive: malformed local file header");
478
583
  }
@@ -480,20 +585,8 @@ async function readZipFileBytes(storage: ZipStorage, uncompressedSize: number):
480
585
  const fileNameLength = readUInt16LE(localHeader, 26);
481
586
  const extraLength = readUInt16LE(localHeader, 28);
482
587
  const dataStart = storage.localHeaderOffset + 30 + fileNameLength + extraLength;
483
- const compressedBytes = await readZipRange(storage.archivePath, dataStart, dataStart + storage.compressedSize);
484
-
485
- if (storage.compression === ZIP_STORED_COMPRESSION) {
486
- return compressedBytes;
487
- }
488
- if (storage.compression !== ZIP_DEFLATE_COMPRESSION) {
489
- throw new ToolError(`Unsupported ZIP compression method: ${storage.compression}`);
490
- }
491
-
492
- try {
493
- return inflateSync(compressedBytes, { out: new Uint8Array(uncompressedSize) });
494
- } catch (error) {
495
- throw new ToolError(error instanceof Error ? error.message : String(error));
496
- }
588
+ const compressedBytes = await storage.source.read(dataStart, dataStart + storage.compressedSize);
589
+ return decodeZipMember(compressedBytes, storage.compression, uncompressedSize);
497
590
  }
498
591
 
499
592
  async function readTarEntries(bytes: Uint8Array): Promise<ArchiveIndexEntry[]> {
@@ -528,21 +621,18 @@ async function readTarEntries(bytes: Uint8Array): Promise<ArchiveIndexEntry[]> {
528
621
  return entries;
529
622
  }
530
623
 
531
- async function readZipEntries(filePath: string): Promise<ArchiveIndexEntry[]> {
532
- const fileSize = Bun.file(filePath).size;
533
- if (!Number.isSafeInteger(fileSize)) {
534
- throw new ToolError("ZIP archive is too large to read safely");
535
- }
536
-
537
- const directoryInfo = await readZipCentralDirectoryInfo(filePath, fileSize);
538
- const centralDirectory = await readZipRange(
539
- filePath,
540
- directoryInfo.offset,
541
- directoryInfo.offset + directoryInfo.size,
542
- );
543
- return parseZipCentralDirectory(filePath, centralDirectory, directoryInfo.entries);
624
+ async function readZipEntries(source: ByteSource): Promise<ArchiveIndexEntry[]> {
625
+ const directoryInfo = await readZipCentralDirectoryInfo(source);
626
+ const centralDirectory = await source.read(directoryInfo.offset, directoryInfo.offset + directoryInfo.size);
627
+ return parseZipCentralDirectory(source, centralDirectory, directoryInfo.entries);
544
628
  }
545
629
 
630
+ /**
631
+ * Split an `archive.ext:inner/path` reference into every plausible
632
+ * `{ archivePath, subPath }` pair, longest archive prefix first. A path may
633
+ * contain more than one archive extension, so each candidate is a guess at
634
+ * where the archive ends and the member portion begins.
635
+ */
546
636
  export function parseArchivePathCandidates(filePath: string): ArchivePathCandidate[] {
547
637
  const normalized = filePath.replace(/\\/g, "/");
548
638
  const pattern = /\.(?:tar\.gz|tgz|zip|tar)(?=(?::|$))/gi;
@@ -567,6 +657,11 @@ export function parseArchivePathCandidates(filePath: string): ArchivePathCandida
567
657
  return candidates.sort((left, right) => right.archivePath.length - left.archivePath.length);
568
658
  }
569
659
 
660
+ /**
661
+ * An indexed, read-only view over a single archive. ZIP archives are indexed
662
+ * from the central directory and members are inflated on demand; tar archives
663
+ * are fully materialized by `Bun.Archive` up front.
664
+ */
570
665
  export class ArchiveReader {
571
666
  readonly format: ArchiveFormat;
572
667
  #entries = new Map<string, ArchiveIndexEntry>();
@@ -680,42 +775,317 @@ export class ArchiveReader {
680
775
  }
681
776
  }
682
777
 
683
- export async function openArchive(filePath: string): Promise<ArchiveReader> {
684
- const format = getArchiveFormatFromPath(filePath);
685
- if (!format) {
686
- throw new ToolError(`Unsupported archive format: ${filePath}`);
778
+ /**
779
+ * Open an archive for reading. ZIP archives opened from a path are indexed
780
+ * lazily via ranged central-directory reads (members inflate on demand); tar
781
+ * archives and in-memory ZIPs are read from a single buffer.
782
+ */
783
+ export async function openArchive(source: ArchiveSource): Promise<ArchiveReader> {
784
+ if (typeof source === "string") {
785
+ const format = archiveFormatFromPath(source);
786
+ if (!format) {
787
+ throw new ToolError(`Unsupported archive format: ${source}`);
788
+ }
789
+ if (format === "zip") {
790
+ return new ArchiveReader(format, await readZipEntries(fileByteSource(source)));
791
+ }
792
+
793
+ const file = Bun.file(source);
794
+ const archiveSize = file.size;
795
+ if (archiveSize > MAX_TAR_ARCHIVE_BYTES) {
796
+ throw new ToolError(
797
+ `Archive is too large to read in memory (${formatBytes(archiveSize)} > ${formatBytes(MAX_TAR_ARCHIVE_BYTES)} limit)`,
798
+ );
799
+ }
800
+ return new ArchiveReader(format, await readTarEntries(await file.bytes()));
687
801
  }
688
802
 
803
+ const { bytes, format } = source;
689
804
  if (format === "zip") {
690
- return new ArchiveReader(format, await readZipEntries(filePath));
805
+ return new ArchiveReader(format, await readZipEntries(memoryByteSource(bytes)));
691
806
  }
692
-
693
- const file = Bun.file(filePath);
694
- const archiveSize = file.size;
695
- if (archiveSize > MAX_TAR_ARCHIVE_BYTES) {
807
+ if (bytes.byteLength > MAX_TAR_ARCHIVE_BYTES) {
696
808
  throw new ToolError(
697
- `Archive is too large to read in memory (${formatBytes(archiveSize)} > ${formatBytes(MAX_TAR_ARCHIVE_BYTES)} limit)`,
809
+ `Archive is too large to read in memory (${formatBytes(bytes.byteLength)} > ${formatBytes(MAX_TAR_ARCHIVE_BYTES)} limit)`,
698
810
  );
699
811
  }
700
- const entries = await readTarEntries(await file.bytes());
701
- return new ArchiveReader(format, entries);
812
+ return new ArchiveReader(format, await readTarEntries(bytes));
702
813
  }
703
814
 
815
+ /** Render the top-level entries of an in-memory archive as one line each. */
704
816
  export async function listArchiveRoot(
705
817
  bytes: Uint8Array,
706
818
  format: ArchiveFormat,
707
819
  opts: { limit?: number } = {},
708
820
  ): Promise<string> {
709
- const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "omp-archive-"));
710
- const tempPath = path.join(tempDir, `payload.${format}`);
711
- try {
712
- await Bun.write(tempPath, bytes);
713
- const archive = await openArchive(tempPath);
714
- const entries = archive.listDirectory("");
715
- const limitedEntries = opts.limit !== undefined && opts.limit > 0 ? entries.slice(0, opts.limit) : entries;
716
- const lines = formatArchiveEntryLines(limitedEntries);
717
- return lines.length > 0 ? lines.join("\n") : "(empty archive directory)";
718
- } finally {
719
- await fs.rm(tempDir, { recursive: true, force: true });
821
+ const archive = await openArchive({ bytes, format });
822
+ const entries = archive.listDirectory("");
823
+ const limitedEntries = opts.limit !== undefined && opts.limit > 0 ? entries.slice(0, opts.limit) : entries;
824
+ const lines = formatArchiveEntryLines(limitedEntries);
825
+ return lines.length > 0 ? lines.join("\n") : "(empty archive directory)";
826
+ }
827
+
828
+ async function resolveArchiveBytes(source: ArchiveSource): Promise<{ bytes: Uint8Array; format: ArchiveFormat }> {
829
+ if (typeof source !== "string") return source;
830
+ const format = archiveFormatFromPath(source);
831
+ if (!format) {
832
+ throw new ToolError(`Unsupported archive format: ${source}`);
833
+ }
834
+ return { bytes: await Bun.file(source).bytes(), format };
835
+ }
836
+
837
+ async function memberToBytes(content: ArchiveMemberContent): Promise<Uint8Array> {
838
+ if (typeof content === "string") return ENCODER.encode(content);
839
+ if (content instanceof Uint8Array) return content;
840
+ return new Uint8Array(await content.arrayBuffer());
841
+ }
842
+
843
+ /**
844
+ * Fully materialize every file member into a `path → content` map: ZIP members
845
+ * are inflated in memory, tar members are returned as lazy `File`s. Use this
846
+ * when you need every entry (rewrite, extract); for browsing or single-member
847
+ * reads prefer `openArchive`, which is lazy for ZIP.
848
+ */
849
+ export async function readArchiveEntries(source: ArchiveSource): Promise<Map<string, ArchiveMemberContent>> {
850
+ const { bytes, format } = await resolveArchiveBytes(source);
851
+ const entries = new Map<string, ArchiveMemberContent>();
852
+ if (format === "zip") {
853
+ const unzipped = unzip(bytes);
854
+ for (const name in unzipped) {
855
+ entries.set(name, unzipped[name]!);
856
+ }
857
+ return entries;
858
+ }
859
+ const files = await new Bun.Archive(bytes).files();
860
+ for (const [name, file] of files) {
861
+ entries.set(name.replace(/\\/g, "/"), file);
720
862
  }
863
+ return entries;
864
+ }
865
+
866
+ /**
867
+ * Serialize `entries` into an archive of `format` and write it to `destPath`.
868
+ * ZIP is framed in memory, tar / tar.gz via `Bun.Archive` (gzip for tar.gz).
869
+ * String members are encoded as UTF-8.
870
+ */
871
+ export async function writeArchive(
872
+ destPath: string,
873
+ format: ArchiveFormat,
874
+ entries: Iterable<readonly [string, ArchiveMemberContent]>,
875
+ ): Promise<void> {
876
+ if (format === "zip") {
877
+ const record: Record<string, Uint8Array> = {};
878
+ for (const [name, content] of entries) {
879
+ record[name.replace(/\\/g, "/")] = await memberToBytes(content);
880
+ }
881
+ await Bun.write(destPath, zip(record));
882
+ return;
883
+ }
884
+
885
+ const record: Record<string, ArchiveMemberContent> = {};
886
+ for (const [name, content] of entries) {
887
+ record[name.replace(/\\/g, "/")] = content;
888
+ }
889
+ await Bun.Archive.write(destPath, record, format === "tar.gz" ? { compress: "gzip" } : undefined);
890
+ }
891
+
892
+ /**
893
+ * Extract every file member to `destDir`, creating parent directories as
894
+ * needed. Entries that would escape `destDir` (via `..` or an absolute path)
895
+ * are rejected. Returns the number of files written.
896
+ */
897
+ export async function extractArchive(source: ArchiveSource, destDir: string): Promise<number> {
898
+ const extractRoot = path.resolve(destDir);
899
+ const entries = await readArchiveEntries(source);
900
+ let count = 0;
901
+ for (const [name, content] of entries) {
902
+ if (name.endsWith("/")) continue;
903
+ const outputPath = path.resolve(extractRoot, name);
904
+ if (!outputPath.startsWith(extractRoot + path.sep)) {
905
+ throw new ToolError(`Archive entry escapes extraction dir: ${name}`);
906
+ }
907
+ await Bun.write(outputPath, content);
908
+ count++;
909
+ }
910
+ return count;
911
+ }
912
+
913
+ function writeUInt16LE(buf: Uint8Array, offset: number, value: number): void {
914
+ buf[offset] = value & 0xff;
915
+ buf[offset + 1] = (value >>> 8) & 0xff;
916
+ }
917
+
918
+ function writeUInt32LE(buf: Uint8Array, offset: number, value: number): void {
919
+ buf[offset] = value & 0xff;
920
+ buf[offset + 1] = (value >>> 8) & 0xff;
921
+ buf[offset + 2] = (value >>> 16) & 0xff;
922
+ buf[offset + 3] = (value >>> 24) & 0xff;
923
+ }
924
+
925
+ /**
926
+ * Frame a `path → bytes` map into a ZIP archive in memory. Each member is raw
927
+ * DEFLATE unless that would not shrink it, in which case it is stored. ZIP64 is
928
+ * not emitted; archives beyond the 32-bit limits throw rather than corrupt.
929
+ */
930
+ export function zip(entries: Unzipped): Uint8Array {
931
+ const localParts: Uint8Array[] = [];
932
+ const centralParts: Uint8Array[] = [];
933
+ let offset = 0;
934
+ let count = 0;
935
+
936
+ for (const name in entries) {
937
+ const data = entries[name]!;
938
+ const nameBytes = ENCODER.encode(name);
939
+ const crc = zlib.crc32(data) >>> 0;
940
+ const uncompressedSize = data.byteLength;
941
+ const deflated = zlib.deflateRawSync(data);
942
+ const stored = deflated.byteLength >= uncompressedSize;
943
+ const method = stored ? ZIP_STORED_COMPRESSION : ZIP_DEFLATE_COMPRESSION;
944
+ const payload = stored ? data : deflated;
945
+
946
+ // Without ZIP64 the name length is a u16 and offsets/sizes are u32 (with
947
+ // 0xffff/0xffffffff reserved as ZIP64 sentinels); reject anything that
948
+ // would silently wrap a header field instead of producing a valid archive.
949
+ if (
950
+ count + 1 >= ZIP_UINT16_MAX ||
951
+ nameBytes.byteLength > ZIP_UINT16_MAX ||
952
+ uncompressedSize >= ZIP_UINT32_MAX ||
953
+ offset + 30 + nameBytes.byteLength + payload.byteLength >= ZIP_UINT32_MAX
954
+ ) {
955
+ throw new ToolError("ZIP archive is too large to write (ZIP64 is not supported)");
956
+ }
957
+
958
+ const header = new Uint8Array(30 + nameBytes.byteLength);
959
+ writeUInt32LE(header, 0, ZIP_LOCAL_FILE_HEADER_SIGNATURE);
960
+ writeUInt16LE(header, 4, 20);
961
+ writeUInt16LE(header, 6, ZIP_UTF8_FLAG);
962
+ writeUInt16LE(header, 8, method);
963
+ // Fixed 1980-01-01 timestamp keeps the output deterministic.
964
+ writeUInt16LE(header, 12, 0x21);
965
+ writeUInt32LE(header, 14, crc);
966
+ writeUInt32LE(header, 18, payload.byteLength);
967
+ writeUInt32LE(header, 22, uncompressedSize);
968
+ writeUInt16LE(header, 26, nameBytes.byteLength);
969
+ header.set(nameBytes, 30);
970
+ localParts.push(header, payload);
971
+
972
+ const record = new Uint8Array(46 + nameBytes.byteLength);
973
+ writeUInt32LE(record, 0, ZIP_CENTRAL_DIRECTORY_HEADER_SIGNATURE);
974
+ writeUInt16LE(record, 4, 20);
975
+ writeUInt16LE(record, 6, 20);
976
+ writeUInt16LE(record, 8, ZIP_UTF8_FLAG);
977
+ writeUInt16LE(record, 10, method);
978
+ writeUInt16LE(record, 14, 0x21);
979
+ writeUInt32LE(record, 16, crc);
980
+ writeUInt32LE(record, 20, payload.byteLength);
981
+ writeUInt32LE(record, 24, uncompressedSize);
982
+ writeUInt16LE(record, 28, nameBytes.byteLength);
983
+ writeUInt32LE(record, 42, offset);
984
+ record.set(nameBytes, 46);
985
+ centralParts.push(record);
986
+
987
+ offset += header.byteLength + payload.byteLength;
988
+ count++;
989
+ }
990
+
991
+ const centralSize = centralParts.reduce((sum, part) => sum + part.byteLength, 0);
992
+ if (centralSize >= ZIP_UINT32_MAX || offset + centralSize + ZIP_EOCD_MIN_LENGTH >= ZIP_UINT32_MAX) {
993
+ throw new ToolError("ZIP archive is too large to write (ZIP64 is not supported)");
994
+ }
995
+ const eocd = new Uint8Array(ZIP_EOCD_MIN_LENGTH);
996
+ writeUInt32LE(eocd, 0, ZIP_EOCD_SIGNATURE);
997
+ writeUInt16LE(eocd, 8, count);
998
+ writeUInt16LE(eocd, 10, count);
999
+ writeUInt32LE(eocd, 12, centralSize);
1000
+ writeUInt32LE(eocd, 16, offset);
1001
+
1002
+ const out = new Uint8Array(offset + centralSize + ZIP_EOCD_MIN_LENGTH);
1003
+ let pos = 0;
1004
+ for (const part of localParts) {
1005
+ out.set(part, pos);
1006
+ pos += part.byteLength;
1007
+ }
1008
+ for (const part of centralParts) {
1009
+ out.set(part, pos);
1010
+ pos += part.byteLength;
1011
+ }
1012
+ out.set(eocd, pos);
1013
+ return out;
1014
+ }
1015
+
1016
+ function readZip64CentralDirectoryInfoSync(bytes: Uint8Array, eocdOffset: number): ZipCentralDirectoryInfo | undefined {
1017
+ const locatorOffset = eocdOffset - ZIP64_EOCD_LOCATOR_LENGTH;
1018
+ if (locatorOffset < 0) return undefined;
1019
+
1020
+ const locator = readMemoryRange(bytes, locatorOffset, locatorOffset + ZIP64_EOCD_LOCATOR_LENGTH);
1021
+ if (readUInt32LE(locator, 0) !== ZIP64_EOCD_LOCATOR_SIGNATURE) return undefined;
1022
+ if (readUInt32LE(locator, 4) !== 0 || readUInt32LE(locator, 16) > 1) {
1023
+ throw new ToolError("Multi-disk ZIP archives are not supported");
1024
+ }
1025
+
1026
+ const zip64EocdOffset = readUInt64LEAsNumber(locator, 8);
1027
+ const record = readMemoryRange(bytes, zip64EocdOffset, zip64EocdOffset + 56);
1028
+ if (readUInt32LE(record, 0) !== ZIP64_EOCD_SIGNATURE) {
1029
+ throw new ToolError("Invalid ZIP archive: missing ZIP64 end of central directory");
1030
+ }
1031
+ if (readUInt32LE(record, 16) !== 0 || readUInt32LE(record, 20) !== 0) {
1032
+ throw new ToolError("Multi-disk ZIP archives are not supported");
1033
+ }
1034
+
1035
+ return {
1036
+ entries: readUInt64LEAsNumber(record, 32),
1037
+ size: readUInt64LEAsNumber(record, 40),
1038
+ offset: readUInt64LEAsNumber(record, 48),
1039
+ };
1040
+ }
1041
+
1042
+ function readCentralDirectoryInfoSync(bytes: Uint8Array): ZipCentralDirectoryInfo {
1043
+ const fileSize = bytes.byteLength;
1044
+ if (fileSize < ZIP_EOCD_MIN_LENGTH) {
1045
+ throw new ToolError("Invalid ZIP archive: missing end of central directory");
1046
+ }
1047
+
1048
+ const tailLength = Math.min(fileSize, ZIP_EOCD_MIN_LENGTH + ZIP_EOCD_MAX_COMMENT_LENGTH);
1049
+ const tailStart = fileSize - tailLength;
1050
+ const tail = readMemoryRange(bytes, tailStart, fileSize);
1051
+ const eocdIndex = findEndOfCentralDirectory(tail);
1052
+
1053
+ if (readUInt16LE(tail, eocdIndex + 4) !== 0 || readUInt16LE(tail, eocdIndex + 6) !== 0) {
1054
+ throw new ToolError("Multi-disk ZIP archives are not supported");
1055
+ }
1056
+
1057
+ let entries = readUInt16LE(tail, eocdIndex + 10);
1058
+ let size = readUInt32LE(tail, eocdIndex + 12);
1059
+ let offset = readUInt32LE(tail, eocdIndex + 16);
1060
+ const needsZip64 = entries === ZIP_UINT16_MAX || size === ZIP_UINT32_MAX || offset === ZIP_UINT32_MAX;
1061
+ const zip64Info = readZip64CentralDirectoryInfoSync(bytes, tailStart + eocdIndex);
1062
+ if (zip64Info) {
1063
+ ({ entries, size, offset } = zip64Info);
1064
+ } else if (needsZip64) {
1065
+ throw new ToolError("Invalid ZIP archive: missing ZIP64 central directory metadata");
1066
+ }
1067
+
1068
+ if (offset + size > fileSize) {
1069
+ throw new ToolError("Invalid ZIP archive: central directory exceeds file size");
1070
+ }
1071
+
1072
+ return { entries, offset, size };
1073
+ }
1074
+
1075
+ function extractZipMember(bytes: Uint8Array, storage: ZipStorage, uncompressedSize: number): Uint8Array {
1076
+ if ((storage.flags & ZIP_ENCRYPTED_FLAG) !== 0) {
1077
+ throw new ToolError("Encrypted ZIP entries are not supported");
1078
+ }
1079
+
1080
+ const headerStart = storage.localHeaderOffset;
1081
+ const localHeader = readMemoryRange(bytes, headerStart, headerStart + 30);
1082
+ if (readUInt32LE(localHeader, 0) !== ZIP_LOCAL_FILE_HEADER_SIGNATURE) {
1083
+ throw new ToolError("Invalid ZIP archive: malformed local file header");
1084
+ }
1085
+
1086
+ const fileNameLength = readUInt16LE(localHeader, 26);
1087
+ const extraLength = readUInt16LE(localHeader, 28);
1088
+ const dataStart = headerStart + 30 + fileNameLength + extraLength;
1089
+ const compressed = readMemoryRange(bytes, dataStart, dataStart + storage.compressedSize);
1090
+ return decodeZipMember(compressed, storage.compression, uncompressedSize);
721
1091
  }