@asterisk-labs/cozip 2026.5.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # cozip
2
+
3
+ JavaScript reader for Cloud-Optimized ZIP archives. One HTTP call gives you the manifest of a remote `.zip`.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install @asterisk-labs/cozip
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```js
14
+ import { read } from "@asterisk-labs/cozip";
15
+
16
+ const manifest = await read("https://example.com/dataset.zip");
17
+ const train = manifest.filter((row) => row.split === "train");
18
+ ```
19
+
20
+ `manifest` is an array of row objects with `name`, `offset`, `size`, `cozip:gdal_vsi`, plus whatever extras the writer added. Pass `columns: [...]` to bring only specific extras, `gdalVsi: false` to drop the VSI path column.
21
+
22
+ ```js
23
+ const manifest = await read(url, {
24
+ columns: ["cloud_pct", "split"],
25
+ gdalVsi: false,
26
+ });
27
+ ```
28
+
29
+ Only `http://` and `https://` URLs are supported. Cloud schemes like `s3://` or `gcs://` are out of scope, use a presigned HTTP URL or a CORS-enabled proxy. The server must support range requests (`Accept-Ranges: bytes`) and, for browser use, CORS with `Range` allowed.
30
+
31
+ ## Spec
32
+
33
+ See [SPEC.md](https://github.com/asterisk-labs/cozip/blob/main/SPEC.md) for the on-disk format.
34
+
35
+ ## License
36
+
37
+ MIT. See [LICENSE](../LICENSE).
package/package.json ADDED
@@ -0,0 +1,33 @@
1
+ {
2
+ "name": "@asterisk-labs/cozip",
3
+ "version": "2026.5.17",
4
+ "description": "Pure-JS reader for Cloud-Optimized ZIP (cozip) archives over HTTP.",
5
+ "type": "module",
6
+ "exports": {
7
+ ".": {
8
+ "types": "./types/index.d.ts",
9
+ "default": "./src/index.js"
10
+ },
11
+ "./package.json": "./package.json"
12
+ },
13
+ "types": "./types/index.d.ts",
14
+ "files": ["src", "types", "README.md", "LICENSE"],
15
+ "engines": { "node": ">=20" },
16
+ "publishConfig": { "access": "public" },
17
+ "scripts": {
18
+ "types": "tsc",
19
+ "test": "node --test test/*.test.js",
20
+ "prepublishOnly": "npm run types && npm test"
21
+ },
22
+ "devDependencies": { "typescript": "^5.6.0" },
23
+ "dependencies": { "hyparquet": "^1.25.0", "hyparquet-compressors": "^1.1.1"},
24
+ "keywords": ["cozip", "zip", "cloud-optimized", "geospatial", "range-requests"],
25
+ "repository": {
26
+ "type": "git",
27
+ "url": "git+https://github.com/asterisk-labs/cozip.git",
28
+ "directory": "javascript"
29
+ },
30
+ "homepage": "https://github.com/asterisk-labs/cozip/tree/main/javascript#readme",
31
+ "bugs": "https://github.com/asterisk-labs/cozip/issues",
32
+ "license": "MIT"
33
+ }
package/src/index.js ADDED
@@ -0,0 +1,168 @@
1
+ // Pure-JS reader for cozip archives over HTTP.
2
+
3
+ import { parquetReadObjects } from "hyparquet";
4
+ import { compressors } from "hyparquet-compressors";
5
+
6
+ const LFH_SIZE = 51;
7
+ const INDEX_HEADER_SIZE = 11;
8
+ const BOOTSTRAP_SIZE = 65536;
9
+ const LFH_MAGIC = 0x04034b50;
10
+ const INDEX_MAGIC = 0x50495a43; // 'CZIP' as little-endian uint32
11
+ const EXTRA_HEADER_ID = 0xca0c;
12
+ const COZIP_NAME = "__cozip__";
13
+ const METADATA_NAME = "__metadata__";
14
+
15
+ /**
16
+ * @typedef {object} ReadOptions
17
+ * @property {string[]} [columns] Subset of extra columns from __metadata__ to include.
18
+ * @property {boolean} [gdalVsi] Include the cozip:gdal_vsi column (default true).
19
+ */
20
+
21
+ /**
22
+ * Read a cozip archive's manifest over HTTP.
23
+ *
24
+ * @param {string} url
25
+ * @param {ReadOptions} [opts]
26
+ * @returns {Promise<object[]>} Rows with { name, offset, size, ...extras, "cozip:gdal_vsi"? }.
27
+ */
28
+ export async function read(url, opts = {}) {
29
+ if (!/^https?:\/\//i.test(url)) {
30
+ throw new Error(`cozip: only http(s) URLs are supported, got: ${url}`);
31
+ }
32
+ const { columns, gdalVsi = true } = opts;
33
+
34
+ // 1. Bootstrap the index from the first 64 KiB, extend if needed.
35
+ const head = await fetchRange(url, 0, BOOTSTRAP_SIZE - 1);
36
+ const indexEnd = LFH_SIZE + readIndexPayloadSize(head);
37
+ const indexBuf =
38
+ indexEnd > head.length
39
+ ? concat(head, await fetchRange(url, head.length, indexEnd - 1))
40
+ : head;
41
+
42
+ // 2. Locate __metadata__ inside the index.
43
+ const entries = parseIndex(indexBuf);
44
+ const meta = entries.get(METADATA_NAME);
45
+ if (!meta) {
46
+ throw new Error("cozip: archive has no __metadata__ entry");
47
+ }
48
+
49
+ // 3. Fetch and parse the __metadata__ Parquet.
50
+ const metaBytes = await fetchRange(url, meta.offset, meta.offset + meta.size - 1);
51
+ const parquetCols = columns
52
+ ? Array.from(new Set(["name", "offset", "size", ...columns]))
53
+ : undefined;
54
+
55
+ const file = /** @type {ArrayBuffer} */ (
56
+ metaBytes.buffer.slice(
57
+ metaBytes.byteOffset,
58
+ metaBytes.byteOffset + metaBytes.byteLength,
59
+ )
60
+ );
61
+ const rows = await parquetReadObjects({ file, columns: parquetCols, compressors });
62
+
63
+ // 4. Inject the VSI path for GDAL consumers.
64
+ if (gdalVsi) {
65
+ for (const row of rows) {
66
+ row["cozip:gdal_vsi"] = `/vsisubfile/${row.offset}_${row.size},/vsicurl/${url}`;
67
+ }
68
+ }
69
+
70
+ return rows;
71
+ }
72
+
73
+ // internals :)
74
+
75
+ /**
76
+ * @param {string} url
77
+ * @param {number} start
78
+ * @param {number} end
79
+ * @returns {Promise<Uint8Array>}
80
+ */
81
+ async function fetchRange(url, start, end) {
82
+ const res = await fetch(url, { headers: { Range: `bytes=${start}-${end}` } });
83
+ if (res.status !== 206 && res.status !== 200) {
84
+ throw new Error(`cozip: HTTP ${res.status} ${res.statusText} for ${url}`);
85
+ }
86
+ return new Uint8Array(await res.arrayBuffer());
87
+ }
88
+
89
+ /**
90
+ * @param {Uint8Array} a
91
+ * @param {Uint8Array} b
92
+ * @returns {Uint8Array}
93
+ */
94
+ function concat(a, b) {
95
+ const out = new Uint8Array(a.length + b.length);
96
+ out.set(a, 0);
97
+ out.set(b, a.length);
98
+ return out;
99
+ }
100
+
101
+ /**
102
+ * @param {Uint8Array} buf
103
+ * @returns {number}
104
+ */
105
+ function readIndexPayloadSize(buf) {
106
+ const v = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
107
+ if (v.getUint32(0, true) !== LFH_MAGIC) {
108
+ throw new Error("cozip: byte 0 is not a ZIP local file header");
109
+ }
110
+ if (v.getUint16(26, true) !== 9 || v.getUint16(28, true) !== 12) {
111
+ throw new Error("cozip: LFH does not match cozip layout");
112
+ }
113
+ const size = v.getUint32(18, true);
114
+ if (size === 0) throw new Error("cozip: index payload size is zero");
115
+ return size;
116
+ }
117
+
118
+ /**
119
+ * @param {Uint8Array} buf
120
+ * @returns {Map<string, { offset: number, size: number }>}
121
+ */
122
+ function parseIndex(buf) {
123
+ const v = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
124
+ const td = new TextDecoder();
125
+
126
+ if (td.decode(buf.subarray(30, 39)) !== COZIP_NAME) {
127
+ throw new Error("cozip: first ZIP entry is not __cozip__");
128
+ }
129
+ if (v.getUint16(39, true) !== EXTRA_HEADER_ID) {
130
+ throw new Error("cozip: integrity extra field (0xCA0C) missing");
131
+ }
132
+ if (v.getUint32(LFH_SIZE, true) !== INDEX_MAGIC) {
133
+ throw new Error("cozip: index magic is not CZIP");
134
+ }
135
+
136
+ const n = v.getUint32(LFH_SIZE + 7, true);
137
+ let cur = LFH_SIZE + INDEX_HEADER_SIZE;
138
+
139
+ const nameLens = new Array(n);
140
+ for (let i = 0; i < n; i++) {
141
+ nameLens[i] = v.getUint16(cur, true);
142
+ cur += 2;
143
+ }
144
+
145
+ const names = new Array(n);
146
+ for (let i = 0; i < n; i++) {
147
+ names[i] = td.decode(buf.subarray(cur, cur + nameLens[i]));
148
+ cur += nameLens[i];
149
+ }
150
+
151
+ const offsets = new Array(n);
152
+ for (let i = 0; i < n; i++) {
153
+ offsets[i] = Number(v.getBigUint64(cur, true));
154
+ cur += 8;
155
+ }
156
+
157
+ const sizes = new Array(n);
158
+ for (let i = 0; i < n; i++) {
159
+ sizes[i] = Number(v.getBigUint64(cur, true));
160
+ cur += 8;
161
+ }
162
+
163
+ const entries = new Map();
164
+ for (let i = 0; i < n; i++) {
165
+ entries.set(names[i], { offset: offsets[i], size: sizes[i] });
166
+ }
167
+ return entries;
168
+ }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * @typedef {object} ReadOptions
3
+ * @property {string[]} [columns] Subset of extra columns from __metadata__ to include.
4
+ * @property {boolean} [gdalVsi] Include the cozip:gdal_vsi column (default true).
5
+ */
6
+ /**
7
+ * Read a cozip archive's manifest over HTTP.
8
+ *
9
+ * @param {string} url
10
+ * @param {ReadOptions} [opts]
11
+ * @returns {Promise<object[]>} Rows with { name, offset, size, ...extras, "cozip:gdal_vsi"? }.
12
+ */
13
+ export function read(url: string, opts?: ReadOptions): Promise<object[]>;
14
+ export type ReadOptions = {
15
+ /**
16
+ * Subset of extra columns from __metadata__ to include.
17
+ */
18
+ columns?: string[] | undefined;
19
+ /**
20
+ * Include the cozip:gdal_vsi column (default true).
21
+ */
22
+ gdalVsi?: boolean | undefined;
23
+ };