@gscdump/engine 0.27.2 → 0.28.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3708 @@
1
+ import { gunzip } from "./hyparquet-compressors.mjs";
2
+ import { asyncBufferFromUrl, cachedAsyncBuffer } from "hyparquet";
3
+ import { ByteWriter, parquetWrite } from "hyparquet-writer";
4
+ import { parseDecimal } from "hyparquet/src/convert.js";
5
+ function readZigZag(reader) {
6
+ let result = 0;
7
+ let shift = 0;
8
+ while (true) {
9
+ const byte = reader.view.getUint8(reader.offset++);
10
+ result |= (byte & 127) << shift;
11
+ if (!(byte & 128)) return result >>> 1 ^ -(result & 1);
12
+ shift += 7;
13
+ }
14
+ }
15
+ function readZigZagBigInt(reader) {
16
+ let result = 0n;
17
+ let shift = 0n;
18
+ while (true) {
19
+ const byte = reader.view.getUint8(reader.offset++);
20
+ result |= BigInt(byte & 127) << shift;
21
+ if (!(byte & 128)) return result >> 1n ^ -(result & 1n);
22
+ shift += 7n;
23
+ }
24
+ }
25
+ function readAvroString(reader) {
26
+ const length = readZigZag(reader);
27
+ const bytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, length);
28
+ reader.offset += length;
29
+ return new TextDecoder().decode(bytes);
30
+ }
31
+ function avroMetadata(reader) {
32
+ if (reader.view.getUint32(reader.offset) !== 1331849729) throw new Error("avro invalid magic bytes");
33
+ reader.offset += 4;
34
+ const metadata = {};
35
+ let mapCount = readZigZag(reader);
36
+ while (mapCount !== 0) {
37
+ if (mapCount < 0) {
38
+ mapCount = -mapCount;
39
+ readZigZag(reader);
40
+ }
41
+ for (let i = 0; i < mapCount; i++) {
42
+ const key = readAvroString(reader);
43
+ metadata[key] = readAvroString(reader);
44
+ }
45
+ mapCount = readZigZag(reader);
46
+ }
47
+ metadata["avro.schema"] = JSON.parse(metadata["avro.schema"]);
48
+ if (metadata["schema"]) metadata["schema"] = JSON.parse(metadata["schema"]);
49
+ if (metadata["iceberg.schema"]) metadata["iceberg.schema"] = JSON.parse(metadata["iceberg.schema"]);
50
+ const syncMarker = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, 16);
51
+ reader.offset += 16;
52
+ return {
53
+ metadata,
54
+ syncMarker
55
+ };
56
+ }
57
+ function avroRead({ reader, metadata, syncMarker }) {
58
+ const blocks = [];
59
+ while (reader.offset < reader.view.byteLength) {
60
+ let recordCount = readZigZag(reader);
61
+ if (recordCount === 0) break;
62
+ if (recordCount < 0) recordCount = -recordCount;
63
+ const blockSize = readZigZag(reader);
64
+ let data = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, blockSize);
65
+ reader.offset += blockSize;
66
+ const blockSync = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, 16);
67
+ reader.offset += 16;
68
+ for (let i = 0; i < 16; i++) if (blockSync[i] !== syncMarker[i]) throw new Error("sync marker does not match");
69
+ const codec = metadata["avro.codec"];
70
+ if (codec === "deflate") data = gunzip(data);
71
+ else if (codec !== "null") throw new Error(`unsupported codec: ${codec}`);
72
+ const { fields } = metadata["avro.schema"];
73
+ const dataReader = {
74
+ view: new DataView(data.buffer, data.byteOffset, data.byteLength),
75
+ offset: 0
76
+ };
77
+ for (let i = 0; i < recordCount; i++) {
78
+ const obj = {};
79
+ for (const field of fields) {
80
+ const value = readType(dataReader, field.type);
81
+ obj[field.name] = value;
82
+ }
83
+ blocks.push(obj);
84
+ }
85
+ }
86
+ return blocks;
87
+ }
88
+ function readType(reader, type) {
89
+ if (type === "null") return;
90
+ else if (Array.isArray(type)) return readType(reader, type[readZigZag(reader)]);
91
+ else if (typeof type === "object" && type.type === "record") {
92
+ const obj = {};
93
+ for (const subField of type.fields) obj[subField.name] = readType(reader, subField.type);
94
+ return obj;
95
+ } else if (typeof type === "object" && type.type === "array") {
96
+ const arr = [];
97
+ while (true) {
98
+ let count = readZigZag(reader);
99
+ if (count === 0) break;
100
+ if (count < 0) {
101
+ count = -count;
102
+ readZigZag(reader);
103
+ }
104
+ for (let i = 0; i < count; i++) arr.push(readType(reader, type.items));
105
+ }
106
+ return arr;
107
+ } else if (typeof type === "object" && type.logicalType) if (type.logicalType === "date" && type.type === "int") {
108
+ const value = readZigZag(reader);
109
+ return /* @__PURE__ */ new Date(value * 864e5);
110
+ } else if (type.logicalType === "time-millis" && type.type === "int") return readZigZag(reader);
111
+ else if (type.logicalType === "time-micros" && type.type === "long") return readZigZagBigInt(reader);
112
+ else if (type.logicalType === "timestamp-millis" && type.type === "long") {
113
+ const value = readZigZagBigInt(reader);
114
+ return new Date(Number(value));
115
+ } else if (type.logicalType === "timestamp-micros" && type.type === "long") {
116
+ const value = readZigZagBigInt(reader);
117
+ return new Date(Number(value / 1000n));
118
+ } else if (type.logicalType === "timestamp-nanos" && type.type === "long") {
119
+ const value = readZigZagBigInt(reader);
120
+ return new Date(Number(value / 1000000n));
121
+ } else if (type.logicalType === "decimal" && "precision" in type) {
122
+ const bytes = type.type === "fixed" ? readFixed(reader, type.size) : readType(reader, type.type);
123
+ const factor = 10 ** -(type.scale || 0);
124
+ return parseDecimal(bytes) * factor;
125
+ } else if (type.logicalType === "uuid" && type.type === "fixed" && type.size === 16) return bytesToUuid(readFixed(reader, 16));
126
+ else {
127
+ console.warn(`unknown logical type: ${type.logicalType}`);
128
+ return type.type === "fixed" ? readFixed(reader, type.size) : readType(reader, type.type);
129
+ }
130
+ else if (typeof type === "object" && type.type === "fixed") return readFixed(reader, type.size);
131
+ else if (type === "boolean") {
132
+ const value = reader.view.getUint8(reader.offset) === 1;
133
+ reader.offset++;
134
+ return value;
135
+ } else if (type === "int") return readZigZag(reader);
136
+ else if (type === "long") return readZigZagBigInt(reader);
137
+ else if (type === "float") {
138
+ const value = reader.view.getFloat32(reader.offset, true);
139
+ reader.offset += 4;
140
+ return value;
141
+ } else if (type === "double") {
142
+ const value = reader.view.getFloat64(reader.offset, true);
143
+ reader.offset += 8;
144
+ return value;
145
+ } else if (type === "bytes") {
146
+ const length = readZigZag(reader);
147
+ const bytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, length);
148
+ reader.offset += length;
149
+ return bytes;
150
+ } else if (type === "string") {
151
+ const length = readZigZag(reader);
152
+ const bytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, length);
153
+ const text = new TextDecoder().decode(bytes);
154
+ reader.offset += length;
155
+ return text;
156
+ } else throw new Error(`unsupported type: ${type}`);
157
+ }
158
+ function readFixed(reader, size) {
159
+ const bytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, size);
160
+ reader.offset += size;
161
+ return bytes;
162
+ }
163
+ function bytesToUuid(bytes) {
164
+ let hex = "";
165
+ for (let i = 0; i < 16; i++) {
166
+ hex += bytes[i].toString(16).padStart(2, "0");
167
+ if (i === 3 || i === 5 || i === 7 || i === 9) hex += "-";
168
+ }
169
+ return hex;
170
+ }
171
+ function sanitize(name) {
172
+ let result = "";
173
+ for (let i = 0; i < name.length; i++) {
174
+ const ch = name.charAt(i);
175
+ const isLetter = /^[A-Za-z]$/.test(ch);
176
+ const isDigit = /^[0-9]$/.test(ch);
177
+ if (i === 0) if (isLetter || ch === "_") result += ch;
178
+ else result += isDigit ? "_" + ch : "_x" + ch.charCodeAt(0).toString(16).toUpperCase();
179
+ else if (isLetter || isDigit || ch === "_") result += ch;
180
+ else result += "_x" + ch.charCodeAt(0).toString(16).toUpperCase();
181
+ }
182
+ return result;
183
+ }
184
+ function bytesToHex$1(bytes) {
185
+ let hex = "";
186
+ for (let i = 0; i < bytes.length; i++) hex += bytes[i].toString(16).padStart(2, "0");
187
+ return hex;
188
+ }
189
+ function uuid4() {
190
+ if (globalThis.crypto?.randomUUID) return globalThis.crypto.randomUUID();
191
+ return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
192
+ const r = Math.random() * 16 | 0;
193
+ return (c === "x" ? r : r & 3 | 8).toString(16);
194
+ });
195
+ }
196
+ function translateS3Url(url) {
197
+ if (url.startsWith("s3a://") || url.startsWith("s3://")) {
198
+ const rest = url.slice(url.indexOf("://") + 3);
199
+ const slashIndex = rest.indexOf("/");
200
+ if (slashIndex === -1) throw new Error("Invalid S3 URL, missing \"/\" after bucket");
201
+ return `https://${rest.slice(0, slashIndex)}.s3.amazonaws.com${rest.slice(slashIndex)}`;
202
+ }
203
+ return url;
204
+ }
205
+ function urlResolver({ requestInit } = {}) {
206
+ return {
207
+ reader(url, byteLength) {
208
+ return asyncBufferFromUrl({
209
+ url: translateS3Url(url),
210
+ byteLength,
211
+ requestInit
212
+ });
213
+ },
214
+ writer(url, options) {
215
+ const w = new ByteWriter();
216
+ w.finish = async function() {
217
+ const target = translateS3Url(url);
218
+ const body = w.getBytes().slice();
219
+ const headers = {};
220
+ if (requestInit?.headers) new Headers(requestInit.headers).forEach((v, k) => {
221
+ headers[k] = v;
222
+ });
223
+ if (options?.ifNoneMatch) headers["If-None-Match"] = options.ifNoneMatch;
224
+ const res = await fetch(target, {
225
+ ...requestInit,
226
+ method: "PUT",
227
+ headers,
228
+ body
229
+ });
230
+ if (!res.ok) {
231
+ const err = /* @__PURE__ */ new Error(`PUT ${url}: ${res.status} ${res.statusText}`);
232
+ err.status = res.status;
233
+ throw err;
234
+ }
235
+ };
236
+ return w;
237
+ },
238
+ async deleter(url) {
239
+ const res = await fetch(translateS3Url(url), {
240
+ ...requestInit,
241
+ method: "DELETE"
242
+ });
243
+ if (!res.ok && res.status !== 404) throw new Error(`DELETE ${url}: ${res.status} ${res.statusText}`);
244
+ }
245
+ };
246
+ }
247
+ function cachingResolver(base) {
248
+ const cache = /* @__PURE__ */ new Map();
249
+ const out = { reader(path, byteLength) {
250
+ let buf = cache.get(path);
251
+ if (!buf) {
252
+ buf = (async () => cachedAsyncBuffer(await base.reader(path, byteLength)))();
253
+ cache.set(path, buf);
254
+ buf.catch(() => {
255
+ if (cache.get(path) === buf) cache.delete(path);
256
+ });
257
+ }
258
+ return buf;
259
+ } };
260
+ if (base.writer) {
261
+ const baseWriter = base.writer;
262
+ out.writer = (path, options) => {
263
+ const w = baseWriter(path, options);
264
+ const origFinish = w.finish.bind(w);
265
+ w.finish = async function() {
266
+ await origFinish();
267
+ cache.delete(path);
268
+ };
269
+ return w;
270
+ };
271
+ }
272
+ if (base.deleter) {
273
+ const baseDeleter = base.deleter;
274
+ out.deleter = async (path) => {
275
+ await baseDeleter(path);
276
+ cache.delete(path);
277
+ };
278
+ }
279
+ return out;
280
+ }
281
+ function s3Lister({ requestInit } = {}) {
282
+ return async function list(url) {
283
+ const s3parts = s3ParseUrl(url);
284
+ if (!s3parts) throw new Error(`not an S3 URL: ${url}`);
285
+ const { bucket, prefix } = s3parts;
286
+ const listUrl = `https://${bucket}.s3.amazonaws.com/?list-type=2&prefix=${prefix.replace(/\/$/, "")}/&delimiter=/`;
287
+ const res = await fetch(listUrl, requestInit);
288
+ if (!res.ok) throw new Error(`${res.status} ${res.statusText}`);
289
+ return ((await res.text()).match(/<Contents>(.*?)<\/Contents>/gs) || []).map((match) => {
290
+ const keyMatch = match.match(/<Key>(.*?)<\/Key>/);
291
+ if (!keyMatch) throw new Error("failed to parse S3 list response");
292
+ return keyMatch[1].split("/").pop() ?? "";
293
+ }).filter(Boolean);
294
+ };
295
+ }
296
+ function s3ParseUrl(url) {
297
+ if (url.startsWith("s3://") || url.startsWith("s3a://")) {
298
+ const parts = url.split("/");
299
+ return {
300
+ bucket: parts[2],
301
+ prefix: parts.slice(3).join("/")
302
+ };
303
+ }
304
+ if (url.startsWith("https://s3.amazonaws.com/")) {
305
+ const parts = url.split("/");
306
+ return {
307
+ bucket: parts[3],
308
+ prefix: parts.slice(4).join("/")
309
+ };
310
+ }
311
+ const m = url.match(/^https:\/\/([a-z0-9][a-z0-9-]*)\.s3(?:[.-][a-z0-9-]+)?\.amazonaws\.com\/(.*)$/);
312
+ if (m) return {
313
+ bucket: m[1],
314
+ prefix: m[2]
315
+ };
316
+ }
317
+ async function resolveText(resolver, path) {
318
+ const ab = await resolver.reader(path);
319
+ let buf = await ab.slice(0, ab.byteLength);
320
+ if (isGzip(buf)) buf = await decompressGzip(buf);
321
+ return new TextDecoder().decode(buf);
322
+ }
323
+ function isGzip(buf) {
324
+ if (buf.byteLength < 2) return false;
325
+ const view = new Uint8Array(buf, 0, 2);
326
+ return view[0] === 31 && view[1] === 139;
327
+ }
328
+ async function decompressGzip(buf) {
329
+ if (!globalThis.DecompressionStream) throw new Error("gzip decompression is not supported in this environment");
330
+ const stream = new Blob([buf]).stream().pipeThrough(new DecompressionStream("gzip"));
331
+ return await new Response(stream).arrayBuffer();
332
+ }
333
+ async function fetchAvroRecords(url, resolver, byteLength) {
334
+ const lengthHint = byteLength !== void 0 && Number.isFinite(byteLength) ? byteLength : void 0;
335
+ const ab = await resolver.reader(url, lengthHint);
336
+ const buffer = await ab.slice(0, ab.byteLength);
337
+ const reader = {
338
+ view: new DataView(buffer),
339
+ offset: 0
340
+ };
341
+ const { metadata, syncMarker } = await avroMetadata(reader);
342
+ return await avroRead({
343
+ reader,
344
+ metadata,
345
+ syncMarker
346
+ });
347
+ }
348
+ const MAX_SAFE = BigInt(Number.MAX_SAFE_INTEGER);
349
+ function stringifyIcebergJson(value, indent) {
350
+ const sp = indent ? " ".repeat(indent) : "";
351
+ function emit(v, depth) {
352
+ if (typeof v === "bigint") return v.toString();
353
+ if (v === null) return "null";
354
+ if (typeof v === "string") return JSON.stringify(v);
355
+ if (typeof v === "number" || typeof v === "boolean") return JSON.stringify(v);
356
+ if (Array.isArray(v)) {
357
+ if (v.length === 0) return "[]";
358
+ const inner = v.map((x) => emit(x, depth + 1));
359
+ if (!sp) return "[" + inner.join(",") + "]";
360
+ const pad = sp.repeat(depth + 1), close = sp.repeat(depth);
361
+ return "[\n" + pad + inner.join(",\n" + pad) + "\n" + close + "]";
362
+ }
363
+ if (typeof v === "object") {
364
+ const keys = Object.keys(v).filter((k) => v[k] !== void 0);
365
+ if (keys.length === 0) return "{}";
366
+ const inner = keys.map((k) => JSON.stringify(k) + (sp ? ": " : ":") + emit(v[k], depth + 1));
367
+ if (!sp) return "{" + inner.join(",") + "}";
368
+ const pad = sp.repeat(depth + 1), close = sp.repeat(depth);
369
+ return "{\n" + pad + inner.join(",\n" + pad) + "\n" + close + "}";
370
+ }
371
+ return JSON.stringify(v);
372
+ }
373
+ return emit(value, 0);
374
+ }
375
+ function parseIcebergJson(text) {
376
+ let i = 0;
377
+ function skipWs() {
378
+ while (i < text.length) {
379
+ const c = text.charCodeAt(i);
380
+ if (c !== 32 && c !== 9 && c !== 10 && c !== 13) break;
381
+ i++;
382
+ }
383
+ }
384
+ function parseString() {
385
+ if (text[i] !== "\"") throw new Error(`expected " at ${i}`);
386
+ i++;
387
+ let s = "";
388
+ while (i < text.length) {
389
+ const c = text[i++];
390
+ if (c === "\"") return s;
391
+ if (c !== "\\") {
392
+ s += c;
393
+ continue;
394
+ }
395
+ const e = text[i++];
396
+ if (e === "u") {
397
+ s += String.fromCharCode(parseInt(text.slice(i, i + 4), 16));
398
+ i += 4;
399
+ } else if (e === "n") s += "\n";
400
+ else if (e === "t") s += " ";
401
+ else if (e === "r") s += "\r";
402
+ else if (e === "b") s += "\b";
403
+ else if (e === "f") s += "\f";
404
+ else s += e;
405
+ }
406
+ throw new Error("unterminated string");
407
+ }
408
+ function parseNumber() {
409
+ const start = i;
410
+ if (text[i] === "-") i++;
411
+ while (text[i] >= "0" && text[i] <= "9") i++;
412
+ const intEnd = i;
413
+ let isFloat = false;
414
+ if (text[i] === ".") {
415
+ isFloat = true;
416
+ i++;
417
+ while (text[i] >= "0" && text[i] <= "9") i++;
418
+ }
419
+ if (text[i] === "e" || text[i] === "E") {
420
+ isFloat = true;
421
+ i++;
422
+ if (text[i] === "+" || text[i] === "-") i++;
423
+ while (text[i] >= "0" && text[i] <= "9") i++;
424
+ }
425
+ if (isFloat) return Number(text.slice(start, i));
426
+ const intStr = text.slice(start, intEnd);
427
+ if (intStr.length >= 16) {
428
+ const n = BigInt(intStr);
429
+ if (n > MAX_SAFE || n < -MAX_SAFE) return n;
430
+ }
431
+ return Number(intStr);
432
+ }
433
+ function parseLiteral(lit, val) {
434
+ if (text.slice(i, i + lit.length) !== lit) throw new Error(`bad literal at ${i}`);
435
+ i += lit.length;
436
+ return val;
437
+ }
438
+ function parseValue() {
439
+ skipWs();
440
+ const ch = text[i];
441
+ if (ch === "\"") return parseString();
442
+ if (ch === "{") return parseObject();
443
+ if (ch === "[") return parseArray();
444
+ if (ch === "t") return parseLiteral("true", true);
445
+ if (ch === "f") return parseLiteral("false", false);
446
+ if (ch === "n") return parseLiteral("null", null);
447
+ return parseNumber();
448
+ }
449
+ function parseObject() {
450
+ i++;
451
+ skipWs();
452
+ const obj = {};
453
+ if (text[i] === "}") {
454
+ i++;
455
+ return obj;
456
+ }
457
+ while (true) {
458
+ skipWs();
459
+ const key = parseString();
460
+ skipWs();
461
+ if (text[i] !== ":") throw new Error(`expected : at ${i}`);
462
+ i++;
463
+ obj[key] = parseValue();
464
+ skipWs();
465
+ if (text[i] === ",") {
466
+ i++;
467
+ continue;
468
+ }
469
+ if (text[i] === "}") {
470
+ i++;
471
+ return obj;
472
+ }
473
+ throw new Error(`expected , or } at ${i}`);
474
+ }
475
+ }
476
+ function parseArray() {
477
+ i++;
478
+ skipWs();
479
+ const arr = [];
480
+ if (text[i] === "]") {
481
+ i++;
482
+ return arr;
483
+ }
484
+ while (true) {
485
+ arr.push(parseValue());
486
+ skipWs();
487
+ if (text[i] === ",") {
488
+ i++;
489
+ continue;
490
+ }
491
+ if (text[i] === "]") {
492
+ i++;
493
+ return arr;
494
+ }
495
+ throw new Error(`expected , or ] at ${i}`);
496
+ }
497
+ }
498
+ const value = parseValue();
499
+ skipWs();
500
+ if (i !== text.length) throw new Error(`unexpected trailing input at ${i}`);
501
+ return value;
502
+ }
503
+ function metadataFileVersionNumber(file) {
504
+ const match = file.match(/^(?:v(\d+)|(\d+)-.+)(?:\.metadata\.json|\.gz\.metadata\.json|\.metadata\.json\.gz)$/);
505
+ if (!match) return void 0;
506
+ return Number(match[1] ?? match[2]);
507
+ }
508
+ function metadataFileVersionName(file) {
509
+ if (metadataFileVersionNumber(file) === void 0) return void 0;
510
+ return file.replace(/(?:\.metadata\.json\.gz|\.gz\.metadata\.json|\.metadata\.json)$/, "");
511
+ }
512
+ function metadataVersions(files) {
513
+ const versions = /* @__PURE__ */ new Map();
514
+ for (const file of files) {
515
+ const version = metadataFileVersionNumber(file);
516
+ const name = metadataFileVersionName(file);
517
+ if (version === void 0 || name === void 0) continue;
518
+ const current = versions.get(version);
519
+ const paddedVersion = String(version).padStart(5, "0");
520
+ if (current === void 0 || metadataFilePreference(file, paddedVersion) < metadataFilePreference(`${current}.metadata.json`, paddedVersion)) versions.set(version, name);
521
+ }
522
+ return [...versions.entries()].sort(([a], [b]) => a - b).map(([, name]) => name);
523
+ }
524
+ function icebergLatestVersion({ tableUrl, resolver, lister }) {
525
+ resolver ??= urlResolver();
526
+ lister ??= s3Lister();
527
+ const url = `${tableUrl}/metadata/version-hint.text`;
528
+ return resolveText(resolver, url).then((text) => {
529
+ const version = parseInt(text);
530
+ if (isNaN(version)) throw new Error(`invalid version: ${text}`);
531
+ return `v${version}`;
532
+ }).catch(() => {
533
+ const metadataDir = `${tableUrl}/metadata`;
534
+ return lister(metadataDir).then((files) => {
535
+ const versions = metadataVersions(files);
536
+ if (versions.length === 0) throw new Error("no metadata files found");
537
+ return versions[versions.length - 1];
538
+ });
539
+ }).catch((err) => {
540
+ throw new Error(`failed to determine latest iceberg version: ${err.message}`);
541
+ });
542
+ }
543
+ async function resolveMetadata({ tableUrl, metadataFileName, resolver, lister }) {
544
+ resolver ??= urlResolver();
545
+ lister ??= s3Lister();
546
+ if (!metadataFileName) metadataFileName = `${await icebergLatestVersion({
547
+ tableUrl,
548
+ resolver,
549
+ lister
550
+ })}.metadata.json`;
551
+ const url = `${tableUrl}/metadata/${metadataFileName}`;
552
+ try {
553
+ return {
554
+ metadata: parseIcebergJson(await resolveText(resolver, url)),
555
+ metadataFileName
556
+ };
557
+ } catch (err) {
558
+ try {
559
+ const metadataDir = `${tableUrl}/metadata`;
560
+ const match = findMetadataFile(await lister(metadataDir), metadataFileName);
561
+ if (match) return {
562
+ metadata: parseIcebergJson(await resolveText(resolver, `${metadataDir}/${match}`)),
563
+ metadataFileName: match
564
+ };
565
+ } catch {}
566
+ throw new Error(`failed to get iceberg metadata: ${err.message}`);
567
+ }
568
+ }
569
+ function findMetadataFile(files, metadataFileName) {
570
+ if (files.includes(metadataFileName)) return metadataFileName;
571
+ const version = metadataFileVersionNumber(metadataFileName);
572
+ if (version === void 0) return void 0;
573
+ const versionNum = String(version).padStart(5, "0");
574
+ return files.filter((f) => metadataFileVersionNumber(f) === version).sort((a, b) => metadataFilePreference(a, versionNum) - metadataFilePreference(b, versionNum))[0];
575
+ }
576
+ async function loadLatestFileCatalogMetadata({ tableUrl, resolver, lister, maxProbe = 64 }) {
577
+ resolver ??= urlResolver();
578
+ lister ??= s3Lister();
579
+ let files;
580
+ try {
581
+ files = await lister(`${tableUrl}/metadata`);
582
+ } catch (err) {
583
+ const fallback = await hintProbeFallback(resolver, tableUrl, maxProbe);
584
+ if (fallback) return fallback;
585
+ throw err;
586
+ }
587
+ let highest = -1;
588
+ let highestFile;
589
+ for (const file of files) {
590
+ const v = metadataFileVersionNumber(file);
591
+ if (v === void 0) continue;
592
+ if (v > highest) {
593
+ highest = v;
594
+ highestFile = file;
595
+ }
596
+ }
597
+ if (highest < 0 || !highestFile) throw new Error(`no metadata files found at ${tableUrl}/metadata`);
598
+ const metadataLocation = `${tableUrl}/metadata/${highestFile}`;
599
+ const text = await resolveText(resolver, metadataLocation);
600
+ return {
601
+ version: highest,
602
+ metadata: parseIcebergJson(text),
603
+ metadataFileName: highestFile,
604
+ metadataLocation
605
+ };
606
+ }
607
+ async function hintProbeFallback(resolver, tableUrl, maxProbe) {
608
+ let hintVersion;
609
+ try {
610
+ const text = await resolveText(resolver, `${tableUrl}/metadata/version-hint.text`);
611
+ const parsed = parseInt(text);
612
+ if (!isNaN(parsed)) hintVersion = parsed;
613
+ } catch {}
614
+ if (hintVersion === void 0 || hintVersion < 0) return void 0;
615
+ let lastFound = await tryReadVersion(resolver, tableUrl, hintVersion);
616
+ if (!lastFound) return void 0;
617
+ let probe = hintVersion + 1;
618
+ const limit = hintVersion + maxProbe;
619
+ while (probe <= limit) {
620
+ const next = await tryReadVersion(resolver, tableUrl, probe);
621
+ if (!next) break;
622
+ lastFound = next;
623
+ probe++;
624
+ }
625
+ if (probe > limit) return void 0;
626
+ return lastFound;
627
+ }
628
+ async function tryReadVersion(resolver, tableUrl, version) {
629
+ const fileName = `v${version}.metadata.json`;
630
+ const metadataLocation = `${tableUrl}/metadata/${fileName}`;
631
+ try {
632
+ return {
633
+ version,
634
+ metadata: parseIcebergJson(await resolveText(resolver, metadataLocation)),
635
+ metadataFileName: fileName,
636
+ metadataLocation
637
+ };
638
+ } catch {
639
+ return;
640
+ }
641
+ }
642
+ function metadataFilePreference(file, paddedVersion) {
643
+ if (file === `v${Number(paddedVersion)}.metadata.json`) return 0;
644
+ if (file === `v${Number(paddedVersion)}.gz.metadata.json`) return 1;
645
+ if (file === `v${Number(paddedVersion)}.metadata.json.gz`) return 2;
646
+ if (file.startsWith(`${paddedVersion}-`) && file.endsWith(".metadata.json")) return 3;
647
+ if (file.startsWith(`${paddedVersion}-`) && file.endsWith(".gz.metadata.json")) return 4;
648
+ if (file.startsWith(`${paddedVersion}-`) && file.endsWith(".metadata.json.gz")) return 5;
649
+ return 6;
650
+ }
651
+ async function restCatalogConnect({ url, warehouse, requestInit }) {
652
+ const base = url.replace(/\/$/, "");
653
+ const configUrl = warehouse ? `${base}/v1/config?warehouse=${encodeURIComponent(warehouse)}` : `${base}/v1/config`;
654
+ const res = await fetch(configUrl, requestInit);
655
+ if (!res.ok) await throwRestError(res);
656
+ const body = parseIcebergJson(await res.text());
657
+ const defaults = body.defaults ?? {};
658
+ const overrides = body.overrides ?? {};
659
+ const prefix = overrides.prefix ?? defaults.prefix ?? "";
660
+ return Object.freeze({
661
+ type: "rest",
662
+ url: base,
663
+ prefix: typeof prefix === "string" ? prefix : "",
664
+ defaults,
665
+ overrides,
666
+ requestInit
667
+ });
668
+ }
669
+ function restCatalogListTables(ctx, { namespace }) {
670
+ const ns = encodeNamespace(namespace);
671
+ return paginate({}, async (query) => {
672
+ const body = parseIcebergJson(await (await restFetch(ctx, `namespaces/${ns}/tables${query}`)).text());
673
+ return {
674
+ items: body.identifiers ?? [],
675
+ nextPageToken: body["next-page-token"]
676
+ };
677
+ });
678
+ }
679
+ async function restCatalogLoadTable(ctx, { namespace, table }) {
680
+ const body = parseIcebergJson(await (await restFetch(ctx, `namespaces/${encodeNamespace(namespace)}/tables/${encodeURIComponent(table)}`)).text());
681
+ return {
682
+ metadataLocation: body["metadata-location"],
683
+ metadata: body.metadata,
684
+ config: body.config ?? {}
685
+ };
686
+ }
687
+ async function restCatalogCreateTable(ctx, { namespace, table, schema, location, partitionSpec, writeOrder, stageCreate, properties }) {
688
+ const ns = encodeNamespace(namespace);
689
+ const body = {
690
+ name: table,
691
+ schema
692
+ };
693
+ if (location !== void 0) body.location = location;
694
+ if (partitionSpec !== void 0) body["partition-spec"] = partitionSpec;
695
+ if (writeOrder !== void 0) body["write-order"] = writeOrder;
696
+ if (stageCreate !== void 0) body["stage-create"] = stageCreate;
697
+ if (properties !== void 0) body.properties = properties;
698
+ const responseBody = parseIcebergJson(await (await restFetch(ctx, `namespaces/${ns}/tables`, {
699
+ method: "POST",
700
+ headers: { "content-type": "application/json" },
701
+ body: stringifyIcebergJson(body)
702
+ })).text());
703
+ return {
704
+ metadataLocation: responseBody["metadata-location"],
705
+ metadata: responseBody.metadata,
706
+ config: responseBody.config ?? {}
707
+ };
708
+ }
709
+ async function restCatalogUpdateTable(ctx, { namespace, table, requirements, updates }) {
710
+ const responseBody = parseIcebergJson(await (await restFetch(ctx, `namespaces/${encodeNamespace(namespace)}/tables/${encodeURIComponent(table)}`, {
711
+ method: "POST",
712
+ headers: { "content-type": "application/json" },
713
+ body: stringifyIcebergJson({
714
+ requirements,
715
+ updates
716
+ })
717
+ })).text());
718
+ return {
719
+ metadataLocation: responseBody["metadata-location"],
720
+ metadata: responseBody.metadata,
721
+ config: responseBody.config ?? {}
722
+ };
723
+ }
724
+ async function restCatalogDropTable(ctx, { namespace, table, purgeRequested }) {
725
+ await restFetch(ctx, `namespaces/${encodeNamespace(namespace)}/tables/${encodeURIComponent(table)}${purgeRequested ? "?purgeRequested=true" : ""}`, { method: "DELETE" });
726
+ }
727
+ async function restCatalogCreateNamespace(ctx, { namespace, properties }) {
728
+ const ns = Array.isArray(namespace) ? namespace : namespace.split(".");
729
+ const body = parseIcebergJson(await (await restFetch(ctx, "namespaces", {
730
+ method: "POST",
731
+ headers: { "content-type": "application/json" },
732
+ body: stringifyIcebergJson({
733
+ namespace: ns,
734
+ properties: properties ?? {}
735
+ })
736
+ })).text());
737
+ return {
738
+ namespace: body.namespace ?? ns,
739
+ properties: body.properties ?? {}
740
+ };
741
+ }
742
+ function encodeNamespace(namespace) {
743
+ return (Array.isArray(namespace) ? namespace : namespace.split(".")).map((p) => encodeURIComponent(p)).join("%1F");
744
+ }
745
+ async function restFetch(ctx, path, init) {
746
+ const prefixSegment = ctx.prefix ? `${ctx.prefix.replace(/^\/|\/$/g, "")}/` : "";
747
+ const fullUrl = `${ctx.url}/v1/${prefixSegment}${path}`;
748
+ const merged = mergeRequestInit(ctx.requestInit, init);
749
+ const res = await fetch(fullUrl, merged);
750
+ if (!res.ok) await throwRestError(res);
751
+ return res;
752
+ }
753
+ function mergeRequestInit(a, b) {
754
+ if (!a) return b;
755
+ if (!b) return a;
756
+ return {
757
+ ...a,
758
+ ...b,
759
+ headers: {
760
+ ...headersToObject(a.headers),
761
+ ...headersToObject(b.headers)
762
+ }
763
+ };
764
+ }
765
+ function headersToObject(h) {
766
+ if (!h) return {};
767
+ if (h instanceof Headers) {
768
+ const out = {};
769
+ h.forEach((v, k) => {
770
+ out[k] = v;
771
+ });
772
+ return out;
773
+ }
774
+ if (Array.isArray(h)) return Object.fromEntries(h);
775
+ return h;
776
+ }
777
+ async function throwRestError(res) {
778
+ let detail = "";
779
+ try {
780
+ const body = parseIcebergJson(await res.text());
781
+ if (body?.error) {
782
+ const { code, type, message } = body.error;
783
+ detail = `${code ?? res.status} ${type ?? ""}: ${message ?? ""}`.trim();
784
+ }
785
+ } catch {}
786
+ const err = new Error(detail || `${res.status} ${res.statusText}`);
787
+ err.status = res.status;
788
+ throw err;
789
+ }
790
+ async function paginate(baseParams, fetchPage) {
791
+ const out = [];
792
+ let pageToken;
793
+ while (true) {
794
+ const params = { ...baseParams };
795
+ if (pageToken) params.pageToken = pageToken;
796
+ const keys = Object.keys(params);
797
+ const { items, nextPageToken } = await fetchPage(keys.length ? "?" + keys.map((k) => `${k}=${params[k]}`).join("&") : "");
798
+ out.push(...items);
799
+ if (!nextPageToken) return out;
800
+ pageToken = encodeURIComponent(nextPageToken);
801
+ }
802
+ }
803
+ async function loadTable({ catalog, namespace, table, tableUrl, resolver }) {
804
+ if (catalog.type === "rest") {
805
+ if (!namespace || !table) throw new Error("namespace and table are required for rest catalogs");
806
+ const { metadata } = await restCatalogLoadTable(catalog, {
807
+ namespace,
808
+ table
809
+ });
810
+ return {
811
+ metadata,
812
+ metadataFileName: void 0,
813
+ tableUrl: metadata.location,
814
+ resolver
815
+ };
816
+ }
817
+ if (catalog.type === "file") {
818
+ if (!tableUrl) throw new Error("tableUrl is required for file catalogs");
819
+ const eff = resolver ?? catalog.resolver;
820
+ if (catalog.conditionalCommits) {
821
+ const { metadata, metadataFileName, version } = await loadLatestFileCatalogMetadata({
822
+ tableUrl,
823
+ resolver: eff,
824
+ lister: catalog.lister
825
+ });
826
+ return {
827
+ metadata,
828
+ metadataFileName,
829
+ version,
830
+ tableUrl,
831
+ resolver: eff
832
+ };
833
+ }
834
+ const { metadata, metadataFileName } = await resolveMetadata({
835
+ tableUrl,
836
+ resolver: eff,
837
+ lister: catalog.lister
838
+ });
839
+ return {
840
+ metadata,
841
+ metadataFileName,
842
+ tableUrl,
843
+ resolver: eff
844
+ };
845
+ }
846
+ throw new Error(`unknown catalog type: ${catalog?.type}`);
847
+ }
848
+ function validateSchemaForVersion(schema, formatVersion) {
849
+ for (const field of schema.fields) validateFieldForVersion(field, formatVersion, field.name);
850
+ }
851
+ function maxFieldId(fields = []) {
852
+ let max = 0;
853
+ for (const field of fields) {
854
+ if (max < field.id) max = field.id;
855
+ const nested = maxNestedFieldId(field.type);
856
+ if (max < nested) max = nested;
857
+ }
858
+ return max;
859
+ }
860
+ function maxNestedFieldId(type) {
861
+ if (typeof type === "string") return 0;
862
+ if (type.type === "list") {
863
+ const elementId = type["element-id"] ?? 0;
864
+ return Math.max(elementId, maxNestedFieldId(type.element));
865
+ }
866
+ if (type.type === "map") {
867
+ const keyId = type["key-id"] ?? 0;
868
+ const valueId = type["value-id"] ?? 0;
869
+ return Math.max(keyId, valueId, maxNestedFieldId(type.key), maxNestedFieldId(type.value));
870
+ }
871
+ if (type.type === "struct") return maxFieldId(type.fields);
872
+ return 0;
873
+ }
874
+ const MAX_USER_FIELD_ID = 2147483447;
875
+ function validateFieldForVersion(field, formatVersion, path) {
876
+ if (typeof field.id === "number" && field.id > MAX_USER_FIELD_ID) throw new Error(`field id ${field.id} is in the reserved range (> ${MAX_USER_FIELD_ID}) (field: ${path})`);
877
+ if (formatVersion < 3) {
878
+ checkTypeForV2(field.type, path);
879
+ if (field["initial-default"] !== void 0) throw new Error(`initial-default requires format-version 3 (field: ${path})`);
880
+ if (field["write-default"] !== void 0) throw new Error(`write-default requires format-version 3 (field: ${path})`);
881
+ } else checkV3Default(field, path);
882
+ checkNestedFieldsForVersion(field.type, formatVersion, path);
883
+ }
884
+ function checkTypeForV2(type, path) {
885
+ if (typeof type === "string") {
886
+ if (type === "unknown" || type === "variant" || type === "timestamp_ns" || type === "timestamptz_ns" || type === "geometry" || type.startsWith("geometry(") || type === "geography" || type.startsWith("geography(")) throw new Error(`type ${type} requires format-version 3 (field: ${path})`);
887
+ return;
888
+ }
889
+ if (type.type === "struct") for (const f of type.fields) checkTypeForV2(f.type, `${path}.${f.name}`);
890
+ else if (type.type === "list") checkTypeForV2(type.element, `${path}.element`);
891
+ else if (type.type === "map") {
892
+ checkTypeForV2(type.key, `${path}.key`);
893
+ checkTypeForV2(type.value, `${path}.value`);
894
+ }
895
+ }
896
+ function checkV3Default(field, path) {
897
+ const type = typeName(field.type);
898
+ if (!requiresNullDefault(type)) return;
899
+ for (const key of ["initial-default", "write-default"]) if (field[key] != null) throw new Error(`${key} for field ${path} of type ${type} must default to null`);
900
+ }
901
+ function checkNestedFieldsForVersion(type, formatVersion, path) {
902
+ if (typeof type === "string") return;
903
+ if (type.type === "struct") for (const f of type.fields) validateFieldForVersion(f, formatVersion, `${path}.${f.name}`);
904
+ else if (type.type === "list") {
905
+ checkReservedFieldId(type["element-id"], `${path}.element`);
906
+ checkNestedFieldsForVersion(type.element, formatVersion, `${path}.element`);
907
+ } else if (type.type === "map") {
908
+ checkReservedFieldId(type["key-id"], `${path}.key`);
909
+ checkReservedFieldId(type["value-id"], `${path}.value`);
910
+ checkNestedFieldsForVersion(type.key, formatVersion, `${path}.key`);
911
+ checkNestedFieldsForVersion(type.value, formatVersion, `${path}.value`);
912
+ }
913
+ }
914
+ function checkReservedFieldId(id, path) {
915
+ if (typeof id === "number" && id > MAX_USER_FIELD_ID) throw new Error(`field id ${id} is in the reserved range (> ${MAX_USER_FIELD_ID}) (field: ${path})`);
916
+ }
917
+ function typeName(type) {
918
+ return typeof type === "string" ? type : type.type;
919
+ }
920
+ function requiresNullDefault(type) {
921
+ return type === "unknown" || type === "variant" || type === "geometry" || type.startsWith("geometry(") || type === "geography" || type.startsWith("geography(");
922
+ }
923
+ function parseDecimalType(type) {
924
+ const m = /^decimal\((\d+),\s*(\d+)\)$/.exec(type);
925
+ if (!m) return void 0;
926
+ return {
927
+ precision: parseInt(m[1], 10),
928
+ scale: parseInt(m[2], 10)
929
+ };
930
+ }
931
+ function decimalRequiredBytes(precision) {
932
+ const limit = 10n ** BigInt(precision);
933
+ let n = 1;
934
+ let bound = 128n;
935
+ while (limit > bound) {
936
+ n++;
937
+ bound <<= 8n;
938
+ }
939
+ return n;
940
+ }
941
+ function decimalToFixedBytes(value, precision, scale, label) {
942
+ const size = decimalRequiredBytes(precision);
943
+ if (value instanceof Uint8Array) {
944
+ if (value.length !== size) throw new Error(`expected ${label}`);
945
+ return value;
946
+ }
947
+ if (typeof value !== "number" && typeof value !== "bigint") throw new Error(`expected ${label}`);
948
+ const factor = 10n ** BigInt(scale);
949
+ const unscaled = typeof value === "bigint" ? value * factor : BigInt(Math.round(value * Number(factor)));
950
+ const limit = 10n ** BigInt(precision);
951
+ if (unscaled >= limit || unscaled <= -limit) throw new Error(`${label} exceeds precision ${precision}`);
952
+ return bigintToFixedBytes(unscaled, size, label);
953
+ }
954
+ function toUint8Array(value) {
955
+ return value instanceof Uint8Array ? value : new Uint8Array(value);
956
+ }
957
+ function uuidToBytes(value, label) {
958
+ if (value instanceof Uint8Array) {
959
+ if (value.length !== 16) throw new Error(`expected ${label}`);
960
+ return value;
961
+ }
962
+ if (typeof value !== "string") throw new Error(`expected ${label}`);
963
+ const hex = value.toLowerCase().replace(/-/g, "");
964
+ if (!/^[0-9a-f]{32}$/.test(hex)) throw new Error(`expected ${label}`);
965
+ const bytes = new Uint8Array(16);
966
+ for (let i = 0; i < bytes.length; i++) bytes[i] = parseInt(hex.slice(i * 2, i * 2 + 2), 16);
967
+ return bytes;
968
+ }
969
+ function bigintToFixedBytes(value, size, label) {
970
+ const bytes = new Uint8Array(size);
971
+ let v = value;
972
+ for (let i = size - 1; i >= 0; i--) {
973
+ bytes[i] = Number(v & 255n);
974
+ v >>= 8n;
975
+ }
976
+ const negative = value < 0n;
977
+ const signBitSet = (bytes[0] & 128) !== 0;
978
+ if (!negative && (v !== 0n || signBitSet) || negative && (v !== -1n || !signBitSet)) throw new Error(`${label} does not fit in ${size} bytes`);
979
+ return bytes;
980
+ }
981
+ function parseTransform(transform) {
982
+ if (transform === "identity" || transform === "void" || transform === "year" || transform === "month" || transform === "day" || transform === "hour") return { kind: transform };
983
+ let m = /^bucket\[(\d+)\]$/.exec(transform);
984
+ if (m) {
985
+ const n = parseInt(m[1], 10);
986
+ if (n > 0) return {
987
+ kind: "bucket",
988
+ n
989
+ };
990
+ }
991
+ m = /^truncate\[(\d+)\]$/.exec(transform);
992
+ if (m) {
993
+ const w = parseInt(m[1], 10);
994
+ if (w > 0) return {
995
+ kind: "truncate",
996
+ w
997
+ };
998
+ }
999
+ throw new Error(`unsupported partition transform: ${transform}`);
1000
+ }
1001
+ function transformResultType(transform, sourceType) {
1002
+ const parsed = parseTransform(transform);
1003
+ validateTransformSource(parsed, sourceType);
1004
+ switch (parsed.kind) {
1005
+ case "identity":
1006
+ case "truncate": return sourceType;
1007
+ case "void": return "int";
1008
+ case "year":
1009
+ case "month":
1010
+ case "day":
1011
+ case "hour":
1012
+ case "bucket": return "int";
1013
+ }
1014
+ }
1015
+ function applyTransform(transform, value, sourceType) {
1016
+ const parsed = parseTransform(transform);
1017
+ validateTransformSource(parsed, sourceType);
1018
+ if (value == null) return null;
1019
+ switch (parsed.kind) {
1020
+ case "identity": return value;
1021
+ case "void": return null;
1022
+ case "year": return yearTransform(value, sourceType);
1023
+ case "month": return monthTransform(value, sourceType);
1024
+ case "day": return dayTransform(value, sourceType);
1025
+ case "hour": return hourTransform(value, sourceType);
1026
+ case "bucket": return bucketTransform(value, sourceType, parsed.n);
1027
+ case "truncate": return truncateTransform(value, sourceType, parsed.w);
1028
+ }
1029
+ }
1030
+ function dateAsMillis(value, sourceType, transform) {
1031
+ const t = typeName(sourceType);
1032
+ validateTransformSource({ kind: transform }, sourceType);
1033
+ if (value instanceof Date) return value.getTime();
1034
+ const n = typeof value === "bigint" ? value : BigInt(value);
1035
+ switch (t) {
1036
+ case "date": return Number(n) * 864e5;
1037
+ case "timestamp":
1038
+ case "timestamptz": return Number(n / 1000n);
1039
+ default: return Number(n / 1000000n);
1040
+ }
1041
+ }
1042
+ function yearTransform(v, t) {
1043
+ return new Date(dateAsMillis(v, t, "year")).getUTCFullYear() - 1970;
1044
+ }
1045
+ function monthTransform(v, t) {
1046
+ const d = new Date(dateAsMillis(v, t, "month"));
1047
+ return (d.getUTCFullYear() - 1970) * 12 + d.getUTCMonth();
1048
+ }
1049
+ function dayTransform(v, t) {
1050
+ return Math.floor(dateAsMillis(v, t, "day") / 864e5);
1051
+ }
1052
+ function hourTransform(v, t) {
1053
+ return Math.floor(dateAsMillis(v, t, "hour") / 36e5);
1054
+ }
1055
+ function bucketTransform(value, sourceType, n) {
1056
+ return (murmur3_32(bucketBytes(value, sourceType), 0) & 2147483647) % n;
1057
+ }
1058
+ function bucketBytes(value, sourceType) {
1059
+ const t = typeName(sourceType);
1060
+ if (t.startsWith("decimal(")) return decimalToUnscaledBytes(value, t);
1061
+ if (t === "uuid") return uuidToBytes(value, "uuid partition value");
1062
+ if (t.startsWith("fixed[") || t === "binary" || t === "fixed") return value instanceof Uint8Array ? value : new Uint8Array(value);
1063
+ switch (t) {
1064
+ case "int":
1065
+ case "long":
1066
+ case "date":
1067
+ case "time":
1068
+ case "timestamp":
1069
+ case "timestamptz":
1070
+ case "timestamp_ns":
1071
+ case "timestamptz_ns": {
1072
+ let v;
1073
+ if (t === "date") v = value instanceof Date ? BigInt(Math.floor(value.getTime() / 864e5)) : BigInt(value);
1074
+ else if (t === "timestamp" || t === "timestamptz") v = value instanceof Date ? BigInt(value.getTime()) * 1000n : BigInt(value);
1075
+ else if (t === "timestamp_ns" || t === "timestamptz_ns") v = value instanceof Date ? BigInt(value.getTime()) * 1000n : BigInt(value) / 1000n;
1076
+ else v = typeof value === "bigint" ? value : BigInt(value);
1077
+ const out = new Uint8Array(8);
1078
+ new DataView(out.buffer).setBigInt64(0, v, true);
1079
+ return out;
1080
+ }
1081
+ case "string": return new TextEncoder().encode(String(value));
1082
+ default: throw new Error(`bucket transform: unsupported source type ${t}`);
1083
+ }
1084
+ }
1085
+ function decimalToUnscaledBytes(value, decimalType) {
1086
+ const m = /^decimal\((\d+),\s*(\d+)\)$/.exec(decimalType);
1087
+ if (!m) throw new Error(`bucket transform: invalid decimal type ${decimalType}`);
1088
+ const scale = parseInt(m[2], 10);
1089
+ const factor = 10n ** BigInt(scale);
1090
+ const unscaled = typeof value === "bigint" ? value * factor : BigInt(Math.round(Number(value) * Number(factor)));
1091
+ const bytes = [];
1092
+ let v = unscaled;
1093
+ while (true) {
1094
+ const byte = Number(v & 255n);
1095
+ bytes.unshift(byte);
1096
+ v >>= 8n;
1097
+ const sign = byte & 128;
1098
+ if (!sign && v === 0n || sign && v === -1n) break;
1099
+ }
1100
+ return new Uint8Array(bytes);
1101
+ }
1102
+ function truncateTransform(value, sourceType, w) {
1103
+ const t = typeName(sourceType);
1104
+ if (t.startsWith("decimal(")) {
1105
+ const m = /^decimal\((\d+),\s*(\d+)\)$/.exec(t);
1106
+ if (!m) throw new Error(`truncate transform: invalid decimal type ${t}`);
1107
+ const scale = parseInt(m[2], 10);
1108
+ const factor = 10n ** BigInt(scale);
1109
+ const unscaled = typeof value === "bigint" ? value * factor : BigInt(Math.round(Number(value) * Number(factor)));
1110
+ const W = BigInt(w);
1111
+ const truncated = unscaled - (unscaled % W + W) % W;
1112
+ return Number(truncated) / Number(factor);
1113
+ }
1114
+ if (t === "binary") return (value instanceof Uint8Array ? value : new Uint8Array(value)).slice(0, w);
1115
+ switch (t) {
1116
+ case "int": {
1117
+ const v = Number(value);
1118
+ return v - (v % w + w) % w;
1119
+ }
1120
+ case "long": {
1121
+ const W = BigInt(w);
1122
+ const v = typeof value === "bigint" ? value : BigInt(value);
1123
+ return v - (v % W + W) % W;
1124
+ }
1125
+ case "string": {
1126
+ const s = String(value);
1127
+ let count = 0;
1128
+ let i = 0;
1129
+ while (i < s.length && count < w) {
1130
+ const code = s.codePointAt(i);
1131
+ i += code > 65535 ? 2 : 1;
1132
+ count++;
1133
+ }
1134
+ return s.slice(0, i);
1135
+ }
1136
+ default: throw new Error(`truncate transform: unsupported source type ${t}`);
1137
+ }
1138
+ }
1139
+ function validateTransformSource(parsed, sourceType) {
1140
+ const t = typeName(sourceType);
1141
+ switch (parsed.kind) {
1142
+ case "identity":
1143
+ if (t === "variant" || t === "geometry" || t.startsWith("geometry(") || t === "geography" || t.startsWith("geography(")) throw new Error(`identity transform: unsupported source type ${t}`);
1144
+ return;
1145
+ case "void": return;
1146
+ case "bucket":
1147
+ if (t === "int" || t === "long" || t.startsWith("decimal(") || t === "date" || t === "time" || t === "timestamp" || t === "timestamptz" || t === "timestamp_ns" || t === "timestamptz_ns" || t === "string" || t === "uuid" || t.startsWith("fixed[") || t === "binary" || t === "fixed") return;
1148
+ throw new Error(`bucket transform: unsupported source type ${t}`);
1149
+ case "truncate":
1150
+ if (t === "int" || t === "long" || t.startsWith("decimal(") || t === "string" || t === "binary") return;
1151
+ throw new Error(`truncate transform: unsupported source type ${t}`);
1152
+ case "year":
1153
+ case "month":
1154
+ case "day":
1155
+ if (t === "date" || t === "timestamp" || t === "timestamptz" || t === "timestamp_ns" || t === "timestamptz_ns") return;
1156
+ throw new Error(`${parsed.kind} transform: unsupported source type ${t}`);
1157
+ case "hour":
1158
+ if (t === "timestamp" || t === "timestamptz" || t === "timestamp_ns" || t === "timestamptz_ns") return;
1159
+ throw new Error("hour transform: unsupported source type " + t);
1160
+ }
1161
+ }
1162
+ function murmur3_32(data, seed) {
1163
+ const c1 = 3432918353;
1164
+ const c2 = 461845907;
1165
+ const len = data.length;
1166
+ const nBlocks = len >>> 2;
1167
+ let h1 = seed >>> 0;
1168
+ for (let i = 0; i < nBlocks; i++) {
1169
+ const off = i * 4;
1170
+ let k1 = data[off] | data[off + 1] << 8 | data[off + 2] << 16 | data[off + 3] << 24;
1171
+ k1 = Math.imul(k1, c1);
1172
+ k1 = k1 << 15 | k1 >>> 17;
1173
+ k1 = Math.imul(k1, c2);
1174
+ h1 ^= k1;
1175
+ h1 = h1 << 13 | h1 >>> 19;
1176
+ h1 = Math.imul(h1, 5) + 3864292196 | 0;
1177
+ }
1178
+ let k1 = 0;
1179
+ const tail = nBlocks * 4;
1180
+ switch (len & 3) {
1181
+ case 3: k1 ^= data[tail + 2] << 16;
1182
+ case 2: k1 ^= data[tail + 1] << 8;
1183
+ case 1:
1184
+ k1 ^= data[tail];
1185
+ k1 = Math.imul(k1, c1);
1186
+ k1 = k1 << 15 | k1 >>> 17;
1187
+ k1 = Math.imul(k1, c2);
1188
+ h1 ^= k1;
1189
+ }
1190
+ h1 ^= len;
1191
+ h1 ^= h1 >>> 16;
1192
+ h1 = Math.imul(h1, 2246822507);
1193
+ h1 ^= h1 >>> 13;
1194
+ h1 = Math.imul(h1, 3266489909);
1195
+ h1 ^= h1 >>> 16;
1196
+ return h1 >>> 0;
1197
+ }
1198
+ function groupByPartition(records, schema, partitionSpec) {
1199
+ const sourceFields = partitionSpec.fields.map((pf) => {
1200
+ const sourceId = pf["source-id"];
1201
+ if (sourceId === void 0) throw new Error(`partition field ${pf.name} is missing source-id`);
1202
+ const sourceField = schema.fields.find((f) => f.id === sourceId);
1203
+ if (!sourceField) throw new Error(`partition source field id ${sourceId} not found in schema`);
1204
+ return {
1205
+ partitionName: pf.name,
1206
+ sourceName: sourceField.name,
1207
+ sourceType: sourceField.type,
1208
+ sourceWriteDefault: sourceField["write-default"],
1209
+ transform: pf.transform,
1210
+ resultType: transformResultType(pf.transform, sourceField.type)
1211
+ };
1212
+ });
1213
+ const groups = /* @__PURE__ */ new Map();
1214
+ for (const record of records) {
1215
+ const partition = {};
1216
+ const keyParts = [];
1217
+ for (const { partitionName, sourceName, sourceType, sourceWriteDefault, transform, resultType } of sourceFields) {
1218
+ let v = record[sourceName];
1219
+ if (v === void 0 && sourceWriteDefault !== void 0) v = sourceWriteDefault;
1220
+ partition[partitionName] = applyTransform(transform, v === void 0 ? null : v, sourceType);
1221
+ keyParts.push(partitionKeyPart(partition[partitionName], resultType));
1222
+ }
1223
+ const key = JSON.stringify(keyParts);
1224
+ let group = groups.get(key);
1225
+ if (!group) {
1226
+ group = {
1227
+ partition,
1228
+ records: []
1229
+ };
1230
+ groups.set(key, group);
1231
+ }
1232
+ group.records.push(record);
1233
+ }
1234
+ return [...groups.values()];
1235
+ }
1236
+ function validatePartitionSpecForWrite(schema, partitionSpec, label = "partition spec") {
1237
+ for (const pf of partitionSpec.fields) {
1238
+ const sourceId = pf["source-id"];
1239
+ if (sourceId === void 0) throw new Error(`${label}: partition field ${pf.name} is missing source-id`);
1240
+ const sourceField = schema.fields.find((f) => f.id === sourceId);
1241
+ if (!sourceField) throw new Error(`${label}: partition source field id ${sourceId} not found in schema`);
1242
+ icebergTypeToAvro(transformResultType(pf.transform, sourceField.type), pf["field-id"]);
1243
+ }
1244
+ }
1245
+ function partitionAvroSchema(schema, partitionSpec) {
1246
+ return {
1247
+ type: "record",
1248
+ name: "r102",
1249
+ fields: partitionSpec.fields.map((pf) => {
1250
+ const sourceField = schema.fields.find((f) => f.id === pf["source-id"]);
1251
+ if (!sourceField) throw new Error(`partition source field id ${pf["source-id"]} not found`);
1252
+ const resultType = transformResultType(pf.transform, sourceField.type);
1253
+ return {
1254
+ name: pf.name,
1255
+ "field-id": pf["field-id"],
1256
+ default: null,
1257
+ type: ["null", icebergTypeToAvro(resultType, pf["field-id"])]
1258
+ };
1259
+ })
1260
+ };
1261
+ }
1262
+ function partitionSpecJson(partitionSpec) {
1263
+ return JSON.stringify(partitionSpec.fields);
1264
+ }
1265
+ function partitionToAvroRecord(partition, schema, partitionSpec) {
1266
+ const out = {};
1267
+ for (const pf of partitionSpec.fields) {
1268
+ const sourceField = schema.fields.find((f) => f.id === pf["source-id"]);
1269
+ if (!sourceField) throw new Error(`partition source field id ${pf["source-id"]} not found`);
1270
+ const resultType = transformResultType(pf.transform, sourceField.type);
1271
+ const value = partition[pf.name];
1272
+ out[pf.name] = value == null ? null : coerceForAvro(value, resultType);
1273
+ }
1274
+ return out;
1275
+ }
1276
+ function partitionKeyPart(value, type) {
1277
+ if (value === null || value === void 0) return "__null__";
1278
+ const name = typeof type === "string" ? type : type.type;
1279
+ if (name === "uuid") return `uuid:${bytesToHex$1(uuidToBytes(value, "uuid partition value"))}`;
1280
+ if (typeof value === "number" && (name === "float" || name === "double")) return `${name}:${floatPartitionKey(value, name)}`;
1281
+ if (name === "long") return `long:${BigInt(value)}`;
1282
+ if (typeof value === "bigint") return `b:${value.toString()}`;
1283
+ if (value instanceof Date) return `d:${value.getTime()}`;
1284
+ if (value instanceof Uint8Array) return `x:${bytesToHex$1(value)}`;
1285
+ return `${typeof value}:${String(value)}`;
1286
+ }
1287
+ function floatPartitionKey(value, type) {
1288
+ if (Number.isNaN(value)) return "nan";
1289
+ const bytes = new Uint8Array(type === "float" ? 4 : 8);
1290
+ const view = new DataView(bytes.buffer);
1291
+ if (type === "float") view.setFloat32(0, value, false);
1292
+ else view.setFloat64(0, value, false);
1293
+ return bytesToHex$1(bytes);
1294
+ }
1295
+ function icebergTypeToAvro(type, fieldId) {
1296
+ const name = typeof type === "string" ? type : type.type;
1297
+ const decimal = parseDecimalType(name);
1298
+ if (decimal) return {
1299
+ type: "fixed",
1300
+ name: `r102_${fieldId}`,
1301
+ size: decimalRequiredBytes(decimal.precision),
1302
+ logicalType: "decimal",
1303
+ precision: decimal.precision,
1304
+ scale: decimal.scale
1305
+ };
1306
+ const fixed = /^fixed\[(\d+)\]$/.exec(name);
1307
+ if (fixed) return {
1308
+ type: "fixed",
1309
+ name: `r102_${fieldId}`,
1310
+ size: parseInt(fixed[1], 10)
1311
+ };
1312
+ switch (name) {
1313
+ case "boolean": return "boolean";
1314
+ case "int": return "int";
1315
+ case "long": return "long";
1316
+ case "float": return "float";
1317
+ case "double": return "double";
1318
+ case "string": return "string";
1319
+ case "uuid": return {
1320
+ type: "fixed",
1321
+ name: `r102_${fieldId}`,
1322
+ size: 16,
1323
+ logicalType: "uuid"
1324
+ };
1325
+ case "binary": return "bytes";
1326
+ case "date": return {
1327
+ type: "int",
1328
+ logicalType: "date"
1329
+ };
1330
+ case "time": return {
1331
+ type: "long",
1332
+ logicalType: "time-micros"
1333
+ };
1334
+ case "timestamp": return {
1335
+ type: "long",
1336
+ logicalType: "timestamp-micros",
1337
+ "adjust-to-utc": false
1338
+ };
1339
+ case "timestamptz": return {
1340
+ type: "long",
1341
+ logicalType: "timestamp-micros",
1342
+ "adjust-to-utc": true
1343
+ };
1344
+ case "timestamp_ns": return {
1345
+ type: "long",
1346
+ logicalType: "timestamp-nanos",
1347
+ "adjust-to-utc": false
1348
+ };
1349
+ case "timestamptz_ns": return {
1350
+ type: "long",
1351
+ logicalType: "timestamp-nanos",
1352
+ "adjust-to-utc": true
1353
+ };
1354
+ default: throw new Error(`unsupported partition source type: ${name}`);
1355
+ }
1356
+ }
1357
+ function coerceForAvro(value, type) {
1358
+ const name = typeof type === "string" ? type : type.type;
1359
+ if (name === "long") return typeof value === "bigint" ? value : BigInt(value);
1360
+ if (name === "uuid") return uuidToBytes(value, "uuid partition value");
1361
+ const decimal = parseDecimalType(name);
1362
+ if (decimal) return decimalToFixedBytes(value, decimal.precision, decimal.scale, `decimal(${decimal.precision},${decimal.scale}) partition value`);
1363
+ const fixed = /^fixed\[(\d+)\]$/.exec(name);
1364
+ if (fixed) {
1365
+ const bytes = toUint8Array(value);
1366
+ const expected = parseInt(fixed[1], 10);
1367
+ if (bytes.length !== expected) throw new Error(`expected fixed[${expected}] partition value`);
1368
+ return bytes;
1369
+ }
1370
+ return value;
1371
+ }
1372
+ async function icebergCreate({ tableUrl, resolver, schema, formatVersion, partitionSpec, sortOrder, properties, conditionalCommits }) {
1373
+ if (!tableUrl) throw new Error("tableUrl is required");
1374
+ if (formatVersion === void 0) {
1375
+ const propVersion = properties?.["format-version"];
1376
+ formatVersion = propVersion !== void 0 ? Number(propVersion) : 2;
1377
+ }
1378
+ if (formatVersion !== 2 && formatVersion !== 3) throw new Error(`unsupported format-version: ${formatVersion}`);
1379
+ const metadataVersion = 1;
1380
+ const metadataUrl = `${tableUrl}/metadata/v${metadataVersion}.metadata.json`;
1381
+ const initialSchema = schema ?? {
1382
+ type: "struct",
1383
+ "schema-id": 0,
1384
+ fields: []
1385
+ };
1386
+ validateSchemaForVersion(initialSchema, formatVersion);
1387
+ const initialPartitionSpec = partitionSpec ?? {
1388
+ "spec-id": 0,
1389
+ fields: []
1390
+ };
1391
+ validatePartitionSpecForWrite(initialSchema, initialPartitionSpec);
1392
+ const initialSortOrder = sortOrder ?? {
1393
+ "order-id": 0,
1394
+ fields: []
1395
+ };
1396
+ const metadata = {
1397
+ "format-version": formatVersion,
1398
+ "table-uuid": uuid4(),
1399
+ location: tableUrl,
1400
+ "last-sequence-number": 0,
1401
+ "last-updated-ms": Date.now(),
1402
+ "last-column-id": maxFieldId(initialSchema.fields),
1403
+ "current-schema-id": initialSchema["schema-id"] ?? 0,
1404
+ schemas: [initialSchema],
1405
+ "default-spec-id": initialPartitionSpec["spec-id"],
1406
+ "partition-specs": [initialPartitionSpec],
1407
+ "last-partition-id": maxPartitionFieldId(initialPartitionSpec.fields),
1408
+ "sort-orders": [initialSortOrder],
1409
+ "default-sort-order-id": initialSortOrder["order-id"]
1410
+ };
1411
+ if (properties) metadata.properties = properties;
1412
+ if (formatVersion >= 3) metadata["next-row-id"] = 0;
1413
+ if (!resolver.writer) throw new Error("resolver.writer is required");
1414
+ const metadataWriter = conditionalCommits ? resolver.writer(metadataUrl, { ifNoneMatch: "*" }) : resolver.writer(metadataUrl);
1415
+ const metadataBytes = new TextEncoder().encode(stringifyIcebergJson(metadata, 2));
1416
+ metadataWriter.appendBytes(metadataBytes);
1417
+ await metadataWriter.finish();
1418
+ const versionHintUrl = `${tableUrl}/metadata/version-hint.text`;
1419
+ try {
1420
+ const versionHintWriter = resolver.writer(versionHintUrl);
1421
+ const versionHintBytes = new TextEncoder().encode(String(metadataVersion));
1422
+ versionHintWriter.appendBytes(versionHintBytes);
1423
+ await versionHintWriter.finish();
1424
+ } catch (err) {
1425
+ if (!conditionalCommits) throw err;
1426
+ }
1427
+ return metadata;
1428
+ }
1429
+ function maxPartitionFieldId(partitionFields = []) {
1430
+ let max = 0;
1431
+ for (const pf of partitionFields) if (max < pf["field-id"]) max = pf["field-id"];
1432
+ return max;
1433
+ }
1434
+ async function fileCatalogCommit({ tableUrl, metadata, metadataFileName, currentVersion, staged, resolver, conditionalCommits }) {
1435
+ if (!tableUrl) throw new Error("tableUrl is required");
1436
+ if (!resolver?.writer) throw new Error("resolver.writer is required");
1437
+ checkRequirements(metadata, staged.requirements);
1438
+ const updated = applyUpdates(staged.updates.some((up) => up.action === "add-snapshot") ? metadata : {
1439
+ ...metadata,
1440
+ "last-updated-ms": Date.now()
1441
+ }, staged.updates);
1442
+ const priorMetadataLog = metadata["metadata-log"] ?? [];
1443
+ const derivedVersion = currentVersion ?? deriveCurrentVersion(priorMetadataLog);
1444
+ const newVersion = derivedVersion + 1;
1445
+ const currentMetadataPath = metadataFileName ? `${tableUrl}/metadata/${metadataFileName}` : `${tableUrl}/metadata/v${derivedVersion}.metadata.json`;
1446
+ const newMetadataPath = `${tableUrl}/metadata/v${newVersion}.metadata.json`;
1447
+ const appendedLog = [...priorMetadataLog, {
1448
+ "timestamp-ms": metadata["last-updated-ms"],
1449
+ "metadata-file": currentMetadataPath
1450
+ }];
1451
+ const max = Number(updated.properties?.["write.metadata.previous-versions-max"] ?? 100);
1452
+ const droppedLog = max > 0 && appendedLog.length > max ? appendedLog.slice(0, appendedLog.length - max) : [];
1453
+ const trimmedLog = droppedLog.length > 0 ? appendedLog.slice(-max) : appendedLog;
1454
+ const newMetadata = {
1455
+ ...updated,
1456
+ "metadata-log": trimmedLog
1457
+ };
1458
+ const metaWriter = conditionalCommits ? resolver.writer(newMetadataPath, { ifNoneMatch: "*" }) : resolver.writer(newMetadataPath);
1459
+ metaWriter.appendBytes(new TextEncoder().encode(stringifyIcebergJson(newMetadata, 2)));
1460
+ await metaWriter.finish();
1461
+ try {
1462
+ const hintWriter = resolver.writer(`${tableUrl}/metadata/version-hint.text`);
1463
+ hintWriter.appendBytes(new TextEncoder().encode(String(newVersion)));
1464
+ await hintWriter.finish();
1465
+ } catch {}
1466
+ if (updated.properties?.["write.metadata.delete-after-commit.enabled"] === "true" && droppedLog.length > 0 && resolver.deleter) {
1467
+ const { deleter } = resolver;
1468
+ await Promise.allSettled(droppedLog.map((entry) => deleter(entry["metadata-file"])));
1469
+ }
1470
+ return newMetadata;
1471
+ }
1472
+ function deriveCurrentVersion(priorMetadataLog) {
1473
+ if (priorMetadataLog.length === 0) return 1;
1474
+ const match = (priorMetadataLog[priorMetadataLog.length - 1]["metadata-file"].split("/").pop() ?? "").match(/^(?:v(\d+)|0*(\d+)-[0-9a-f-]+)\.metadata\.json$/);
1475
+ if (match) return Number(match[1] ?? match[2]) + 1;
1476
+ return priorMetadataLog.length + 1;
1477
+ }
1478
+ function checkRequirements(metadata, requirements) {
1479
+ for (const req of requirements) if (req.type === "assert-create") throw new Error("requirement failed: assert-create against an existing table");
1480
+ else if (req.type === "assert-table-uuid") {
1481
+ if (metadata["table-uuid"] !== req.uuid) throw new Error(`requirement failed: table-uuid expected ${req.uuid}, got ${metadata["table-uuid"]}`);
1482
+ } else if (req.type === "assert-ref-snapshot-id") {
1483
+ let current = (metadata.refs ?? {})[req.ref]?.["snapshot-id"] ?? null;
1484
+ if (current === null && req.ref === "main") current = metadata["current-snapshot-id"] ?? null;
1485
+ const expected = req["snapshot-id"];
1486
+ if (!(current === expected || current != null && expected != null && BigInt(current) === BigInt(expected))) throw new Error(`requirement failed: ref ${req.ref} expected snapshot ${expected}, got ${current}`);
1487
+ } else if (req.type === "assert-next-row-id") {
1488
+ const current = Number(metadata["next-row-id"] ?? 0);
1489
+ if (current !== req["next-row-id"]) throw new Error(`requirement failed: next-row-id expected ${req["next-row-id"]}, got ${current}`);
1490
+ } else if (req.type === "assert-current-schema-id") {
1491
+ const current = metadata["current-schema-id"];
1492
+ if (current !== req["current-schema-id"]) throw new Error(`requirement failed: current-schema-id expected ${req["current-schema-id"]}, got ${current}`);
1493
+ } else if (req.type === "assert-last-assigned-field-id") {
1494
+ const current = metadata["last-column-id"];
1495
+ if (current !== req["last-assigned-field-id"]) throw new Error(`requirement failed: last-assigned-field-id expected ${req["last-assigned-field-id"]}, got ${current}`);
1496
+ } else if (req.type === "assert-last-assigned-partition-id") {
1497
+ const current = metadata["last-partition-id"];
1498
+ if (current !== req["last-assigned-partition-id"]) throw new Error(`requirement failed: last-assigned-partition-id expected ${req["last-assigned-partition-id"]}, got ${current}`);
1499
+ } else if (req.type === "assert-default-spec-id") {
1500
+ const current = metadata["default-spec-id"];
1501
+ if (current !== req["default-spec-id"]) throw new Error(`requirement failed: default-spec-id expected ${req["default-spec-id"]}, got ${current}`);
1502
+ } else if (req.type === "assert-default-sort-order-id") {
1503
+ const current = metadata["default-sort-order-id"];
1504
+ if (current !== req["default-sort-order-id"]) throw new Error(`requirement failed: default-sort-order-id expected ${req["default-sort-order-id"]}, got ${current}`);
1505
+ } else throw new Error(`unknown requirement: ${JSON.stringify(req)}`);
1506
+ }
1507
+ function applyUpdates(metadata, updates) {
1508
+ let next = { ...metadata };
1509
+ for (const up of updates) if (up.action === "add-snapshot") {
1510
+ const snap = up.snapshot;
1511
+ const priorSnapshots = next.snapshots ?? [];
1512
+ if (priorSnapshots.some((s) => s["snapshot-id"] === snap["snapshot-id"])) throw new Error(`add-snapshot: snapshot-id ${snap["snapshot-id"]} already exists`);
1513
+ next = {
1514
+ ...next,
1515
+ snapshots: [...priorSnapshots, snap],
1516
+ "last-sequence-number": Math.max(next["last-sequence-number"] ?? 0, snap["sequence-number"]),
1517
+ "last-updated-ms": snap["timestamp-ms"]
1518
+ };
1519
+ if (next["format-version"] >= 3 && snap["first-row-id"] !== void 0 && snap["added-rows"] !== void 0) {
1520
+ const nextRowId = snap["first-row-id"] + snap["added-rows"];
1521
+ next["next-row-id"] = Math.max(Number(next["next-row-id"] ?? 0), nextRowId);
1522
+ }
1523
+ } else if (up.action === "set-properties") next = {
1524
+ ...next,
1525
+ properties: {
1526
+ ...next.properties,
1527
+ ...up.updates
1528
+ }
1529
+ };
1530
+ else if (up.action === "remove-properties") {
1531
+ const properties = { ...next.properties };
1532
+ for (const key of up.removals) delete properties[key];
1533
+ next = {
1534
+ ...next,
1535
+ properties
1536
+ };
1537
+ } else if (up.action === "add-schema") {
1538
+ const schemas = next.schemas ?? [];
1539
+ let schemaId = up.schema["schema-id"];
1540
+ if (schemaId === -1) schemaId = schemas.reduce((m, s) => Math.max(m, s["schema-id"]), -1) + 1;
1541
+ else if (schemas.some((s) => s["schema-id"] === schemaId)) throw new Error(`add-schema: schema-id ${schemaId} already exists`);
1542
+ const newSchema = {
1543
+ ...up.schema,
1544
+ "schema-id": schemaId
1545
+ };
1546
+ validateSchemaForVersion(newSchema, next["format-version"]);
1547
+ const priorLastColumnId = next["last-column-id"] ?? 0;
1548
+ validateAssignedFieldIds(newSchema, currentAssignedIdIndex(schemas, next["current-schema-id"]), priorLastColumnId);
1549
+ validateSchemaEvolution(schemas, newSchema, priorLastColumnId, next["format-version"]);
1550
+ validateNewRequiredFields(newSchema, priorLastColumnId);
1551
+ next = {
1552
+ ...next,
1553
+ schemas: [...schemas, newSchema],
1554
+ "last-column-id": Math.max(priorLastColumnId, maxFieldId(newSchema.fields))
1555
+ };
1556
+ } else if (up.action === "set-current-schema") {
1557
+ let id = up["schema-id"];
1558
+ const schemas = next.schemas ?? [];
1559
+ if (id === -1) {
1560
+ if (schemas.length === 0) throw new Error("set-current-schema: table has no schemas");
1561
+ id = schemas[schemas.length - 1]["schema-id"];
1562
+ } else if (!schemas.some((s) => s["schema-id"] === id)) throw new Error(`set-current-schema: schema-id ${id} not found`);
1563
+ next = {
1564
+ ...next,
1565
+ "current-schema-id": id
1566
+ };
1567
+ } else if (up.action === "add-sort-order") {
1568
+ const orders = next["sort-orders"] ?? [];
1569
+ let orderId = up["sort-order"]["order-id"];
1570
+ if (orderId === -1) orderId = orders.reduce((m, o) => Math.max(m, o["order-id"]), -1) + 1;
1571
+ else if (orders.some((o) => o["order-id"] === orderId)) throw new Error(`add-sort-order: order-id ${orderId} already exists`);
1572
+ const newOrder = {
1573
+ ...up["sort-order"],
1574
+ "order-id": orderId
1575
+ };
1576
+ next = {
1577
+ ...next,
1578
+ "sort-orders": [...orders, newOrder]
1579
+ };
1580
+ } else if (up.action === "set-default-sort-order") {
1581
+ let id = up["sort-order-id"];
1582
+ const orders = next["sort-orders"] ?? [];
1583
+ if (id === -1) {
1584
+ if (orders.length === 0) throw new Error("set-default-sort-order: table has no sort orders");
1585
+ id = orders[orders.length - 1]["order-id"];
1586
+ } else if (!orders.some((o) => o["order-id"] === id)) throw new Error(`set-default-sort-order: sort-order-id ${id} not found`);
1587
+ next = {
1588
+ ...next,
1589
+ "default-sort-order-id": id
1590
+ };
1591
+ } else if (up.action === "add-spec") {
1592
+ const specs = next["partition-specs"] ?? [];
1593
+ let specId = up.spec["spec-id"];
1594
+ if (specId === -1) specId = specs.reduce((m, s) => Math.max(m, s["spec-id"]), -1) + 1;
1595
+ else if (specs.some((s) => s["spec-id"] === specId)) throw new Error(`add-spec: spec-id ${specId} already exists`);
1596
+ const newSpec = {
1597
+ ...up.spec,
1598
+ "spec-id": specId
1599
+ };
1600
+ validatePartitionSpecEvolution(specs, newSpec, currentSchemaForMetadata(next));
1601
+ let nextLastPartitionId = next["last-partition-id"] ?? 0;
1602
+ for (const f of newSpec.fields) if (f["field-id"] > nextLastPartitionId) nextLastPartitionId = f["field-id"];
1603
+ next = {
1604
+ ...next,
1605
+ "partition-specs": [...specs, newSpec],
1606
+ "last-partition-id": nextLastPartitionId
1607
+ };
1608
+ } else if (up.action === "set-default-spec") {
1609
+ let id = up["spec-id"];
1610
+ const specs = next["partition-specs"] ?? [];
1611
+ if (id === -1) {
1612
+ if (specs.length === 0) throw new Error("set-default-spec: table has no partition specs");
1613
+ id = specs[specs.length - 1]["spec-id"];
1614
+ } else if (!specs.some((s) => s["spec-id"] === id)) throw new Error(`set-default-spec: spec-id ${id} not found`);
1615
+ next = {
1616
+ ...next,
1617
+ "default-spec-id": id
1618
+ };
1619
+ } else if (up.action === "remove-snapshots") {
1620
+ const removeIds = new Set(up["snapshot-ids"]);
1621
+ const snapshots = (next.snapshots ?? []).filter((s) => !removeIds.has(s["snapshot-id"]));
1622
+ const log = (next["snapshot-log"] ?? []).filter((e) => !removeIds.has(e["snapshot-id"]));
1623
+ next = {
1624
+ ...next,
1625
+ snapshots,
1626
+ "snapshot-log": log
1627
+ };
1628
+ } else if (up.action === "set-snapshot-ref") {
1629
+ const ref = {
1630
+ "snapshot-id": up["snapshot-id"],
1631
+ type: up.type
1632
+ };
1633
+ if (up["min-snapshots-to-keep"] !== void 0) ref["min-snapshots-to-keep"] = up["min-snapshots-to-keep"];
1634
+ if (up["max-snapshot-age-ms"] !== void 0) ref["max-snapshot-age-ms"] = up["max-snapshot-age-ms"];
1635
+ if (up["max-ref-age-ms"] !== void 0) ref["max-ref-age-ms"] = up["max-ref-age-ms"];
1636
+ next = {
1637
+ ...next,
1638
+ refs: {
1639
+ ...next.refs,
1640
+ [up["ref-name"]]: ref
1641
+ }
1642
+ };
1643
+ if (up["ref-name"] === "main" && up.type === "branch") {
1644
+ next["current-snapshot-id"] = up["snapshot-id"];
1645
+ next["snapshot-log"] = [...next["snapshot-log"] ?? [], {
1646
+ "timestamp-ms": next["last-updated-ms"],
1647
+ "snapshot-id": up["snapshot-id"]
1648
+ }];
1649
+ }
1650
+ } else throw new Error(`unknown update: ${JSON.stringify(up)}`);
1651
+ return next;
1652
+ }
1653
+ function currentAssignedIdIndex(schemas, currentSchemaId) {
1654
+ const currentSchema = schemas.find((s) => s["schema-id"] === currentSchemaId) ?? schemas[schemas.length - 1];
1655
+ const assignedIds = /* @__PURE__ */ new Map();
1656
+ if (currentSchema) indexAssignedFieldIds(currentSchema.fields, "", assignedIds);
1657
+ return assignedIds;
1658
+ }
1659
+ function indexAssignedFieldIds(fields, prefix, assignedIds) {
1660
+ for (const field of fields) {
1661
+ const path = prefix ? `${prefix}.${field.name}` : field.name;
1662
+ assignedIds.set(field.id, {
1663
+ kind: "field",
1664
+ path
1665
+ });
1666
+ indexAssignedTypeIds(field.type, path, assignedIds);
1667
+ }
1668
+ }
1669
+ function indexAssignedTypeIds(type, path, assignedIds) {
1670
+ if (typeof type === "string") return;
1671
+ if (type.type === "struct") indexAssignedFieldIds(type.fields, path, assignedIds);
1672
+ else if (type.type === "list") {
1673
+ assignedIds.set(type["element-id"], {
1674
+ kind: "list element",
1675
+ path: `${path}.element`
1676
+ });
1677
+ indexAssignedTypeIds(type.element, `${path}.element`, assignedIds);
1678
+ } else if (type.type === "map") {
1679
+ assignedIds.set(type["key-id"], {
1680
+ kind: "map key",
1681
+ path: `${path}.key`
1682
+ });
1683
+ assignedIds.set(type["value-id"], {
1684
+ kind: "map value",
1685
+ path: `${path}.value`
1686
+ });
1687
+ indexAssignedTypeIds(type.key, `${path}.key`, assignedIds);
1688
+ indexAssignedTypeIds(type.value, `${path}.value`, assignedIds);
1689
+ }
1690
+ }
1691
+ function validateAssignedFieldIds(schema, priorAssignedIds, priorLastColumnId) {
1692
+ validateAssignedFields(schema.fields, "", priorAssignedIds, priorLastColumnId);
1693
+ }
1694
+ function validateAssignedFields(fields, prefix, priorAssignedIds, priorLastColumnId) {
1695
+ for (const field of fields) {
1696
+ const path = prefix ? `${prefix}.${field.name}` : field.name;
1697
+ validateAssignedId(field.id, "field", path, priorAssignedIds, priorLastColumnId);
1698
+ validateAssignedTypeIds(field.type, path, priorAssignedIds, priorLastColumnId);
1699
+ }
1700
+ }
1701
+ function validateAssignedTypeIds(type, path, priorAssignedIds, priorLastColumnId) {
1702
+ if (typeof type === "string") return;
1703
+ if (type.type === "struct") validateAssignedFields(type.fields, path, priorAssignedIds, priorLastColumnId);
1704
+ else if (type.type === "list") {
1705
+ validateAssignedId(type["element-id"], "list element", `${path}.element`, priorAssignedIds, priorLastColumnId);
1706
+ validateAssignedTypeIds(type.element, `${path}.element`, priorAssignedIds, priorLastColumnId);
1707
+ } else if (type.type === "map") {
1708
+ validateAssignedId(type["key-id"], "map key", `${path}.key`, priorAssignedIds, priorLastColumnId);
1709
+ validateAssignedId(type["value-id"], "map value", `${path}.value`, priorAssignedIds, priorLastColumnId);
1710
+ validateAssignedTypeIds(type.key, `${path}.key`, priorAssignedIds, priorLastColumnId);
1711
+ validateAssignedTypeIds(type.value, `${path}.value`, priorAssignedIds, priorLastColumnId);
1712
+ }
1713
+ }
1714
+ function validateAssignedId(id, kind, path, priorAssignedIds, priorLastColumnId) {
1715
+ if (id > priorLastColumnId) return;
1716
+ const prior = priorAssignedIds.get(id);
1717
+ if (!prior) throw new Error(`add-schema: ${kind} ${path} uses unassigned id ${id} (last-column-id ${priorLastColumnId})`);
1718
+ if (prior.kind !== kind) throw new Error(`add-schema: ${kind} ${path} uses id ${id} previously assigned to ${prior.kind} ${prior.path}`);
1719
+ }
1720
+ function validateNewRequiredFields(schema, priorLastColumnId) {
1721
+ for (const field of schema.fields) if (field.id > priorLastColumnId && field.required) {
1722
+ if (field["initial-default"] == null) throw new Error(`add-schema: required field ${field.name} (id ${field.id}) needs a non-null initial-default`);
1723
+ if (field["write-default"] == null) throw new Error(`add-schema: required field ${field.name} (id ${field.id}) needs a non-null write-default`);
1724
+ }
1725
+ }
1726
+ function validateSchemaEvolution(schemas, newSchema, priorLastColumnId, formatVersion) {
1727
+ for (const field of newSchema.fields) {
1728
+ if (field.id > priorLastColumnId) continue;
1729
+ const prior = latestFieldById(schemas, field.id);
1730
+ if (!prior) continue;
1731
+ if (!canPromoteType(prior.type, field.type, formatVersion)) throw new Error(`add-schema: cannot promote field ${field.name} from ${typeToString(prior.type)} to ${typeToString(field.type)}`);
1732
+ if (!defaultsEqual(prior["initial-default"], field["initial-default"])) throw new Error(`add-schema: initial-default for field ${field.name} cannot change`);
1733
+ }
1734
+ }
1735
+ function latestFieldById(schemas, id) {
1736
+ for (let i = schemas.length - 1; i >= 0; i--) {
1737
+ const field = schemas[i].fields.find((f) => f.id === id);
1738
+ if (field) return field;
1739
+ }
1740
+ }
1741
+ function canPromoteType(from, to, formatVersion) {
1742
+ if (typesEqual(from, to)) return true;
1743
+ if (typeof from !== "string" || typeof to !== "string") return false;
1744
+ if (formatVersion >= 3 && from === "unknown") return true;
1745
+ if (from === "int" && to === "long") return true;
1746
+ if (from === "float" && to === "double") return true;
1747
+ if (formatVersion >= 3 && from === "date" && (to === "timestamp" || to === "timestamp_ns")) return true;
1748
+ return decimalPromotionAllowed(from, to);
1749
+ }
1750
+ function typesEqual(a, b) {
1751
+ if (typeof a === "string" || typeof b === "string") return a === b;
1752
+ return JSON.stringify(a) === JSON.stringify(b);
1753
+ }
1754
+ function decimalPromotionAllowed(from, to) {
1755
+ const a = parseDecimalType(from);
1756
+ const b = parseDecimalType(to);
1757
+ return Boolean(a && b && b.precision > a.precision && b.scale === a.scale);
1758
+ }
1759
+ function typeToString(type) {
1760
+ return typeof type === "string" ? type : JSON.stringify(type);
1761
+ }
1762
+ function defaultsEqual(a, b) {
1763
+ if (Object.is(a, b)) return true;
1764
+ if (!a || !b || typeof a !== "object" || typeof b !== "object") return false;
1765
+ if (Array.isArray(a) || Array.isArray(b)) {
1766
+ if (!Array.isArray(a) || !Array.isArray(b) || a.length !== b.length) return false;
1767
+ for (let i = 0; i < a.length; i++) if (!defaultsEqual(a[i], b[i])) return false;
1768
+ return true;
1769
+ }
1770
+ const aKeys = Object.keys(a);
1771
+ const bKeys = Object.keys(b);
1772
+ if (aKeys.length !== bKeys.length) return false;
1773
+ for (const key of aKeys) {
1774
+ if (!Object.hasOwn(b, key)) return false;
1775
+ if (!defaultsEqual(a[key], b[key])) return false;
1776
+ }
1777
+ return true;
1778
+ }
1779
+ function validatePartitionSpecEvolution(specs, newSpec, schema) {
1780
+ validateWritablePartitionSpec(newSpec, schema);
1781
+ if (specs.some((spec) => partitionSpecsEquivalent(spec, newSpec))) throw new Error("add-spec: equivalent partition spec already exists");
1782
+ for (const field of newSpec.fields) {
1783
+ const equivalent = equivalentPartitionField(specs, field);
1784
+ if (equivalent && equivalent["field-id"] !== field["field-id"]) throw new Error(`add-spec: partition field ${field.name} must reuse field-id ${equivalent["field-id"]}`);
1785
+ }
1786
+ }
1787
+ function validateWritablePartitionSpec(spec, schema) {
1788
+ try {
1789
+ validatePartitionSpecForWrite(schema, spec, "add-spec");
1790
+ } catch (err) {
1791
+ const message = err instanceof Error ? err.message : String(err);
1792
+ if (message.startsWith("unsupported partition transform: ")) throw new Error(`add-spec: ${message}`);
1793
+ throw err;
1794
+ }
1795
+ }
1796
+ function currentSchemaForMetadata(metadata) {
1797
+ const schema = metadata.schemas?.find((s) => s["schema-id"] === metadata["current-schema-id"]);
1798
+ if (!schema) throw new Error("add-spec: current schema not found in metadata");
1799
+ return schema;
1800
+ }
1801
+ function equivalentPartitionField(specs, field) {
1802
+ for (const spec of specs) {
1803
+ const found = spec.fields.find((existing) => partitionFieldsEquivalent(existing, field));
1804
+ if (found) return found;
1805
+ }
1806
+ }
1807
+ function partitionSpecsEquivalent(a, b) {
1808
+ if (a.fields.length !== b.fields.length) return false;
1809
+ for (let i = 0; i < a.fields.length; i++) if (!partitionFieldsEquivalent(a.fields[i], b.fields[i])) return false;
1810
+ return true;
1811
+ }
1812
+ function partitionFieldsEquivalent(a, b) {
1813
+ return a["source-id"] === b["source-id"] && idsListEquivalent(a["source-ids"], b["source-ids"]) && a.transform === b.transform && a.name === b.name;
1814
+ }
1815
+ function idsListEquivalent(a, b) {
1816
+ if (a === void 0 || b === void 0) return a === b;
1817
+ if (a.length !== b.length) return false;
1818
+ for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
1819
+ return true;
1820
+ }
1821
+ function avroWrite({ writer, schema, records, blockSize = 512, metadata }) {
1822
+ writer.appendUint32(23749199);
1823
+ const meta = {
1824
+ ...metadata,
1825
+ "avro.schema": typeof schema === "string" ? schema : JSON.stringify(schema),
1826
+ "avro.codec": "null"
1827
+ };
1828
+ appendZigZag(writer, Object.keys(meta).length);
1829
+ for (const [key, value] of Object.entries(meta)) {
1830
+ const kb = new TextEncoder().encode(key);
1831
+ appendZigZag(writer, kb.length);
1832
+ writer.appendBytes(kb);
1833
+ const vb = new TextEncoder().encode(value);
1834
+ appendZigZag(writer, vb.length);
1835
+ writer.appendBytes(vb);
1836
+ }
1837
+ writer.appendVarInt(0);
1838
+ const sync = new Uint8Array(16);
1839
+ for (let i = 0; i < 16; i++) sync[i] = Math.random() * 256 | 0;
1840
+ writer.appendBytes(sync);
1841
+ for (let i = 0; i < records.length; i += blockSize) {
1842
+ const block = records.slice(i, i + blockSize);
1843
+ appendZigZag(writer, block.length);
1844
+ const blockWriter = new ByteWriter();
1845
+ for (const record of block) for (const { name, type } of schema.fields) writeType(blockWriter, type, record[name]);
1846
+ appendZigZag(writer, blockWriter.offset);
1847
+ writer.appendBytes(blockWriter.getBytes());
1848
+ writer.appendBytes(sync);
1849
+ }
1850
+ return writer.finish();
1851
+ }
1852
+ function writeType(writer, schema, value) {
1853
+ if (Array.isArray(schema)) {
1854
+ const unionIndex = schema.findIndex((s) => {
1855
+ if (Array.isArray(s)) throw new Error("nested unions not supported");
1856
+ const tag = typeof s === "string" ? s : s.type === "record" || s.type === "array" || s.type === "fixed" ? s.type : s.logicalType;
1857
+ if (value == null) return tag === "null";
1858
+ if (tag === "boolean") return typeof value === "boolean";
1859
+ if (tag === "int") return typeof value === "number" && Number.isInteger(value);
1860
+ if (tag === "long") return typeof value === "bigint" || typeof value === "number";
1861
+ if (tag === "float" || tag === "double") return typeof value === "number";
1862
+ if (tag === "string") return typeof value === "string";
1863
+ if (tag === "bytes") return value instanceof Uint8Array;
1864
+ if (tag === "date") return value instanceof Date || typeof value === "number";
1865
+ if (tag === "time-millis") return typeof value === "number";
1866
+ if (tag === "time-micros") return typeof value === "bigint" || typeof value === "number";
1867
+ if (tag === "timestamp-millis" || tag === "timestamp-micros" || tag === "timestamp-nanos") return value instanceof Date || typeof value === "bigint" || typeof value === "number";
1868
+ if (tag === "decimal") return typeof value === "number" || typeof value === "bigint";
1869
+ if (tag === "record") return typeof value === "object" && value !== null;
1870
+ if (tag === "array") return Array.isArray(value);
1871
+ if (tag === "fixed") {
1872
+ if (value instanceof Uint8Array) return true;
1873
+ return typeof s === "object" && "logicalType" in s && s.logicalType === "uuid" && typeof value === "string";
1874
+ }
1875
+ return false;
1876
+ });
1877
+ if (unionIndex === -1) throw new Error("union branch not found");
1878
+ appendZigZag(writer, unionIndex);
1879
+ writeType(writer, schema[unionIndex], value);
1880
+ } else if (typeof schema === "string") {
1881
+ if (schema === "null") {} else if (schema === "boolean") writer.appendUint8(value ? 1 : 0);
1882
+ else if (schema === "int") {
1883
+ if (typeof value !== "number" || !Number.isInteger(value)) throw new Error("expected integer value");
1884
+ appendZigZag(writer, value);
1885
+ } else if (schema === "long") {
1886
+ if (typeof value !== "bigint") throw new Error("expected bigint value");
1887
+ appendZigZag64(writer, value);
1888
+ } else if (schema === "float") {
1889
+ if (typeof value !== "number") throw new Error("expected number value");
1890
+ writer.appendFloat32(value);
1891
+ } else if (schema === "double") {
1892
+ if (typeof value !== "number") throw new Error("expected number value");
1893
+ writer.appendFloat64(value);
1894
+ } else if (schema === "bytes") {
1895
+ if (!(value instanceof Uint8Array)) throw new Error("expected Uint8Array value");
1896
+ appendZigZag(writer, value.length);
1897
+ writer.appendBytes(value);
1898
+ } else if (schema === "string") {
1899
+ if (typeof value !== "string") throw new Error("expected string value");
1900
+ const b = new TextEncoder().encode(value);
1901
+ appendZigZag(writer, b.length);
1902
+ writer.appendBytes(b);
1903
+ }
1904
+ } else if (schema.type === "record") for (const f of schema.fields) writeType(writer, f.type, value[f.name]);
1905
+ else if (schema.type === "array") {
1906
+ if (value.length) {
1907
+ appendZigZag(writer, value.length);
1908
+ for (const it of value) writeType(writer, schema.items, it);
1909
+ }
1910
+ writer.appendVarInt(0);
1911
+ } else if (schema.type === "fixed") {
1912
+ const bytes = schema.logicalType === "uuid" && typeof value === "string" ? uuidStringToBytes$1(value) : value;
1913
+ if (!(bytes instanceof Uint8Array)) throw new Error("expected Uint8Array value");
1914
+ if (bytes.length !== schema.size) throw new Error(`expected fixed[${schema.size}] value`);
1915
+ writer.appendBytes(bytes);
1916
+ } else if ("logicalType" in schema) if (schema.logicalType === "date") appendZigZag(writer, value instanceof Date ? Math.floor(value.getTime() / 864e5) : value);
1917
+ else if (schema.logicalType === "time-millis") appendZigZag(writer, value);
1918
+ else if (schema.logicalType === "time-micros") appendZigZag64(writer, BigInt(value));
1919
+ else if (schema.logicalType === "timestamp-millis") appendZigZag64(writer, value instanceof Date ? BigInt(value.getTime()) : BigInt(value));
1920
+ else if (schema.logicalType === "timestamp-micros") appendZigZag64(writer, value instanceof Date ? BigInt(value.getTime()) * 1000n : BigInt(value));
1921
+ else if (schema.logicalType === "timestamp-nanos") appendZigZag64(writer, value instanceof Date ? BigInt(value.getTime()) * 1000000n : BigInt(value));
1922
+ else if (schema.logicalType === "decimal") {
1923
+ const scale = "scale" in schema ? schema.scale ?? 0 : 0;
1924
+ let u;
1925
+ if (typeof value === "bigint") u = value;
1926
+ else if (typeof value === "number") u = BigInt(Math.round(value * 10 ** scale));
1927
+ else throw new Error("decimal value must be bigint or number");
1928
+ const b = bigIntToBytes(u);
1929
+ appendZigZag(writer, b.length);
1930
+ writer.appendBytes(b);
1931
+ } else throw new Error(`unknown logical type ${schema.logicalType}`);
1932
+ else throw new Error(`unknown schema type ${JSON.stringify(schema)}`);
1933
+ }
1934
+ function appendZigZag(writer, v) {
1935
+ writer.appendVarInt(v << 1 ^ v >> 31);
1936
+ }
1937
+ function appendZigZag64(writer, v) {
1938
+ writer.appendVarBigInt(v << 1n ^ v >> 63n);
1939
+ }
1940
+ function uuidStringToBytes$1(value) {
1941
+ const hex = value.toLowerCase().replace(/-/g, "");
1942
+ if (!/^[0-9a-f]{32}$/.test(hex)) throw new Error("expected uuid string");
1943
+ const bytes = new Uint8Array(16);
1944
+ for (let i = 0; i < 16; i++) bytes[i] = parseInt(hex.slice(i * 2, i * 2 + 2), 16);
1945
+ return bytes;
1946
+ }
1947
+ function bigIntToBytes(value) {
1948
+ const neg = value < 0n;
1949
+ let abs = neg ? -value : value;
1950
+ const out = [];
1951
+ while (abs > 0n) {
1952
+ out.unshift(Number(abs & 255n));
1953
+ abs >>= 8n;
1954
+ }
1955
+ if (out.length === 0) out.push(0);
1956
+ if (neg) {
1957
+ for (let i = 0; i < out.length; i++) out[i] ^= 255;
1958
+ for (let i = out.length - 1; i >= 0; i--) {
1959
+ out[i] = out[i] + 1 & 255;
1960
+ if (out[i]) break;
1961
+ }
1962
+ if ((out[0] & 128) === 0) out.unshift(255);
1963
+ } else if ((out[0] & 128) !== 0) out.unshift(0);
1964
+ return Uint8Array.from(out);
1965
+ }
1966
+ function manifestEntrySchema(schema, partitionSpec, formatVersion, manifestContent = 0) {
1967
+ const dataFileFields = [
1968
+ {
1969
+ name: "content",
1970
+ type: "int",
1971
+ "field-id": 134
1972
+ },
1973
+ {
1974
+ name: "file_path",
1975
+ type: "string",
1976
+ "field-id": 100
1977
+ },
1978
+ {
1979
+ name: "file_format",
1980
+ type: "string",
1981
+ "field-id": 101
1982
+ },
1983
+ {
1984
+ name: "partition",
1985
+ "field-id": 102,
1986
+ type: partitionAvroSchema(schema, partitionSpec)
1987
+ },
1988
+ {
1989
+ name: "record_count",
1990
+ type: "long",
1991
+ "field-id": 103
1992
+ },
1993
+ {
1994
+ name: "file_size_in_bytes",
1995
+ type: "long",
1996
+ "field-id": 104
1997
+ },
1998
+ mapField("column_sizes", 108, "k117_v118", 117, 118, "long"),
1999
+ mapField("value_counts", 109, "k119_v120", 119, 120, "long"),
2000
+ mapField("null_value_counts", 110, "k121_v122", 121, 122, "long"),
2001
+ mapField("nan_value_counts", 137, "k138_v139", 138, 139, "long"),
2002
+ mapField("lower_bounds", 125, "k126_v127", 126, 127, "bytes"),
2003
+ mapField("upper_bounds", 128, "k129_v130", 129, 130, "bytes"),
2004
+ {
2005
+ name: "sort_order_id",
2006
+ type: ["null", "int"],
2007
+ default: null,
2008
+ "field-id": 140
2009
+ }
2010
+ ];
2011
+ if (manifestContent === 1) {
2012
+ dataFileFields.push({
2013
+ name: "equality_ids",
2014
+ "field-id": 135,
2015
+ default: null,
2016
+ type: ["null", {
2017
+ type: "array",
2018
+ items: "int",
2019
+ "element-id": 136
2020
+ }]
2021
+ });
2022
+ dataFileFields.push({
2023
+ name: "referenced_data_file",
2024
+ type: ["null", "string"],
2025
+ default: null,
2026
+ "field-id": 143
2027
+ });
2028
+ if (formatVersion >= 3) {
2029
+ dataFileFields.push({
2030
+ name: "content_offset",
2031
+ type: ["null", "long"],
2032
+ default: null,
2033
+ "field-id": 144
2034
+ });
2035
+ dataFileFields.push({
2036
+ name: "content_size_in_bytes",
2037
+ type: ["null", "long"],
2038
+ default: null,
2039
+ "field-id": 145
2040
+ });
2041
+ }
2042
+ }
2043
+ if (formatVersion >= 3) dataFileFields.push({
2044
+ name: "first_row_id",
2045
+ type: ["null", "long"],
2046
+ default: null,
2047
+ "field-id": 142
2048
+ });
2049
+ return {
2050
+ type: "record",
2051
+ name: "manifest_entry",
2052
+ fields: [
2053
+ {
2054
+ name: "status",
2055
+ type: "int",
2056
+ "field-id": 0
2057
+ },
2058
+ {
2059
+ name: "snapshot_id",
2060
+ type: ["null", "long"],
2061
+ default: null,
2062
+ "field-id": 1
2063
+ },
2064
+ {
2065
+ name: "sequence_number",
2066
+ type: ["null", "long"],
2067
+ default: null,
2068
+ "field-id": 3
2069
+ },
2070
+ {
2071
+ name: "file_sequence_number",
2072
+ type: ["null", "long"],
2073
+ default: null,
2074
+ "field-id": 4
2075
+ },
2076
+ {
2077
+ name: "data_file",
2078
+ "field-id": 2,
2079
+ type: {
2080
+ type: "record",
2081
+ name: "r2",
2082
+ fields: dataFileFields
2083
+ }
2084
+ }
2085
+ ]
2086
+ };
2087
+ }
2088
+ function mapField(name, fieldId, recName, keyId, valueId, valueType) {
2089
+ return {
2090
+ name,
2091
+ "field-id": fieldId,
2092
+ default: null,
2093
+ type: ["null", {
2094
+ type: "array",
2095
+ logicalType: "map",
2096
+ items: {
2097
+ type: "record",
2098
+ name: recName,
2099
+ fields: [{
2100
+ name: "key",
2101
+ type: "int",
2102
+ "field-id": keyId
2103
+ }, {
2104
+ name: "value",
2105
+ type: valueType,
2106
+ "field-id": valueId
2107
+ }]
2108
+ }
2109
+ }]
2110
+ };
2111
+ }
2112
+ function icebergSchemaJson(schema) {
2113
+ return JSON.stringify(schema);
2114
+ }
2115
+ function encodeMap(m) {
2116
+ if (!m) return null;
2117
+ const entries = Object.entries(m);
2118
+ if (!entries.length) return null;
2119
+ return entries.map(([k, value]) => ({
2120
+ key: Number(k),
2121
+ value
2122
+ }));
2123
+ }
2124
+ function writeDataManifest({ writer, schema, partitionSpec, snapshotId, dataFiles, formatVersion = 2 }) {
2125
+ const records = dataFiles.map((dataFile) => {
2126
+ if (dataFile.content !== 0) throw new Error(`writeDataManifest expects data files (content=0), got content=${dataFile.content}`);
2127
+ return manifestEntryRecord(dataFile, schema, partitionSpec, snapshotId, formatVersion, 0);
2128
+ });
2129
+ return avroWrite({
2130
+ writer,
2131
+ schema: manifestEntrySchema(schema, partitionSpec, formatVersion, 0),
2132
+ records,
2133
+ metadata: {
2134
+ "format-version": String(formatVersion),
2135
+ content: "data",
2136
+ schema: icebergSchemaJson(schema),
2137
+ "partition-spec": partitionSpecJson(partitionSpec),
2138
+ "partition-spec-id": String(partitionSpec["spec-id"])
2139
+ }
2140
+ });
2141
+ }
2142
+ function manifestEntryRecord(dataFile, schema, partitionSpec, snapshotId, formatVersion, manifestContent) {
2143
+ const dataFileRecord = {
2144
+ content: dataFile.content,
2145
+ file_path: dataFile.file_path,
2146
+ file_format: dataFile.file_format.toUpperCase(),
2147
+ partition: partitionToAvroRecord(dataFile.partition ?? {}, schema, partitionSpec),
2148
+ record_count: dataFile.record_count,
2149
+ file_size_in_bytes: dataFile.file_size_in_bytes,
2150
+ column_sizes: encodeMap(dataFile.column_sizes),
2151
+ value_counts: encodeMap(dataFile.value_counts),
2152
+ null_value_counts: encodeMap(dataFile.null_value_counts),
2153
+ nan_value_counts: encodeMap(dataFile.nan_value_counts),
2154
+ lower_bounds: encodeMap(dataFile.lower_bounds),
2155
+ upper_bounds: encodeMap(dataFile.upper_bounds),
2156
+ sort_order_id: dataFile.content === 1 ? null : dataFile.sort_order_id ?? 0
2157
+ };
2158
+ if (manifestContent === 1) {
2159
+ dataFileRecord.equality_ids = dataFile.equality_ids?.length ? dataFile.equality_ids : null;
2160
+ dataFileRecord.referenced_data_file = dataFile.referenced_data_file ?? null;
2161
+ if (formatVersion >= 3) {
2162
+ dataFileRecord.content_offset = dataFile.content_offset ?? null;
2163
+ dataFileRecord.content_size_in_bytes = dataFile.content_size_in_bytes ?? null;
2164
+ }
2165
+ }
2166
+ if (formatVersion >= 3) dataFileRecord.first_row_id = dataFile.first_row_id ?? null;
2167
+ return {
2168
+ status: 1,
2169
+ snapshot_id: snapshotId,
2170
+ sequence_number: null,
2171
+ file_sequence_number: null,
2172
+ data_file: dataFileRecord
2173
+ };
2174
+ }
2175
+ function manifestFileSchema(formatVersion) {
2176
+ const fields = [
2177
+ {
2178
+ name: "manifest_path",
2179
+ type: "string",
2180
+ "field-id": 500
2181
+ },
2182
+ {
2183
+ name: "manifest_length",
2184
+ type: "long",
2185
+ "field-id": 501
2186
+ },
2187
+ {
2188
+ name: "partition_spec_id",
2189
+ type: "int",
2190
+ "field-id": 502
2191
+ },
2192
+ {
2193
+ name: "content",
2194
+ type: "int",
2195
+ "field-id": 517
2196
+ },
2197
+ {
2198
+ name: "sequence_number",
2199
+ type: "long",
2200
+ "field-id": 515
2201
+ },
2202
+ {
2203
+ name: "min_sequence_number",
2204
+ type: "long",
2205
+ "field-id": 516
2206
+ },
2207
+ {
2208
+ name: "added_snapshot_id",
2209
+ type: "long",
2210
+ "field-id": 503
2211
+ },
2212
+ {
2213
+ name: "added_files_count",
2214
+ type: "int",
2215
+ "field-id": 504
2216
+ },
2217
+ {
2218
+ name: "existing_files_count",
2219
+ type: "int",
2220
+ "field-id": 505
2221
+ },
2222
+ {
2223
+ name: "deleted_files_count",
2224
+ type: "int",
2225
+ "field-id": 506
2226
+ },
2227
+ {
2228
+ name: "added_rows_count",
2229
+ type: "long",
2230
+ "field-id": 512
2231
+ },
2232
+ {
2233
+ name: "existing_rows_count",
2234
+ type: "long",
2235
+ "field-id": 513
2236
+ },
2237
+ {
2238
+ name: "deleted_rows_count",
2239
+ type: "long",
2240
+ "field-id": 514
2241
+ },
2242
+ {
2243
+ name: "partitions",
2244
+ type: ["null", {
2245
+ type: "array",
2246
+ "element-id": 508,
2247
+ items: {
2248
+ type: "record",
2249
+ name: "r508",
2250
+ fields: [
2251
+ {
2252
+ name: "contains_null",
2253
+ type: "boolean",
2254
+ "field-id": 509
2255
+ },
2256
+ {
2257
+ name: "contains_nan",
2258
+ type: ["null", "boolean"],
2259
+ default: null,
2260
+ "field-id": 518
2261
+ },
2262
+ {
2263
+ name: "lower_bound",
2264
+ type: ["null", "bytes"],
2265
+ default: null,
2266
+ "field-id": 510
2267
+ },
2268
+ {
2269
+ name: "upper_bound",
2270
+ type: ["null", "bytes"],
2271
+ default: null,
2272
+ "field-id": 511
2273
+ }
2274
+ ]
2275
+ }
2276
+ }],
2277
+ default: null,
2278
+ "field-id": 507
2279
+ }
2280
+ ];
2281
+ if (formatVersion >= 3) fields.push({
2282
+ name: "first_row_id",
2283
+ type: ["null", "long"],
2284
+ default: null,
2285
+ "field-id": 520
2286
+ });
2287
+ return {
2288
+ type: "record",
2289
+ name: "manifest_file",
2290
+ fields
2291
+ };
2292
+ }
2293
+ function writeManifestList({ writer, snapshotId, sequenceNumber, manifests, formatVersion = 2 }) {
2294
+ const records = manifests.map((m) => {
2295
+ const record = {
2296
+ manifest_path: m.manifest_path,
2297
+ manifest_length: m.manifest_length,
2298
+ partition_spec_id: m.partition_spec_id,
2299
+ content: m.content,
2300
+ sequence_number: m.sequence_number ?? sequenceNumber,
2301
+ min_sequence_number: m.min_sequence_number ?? sequenceNumber,
2302
+ added_snapshot_id: m.added_snapshot_id,
2303
+ added_files_count: m.added_files_count,
2304
+ existing_files_count: m.existing_files_count,
2305
+ deleted_files_count: m.deleted_files_count,
2306
+ added_rows_count: m.added_rows_count,
2307
+ existing_rows_count: m.existing_rows_count,
2308
+ deleted_rows_count: m.deleted_rows_count,
2309
+ partitions: m.partitions ?? null
2310
+ };
2311
+ if (formatVersion >= 3) record.first_row_id = m.content === 0 ? m.first_row_id ?? null : null;
2312
+ return record;
2313
+ });
2314
+ return avroWrite({
2315
+ writer,
2316
+ schema: manifestFileSchema(formatVersion),
2317
+ records,
2318
+ metadata: {
2319
+ "format-version": String(formatVersion),
2320
+ "snapshot-id": String(snapshotId),
2321
+ "sequence-number": String(sequenceNumber)
2322
+ }
2323
+ });
2324
+ }
2325
+ function isGeoType(name) {
2326
+ return name.startsWith("geometry") || name.startsWith("geography");
2327
+ }
2328
+ function computeGeoBounds(records, field) {
2329
+ let partial;
2330
+ let nulls = 0n;
2331
+ const writeDefault = field["write-default"];
2332
+ for (const record of records) {
2333
+ let v = record[field.name];
2334
+ if (v === void 0 && writeDefault !== void 0) v = writeDefault;
2335
+ if (v === null || v === void 0) {
2336
+ nulls++;
2337
+ continue;
2338
+ }
2339
+ if (typeof v !== "object") throw new Error("geospatial column expects GeoJSON geometries");
2340
+ partial = extendBoundsFromGeometry(partial, v);
2341
+ }
2342
+ const result = {
2343
+ value_count: BigInt(records.length),
2344
+ null_count: nulls
2345
+ };
2346
+ const { xmin, ymin, xmax, ymax, zmin, zmax, mmin, mmax } = partial ?? {};
2347
+ if (xmin === void 0 || ymin === void 0 || xmax === void 0 || ymax === void 0) return result;
2348
+ const hasZ = zmin !== void 0;
2349
+ const hasM = mmin !== void 0;
2350
+ return {
2351
+ ...result,
2352
+ lower: encodeGeoPoint(xmin, ymin, zmin, mmin, hasZ, hasM),
2353
+ upper: encodeGeoPoint(xmax, ymax, zmax, mmax, hasZ, hasM)
2354
+ };
2355
+ }
2356
+ function extendBoundsFromGeometry(bbox, geometry) {
2357
+ if (geometry.type === "GeometryCollection") {
2358
+ for (const child of geometry.geometries || []) bbox = extendBoundsFromGeometry(bbox, child);
2359
+ return bbox;
2360
+ }
2361
+ return extendBoundsFromCoordinates(bbox, geometry.coordinates);
2362
+ }
2363
+ function extendBoundsFromCoordinates(bbox, coordinates) {
2364
+ if (typeof coordinates[0] === "number") {
2365
+ bbox = updateAxis(bbox, "xmin", "xmax", coordinates[0]);
2366
+ bbox = updateAxis(bbox, "ymin", "ymax", coordinates[1]);
2367
+ if (coordinates.length > 2) bbox = updateAxis(bbox, "zmin", "zmax", coordinates[2]);
2368
+ if (coordinates.length > 3) bbox = updateAxis(bbox, "mmin", "mmax", coordinates[3]);
2369
+ return bbox;
2370
+ }
2371
+ for (const child of coordinates) bbox = extendBoundsFromCoordinates(bbox, child);
2372
+ return bbox;
2373
+ }
2374
+ function updateAxis(bbox, minKey, maxKey, value) {
2375
+ if (value === void 0 || !Number.isFinite(value)) return bbox;
2376
+ if (!bbox) bbox = {};
2377
+ const min = bbox[minKey];
2378
+ const max = bbox[maxKey];
2379
+ if (min === void 0 || value < min) bbox[minKey] = value;
2380
+ if (max === void 0 || value > max) bbox[maxKey] = value;
2381
+ return bbox;
2382
+ }
2383
+ function encodeGeoPoint(x, y, z, m, hasZ, hasM) {
2384
+ const len = !hasZ && !hasM ? 16 : hasZ && !hasM ? 24 : 32;
2385
+ const buf = new ArrayBuffer(len);
2386
+ const view = new DataView(buf);
2387
+ view.setFloat64(0, x, true);
2388
+ view.setFloat64(8, y, true);
2389
+ if (len === 24) view.setFloat64(16, z, true);
2390
+ else if (len === 32) {
2391
+ view.setFloat64(16, hasZ ? z : NaN, true);
2392
+ view.setFloat64(24, m, true);
2393
+ }
2394
+ return new Uint8Array(buf);
2395
+ }
2396
+ function serializeValue(value, type) {
2397
+ const name = typeName(type);
2398
+ if (name.startsWith("decimal(")) {
2399
+ const m = /^decimal\((\d+),\s*(\d+)\)$/.exec(name);
2400
+ if (!m) return void 0;
2401
+ const scale = parseInt(m[2], 10);
2402
+ if (typeof value !== "number" && typeof value !== "bigint") return void 0;
2403
+ const factor = 10n ** BigInt(scale);
2404
+ return twosComplementMinBigEndian(typeof value === "bigint" ? value * factor : BigInt(Math.round(value * Number(factor))));
2405
+ }
2406
+ if (name.startsWith("fixed[")) return value instanceof Uint8Array ? value : void 0;
2407
+ switch (name) {
2408
+ case "boolean": return new Uint8Array([value ? 1 : 0]);
2409
+ case "int": {
2410
+ const buf = /* @__PURE__ */ new ArrayBuffer(4);
2411
+ new DataView(buf).setInt32(0, value, true);
2412
+ return new Uint8Array(buf);
2413
+ }
2414
+ case "long": {
2415
+ const buf = /* @__PURE__ */ new ArrayBuffer(8);
2416
+ new DataView(buf).setBigInt64(0, typeof value === "bigint" ? value : BigInt(value), true);
2417
+ return new Uint8Array(buf);
2418
+ }
2419
+ case "float": {
2420
+ const buf = /* @__PURE__ */ new ArrayBuffer(4);
2421
+ new DataView(buf).setFloat32(0, value, true);
2422
+ return new Uint8Array(buf);
2423
+ }
2424
+ case "double": {
2425
+ const buf = /* @__PURE__ */ new ArrayBuffer(8);
2426
+ new DataView(buf).setFloat64(0, value, true);
2427
+ return new Uint8Array(buf);
2428
+ }
2429
+ case "date": {
2430
+ const days = value instanceof Date ? Math.floor(value.getTime() / 864e5) : Number(value);
2431
+ const buf = /* @__PURE__ */ new ArrayBuffer(4);
2432
+ new DataView(buf).setInt32(0, days, true);
2433
+ return new Uint8Array(buf);
2434
+ }
2435
+ case "time": {
2436
+ const buf = /* @__PURE__ */ new ArrayBuffer(8);
2437
+ new DataView(buf).setBigInt64(0, typeof value === "bigint" ? value : BigInt(value), true);
2438
+ return new Uint8Array(buf);
2439
+ }
2440
+ case "timestamp":
2441
+ case "timestamptz": {
2442
+ const buf = /* @__PURE__ */ new ArrayBuffer(8);
2443
+ new DataView(buf).setBigInt64(0, timestampToMicros(value), true);
2444
+ return new Uint8Array(buf);
2445
+ }
2446
+ case "timestamp_ns":
2447
+ case "timestamptz_ns": {
2448
+ const buf = /* @__PURE__ */ new ArrayBuffer(8);
2449
+ new DataView(buf).setBigInt64(0, timestampToNanos(value), true);
2450
+ return new Uint8Array(buf);
2451
+ }
2452
+ case "string": return new TextEncoder().encode(value);
2453
+ case "binary": return value instanceof Uint8Array ? value : void 0;
2454
+ case "uuid":
2455
+ if (value instanceof Uint8Array && value.length === 16) return value;
2456
+ if (typeof value === "string") return uuidStringToBytes(value);
2457
+ return;
2458
+ default: return;
2459
+ }
2460
+ }
2461
+ function compare(a, b, type) {
2462
+ switch (typeName(type)) {
2463
+ case "boolean": return (a ? 1 : 0) - (b ? 1 : 0);
2464
+ case "int": return a < b ? -1 : a > b ? 1 : 0;
2465
+ case "float":
2466
+ case "double": return compareFloating(a, b);
2467
+ case "long": {
2468
+ const ai = typeof a === "bigint" ? a : BigInt(a);
2469
+ const bi = typeof b === "bigint" ? b : BigInt(b);
2470
+ return ai < bi ? -1 : ai > bi ? 1 : 0;
2471
+ }
2472
+ case "date": {
2473
+ const ad = dateToDays(a);
2474
+ const bd = dateToDays(b);
2475
+ if (Number.isNaN(ad) || Number.isNaN(bd)) return NaN;
2476
+ return ad < bd ? -1 : ad > bd ? 1 : 0;
2477
+ }
2478
+ case "timestamp":
2479
+ case "timestamptz": return compareBigInt(timestampToMicros(a), timestampToMicros(b));
2480
+ case "timestamp_ns":
2481
+ case "timestamptz_ns": return compareBigInt(timestampToNanos(a), timestampToNanos(b));
2482
+ case "string": return a < b ? -1 : a > b ? 1 : 0;
2483
+ case "binary":
2484
+ case "uuid": return compareBytes(a, b);
2485
+ default:
2486
+ if (typeName(type).startsWith("fixed[")) return compareBytes(a, b);
2487
+ return a < b ? -1 : a > b ? 1 : 0;
2488
+ }
2489
+ }
2490
+ function compareFloating(a, b) {
2491
+ if (Object.is(a, b)) return 0;
2492
+ if (a === 0 && b === 0) return Object.is(a, -0) ? -1 : 1;
2493
+ return a < b ? -1 : a > b ? 1 : 0;
2494
+ }
2495
+ function compareBigInt(a, b) {
2496
+ return a < b ? -1 : a > b ? 1 : 0;
2497
+ }
2498
+ function dateToDays(value) {
2499
+ if (value instanceof Date) return Math.floor(value.getTime() / 864e5);
2500
+ if (typeof value === "bigint") return Number(value);
2501
+ if (typeof value === "number") return value;
2502
+ if (typeof value === "string") {
2503
+ const ms = Date.parse(value);
2504
+ return Number.isNaN(ms) ? NaN : Math.floor(ms / 864e5);
2505
+ }
2506
+ return NaN;
2507
+ }
2508
+ function timestampToMicros(value) {
2509
+ return typeof value === "bigint" ? value : value instanceof Date ? BigInt(value.getTime()) * 1000n : BigInt(value);
2510
+ }
2511
+ function timestampToNanos(value) {
2512
+ return typeof value === "bigint" ? value : value instanceof Date ? BigInt(value.getTime()) * 1000000n : BigInt(value);
2513
+ }
2514
+ function compareBytes(a, b) {
2515
+ const len = Math.min(a.length, b.length);
2516
+ for (let i = 0; i < len; i++) if (a[i] !== b[i]) return a[i] - b[i];
2517
+ return a.length - b.length;
2518
+ }
2519
+ function twosComplementMinBigEndian(value) {
2520
+ const bytes = [];
2521
+ let v = value;
2522
+ while (true) {
2523
+ const byte = Number(v & 255n);
2524
+ bytes.unshift(byte);
2525
+ v >>= 8n;
2526
+ const sign = byte & 128;
2527
+ if (!sign && v === 0n || sign && v === -1n) break;
2528
+ }
2529
+ return new Uint8Array(bytes);
2530
+ }
2531
+ function uuidStringToBytes(s) {
2532
+ const hex = s.replace(/-/g, "");
2533
+ if (hex.length !== 32) return void 0;
2534
+ const out = new Uint8Array(16);
2535
+ for (let i = 0; i < 16; i++) {
2536
+ const byte = parseInt(hex.slice(i * 2, i * 2 + 2), 16);
2537
+ if (Number.isNaN(byte)) return void 0;
2538
+ out[i] = byte;
2539
+ }
2540
+ return out;
2541
+ }
2542
+ const TRUNCATE_LIMIT = 16;
2543
+ function computeColumnStats(records, schema) {
2544
+ const value_counts = {};
2545
+ const null_value_counts = {};
2546
+ const nan_value_counts = {};
2547
+ const lower_bounds = {};
2548
+ const upper_bounds = {};
2549
+ for (const field of schema.fields) {
2550
+ const type = typeName(field.type);
2551
+ if (type === "unknown") continue;
2552
+ if (type === "list" || type === "map" || type === "struct") continue;
2553
+ if (isGeoType(type)) {
2554
+ const { value_count, null_count, lower, upper } = computeGeoBounds(records, field);
2555
+ value_counts[field.id] = value_count;
2556
+ null_value_counts[field.id] = null_count;
2557
+ if (lower) lower_bounds[field.id] = lower;
2558
+ if (upper) upper_bounds[field.id] = upper;
2559
+ continue;
2560
+ }
2561
+ let nulls = 0n;
2562
+ let nans = 0n;
2563
+ let min;
2564
+ let max;
2565
+ const isFloat = type === "float" || type === "double";
2566
+ const trackBounds = hasComparableBounds(field.type);
2567
+ const writeDefault = field["write-default"];
2568
+ for (const record of records) {
2569
+ let v = record[field.name];
2570
+ if (v === void 0 && writeDefault !== void 0) v = writeDefault;
2571
+ if (v === null || v === void 0) {
2572
+ nulls++;
2573
+ continue;
2574
+ }
2575
+ if (isFloat && Number.isNaN(v)) {
2576
+ nans++;
2577
+ continue;
2578
+ }
2579
+ if (trackBounds) {
2580
+ if (min === void 0 || compare(v, min, field.type) < 0) min = v;
2581
+ if (max === void 0 || compare(v, max, field.type) > 0) max = v;
2582
+ }
2583
+ }
2584
+ value_counts[field.id] = BigInt(records.length);
2585
+ null_value_counts[field.id] = nulls;
2586
+ if (isFloat) nan_value_counts[field.id] = nans;
2587
+ if (min !== void 0) {
2588
+ const lo = serializeValue(truncateLower(min, field.type), field.type);
2589
+ if (lo) lower_bounds[field.id] = lo;
2590
+ }
2591
+ if (max !== void 0) {
2592
+ const truncated = truncateUpper(max, field.type);
2593
+ if (truncated !== void 0) {
2594
+ const hi = serializeValue(truncated, field.type);
2595
+ if (hi) upper_bounds[field.id] = hi;
2596
+ }
2597
+ }
2598
+ }
2599
+ return {
2600
+ value_counts,
2601
+ null_value_counts,
2602
+ nan_value_counts,
2603
+ lower_bounds,
2604
+ upper_bounds
2605
+ };
2606
+ }
2607
+ function hasComparableBounds(type) {
2608
+ const name = typeName(type);
2609
+ if (isGeoType(name)) return false;
2610
+ return name !== "unknown" && name !== "variant";
2611
+ }
2612
+ function computeFieldSummary(values, type) {
2613
+ const name = typeName(type);
2614
+ const isFloat = name === "float" || name === "double";
2615
+ const trackBounds = hasComparableBounds(type);
2616
+ let containsNull = false;
2617
+ let containsNan = false;
2618
+ let min;
2619
+ let max;
2620
+ for (const v of values) {
2621
+ if (v === null || v === void 0) {
2622
+ containsNull = true;
2623
+ continue;
2624
+ }
2625
+ if (isFloat && Number.isNaN(v)) {
2626
+ containsNan = true;
2627
+ continue;
2628
+ }
2629
+ if (trackBounds) {
2630
+ if (min === void 0 || compare(v, min, type) < 0) min = v;
2631
+ if (max === void 0 || compare(v, max, type) > 0) max = v;
2632
+ }
2633
+ }
2634
+ const summary = { contains_null: containsNull };
2635
+ if (isFloat) summary.contains_nan = containsNan;
2636
+ if (min !== void 0) {
2637
+ const lo = serializeValue(truncateLower(min, type), type);
2638
+ if (lo) summary.lower_bound = lo;
2639
+ }
2640
+ if (max !== void 0) {
2641
+ const truncated = truncateUpper(max, type);
2642
+ if (truncated !== void 0) {
2643
+ const hi = serializeValue(truncated, type);
2644
+ if (hi) summary.upper_bound = hi;
2645
+ }
2646
+ }
2647
+ return summary;
2648
+ }
2649
+ function truncateLower(value, type) {
2650
+ const name = typeName(type);
2651
+ if (name === "string" && typeof value === "string") {
2652
+ const cps = Array.from(value);
2653
+ if (cps.length <= TRUNCATE_LIMIT) return value;
2654
+ return cps.slice(0, TRUNCATE_LIMIT).join("");
2655
+ }
2656
+ if ((name === "binary" || name.startsWith("fixed[")) && value instanceof Uint8Array) {
2657
+ if (value.length <= TRUNCATE_LIMIT) return value;
2658
+ return value.slice(0, TRUNCATE_LIMIT);
2659
+ }
2660
+ return value;
2661
+ }
2662
+ function truncateUpper(value, type) {
2663
+ const name = typeName(type);
2664
+ if (name === "string" && typeof value === "string") {
2665
+ const cps = Array.from(value);
2666
+ if (cps.length <= TRUNCATE_LIMIT) return value;
2667
+ const prefix = cps.slice(0, TRUNCATE_LIMIT);
2668
+ while (prefix.length > 0) {
2669
+ const cp = prefix[prefix.length - 1].codePointAt(0);
2670
+ const next = cp + 1 === 55296 ? 57344 : cp + 1;
2671
+ if (next <= 1114111) {
2672
+ prefix[prefix.length - 1] = String.fromCodePoint(next);
2673
+ return prefix.join("");
2674
+ }
2675
+ prefix.pop();
2676
+ }
2677
+ return;
2678
+ }
2679
+ if ((name === "binary" || name.startsWith("fixed[")) && value instanceof Uint8Array) {
2680
+ if (value.length <= TRUNCATE_LIMIT) return value;
2681
+ const prefix = value.slice(0, TRUNCATE_LIMIT);
2682
+ for (let i = prefix.length - 1; i >= 0; i--) if (prefix[i] < 255) {
2683
+ const out = prefix.slice(0, i + 1);
2684
+ out[i]++;
2685
+ return out;
2686
+ }
2687
+ return;
2688
+ }
2689
+ return value;
2690
+ }
2691
+ function currentSnapshot(metadata) {
2692
+ const id = metadata["current-snapshot-id"];
2693
+ if (id === void 0) return void 0;
2694
+ return metadata.snapshots?.find((s) => s["snapshot-id"] === id);
2695
+ }
2696
+ async function loadPriorManifests(metadata, resolver) {
2697
+ const snap = currentSnapshot(metadata);
2698
+ if (!snap?.["manifest-list"]) return [];
2699
+ return await fetchAvroRecords(snap["manifest-list"], resolver);
2700
+ }
2701
+ async function buildSnapshotUpdate({ tableUrl, metadata, resolver, snapshotId, sequenceNumber, manifestUuid, timestampMs, formatVersion, newManifests, summary, writtenFiles, priorManifests, skipPriorManifestPaths }) {
2702
+ const writerFn = resolver.writer;
2703
+ if (!writerFn) throw new Error("resolver.writer is required");
2704
+ const rowLineage = formatVersion >= 3;
2705
+ const firstRowId = rowLineage ? BigInt(metadata["next-row-id"] ?? 0) : 0n;
2706
+ priorManifests ??= await loadPriorManifests(metadata, resolver);
2707
+ if (skipPriorManifestPaths?.size) priorManifests = priorManifests.filter((manifest) => !skipPriorManifestPaths.has(manifest.manifest_path));
2708
+ const allManifests = [...priorManifests, ...newManifests];
2709
+ const addedRows = rowLineage ? assignFirstRowIds$1(allManifests, firstRowId) : 0n;
2710
+ const manifestListPath = `${tableUrl}/metadata/snap-${snapshotId}-1-${manifestUuid}.avro`;
2711
+ await writeManifestList({
2712
+ writer: writerFn(manifestListPath),
2713
+ snapshotId,
2714
+ sequenceNumber,
2715
+ manifests: allManifests,
2716
+ formatVersion
2717
+ });
2718
+ const snapshot = {
2719
+ "snapshot-id": Number(snapshotId),
2720
+ "sequence-number": Number(sequenceNumber),
2721
+ "timestamp-ms": timestampMs,
2722
+ "manifest-list": manifestListPath,
2723
+ summary,
2724
+ "schema-id": metadata["current-schema-id"]
2725
+ };
2726
+ if (rowLineage) {
2727
+ snapshot["first-row-id"] = toMetadataLong(firstRowId);
2728
+ snapshot["added-rows"] = toMetadataLong(addedRows);
2729
+ }
2730
+ const rawCurrentSnapshotId = metadata["current-snapshot-id"];
2731
+ const currentSnapshotId = rawCurrentSnapshotId === void 0 || rawCurrentSnapshotId === null || rawCurrentSnapshotId === -1 ? null : rawCurrentSnapshotId;
2732
+ if (currentSnapshotId !== null) snapshot["parent-snapshot-id"] = currentSnapshotId;
2733
+ const requirements = [{
2734
+ type: "assert-table-uuid",
2735
+ uuid: metadata["table-uuid"]
2736
+ }, {
2737
+ type: "assert-ref-snapshot-id",
2738
+ ref: "main",
2739
+ "snapshot-id": currentSnapshotId
2740
+ }];
2741
+ if (rowLineage) requirements.push({
2742
+ type: "assert-next-row-id",
2743
+ "next-row-id": toMetadataLong(metadata["next-row-id"] ?? 0)
2744
+ });
2745
+ return {
2746
+ snapshot,
2747
+ requirements,
2748
+ updates: [{
2749
+ action: "add-snapshot",
2750
+ snapshot
2751
+ }, {
2752
+ action: "set-snapshot-ref",
2753
+ "ref-name": "main",
2754
+ type: "branch",
2755
+ "snapshot-id": snapshot["snapshot-id"]
2756
+ }],
2757
+ writtenFiles: [...writtenFiles, manifestListPath]
2758
+ };
2759
+ }
2760
+ function buildPartitionSummaries(partitions, schema, partitionSpec) {
2761
+ return partitionSpec.fields.map((pf) => {
2762
+ const sourceField = schema.fields.find((f) => f.id === pf["source-id"]);
2763
+ if (!sourceField) throw new Error(`partition source field id ${pf["source-id"]} not found`);
2764
+ const resultType = transformResultType(pf.transform, sourceField.type);
2765
+ return computeFieldSummary(partitions.map((p) => p[pf.name]), resultType);
2766
+ });
2767
+ }
2768
+ function assignFirstRowIds$1(manifests, firstRowId) {
2769
+ let nextFirstRowId = firstRowId;
2770
+ let assignedRows = 0n;
2771
+ for (const manifest of manifests) {
2772
+ if (manifest.content !== 0) {
2773
+ manifest.first_row_id = void 0;
2774
+ continue;
2775
+ }
2776
+ const rowIdRange = BigInt(manifest.added_rows_count ?? 0) + BigInt(manifest.existing_rows_count ?? 0);
2777
+ if (manifest.first_row_id == null) {
2778
+ manifest.first_row_id = nextFirstRowId;
2779
+ nextFirstRowId += rowIdRange;
2780
+ assignedRows += rowIdRange;
2781
+ } else {
2782
+ const manifestEnd = BigInt(manifest.first_row_id) + rowIdRange;
2783
+ if (manifestEnd > nextFirstRowId) nextFirstRowId = manifestEnd;
2784
+ }
2785
+ }
2786
+ return assignedRows;
2787
+ }
2788
+ function toMetadataLong(value) {
2789
+ const out = Number(value);
2790
+ if (!Number.isSafeInteger(out)) throw new Error(`metadata long exceeds JavaScript safe integer range: ${value}`);
2791
+ return out;
2792
+ }
2793
+ function writeParquet({ writer, schema, records, codec }) {
2794
+ const columnData = [];
2795
+ const parquetFields = [];
2796
+ let rootChildren = 0;
2797
+ for (const field of schema.fields) {
2798
+ const name = sanitize(field.name);
2799
+ const fieldElements = icebergTypeToParquetFields(name, field.type, field.required, field.id);
2800
+ if (!fieldElements.length) continue;
2801
+ columnData.push({
2802
+ name,
2803
+ data: extractColumn(records, field)
2804
+ });
2805
+ parquetFields.push(...fieldElements);
2806
+ rootChildren++;
2807
+ }
2808
+ return parquetWrite({
2809
+ writer,
2810
+ columnData,
2811
+ schema: [{
2812
+ name: "root",
2813
+ num_children: rootChildren
2814
+ }, ...parquetFields],
2815
+ kvMetadata: [{
2816
+ key: "iceberg.schema",
2817
+ value: JSON.stringify(schema)
2818
+ }],
2819
+ codec
2820
+ });
2821
+ }
2822
+ function extractColumn(records, field) {
2823
+ const out = new Array(records.length);
2824
+ for (let i = 0; i < records.length; i++) out[i] = materializeFieldValue(records[i][field.name], field);
2825
+ return out;
2826
+ }
2827
+ function materializeFieldValue(value, field) {
2828
+ const writeDefault = field["write-default"];
2829
+ return materializeNestedDefaults(value !== void 0 ? value : writeDefault !== void 0 ? writeDefault : null, field.type);
2830
+ }
2831
+ function materializeNestedDefaults(value, type) {
2832
+ if (value === null || value === void 0 || typeof type !== "object") return value;
2833
+ if (type.type === "struct") {
2834
+ if (typeof value !== "object" || Array.isArray(value)) return value;
2835
+ const out = { ...value };
2836
+ for (const child of type.fields) out[child.name] = materializeFieldValue(value[child.name], child);
2837
+ return out;
2838
+ }
2839
+ if (type.type === "list") {
2840
+ if (!Array.isArray(value)) return value;
2841
+ return value.map((v) => materializeNestedDefaults(v, type.element));
2842
+ }
2843
+ if (type.type === "map") return materializeMapDefaults(value, type);
2844
+ return value;
2845
+ }
2846
+ function materializeMapDefaults(value, type) {
2847
+ if (typeof type.key !== "object" && typeof type.value !== "object") return value;
2848
+ if (value instanceof Map) return Array.from(value.entries(), ([key, entryValue]) => ({
2849
+ key: materializeNestedDefaults(key, type.key),
2850
+ value: materializeNestedDefaults(entryValue, type.value)
2851
+ }));
2852
+ if (Array.isArray(value)) return value.map((entry) => {
2853
+ if (entry && typeof entry === "object" && "key" in entry && "value" in entry) return {
2854
+ key: materializeNestedDefaults(entry.key, type.key),
2855
+ value: materializeNestedDefaults(entry.value, type.value)
2856
+ };
2857
+ if (Array.isArray(entry) && entry.length === 2) return {
2858
+ key: materializeNestedDefaults(entry[0], type.key),
2859
+ value: materializeNestedDefaults(entry[1], type.value)
2860
+ };
2861
+ return entry;
2862
+ });
2863
+ if (typeof value === "object") return Object.fromEntries(Object.entries(value).map(([key, entryValue]) => [key, materializeNestedDefaults(entryValue, type.value)]));
2864
+ return value;
2865
+ }
2866
+ function icebergTypeToParquetFields(name, type, required, fieldId) {
2867
+ const repetition_type = required ? "REQUIRED" : "OPTIONAL";
2868
+ if (typeof type === "object") {
2869
+ if (type.type === "list") {
2870
+ const elementFields = icebergTypeToParquetFields("element", type.element, type["element-required"], type["element-id"]);
2871
+ if (!elementFields.length) throw new Error(`unsupported iceberg list element type: ${typeName(type.element)}`);
2872
+ return [
2873
+ {
2874
+ name,
2875
+ converted_type: "LIST",
2876
+ logical_type: { type: "LIST" },
2877
+ repetition_type,
2878
+ num_children: 1,
2879
+ field_id: fieldId
2880
+ },
2881
+ {
2882
+ name: "list",
2883
+ repetition_type: "REPEATED",
2884
+ num_children: 1
2885
+ },
2886
+ ...elementFields
2887
+ ];
2888
+ }
2889
+ if (type.type === "struct") {
2890
+ const allChildren = [];
2891
+ let directChildren = 0;
2892
+ for (const child of type.fields) {
2893
+ const sub = icebergTypeToParquetFields(child.name, child.type, child.required, child.id);
2894
+ if (!sub.length) continue;
2895
+ allChildren.push(...sub);
2896
+ directChildren++;
2897
+ }
2898
+ if (!directChildren) throw new Error(`struct ${name} has no writable children`);
2899
+ return [{
2900
+ name,
2901
+ repetition_type,
2902
+ num_children: directChildren,
2903
+ field_id: fieldId
2904
+ }, ...allChildren];
2905
+ }
2906
+ if (type.type === "map") {
2907
+ if (type.key !== "string" && type.key !== "int") throw new Error(`unsupported iceberg map key type: ${typeName(type.key)}`);
2908
+ const keyFields = icebergTypeToParquetFields("key", type.key, true, type["key-id"]);
2909
+ const valueFields = icebergTypeToParquetFields("value", type.value, type["value-required"], type["value-id"]);
2910
+ if (!keyFields.length) throw new Error(`unsupported iceberg map key type: ${typeName(type.key)}`);
2911
+ if (!valueFields.length) throw new Error(`unsupported iceberg map value type: ${typeName(type.value)}`);
2912
+ return [
2913
+ {
2914
+ name,
2915
+ converted_type: "MAP",
2916
+ logical_type: { type: "MAP" },
2917
+ repetition_type,
2918
+ num_children: 1,
2919
+ field_id: fieldId
2920
+ },
2921
+ {
2922
+ name: "key_value",
2923
+ repetition_type: "REPEATED",
2924
+ num_children: 2
2925
+ },
2926
+ ...keyFields,
2927
+ ...valueFields
2928
+ ];
2929
+ }
2930
+ throw new Error(`unsupported iceberg nested type: ${JSON.stringify(type)}`);
2931
+ }
2932
+ if (type.startsWith("geometry")) return [{
2933
+ name,
2934
+ type: "BYTE_ARRAY",
2935
+ logical_type: { type: "GEOMETRY" },
2936
+ repetition_type,
2937
+ field_id: fieldId
2938
+ }];
2939
+ if (type.startsWith("geography")) return [{
2940
+ name,
2941
+ type: "BYTE_ARRAY",
2942
+ logical_type: { type: "GEOGRAPHY" },
2943
+ repetition_type,
2944
+ field_id: fieldId
2945
+ }];
2946
+ const decimal = parseDecimalType(type);
2947
+ if (decimal) {
2948
+ const { precision, scale } = decimal;
2949
+ return [{
2950
+ name,
2951
+ type: "FIXED_LEN_BYTE_ARRAY",
2952
+ type_length: decimalRequiredBytes(precision),
2953
+ converted_type: "DECIMAL",
2954
+ logical_type: {
2955
+ type: "DECIMAL",
2956
+ precision,
2957
+ scale
2958
+ },
2959
+ precision,
2960
+ scale,
2961
+ repetition_type,
2962
+ field_id: fieldId
2963
+ }];
2964
+ }
2965
+ const fixedLen = parseFixedType(type);
2966
+ if (fixedLen !== void 0) return [{
2967
+ name,
2968
+ type: "FIXED_LEN_BYTE_ARRAY",
2969
+ type_length: fixedLen,
2970
+ repetition_type,
2971
+ field_id: fieldId
2972
+ }];
2973
+ switch (type) {
2974
+ case "unknown":
2975
+ if (required) throw new Error("unsupported required iceberg type: unknown");
2976
+ return [];
2977
+ case "variant": return [
2978
+ {
2979
+ name,
2980
+ repetition_type,
2981
+ num_children: 2,
2982
+ logical_type: { type: "VARIANT" },
2983
+ field_id: fieldId
2984
+ },
2985
+ {
2986
+ name: "metadata",
2987
+ type: "BYTE_ARRAY",
2988
+ repetition_type: "REQUIRED"
2989
+ },
2990
+ {
2991
+ name: "value",
2992
+ type: "BYTE_ARRAY",
2993
+ repetition_type: "OPTIONAL"
2994
+ }
2995
+ ];
2996
+ case "boolean": return [{
2997
+ name,
2998
+ type: "BOOLEAN",
2999
+ repetition_type,
3000
+ field_id: fieldId
3001
+ }];
3002
+ case "int": return [{
3003
+ name,
3004
+ type: "INT32",
3005
+ repetition_type,
3006
+ field_id: fieldId
3007
+ }];
3008
+ case "long": return [{
3009
+ name,
3010
+ type: "INT64",
3011
+ repetition_type,
3012
+ field_id: fieldId
3013
+ }];
3014
+ case "float": return [{
3015
+ name,
3016
+ type: "FLOAT",
3017
+ repetition_type,
3018
+ field_id: fieldId
3019
+ }];
3020
+ case "double": return [{
3021
+ name,
3022
+ type: "DOUBLE",
3023
+ repetition_type,
3024
+ field_id: fieldId
3025
+ }];
3026
+ case "string": return [{
3027
+ name,
3028
+ type: "BYTE_ARRAY",
3029
+ converted_type: "UTF8",
3030
+ repetition_type,
3031
+ field_id: fieldId
3032
+ }];
3033
+ case "binary": return [{
3034
+ name,
3035
+ type: "BYTE_ARRAY",
3036
+ repetition_type,
3037
+ field_id: fieldId
3038
+ }];
3039
+ case "uuid": return [{
3040
+ name,
3041
+ type: "FIXED_LEN_BYTE_ARRAY",
3042
+ type_length: 16,
3043
+ logical_type: { type: "UUID" },
3044
+ repetition_type,
3045
+ field_id: fieldId
3046
+ }];
3047
+ case "date": return [{
3048
+ name,
3049
+ type: "INT32",
3050
+ converted_type: "DATE",
3051
+ logical_type: { type: "DATE" },
3052
+ repetition_type,
3053
+ field_id: fieldId
3054
+ }];
3055
+ case "time": return [{
3056
+ name,
3057
+ type: "INT64",
3058
+ converted_type: "TIME_MICROS",
3059
+ logical_type: {
3060
+ type: "TIME",
3061
+ isAdjustedToUTC: false,
3062
+ unit: "MICROS"
3063
+ },
3064
+ repetition_type,
3065
+ field_id: fieldId
3066
+ }];
3067
+ case "timestamp": return [timestampField(name, repetition_type, false, "MICROS", fieldId)];
3068
+ case "timestamptz": return [timestampField(name, repetition_type, true, "MICROS", fieldId)];
3069
+ case "timestamp_ns": return [timestampField(name, repetition_type, false, "NANOS", fieldId)];
3070
+ case "timestamptz_ns": return [timestampField(name, repetition_type, true, "NANOS", fieldId)];
3071
+ default: throw new Error(`unsupported iceberg type: ${type}`);
3072
+ }
3073
+ }
3074
+ function parseFixedType(type) {
3075
+ const m = /^fixed\[(\d+)\]$/.exec(type);
3076
+ if (!m) return void 0;
3077
+ return parseInt(m[1], 10);
3078
+ }
3079
+ function timestampField(name, repetition_type, isAdjustedToUTC, unit, field_id) {
3080
+ return {
3081
+ name,
3082
+ type: "INT64",
3083
+ logical_type: {
3084
+ type: "TIMESTAMP",
3085
+ isAdjustedToUTC,
3086
+ unit
3087
+ },
3088
+ repetition_type,
3089
+ field_id
3090
+ };
3091
+ }
3092
+ function buildSortComparator(sortOrder, schema) {
3093
+ if (!sortOrder?.fields?.length) return void 0;
3094
+ const fields = sortOrder.fields.map((sf) => {
3095
+ const sourceId = sf["source-id"] ?? sf["source-ids"]?.[0];
3096
+ const sourceField = schema.fields.find((f) => f.id === sourceId);
3097
+ if (!sourceField) throw new Error(`sort source field id ${sourceId} not found in schema`);
3098
+ return {
3099
+ name: sourceField.name,
3100
+ transform: sf.transform,
3101
+ sourceType: sourceField.type,
3102
+ resultType: transformResultType(sf.transform, sourceField.type),
3103
+ desc: sf.direction === "desc",
3104
+ nullsFirst: sf["null-order"] === "nulls-first"
3105
+ };
3106
+ });
3107
+ return (a, b) => {
3108
+ for (const f of fields) {
3109
+ const c = compareKeys(sortKey(a[f.name], f.transform, f.sourceType), sortKey(b[f.name], f.transform, f.sourceType), f.resultType, f.desc, f.nullsFirst);
3110
+ if (c !== 0) return c;
3111
+ }
3112
+ return 0;
3113
+ };
3114
+ }
3115
+ function sortKey(value, transform, sourceType) {
3116
+ if (value === null || value === void 0) return null;
3117
+ if (transform === "identity") return value;
3118
+ return applyTransform(transform, value, sourceType);
3119
+ }
3120
+ function compareKeys(ka, kb, resultType, desc, nullsFirst) {
3121
+ const aNull = ka === null || ka === void 0;
3122
+ const bNull = kb === null || kb === void 0;
3123
+ if (aNull && bNull) return 0;
3124
+ if (aNull) return nullsFirst ? -1 : 1;
3125
+ if (bNull) return nullsFirst ? 1 : -1;
3126
+ const aNaN = typeof ka === "number" && Number.isNaN(ka);
3127
+ const bNaN = typeof kb === "number" && Number.isNaN(kb);
3128
+ if (aNaN || bNaN) {
3129
+ if (aNaN && bNaN) return 0;
3130
+ const c = aNaN ? 1 : -1;
3131
+ return desc ? -c : c;
3132
+ }
3133
+ const c = compare(ka, kb, resultType);
3134
+ return desc ? -c : c;
3135
+ }
3136
+ async function prepareAppend({ tableUrl, metadata, records, resolver, sortOrderId }) {
3137
+ if (!tableUrl) throw new Error("tableUrl is required");
3138
+ if (!resolver?.writer) throw new Error("resolver.writer is required");
3139
+ const writerFn = resolver.writer;
3140
+ if (metadata["format-version"] !== 2 && metadata["format-version"] !== 3) throw new Error(`unsupported format-version: ${metadata["format-version"]}`);
3141
+ const formatVersion = metadata["format-version"];
3142
+ const partitionSpec = metadata["partition-specs"].find((s) => s["spec-id"] === metadata["default-spec-id"]);
3143
+ if (!partitionSpec) throw new Error("default partition spec not found in metadata");
3144
+ const schema = metadata.schemas.find((s) => s["schema-id"] === metadata["current-schema-id"]);
3145
+ if (!schema) throw new Error("current schema not found in metadata");
3146
+ validateSchemaForVersion(schema, formatVersion);
3147
+ const snapshotId = newSnapshotId(metadata);
3148
+ const manifestUuid = uuid4();
3149
+ checkWriteFormat(metadata.properties?.["write.format.default"]);
3150
+ const codec = resolveParquetCodec(metadata.properties?.["write.parquet.compression-codec"]);
3151
+ const orderId = sortOrderId ?? metadata["default-sort-order-id"] ?? 0;
3152
+ const sortOrder = (metadata["sort-orders"] ?? []).find((o) => o["order-id"] === orderId);
3153
+ if (sortOrderId !== void 0 && !sortOrder) throw new Error(`sort order ${sortOrderId} not found in metadata`);
3154
+ const comparator = buildSortComparator(sortOrder, schema);
3155
+ const appliedSortOrderId = comparator ? orderId : 0;
3156
+ const groups = partitionSpec.fields.length ? groupByPartition(records, schema, partitionSpec) : [{
3157
+ partition: {},
3158
+ records
3159
+ }];
3160
+ const writtenDataFiles = await Promise.all(groups.map(async (group) => {
3161
+ const sortedRecords = comparator ? [...group.records].sort(comparator) : group.records;
3162
+ const dataPath = `${tableUrl}/data/${uuid4()}.parquet`;
3163
+ const dataWriter = writerFn(dataPath);
3164
+ await writeParquet({
3165
+ writer: dataWriter,
3166
+ schema,
3167
+ records: sortedRecords,
3168
+ codec
3169
+ });
3170
+ const stats = computeColumnStats(sortedRecords, schema);
3171
+ return {
3172
+ partition: group.partition,
3173
+ records: sortedRecords,
3174
+ dataFile: {
3175
+ content: 0,
3176
+ file_path: dataPath,
3177
+ file_format: "parquet",
3178
+ partition: group.partition,
3179
+ record_count: BigInt(sortedRecords.length),
3180
+ file_size_in_bytes: BigInt(dataWriter.offset),
3181
+ value_counts: stats.value_counts,
3182
+ null_value_counts: stats.null_value_counts,
3183
+ nan_value_counts: stats.nan_value_counts,
3184
+ lower_bounds: stats.lower_bounds,
3185
+ upper_bounds: stats.upper_bounds,
3186
+ sort_order_id: appliedSortOrderId
3187
+ },
3188
+ path: dataPath
3189
+ };
3190
+ }));
3191
+ const manifestPath = `${tableUrl}/metadata/${manifestUuid}-m0.avro`;
3192
+ const manifestWriter = writerFn(manifestPath);
3193
+ await writeDataManifest({
3194
+ writer: manifestWriter,
3195
+ schema,
3196
+ partitionSpec,
3197
+ snapshotId,
3198
+ dataFiles: writtenDataFiles.map((f) => f.dataFile),
3199
+ formatVersion
3200
+ });
3201
+ const manifestLength = BigInt(manifestWriter.offset);
3202
+ const addedRowCount = writtenDataFiles.reduce((sum, f) => sum + BigInt(f.records.length), 0n);
3203
+ const addedFilesSize = writtenDataFiles.reduce((sum, f) => sum + f.dataFile.file_size_in_bytes, 0n);
3204
+ const partitions = buildPartitionSummaries(writtenDataFiles.map((f) => f.dataFile.partition), schema, partitionSpec);
3205
+ return {
3206
+ snapshotId,
3207
+ manifestUuid,
3208
+ formatVersion,
3209
+ manifestPath,
3210
+ manifestLength,
3211
+ partitionSpecId: partitionSpec["spec-id"],
3212
+ partitions,
3213
+ addedDataFilesCount: writtenDataFiles.length,
3214
+ addedRowCount,
3215
+ addedFilesSize,
3216
+ recordsCount: records.length,
3217
+ writtenFiles: [...writtenDataFiles.map((f) => f.path), manifestPath]
3218
+ };
3219
+ }
3220
+ async function stageSnapshotForAppend({ tableUrl, metadata, prepared, resolver }) {
3221
+ if (!tableUrl) throw new Error("tableUrl is required");
3222
+ if (!resolver?.writer) throw new Error("resolver.writer is required");
3223
+ const sequenceNumber = BigInt(metadata["last-sequence-number"] ?? 0) + 1n;
3224
+ const timestampMs = Date.now();
3225
+ const newManifest = {
3226
+ manifest_path: prepared.manifestPath,
3227
+ manifest_length: prepared.manifestLength,
3228
+ partition_spec_id: prepared.partitionSpecId,
3229
+ content: 0,
3230
+ sequence_number: sequenceNumber,
3231
+ min_sequence_number: sequenceNumber,
3232
+ added_snapshot_id: prepared.snapshotId,
3233
+ added_files_count: prepared.addedDataFilesCount,
3234
+ existing_files_count: 0,
3235
+ deleted_files_count: 0,
3236
+ added_rows_count: prepared.addedRowCount,
3237
+ existing_rows_count: 0n,
3238
+ deleted_rows_count: 0n,
3239
+ partitions: prepared.partitions
3240
+ };
3241
+ const prevSummary = currentSnapshot(metadata)?.summary;
3242
+ const prevTotals = {
3243
+ records: BigInt(prevSummary?.["total-records"] ?? "0"),
3244
+ size: BigInt(prevSummary?.["total-files-size"] ?? "0"),
3245
+ files: BigInt(prevSummary?.["total-data-files"] ?? "0")
3246
+ };
3247
+ const summary = {
3248
+ operation: "append",
3249
+ "added-data-files": String(prepared.addedDataFilesCount),
3250
+ "added-records": String(prepared.recordsCount),
3251
+ "added-files-size": String(prepared.addedFilesSize),
3252
+ "changed-partition-count": String(prepared.addedDataFilesCount),
3253
+ "total-records": String(prevTotals.records + BigInt(prepared.recordsCount)),
3254
+ "total-files-size": String(prevTotals.size + prepared.addedFilesSize),
3255
+ "total-data-files": String(prevTotals.files + BigInt(prepared.addedDataFilesCount)),
3256
+ "total-delete-files": "0",
3257
+ "total-position-deletes": "0",
3258
+ "total-equality-deletes": "0"
3259
+ };
3260
+ return await buildSnapshotUpdate({
3261
+ tableUrl,
3262
+ metadata,
3263
+ resolver,
3264
+ snapshotId: prepared.snapshotId,
3265
+ sequenceNumber,
3266
+ manifestUuid: prepared.manifestUuid,
3267
+ timestampMs,
3268
+ formatVersion: prepared.formatVersion,
3269
+ newManifests: [newManifest],
3270
+ summary,
3271
+ writtenFiles: []
3272
+ });
3273
+ }
3274
+ function checkWriteFormat(value) {
3275
+ if (value === void 0) return;
3276
+ if (value.toLowerCase() !== "parquet") throw new Error(`unsupported write.format.default: ${value}`);
3277
+ }
3278
+ function resolveParquetCodec(value) {
3279
+ if (value === void 0) return void 0;
3280
+ switch (value.toLowerCase()) {
3281
+ case "snappy": return "SNAPPY";
3282
+ case "none":
3283
+ case "uncompressed": return "UNCOMPRESSED";
3284
+ default: throw new Error(`unsupported write.parquet.compression-codec: ${value}`);
3285
+ }
3286
+ }
3287
+ function newSnapshotId(metadata) {
3288
+ const used = new Set((metadata?.snapshots ?? []).map((s) => BigInt(s["snapshot-id"])));
3289
+ const arr = new BigInt64Array(1);
3290
+ for (let attempt = 0; attempt < 32; attempt++) {
3291
+ globalThis.crypto.getRandomValues(arr);
3292
+ const masked = arr[0] & 9007199254740991n;
3293
+ const id = masked === 0n ? 1n : masked;
3294
+ if (!used.has(id)) return id;
3295
+ }
3296
+ throw new Error("newSnapshotId: failed to find an unused id after 32 attempts");
3297
+ }
3298
+ async function icebergManifests({ metadata, resolver, snapshotId, partitionFilter }) {
3299
+ resolver ??= urlResolver();
3300
+ const rawTarget = snapshotId ?? metadata["current-snapshot-id"];
3301
+ if (rawTarget == null || rawTarget < 0) throw new Error("No current snapshot id found in table metadata");
3302
+ const targetId = BigInt(rawTarget);
3303
+ const snapshot = metadata.snapshots?.find((s) => BigInt(s["snapshot-id"]) === targetId);
3304
+ if (!snapshot) throw new Error(`Snapshot ${rawTarget} not found in metadata`);
3305
+ let manifests = [];
3306
+ if (snapshot["manifest-list"]) {
3307
+ const manifestListUrl = snapshot["manifest-list"];
3308
+ manifests = await fetchAvroRecords(manifestListUrl, resolver);
3309
+ } else if (snapshot.manifests) manifests = snapshot.manifests;
3310
+ else throw new Error("No manifest information found in snapshot");
3311
+ if (partitionFilter) manifests = manifests.filter((manifest) => {
3312
+ let keep = true;
3313
+ try {
3314
+ keep = partitionFilter(manifest.partitions, manifest.partition_spec_id ?? 0, manifest) !== false;
3315
+ } catch {
3316
+ keep = true;
3317
+ }
3318
+ return keep;
3319
+ });
3320
+ return await fetchManifests(manifests, resolver);
3321
+ }
3322
+ async function fetchManifests(manifests, resolver) {
3323
+ return await Promise.all(manifests.map(async (manifest) => {
3324
+ const url = manifest.manifest_path;
3325
+ const entries = await fetchAvroRecords(url, resolver, Number(manifest.manifest_length));
3326
+ for (const entry of entries) {
3327
+ entry.partition_spec_id = manifest.partition_spec_id ?? 0;
3328
+ if (entry.sequence_number === void 0) entry.sequence_number = manifest.sequence_number ?? 0n;
3329
+ if (entry.status === 1) {
3330
+ if (entry.sequence_number === void 0) entry.sequence_number = manifest.sequence_number;
3331
+ if (entry.file_sequence_number === void 0) entry.file_sequence_number = manifest.sequence_number;
3332
+ } else if (entry.sequence_number === void 0 || entry.file_sequence_number === void 0) throw new Error("iceberg manifest entry missing sequence number");
3333
+ }
3334
+ assignFirstRowIds(manifest, entries);
3335
+ return {
3336
+ url,
3337
+ entries
3338
+ };
3339
+ }));
3340
+ }
3341
+ function assignFirstRowIds(manifest, entries) {
3342
+ if (manifest.content !== 0 || manifest.first_row_id == null) return;
3343
+ let nextFirstRowId = BigInt(manifest.first_row_id);
3344
+ for (const entry of entries) {
3345
+ const dataFile = entry.data_file;
3346
+ if (dataFile.content !== 0) continue;
3347
+ if (dataFile.first_row_id == null) {
3348
+ dataFile.first_row_id = nextFirstRowId;
3349
+ nextFirstRowId += BigInt(dataFile.record_count);
3350
+ }
3351
+ }
3352
+ }
3353
+ const DEFAULT_RETRY = Object.freeze({
3354
+ maxAttempts: 50,
3355
+ initialMs: 50,
3356
+ maxMs: 3e3,
3357
+ factor: 2,
3358
+ totalTimeoutMs: 1800 * 1e3
3359
+ });
3360
+ async function icebergAppend({ catalog, namespace, table, tableUrl, resolver, records, sortOrderId }) {
3361
+ const ctx = await loadTable({
3362
+ catalog,
3363
+ namespace,
3364
+ table,
3365
+ tableUrl,
3366
+ resolver
3367
+ });
3368
+ const prepared = await prepareAppend({
3369
+ tableUrl: ctx.tableUrl,
3370
+ metadata: ctx.metadata,
3371
+ records,
3372
+ resolver: requireResolver(ctx.resolver, "icebergAppend"),
3373
+ sortOrderId
3374
+ });
3375
+ return await commitWithRetry({
3376
+ catalog,
3377
+ target: {
3378
+ namespace,
3379
+ table
3380
+ },
3381
+ ctx,
3382
+ stage: (workingCtx) => stageSnapshotForAppend({
3383
+ tableUrl: workingCtx.tableUrl,
3384
+ metadata: workingCtx.metadata,
3385
+ prepared,
3386
+ resolver: requireResolver(workingCtx.resolver, "icebergAppend")
3387
+ })
3388
+ });
3389
+ }
3390
+ async function icebergCreateTable({ catalog, namespace, table, tableUrl, schema, partitionSpec, sortOrder, properties, formatVersion, stageCreate }) {
3391
+ if (catalog.type === "rest") {
3392
+ if (!namespace || !table) throw new Error("namespace and table are required for rest catalogs");
3393
+ if (!schema) throw new Error("schema is required for rest catalogs");
3394
+ const { metadata } = await restCatalogCreateTable(catalog, {
3395
+ namespace,
3396
+ table,
3397
+ schema,
3398
+ location: tableUrl,
3399
+ partitionSpec,
3400
+ writeOrder: sortOrder,
3401
+ stageCreate,
3402
+ properties
3403
+ });
3404
+ return metadata;
3405
+ }
3406
+ if (!tableUrl) throw new Error("tableUrl is required for file catalogs");
3407
+ return await icebergCreate({
3408
+ tableUrl,
3409
+ resolver: catalog.resolver,
3410
+ schema,
3411
+ formatVersion,
3412
+ partitionSpec,
3413
+ sortOrder,
3414
+ properties,
3415
+ conditionalCommits: catalog.conditionalCommits
3416
+ });
3417
+ }
3418
+ async function icebergDropTable({ catalog, namespace, table, tableUrl, lister, purgeRequested }) {
3419
+ if (catalog.type === "rest") {
3420
+ if (!namespace || !table) throw new Error("namespace and table are required for rest catalogs");
3421
+ await restCatalogDropTable(catalog, {
3422
+ namespace,
3423
+ table,
3424
+ purgeRequested
3425
+ });
3426
+ return;
3427
+ }
3428
+ if (!tableUrl) throw new Error("tableUrl is required for file catalogs");
3429
+ if (!lister) throw new Error("lister is required to drop a file catalog table");
3430
+ const { deleter } = catalog.resolver;
3431
+ if (!deleter) throw new Error("resolver.deleter is required to drop a file catalog table");
3432
+ const dirs = purgeRequested ? ["metadata", "data"] : ["metadata"];
3433
+ for (const dir of dirs) {
3434
+ const names = await lister(`${tableUrl}/${dir}`).catch(() => []);
3435
+ await Promise.allSettled(names.map((n) => deleter(`${tableUrl}/${dir}/${n}`)));
3436
+ }
3437
+ }
3438
+ function requireResolver(resolver, caller) {
3439
+ if (!resolver) throw new Error(`${caller}: resolver is required`);
3440
+ return resolver;
3441
+ }
3442
+ async function commitStaged(catalog, target, ctx, staged) {
3443
+ if (catalog.type === "rest") {
3444
+ const { metadata } = await restCatalogUpdateTable(catalog, {
3445
+ namespace: target.namespace,
3446
+ table: target.table,
3447
+ requirements: staged.requirements,
3448
+ updates: staged.updates
3449
+ });
3450
+ return metadata;
3451
+ }
3452
+ if (!ctx.resolver) throw new Error("resolver is required to commit to a file catalog");
3453
+ return await fileCatalogCommit({
3454
+ tableUrl: ctx.tableUrl,
3455
+ metadata: ctx.metadata,
3456
+ metadataFileName: ctx.metadataFileName,
3457
+ currentVersion: ctx.version,
3458
+ staged,
3459
+ resolver: ctx.resolver,
3460
+ conditionalCommits: catalog.type === "file" && catalog.conditionalCommits
3461
+ });
3462
+ }
3463
+ async function commitWithRetry({ catalog, target, ctx, stage }) {
3464
+ const retryEnabled = catalog.type === "rest" || catalog.type === "file" && catalog.conditionalCommits === true;
3465
+ const policy = resolveRetryPolicy(ctx.metadata);
3466
+ const startedAt = Date.now();
3467
+ let workingCtx = ctx;
3468
+ for (let attempt = 1; attempt <= policy.maxAttempts; attempt++) {
3469
+ const staged = await stage(workingCtx);
3470
+ try {
3471
+ return await commitStaged(catalog, target, workingCtx, staged);
3472
+ } catch (err) {
3473
+ if (!retryEnabled || !isCommitConflict(err)) throw err;
3474
+ if (attempt === policy.maxAttempts) throw new Error(`${catalog.type} catalog commit failed after ${policy.maxAttempts} attempts due to concurrent commits`);
3475
+ const elapsed = Date.now() - startedAt;
3476
+ if (elapsed >= policy.totalTimeoutMs) throw new Error(`${catalog.type} catalog commit retry budget exhausted after ${attempt} attempts and ${elapsed}ms (limit ${policy.totalTimeoutMs}ms)`);
3477
+ const remaining = policy.totalTimeoutMs - elapsed;
3478
+ await sleep(Math.min(jitteredBackoff(attempt, policy), remaining));
3479
+ workingCtx = await reloadCtx(catalog, target, workingCtx, err);
3480
+ }
3481
+ }
3482
+ throw new Error("unreachable");
3483
+ }
3484
+ async function reloadCtx(catalog, target, workingCtx, lastErr) {
3485
+ if (catalog.type === "rest") {
3486
+ if (!target.namespace || !target.table) throw lastErr;
3487
+ const { metadata } = await restCatalogLoadTable(catalog, {
3488
+ namespace: target.namespace,
3489
+ table: target.table
3490
+ });
3491
+ return {
3492
+ metadata,
3493
+ metadataFileName: workingCtx.metadataFileName,
3494
+ version: workingCtx.version,
3495
+ tableUrl: workingCtx.tableUrl,
3496
+ resolver: workingCtx.resolver
3497
+ };
3498
+ }
3499
+ if (!workingCtx.resolver) throw lastErr;
3500
+ const fresh = await loadLatestFileCatalogMetadata({
3501
+ tableUrl: workingCtx.tableUrl,
3502
+ resolver: workingCtx.resolver,
3503
+ lister: catalog.lister
3504
+ });
3505
+ return {
3506
+ metadata: fresh.metadata,
3507
+ metadataFileName: fresh.metadataFileName,
3508
+ version: fresh.version,
3509
+ tableUrl: workingCtx.tableUrl,
3510
+ resolver: workingCtx.resolver
3511
+ };
3512
+ }
3513
+ function resolveRetryPolicy(metadata) {
3514
+ const props = metadata.properties ?? {};
3515
+ const numRetries = parseTableProp(props["commit.retry.num-retries"]);
3516
+ const maxAttempts = numRetries === void 0 ? DEFAULT_RETRY.maxAttempts : numRetries + 1;
3517
+ const initialMs = parseTableProp(props["commit.retry.min-wait-ms"]) ?? DEFAULT_RETRY.initialMs;
3518
+ const maxMs = parseTableProp(props["commit.retry.max-wait-ms"]) ?? DEFAULT_RETRY.maxMs;
3519
+ const totalTimeoutMs = parseTableProp(props["commit.retry.total-timeout-ms"]) ?? DEFAULT_RETRY.totalTimeoutMs;
3520
+ return {
3521
+ maxAttempts,
3522
+ initialMs,
3523
+ maxMs,
3524
+ factor: DEFAULT_RETRY.factor,
3525
+ totalTimeoutMs
3526
+ };
3527
+ }
3528
+ function parseTableProp(value) {
3529
+ if (value === void 0 || value === null || value === "") return void 0;
3530
+ const n = Number(value);
3531
+ if (!Number.isFinite(n) || n < 0) return void 0;
3532
+ return n;
3533
+ }
3534
+ function jitteredBackoff(attempt, policy) {
3535
+ if (policy.initialMs === 0 || policy.maxMs === 0) return 0;
3536
+ const base = Math.min(policy.maxMs, policy.initialMs * policy.factor ** (attempt - 1));
3537
+ return Math.floor(Math.random() * base);
3538
+ }
3539
+ function sleep(ms) {
3540
+ if (ms <= 0) return Promise.resolve();
3541
+ return new Promise((resolve) => setTimeout(resolve, ms));
3542
+ }
3543
+ function isCommitConflict(err) {
3544
+ if (!err || typeof err !== "object") return false;
3545
+ const { status } = err;
3546
+ return status === 412 || status === 409;
3547
+ }
3548
+ const enc = new TextEncoder();
3549
+ function s3SignedResolver({ accessKeyId, secretAccessKey, sessionToken, region, endpoint, pathStyle = false }) {
3550
+ const ep = endpoint ? new URL(endpoint.replace(/\/$/, "") + "/") : void 0;
3551
+ function toHttps(url) {
3552
+ if (!url.startsWith("s3://") && !url.startsWith("s3a://")) return url;
3553
+ const rest = url.slice(url.indexOf("://") + 3);
3554
+ const slash = rest.indexOf("/");
3555
+ if (slash === -1) throw new Error(`invalid S3 URL: ${url}`);
3556
+ const bucket = rest.slice(0, slash);
3557
+ const key = rest.slice(slash + 1);
3558
+ if (ep) {
3559
+ if (pathStyle) return `${ep.origin}${ep.pathname}${bucket}/${key}`;
3560
+ return `${ep.protocol}//${bucket}.${ep.host}/${key}`;
3561
+ }
3562
+ return `https://${bucket}.s3.amazonaws.com/${key}`;
3563
+ }
3564
+ async function signRequest(method, url, body, extra = {}) {
3565
+ const u = new URL(url);
3566
+ const xAmzDate = (/* @__PURE__ */ new Date()).toISOString().replace(/[-:]|\.\d{3}/g, "");
3567
+ const dStamp = xAmzDate.slice(0, 8);
3568
+ const payloadHash = body !== void 0 ? await sha256hex(body) : await sha256hex("");
3569
+ const lc = {};
3570
+ for (const [k, v] of Object.entries(extra)) lc[k.toLowerCase()] = String(v);
3571
+ lc["host"] = u.host;
3572
+ lc["x-amz-date"] = xAmzDate;
3573
+ lc["x-amz-content-sha256"] = payloadHash;
3574
+ if (sessionToken) lc["x-amz-security-token"] = sessionToken;
3575
+ const sortedKeys = Object.keys(lc).sort();
3576
+ const canonicalHeaders = sortedKeys.map((k) => `${k}:${lc[k].trim().replace(/\s+/g, " ")}\n`).join("");
3577
+ const signedHeaders = sortedKeys.join(";");
3578
+ const canonicalRequest = [
3579
+ method,
3580
+ u.pathname.split("/").map((seg) => encodeRfc3986(decodeURIComponent(seg))).join("/"),
3581
+ [...u.searchParams.entries()].sort((a, b) => {
3582
+ if (a[0] !== b[0]) return a[0] < b[0] ? -1 : 1;
3583
+ return a[1] < b[1] ? -1 : a[1] > b[1] ? 1 : 0;
3584
+ }).map(([k, v]) => `${encodeRfc3986(k)}=${encodeRfc3986(v)}`).join("&"),
3585
+ canonicalHeaders,
3586
+ signedHeaders,
3587
+ payloadHash
3588
+ ].join("\n");
3589
+ const credentialScope = `${dStamp}/${region}/s3/aws4_request`;
3590
+ const stringToSign = [
3591
+ "AWS4-HMAC-SHA256",
3592
+ xAmzDate,
3593
+ credentialScope,
3594
+ await sha256hex(canonicalRequest)
3595
+ ].join("\n");
3596
+ const signature = bytesToHex(await hmac(await deriveSigningKey(secretAccessKey, dStamp, region, "s3"), stringToSign));
3597
+ const out = {};
3598
+ for (const [k, v] of Object.entries(lc)) {
3599
+ if (k === "host") continue;
3600
+ out[k] = v;
3601
+ }
3602
+ out["Authorization"] = `AWS4-HMAC-SHA256 Credential=${accessKeyId}/${credentialScope}, SignedHeaders=${signedHeaders}, Signature=${signature}`;
3603
+ return out;
3604
+ }
3605
+ return {
3606
+ async reader(path, byteLength) {
3607
+ const url = toHttps(path);
3608
+ let len = byteLength;
3609
+ if (len === void 0) {
3610
+ const headers = await signRequest("HEAD", url);
3611
+ const res = await fetch(url, {
3612
+ method: "HEAD",
3613
+ headers
3614
+ });
3615
+ if (!res.ok) throw new Error(`HEAD ${path}: ${res.status} ${res.statusText}`);
3616
+ len = Number(res.headers.get("content-length"));
3617
+ if (!Number.isFinite(len)) throw new Error(`HEAD ${path}: missing Content-Length`);
3618
+ }
3619
+ const fileLength = len;
3620
+ return {
3621
+ byteLength: fileLength,
3622
+ async slice(start, end) {
3623
+ const range = `bytes=${start}-${(end ?? fileLength) - 1}`;
3624
+ const headers = await signRequest("GET", url, void 0, { range });
3625
+ const res = await fetch(url, {
3626
+ method: "GET",
3627
+ headers
3628
+ });
3629
+ if (!res.ok) throw new Error(`GET ${path} ${range}: ${res.status} ${res.statusText}`);
3630
+ return await res.arrayBuffer();
3631
+ }
3632
+ };
3633
+ },
3634
+ writer(path, options) {
3635
+ const w = new ByteWriter();
3636
+ w.finish = async function() {
3637
+ const url = toHttps(path);
3638
+ const body = w.getBytes().slice();
3639
+ const extra = {};
3640
+ if (options?.ifNoneMatch) extra["if-none-match"] = options.ifNoneMatch;
3641
+ const headers = await signRequest("PUT", url, body, extra);
3642
+ const res = await fetch(url, {
3643
+ method: "PUT",
3644
+ headers,
3645
+ body
3646
+ });
3647
+ if (!res.ok) {
3648
+ const err = /* @__PURE__ */ new Error(`PUT ${path}: ${res.status} ${res.statusText}`);
3649
+ err.status = res.status;
3650
+ throw err;
3651
+ }
3652
+ };
3653
+ return w;
3654
+ },
3655
+ async deleter(path) {
3656
+ const url = toHttps(path);
3657
+ const headers = await signRequest("DELETE", url);
3658
+ const res = await fetch(url, {
3659
+ method: "DELETE",
3660
+ headers
3661
+ });
3662
+ if (!res.ok && res.status !== 404) throw new Error(`DELETE ${path}: ${res.status} ${res.statusText}`);
3663
+ }
3664
+ };
3665
+ }
3666
+ async function sha256hex(data) {
3667
+ const bytes = typeof data === "string" ? enc.encode(data) : data;
3668
+ const hash = await crypto.subtle.digest("SHA-256", bytes);
3669
+ return bytesToHex(new Uint8Array(hash));
3670
+ }
3671
+ async function hmac(key, data) {
3672
+ const keyBytes = typeof key === "string" ? enc.encode(key) : key;
3673
+ const dataBytes = typeof data === "string" ? enc.encode(data) : data;
3674
+ const cryptoKey = await crypto.subtle.importKey("raw", keyBytes, {
3675
+ name: "HMAC",
3676
+ hash: "SHA-256"
3677
+ }, false, ["sign"]);
3678
+ const sig = await crypto.subtle.sign("HMAC", cryptoKey, dataBytes);
3679
+ return new Uint8Array(sig);
3680
+ }
3681
+ async function deriveSigningKey(secret, dateStamp, region, service) {
3682
+ return await hmac(await hmac(await hmac(await hmac(`AWS4${secret}`, dateStamp), region), service), "aws4_request");
3683
+ }
3684
+ function encodeRfc3986(str) {
3685
+ return encodeURIComponent(str).replace(/[!*'()]/g, (c) => "%" + c.charCodeAt(0).toString(16).toUpperCase());
3686
+ }
3687
+ function bytesToHex(bytes) {
3688
+ let s = "";
3689
+ for (const b of bytes) s += b.toString(16).padStart(2, "0");
3690
+ return s;
3691
+ }
3692
+ (() => {
3693
+ if (typeof setImmediate === "function") return () => new Promise((resolve) => setImmediate(resolve));
3694
+ if (typeof MessageChannel !== "undefined") {
3695
+ const channel = new MessageChannel();
3696
+ const queue = [];
3697
+ channel.port1.onmessage = () => {
3698
+ const resolve = queue.shift();
3699
+ if (resolve) resolve();
3700
+ };
3701
+ return () => new Promise((resolve) => {
3702
+ queue.push(resolve);
3703
+ channel.port2.postMessage(0);
3704
+ });
3705
+ }
3706
+ return () => new Promise((resolve) => setTimeout(resolve, 0));
3707
+ })();
3708
+ export { cachingResolver, icebergAppend, icebergCreateTable, icebergDropTable, icebergManifests, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, restCatalogLoadTable, s3SignedResolver };