@gscdump/engine 0.28.1 → 0.28.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/iceberg/index.mjs +12 -1
- package/package.json +3 -3
package/dist/iceberg/index.mjs
CHANGED
|
@@ -343,6 +343,16 @@ function coerceJsonSafe(value) {
|
|
|
343
343
|
if (typeof value === "bigint") return Number(value);
|
|
344
344
|
return value;
|
|
345
345
|
}
|
|
346
|
+
function dedupeByIdentity(table, records) {
|
|
347
|
+
if (records.length < 2) return records;
|
|
348
|
+
const key = ICEBERG_SCHEMAS[table].identityColumns;
|
|
349
|
+
const seen = /* @__PURE__ */ new Map();
|
|
350
|
+
for (const rec of records) {
|
|
351
|
+
const k = key.map((col) => `${rec[col] ?? ""}`).join("\0");
|
|
352
|
+
seen.set(k, rec);
|
|
353
|
+
}
|
|
354
|
+
return seen.size === records.length ? records : [...seen.values()];
|
|
355
|
+
}
|
|
346
356
|
function toRecords(slice, rows) {
|
|
347
357
|
const siteId = slice.ctx.siteId ?? "";
|
|
348
358
|
return rows.map((row) => {
|
|
@@ -395,12 +405,13 @@ function createIcebergAppendSink(options) {
|
|
|
395
405
|
}
|
|
396
406
|
for (const [table, records] of buffers) {
|
|
397
407
|
if (records.length === 0) continue;
|
|
408
|
+
const deduped = dedupeByIdentity(table, records);
|
|
398
409
|
await icebergAppendRetrying({
|
|
399
410
|
catalog: conn.catalog,
|
|
400
411
|
namespace: conn.namespace,
|
|
401
412
|
table,
|
|
402
413
|
resolver: conn.resolver,
|
|
403
|
-
records
|
|
414
|
+
records: deduped
|
|
404
415
|
}, options.commitRetry).then(() => {
|
|
405
416
|
flushed.push(table);
|
|
406
417
|
}, (cause) => {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.28.
|
|
4
|
+
"version": "0.28.3",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -191,8 +191,8 @@
|
|
|
191
191
|
"hyparquet": "^1.26.0",
|
|
192
192
|
"hyparquet-writer": "^0.15.6",
|
|
193
193
|
"proper-lockfile": "^4.1.2",
|
|
194
|
-
"gscdump": "0.28.
|
|
195
|
-
"
|
|
194
|
+
"@gscdump/contracts": "0.28.3",
|
|
195
|
+
"gscdump": "0.28.3"
|
|
196
196
|
},
|
|
197
197
|
"devDependencies": {
|
|
198
198
|
"@duckdb/duckdb-wasm": "^1.32.0",
|