@gscdump/engine 0.28.1 → 0.28.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -343,6 +343,16 @@ function coerceJsonSafe(value) {
343
343
  if (typeof value === "bigint") return Number(value);
344
344
  return value;
345
345
  }
346
+ function dedupeByIdentity(table, records) {
347
+ if (records.length < 2) return records;
348
+ const key = ICEBERG_SCHEMAS[table].identityColumns;
349
+ const seen = /* @__PURE__ */ new Map();
350
+ for (const rec of records) {
351
+ const k = key.map((col) => `${rec[col] ?? ""}`).join("\0");
352
+ seen.set(k, rec);
353
+ }
354
+ return seen.size === records.length ? records : [...seen.values()];
355
+ }
346
356
  function toRecords(slice, rows) {
347
357
  const siteId = slice.ctx.siteId ?? "";
348
358
  return rows.map((row) => {
@@ -395,12 +405,13 @@ function createIcebergAppendSink(options) {
395
405
  }
396
406
  for (const [table, records] of buffers) {
397
407
  if (records.length === 0) continue;
408
+ const deduped = dedupeByIdentity(table, records);
398
409
  await icebergAppendRetrying({
399
410
  catalog: conn.catalog,
400
411
  namespace: conn.namespace,
401
412
  table,
402
413
  resolver: conn.resolver,
403
- records
414
+ records: deduped
404
415
  }, options.commitRetry).then(() => {
405
416
  flushed.push(table);
406
417
  }, (cause) => {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.28.1",
4
+ "version": "0.28.2",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -191,8 +191,8 @@
191
191
  "hyparquet": "^1.26.0",
192
192
  "hyparquet-writer": "^0.15.6",
193
193
  "proper-lockfile": "^4.1.2",
194
- "gscdump": "0.28.1",
195
- "@gscdump/contracts": "0.28.1"
194
+ "@gscdump/contracts": "0.28.2",
195
+ "gscdump": "0.28.2"
196
196
  },
197
197
  "devDependencies": {
198
198
  "@duckdb/duckdb-wasm": "^1.32.0",