duckrun 0.3.17.dev7__tar.gz → 0.3.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {duckrun-0.3.17.dev7/duckrun.egg-info → duckrun-0.3.19}/PKG-INFO +64 -9
  2. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/README.md +62 -7
  3. duckrun-0.3.19/dbt/adapters/duckrun/__version__.py +1 -0
  4. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/adapters/duckrun/delta_dml.py +47 -4
  5. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/adapters/duckrun/impl.py +4 -0
  6. duckrun-0.3.19/duckrun/_runtime.py +51 -0
  7. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/duckrun/session.py +103 -13
  8. {duckrun-0.3.17.dev7 → duckrun-0.3.19/duckrun.egg-info}/PKG-INFO +64 -9
  9. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/duckrun.egg-info/SOURCES.txt +1 -0
  10. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/duckrun.egg-info/requires.txt +1 -1
  11. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/pyproject.toml +13 -14
  12. duckrun-0.3.17.dev7/dbt/adapters/duckrun/__version__.py +0 -1
  13. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/LICENSE +0 -0
  14. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/MANIFEST.in +0 -0
  15. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/adapters/duckrun/__init__.py +0 -0
  16. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/adapters/duckrun/credentials.py +0 -0
  17. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/adapters/duckrun/delta_plugin.py +0 -0
  18. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/adapters/duckrun/engine.py +0 -0
  19. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/adapters/duckrun/environment.py +0 -0
  20. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/adapters/duckrun/remote.py +0 -0
  21. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/adapters/duckrun/secret.py +0 -0
  22. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/include/duckrun/__init__.py +0 -0
  23. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/include/duckrun/dbt_project.yml +0 -0
  24. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/include/duckrun/macros/catalog.sql +0 -0
  25. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/include/duckrun/macros/materializations/_delta_core.sql +0 -0
  26. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/include/duckrun/macros/materializations/delta.sql +0 -0
  27. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/include/duckrun/macros/materializations/incremental.sql +0 -0
  28. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/include/duckrun/macros/materializations/snapshot.sql +0 -0
  29. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/dbt/include/duckrun/macros/materializations/table.sql +0 -0
  30. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/duckrun/__init__.py +0 -0
  31. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/duckrun/auth.py +0 -0
  32. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/duckrun/delta_table.py +0 -0
  33. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/duckrun.egg-info/dependency_links.txt +0 -0
  34. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/duckrun.egg-info/top_level.txt +0 -0
  35. {duckrun-0.3.17.dev7 → duckrun-0.3.19}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.3.17.dev7
3
+ Version: 0.3.19
4
4
  Summary: A dbt adapter that runs SQL in DuckDB and materializes to Delta Lake (delta_rs).
5
5
  Author: mim
6
6
  License: MIT
@@ -12,7 +12,7 @@ Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
13
  Requires-Dist: dbt-duckdb>=1.8
14
14
  Requires-Dist: dbt-core<2.0,>=1.8
15
- Requires-Dist: duckdb==1.5.4
15
+ Requires-Dist: duckdb<1.6.0,>=1.5.4
16
16
  Requires-Dist: deltalake<1.5.1,>=1.5.0
17
17
  Requires-Dist: requests
18
18
  Provides-Extra: local
@@ -35,9 +35,13 @@ Dynamic: license-file
35
35
  > not affiliated with, endorsed by, or supported by any employer or vendor. No warranty —
36
36
  > use it at your own risk.
37
37
 
38
- **duckrun** is a [dbt](https://www.getdbt.com/) adapter that runs your model SQL in
39
- **DuckDB** and writes the results to **Delta Lake** using
40
- [`delta_rs`](https://delta-io.github.io/delta-rs/) (the `deltalake` Python package).
38
+ **duckrun** runs SQL in [DuckDB](https://duckdb.org/) and writes
39
+ [**Delta Lake**](https://delta-io.github.io/delta-rs/) via delta_rs. It gives you:
40
+
41
+ - a [**dbt**](https://www.getdbt.com/) adapter that materializes models as Delta tables;
42
+ - a **`connect()`** helper to write Delta straight from SQL in a notebook;
43
+ - **full snapshot isolation** from read to write — concurrent writers fail loud, never interleave.
44
+
41
45
  duckrun itself is just glue — it owns none of the heavy lifting. The real work is done
42
46
  by **DuckDB** (executes the SQL), **delta-rs** (writes the Delta table), **Arrow** (the
43
47
  zero-copy (kind of) bridge that hands query results from DuckDB to delta-rs), and **dbt** (orchestrates
@@ -67,6 +71,21 @@ pip install duckrun
67
71
 
68
72
  That single install pulls in `dbt-duckdb` (and therefore `duckdb`) plus `deltalake`.
69
73
 
74
+ ### In a Microsoft Fabric Python notebook
75
+
76
+ duckrun needs `duckdb` ≥ 1.5.4 — the release where `delta_scan` gained its `version => N`
77
+ parameter, which duckrun uses for snapshot-pinned reads. Fabric notebooks ship a **stable**
78
+ `duckdb` release, which trails the newest one, so the `duckdb` already loaded in the kernel may
79
+ predate 1.5.4. Upgrade, then restart the Python kernel so the new version loads.
80
+
81
+ ```python
82
+ !pip install duckrun --upgrade
83
+ notebookutils.session.restartPython()
84
+ ```
85
+
86
+ If you skip the restart, duckrun fails loud at `connect()` (and on `dbt run`) and tells you to
87
+ restart — it won't quietly run on the older `duckdb`/`deltalake` still bound in the kernel.
88
+
70
89
  ## Configure your profile
71
90
 
72
91
  ```yaml
@@ -79,12 +98,22 @@ my_project:
79
98
  # No `threads:` needed — duckrun always runs single-threaded.
80
99
  # DuckDB runs in-memory by default — the Delta tables are the only state.
81
100
  # Default Delta location for models that don't set config(location=...).
82
- root_path: './warehouse' # local path, or s3://..., gs://..., abfss://...
101
+ # OneLake — address by GUID, not friendly names (see "OneLake: use GUID paths" below):
102
+ root_path: "abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables"
103
+ # Or any other store: './warehouse' (local), 's3://...', 'gs://...'.
83
104
  # storage_options: {} # passed through to deltalake for remote stores
84
105
  ```
85
106
 
86
107
  Persisted models are written to `<root_path>/<schema>/<model>` (e.g.
87
- `./warehouse/dbo/orders`), or to an explicit `config(location=...)`.
108
+ `.../Tables/dbo/orders`), or to an explicit `config(location=...)`.
109
+
110
+ ### OneLake: use GUID paths for now
111
+
112
+ Address OneLake tables by **workspace GUID + lakehouse GUID**, not friendly names —
113
+ `abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables/...`. This
114
+ sidesteps an upstream `duckdb-delta` read bug ("No files in log segment") that is **already fixed
115
+ upstream but still rolling out to production OneLake**. Friendly-name paths will work again once
116
+ the fix finishes deploying.
88
117
 
89
118
  ### Fabric Lakehouse without a schema
90
119
 
@@ -95,7 +124,7 @@ let the schema fill that slot:
95
124
 
96
125
  ```yaml
97
126
  schema: Tables
98
- root_path: "abfss://<ws>@onelake.dfs.fabric.microsoft.com/<lh>.Lakehouse"
127
+ root_path: "abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>"
99
128
  ```
100
129
 
101
130
  Since models are written to `<root_path>/<schema>/<model>`, this lands them at
@@ -309,7 +338,7 @@ unchanged since the call, else raises `CommitFailedError`.
309
338
 
310
339
  ```python
311
340
  import duckrun
312
- conn = duckrun.connect("abfss://ws@onelake.dfs.fabric.microsoft.com/lh.Lakehouse/Tables/dbo")
341
+ conn = duckrun.connect("abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables/dbo")
313
342
  conn.sql("select * from orders").write.mode("overwrite").saveAsTable("orders_copy")
314
343
  conn.table("orders_copy").show()
315
344
 
@@ -355,6 +384,32 @@ None of this is required to use duckrun — `pip install duckrun` is unaffected.
355
384
  runs the official suite (above); `tests/correctness/` proves the concurrency guarantees. The cards
356
385
  in those docs are rendered live by CI, so they always reflect the latest `main`.
357
386
 
387
+ ## Limitations
388
+
389
+ These are core design trade-offs, not bugs — they're inherent to gluing DuckDB to delta_rs and
390
+ won't be "fixed" away:
391
+
392
+ - **A single dbt run is single-threaded — but concurrency works fine.** This is purely a dbt-adapter
393
+ implementation detail: *within one dbt process* models run with `threads: 1`, because the
394
+ in-process delta_rs write path isn't thread-safe (parallel writes to a table in the *same* process
395
+ collide). It is **not** a limit on concurrent writers. Multiple independent writers — separate dbt
396
+ runs, notebooks, jobs, whatever — writing the same tables at the same time is fully supported and
397
+ safe: every write uses optimistic concurrency (snapshot-pinned MERGE, `safeappend` compare-and-swap,
398
+ fail-loud on a conflicting commit). So you can absolutely run many writers in parallel; you just
399
+ can't multi-thread the models *inside a single* dbt invocation.
400
+ - **Two engines share one machine's memory.** DuckDB executes the SQL and delta_rs materializes the
401
+ Delta table — two separate memory systems in the same process, each with its own pool. Under heavy
402
+ memory pressure (large merges especially) the budget has to be split between them, and getting that
403
+ split right is fragile: delta_rs's merge spill-to-disk is itself flaky, and coordinating two
404
+ systems that don't know about each other's allocations is the hard, unavoidable part of this design.
405
+ - **`DROP TABLE` is a soft tombstone, not a physical delete.** delta_rs has no `DROP`, and removing the
406
+ Delta files directly would be a filesystem hack that fails on object stores — so `conn.sql("drop
407
+ table x")` overwrites the table with a one-column tombstone marker and unregisters it. The table
408
+ vanishes from `conn.catalog` and discovery, and a later `create table x as …` revives the path with
409
+ real data, but the **files are not reclaimed** (a human purges them). One consequence: reading the
410
+ path *directly* (`conn.read.delta("…/x")`) bypasses discovery and returns the one-row tombstone
411
+ marker rather than erroring — address dropped tables by name, not by path.
412
+
358
413
  ## License
359
414
 
360
415
  MIT
@@ -6,9 +6,13 @@
6
6
  > not affiliated with, endorsed by, or supported by any employer or vendor. No warranty —
7
7
  > use it at your own risk.
8
8
 
9
- **duckrun** is a [dbt](https://www.getdbt.com/) adapter that runs your model SQL in
10
- **DuckDB** and writes the results to **Delta Lake** using
11
- [`delta_rs`](https://delta-io.github.io/delta-rs/) (the `deltalake` Python package).
9
+ **duckrun** runs SQL in [DuckDB](https://duckdb.org/) and writes
10
+ [**Delta Lake**](https://delta-io.github.io/delta-rs/) via delta_rs. It gives you:
11
+
12
+ - a [**dbt**](https://www.getdbt.com/) adapter that materializes models as Delta tables;
13
+ - a **`connect()`** helper to write Delta straight from SQL in a notebook;
14
+ - **full snapshot isolation** from read to write — concurrent writers fail loud, never interleave.
15
+
12
16
  duckrun itself is just glue — it owns none of the heavy lifting. The real work is done
13
17
  by **DuckDB** (executes the SQL), **delta-rs** (writes the Delta table), **Arrow** (the
14
18
  zero-copy (kind of) bridge that hands query results from DuckDB to delta-rs), and **dbt** (orchestrates
@@ -38,6 +42,21 @@ pip install duckrun
38
42
 
39
43
  That single install pulls in `dbt-duckdb` (and therefore `duckdb`) plus `deltalake`.
40
44
 
45
+ ### In a Microsoft Fabric Python notebook
46
+
47
+ duckrun needs `duckdb` ≥ 1.5.4 — the release where `delta_scan` gained its `version => N`
48
+ parameter, which duckrun uses for snapshot-pinned reads. Fabric notebooks ship a **stable**
49
+ `duckdb` release, which trails the newest one, so the `duckdb` already loaded in the kernel may
50
+ predate 1.5.4. Upgrade, then restart the Python kernel so the new version loads.
51
+
52
+ ```python
53
+ !pip install duckrun --upgrade
54
+ notebookutils.session.restartPython()
55
+ ```
56
+
57
+ If you skip the restart, duckrun fails loud at `connect()` (and on `dbt run`) and tells you to
58
+ restart — it won't quietly run on the older `duckdb`/`deltalake` still bound in the kernel.
59
+
41
60
  ## Configure your profile
42
61
 
43
62
  ```yaml
@@ -50,12 +69,22 @@ my_project:
50
69
  # No `threads:` needed — duckrun always runs single-threaded.
51
70
  # DuckDB runs in-memory by default — the Delta tables are the only state.
52
71
  # Default Delta location for models that don't set config(location=...).
53
- root_path: './warehouse' # local path, or s3://..., gs://..., abfss://...
72
+ # OneLake — address by GUID, not friendly names (see "OneLake: use GUID paths" below):
73
+ root_path: "abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables"
74
+ # Or any other store: './warehouse' (local), 's3://...', 'gs://...'.
54
75
  # storage_options: {} # passed through to deltalake for remote stores
55
76
  ```
56
77
 
57
78
  Persisted models are written to `<root_path>/<schema>/<model>` (e.g.
58
- `./warehouse/dbo/orders`), or to an explicit `config(location=...)`.
79
+ `.../Tables/dbo/orders`), or to an explicit `config(location=...)`.
80
+
81
+ ### OneLake: use GUID paths for now
82
+
83
+ Address OneLake tables by **workspace GUID + lakehouse GUID**, not friendly names —
84
+ `abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables/...`. This
85
+ sidesteps an upstream `duckdb-delta` read bug ("No files in log segment") that is **already fixed
86
+ upstream but still rolling out to production OneLake**. Friendly-name paths will work again once
87
+ the fix finishes deploying.
59
88
 
60
89
  ### Fabric Lakehouse without a schema
61
90
 
@@ -66,7 +95,7 @@ let the schema fill that slot:
66
95
 
67
96
  ```yaml
68
97
  schema: Tables
69
- root_path: "abfss://<ws>@onelake.dfs.fabric.microsoft.com/<lh>.Lakehouse"
98
+ root_path: "abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>"
70
99
  ```
71
100
 
72
101
  Since models are written to `<root_path>/<schema>/<model>`, this lands them at
@@ -280,7 +309,7 @@ unchanged since the call, else raises `CommitFailedError`.
280
309
 
281
310
  ```python
282
311
  import duckrun
283
- conn = duckrun.connect("abfss://ws@onelake.dfs.fabric.microsoft.com/lh.Lakehouse/Tables/dbo")
312
+ conn = duckrun.connect("abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables/dbo")
284
313
  conn.sql("select * from orders").write.mode("overwrite").saveAsTable("orders_copy")
285
314
  conn.table("orders_copy").show()
286
315
 
@@ -326,6 +355,32 @@ None of this is required to use duckrun — `pip install duckrun` is unaffected.
326
355
  runs the official suite (above); `tests/correctness/` proves the concurrency guarantees. The cards
327
356
  in those docs are rendered live by CI, so they always reflect the latest `main`.
328
357
 
358
+ ## Limitations
359
+
360
+ These are core design trade-offs, not bugs — they're inherent to gluing DuckDB to delta_rs and
361
+ won't be "fixed" away:
362
+
363
+ - **A single dbt run is single-threaded — but concurrency works fine.** This is purely a dbt-adapter
364
+ implementation detail: *within one dbt process* models run with `threads: 1`, because the
365
+ in-process delta_rs write path isn't thread-safe (parallel writes to a table in the *same* process
366
+ collide). It is **not** a limit on concurrent writers. Multiple independent writers — separate dbt
367
+ runs, notebooks, jobs, whatever — writing the same tables at the same time is fully supported and
368
+ safe: every write uses optimistic concurrency (snapshot-pinned MERGE, `safeappend` compare-and-swap,
369
+ fail-loud on a conflicting commit). So you can absolutely run many writers in parallel; you just
370
+ can't multi-thread the models *inside a single* dbt invocation.
371
+ - **Two engines share one machine's memory.** DuckDB executes the SQL and delta_rs materializes the
372
+ Delta table — two separate memory systems in the same process, each with its own pool. Under heavy
373
+ memory pressure (large merges especially) the budget has to be split between them, and getting that
374
+ split right is fragile: delta_rs's merge spill-to-disk is itself flaky, and coordinating two
375
+ systems that don't know about each other's allocations is the hard, unavoidable part of this design.
376
+ - **`DROP TABLE` is a soft tombstone, not a physical delete.** delta_rs has no `DROP`, and removing the
377
+ Delta files directly would be a filesystem hack that fails on object stores — so `conn.sql("drop
378
+ table x")` overwrites the table with a one-column tombstone marker and unregisters it. The table
379
+ vanishes from `conn.catalog` and discovery, and a later `create table x as …` revives the path with
380
+ real data, but the **files are not reclaimed** (a human purges them). One consequence: reading the
381
+ path *directly* (`conn.read.delta("…/x")`) bypasses discovery and returns the one-row tombstone
382
+ marker rather than erroring — address dropped tables by name, not by path.
383
+
329
384
  ## License
330
385
 
331
386
  MIT
@@ -0,0 +1 @@
1
+ version = "0.3.19"
@@ -129,6 +129,11 @@ _CREATE_TEMP_RE = re.compile(r"\s*create\s+(?:or\s+replace\s+)?(?:temp|temporary
129
129
  # verb would match inside an identifier (e.g. `update` within `last_update`).
130
130
  _LEADING_WITH = re.compile(r"\s*with\b", re.I)
131
131
  _DRIVING_DML = re.compile(r"\b(?:insert\s+into|update|delete\s+from)\b", re.I)
132
+ # DuckDB numeric type names (DECIMAL(p,s) matches on the prefix). Used to scope the lossy-narrowing
133
+ # guard to numeric→numeric casts only, leaving the intentional timestamp/string alignment untouched.
134
+ _NUMERIC_TYPE_RE = re.compile(
135
+ r"^(?:TINYINT|SMALLINT|INTEGER|BIGINT|HUGEINT|UTINYINT|USMALLINT|UINTEGER|UBIGINT|UHUGEINT|"
136
+ r"FLOAT|REAL|DOUBLE|DECIMAL)\b", re.I)
132
137
 
133
138
 
134
139
  def _strip_leading(query: str) -> str:
@@ -382,10 +387,10 @@ class _DeltaDML:
382
387
  if self._with_clause: # `WITH … INSERT INTO t SELECT …`: re-attach the CTE to the body
383
388
  body = f"{self._with_clause} {body}"
384
389
  cols = m.group("cols")
385
- if cols: # `insert into t (a, b) select …` project the query onto the named columns
386
- self._append_projected(loc, self._provided(cols), f"({body})")
387
- else: # column count/order already matches the target append as-is
388
- engine.write_delta(loc, self.cursor.sql(body), "append", storage_options=self.so)
390
+ # Always project onto the target schema a column list maps by name, no list maps
391
+ # positionally. Routing both through _append_projected gives one place for the intentional
392
+ # type alignment AND the lossy-numeric-narrowing guard (so `insert … select 3.9` is caught too).
393
+ self._append_projected(loc, self._provided(cols) if cols else None, f"({body})")
389
394
 
390
395
  def _insert_values(self, m, rel, schema, loc) -> None:
391
396
  # `insert into <rel> [(<cols>)] values (...)`: the literals supply every target column when
@@ -420,6 +425,7 @@ class _DeltaDML:
420
425
 
421
426
  quoted = ", ".join('"' + c + '"' for c in provided)
422
427
  inner = f"{derived} v({quoted})"
428
+ self._reject_lossy_numeric_narrowing(inner, provided, dict(zip(target_cols, target_types)))
423
429
  exprs = [
424
430
  f'cast(v."{col}" as {typ}) as "{col}"' if col in provided_set
425
431
  else f'cast(null as {typ}) as "{col}"'
@@ -428,6 +434,43 @@ class _DeltaDML:
428
434
  data = self.cursor.sql(f"select {', '.join(exprs)} from {inner}")
429
435
  engine.write_delta(loc, data, "append", storage_options=self.so)
430
436
 
437
+ def _reject_lossy_numeric_narrowing(self, inner: str, provided, ttype) -> None:
438
+ """Fail loud when a supplied numeric value would be SILENTLY changed by the cast onto its
439
+ target column — e.g. inserting 3.9 into an INTEGER column (which lands 4). The cast in
440
+ :meth:`_append_projected` aligns types ON PURPOSE — timestamp ntz, int widening — and those are
441
+ lossless and intended, so this guard only fires for a numeric→numeric cast where the value does
442
+ NOT survive a round-trip through the target type. Non-numeric casts (timestamps, strings) are
443
+ deliberately left untouched. Raises ``ValueError`` naming the column and an example value.
444
+
445
+ Costs one extra evaluation of ``inner`` (trivial for VALUES; a second scan for ``insert …
446
+ select`` — acceptable to turn silent corruption into a loud error)."""
447
+ src = self.cursor.sql(
448
+ "select " + ", ".join(f'v."{c}"' for c in provided) + f" from {inner} limit 0")
449
+ stype = {c: str(t) for c, t in zip(provided, src.types)}
450
+ checks = [] # (col, lossy-predicate) for numeric→numeric casts that could narrow
451
+ for col in provided:
452
+ s, t = stype[col], ttype[col]
453
+ if s == t or not (_NUMERIC_TYPE_RE.match(s) and _NUMERIC_TYPE_RE.match(t)):
454
+ continue
455
+ # round-trip through the target type; try_cast so the probe itself never throws — an
456
+ # out-of-range value becomes NULL → distinct → flagged, same as a fractional loss.
457
+ checks.append(
458
+ (col, f'try_cast(try_cast(v."{col}" as {t}) as {s}) is distinct from v."{col}"'))
459
+ if not checks:
460
+ return
461
+ sel = ", ".join(
462
+ f'count(*) filter (where {pred}) as "n{i}", '
463
+ f'any_value(v."{col}") filter (where {pred}) as "ex{i}"'
464
+ for i, (col, pred) in enumerate(checks))
465
+ row = self.cursor.sql(f"select {sel} from {inner}").fetchone()
466
+ for i, (col, _) in enumerate(checks):
467
+ n, ex = row[2 * i], row[2 * i + 1]
468
+ if n:
469
+ raise ValueError(
470
+ f"INSERT would silently narrow {n} value(s) for column '{col}' into "
471
+ f"{ttype[col]} (e.g. {ex!r}). Cast explicitly in the SELECT/VALUES if intended."
472
+ )
473
+
431
474
  def _alter_add(self, m, rel, schema, loc) -> None:
432
475
  col = m.group("col").strip().strip('"')
433
476
  # Keep only the column type (drop any DEFAULT/NULL clause); add it as an all-null column by
@@ -32,6 +32,10 @@ class DuckrunConnectionManager(DuckDBConnectionManager):
32
32
 
33
33
  @classmethod
34
34
  def open(cls, connection):
35
+ # Fail loud if the kernel still has Fabric's stale duckdb/deltalake loaded (installed an
36
+ # upgrade but skipped notebookutils.session.restartPython()). Lazy import: same wheel.
37
+ from duckrun._runtime import check_runtime_versions
38
+ check_runtime_versions()
35
39
  # duckrun runs single-threaded, so it uses ONE DuckDB connection for the whole run
36
40
  # (DuckrunEnvironment) instead of dbt-duckdb's per-handle cursors — see environment.py.
37
41
  # Pre-seed the base class's singleton _ENV with it for the local case; remote/MotherDuck
@@ -0,0 +1,51 @@
1
+ """Runtime version guardrail.
2
+
3
+ duckrun needs ``duckdb`` >= 1.5.4 — the release where ``delta_scan`` gained its ``version => N``
4
+ parameter (used for snapshot-pinned reads) — and ``deltalake`` >= 1.5.0 (for the merge
5
+ ``max_spill_size`` cap). A Microsoft Fabric Python notebook ships a *stable* ``duckdb`` release,
6
+ which trails the newest one, so the ``duckdb`` already imported in the kernel may predate 1.5.4.
7
+ ``pip install duckrun --upgrade`` writes the new wheels to disk, but the already-loaded modules stay
8
+ bound until the kernel restarts — so a user who skips the restart would keep running on the older
9
+ modules, quietly losing snapshot-pinned reads and the spill cap.
10
+
11
+ This check turns that into a loud, actionable error. It inspects the *loaded* versions (not the
12
+ pin), so it fires exactly on the forgot-to-restart case.
13
+ """
14
+ from packaging.version import Version
15
+
16
+ # Floors duckrun needs at *runtime* — keep in sync with the pins in pyproject.toml:
17
+ # duckdb 1.5.4 -> delta_scan('...', version => N) for snapshot-pinned incremental reads
18
+ # deltalake 1.5.0 -> max_spill_size on MERGE to cap merge RAM and avoid OOM on large upserts
19
+ _MIN_DUCKDB = "1.5.4"
20
+ _MIN_DELTALAKE = "1.5.0"
21
+
22
+ _REMEDY = (
23
+ "In a Fabric Python notebook, upgrade then restart the kernel so the new versions load:\n"
24
+ " !pip install duckrun --upgrade\n"
25
+ " notebookutils.session.restartPython()\n"
26
+ "then re-run. (Elsewhere: pip install -U 'duckdb>={duckdb}' 'deltalake>={deltalake}' and "
27
+ "restart the interpreter.)"
28
+ ).format(duckdb=_MIN_DUCKDB, deltalake=_MIN_DELTALAKE)
29
+
30
+
31
+ def check_runtime_versions():
32
+ """Raise ``RuntimeError`` if the *loaded* duckdb/deltalake are older than duckrun requires.
33
+
34
+ Catches the notebook "installed but forgot ``restartPython()``" case: the kernel keeps the
35
+ older duckdb/deltalake bound until restart. Idempotent and cheap; called at each entry point
36
+ (``duckrun.connect()`` and the dbt connection open).
37
+ """
38
+ import duckdb
39
+ import deltalake
40
+
41
+ too_old = []
42
+ if Version(duckdb.__version__) < Version(_MIN_DUCKDB):
43
+ too_old.append(f"duckdb {duckdb.__version__} (need >= {_MIN_DUCKDB})")
44
+ if Version(deltalake.__version__) < Version(_MIN_DELTALAKE):
45
+ too_old.append(f"deltalake {deltalake.__version__} (need >= {_MIN_DELTALAKE})")
46
+
47
+ if too_old:
48
+ raise RuntimeError(
49
+ "duckrun needs a newer " + " and ".join(too_old) + " than the kernel has loaded.\n"
50
+ + _REMEDY
51
+ )
@@ -16,6 +16,7 @@ import duckdb
16
16
 
17
17
  from dbt.adapters.duckrun import delta_dml, engine, remote, secret
18
18
  from . import auth
19
+ from ._runtime import check_runtime_versions
19
20
 
20
21
 
21
22
  # Statements that would WRITE to a table — rejected by the read-only conn.sql() with a pointer to
@@ -25,6 +26,8 @@ from . import auth
25
26
  # TEMP/TEMPORARY TABLE and CREATE VIEW are DuckDB-local scratch by design and pass through.
26
27
  _WRITE_KEYWORD_RE = re.compile(r"^(insert|update|delete|merge)\b", re.IGNORECASE)
27
28
  _CREATE_TABLE_RE = re.compile(r"^create\s+(or\s+replace\s+)?table\b", re.IGNORECASE)
29
+ _DML_TARGET_RE = re.compile(
30
+ r"^(?:insert\s+into|delete\s+from|update)\s+(?P<rel>\"?[\w.]+\"?)", re.IGNORECASE)
28
31
  _CREATE_TEMP_RE = re.compile(r"^create\s+(or\s+replace\s+)?(temp|temporary)\b", re.IGNORECASE)
29
32
 
30
33
  # DML forms that genuinely can't be expressed through delta_rs (delta_dml.handle never applies them):
@@ -95,6 +98,34 @@ def _is_delta_write(query: str) -> bool:
95
98
  return bool(_CREATE_TABLE_RE.match(s)) and not _CREATE_TEMP_RE.match(s)
96
99
 
97
100
 
101
+ def _delta_write_message(query: str) -> str:
102
+ """The error for a raw-SQL write conn.sql() can't route to delta_rs. For an INSERT/UPDATE/DELETE
103
+ whose target isn't a discovered Delta table — the common cause being a typo or a table written
104
+ out-of-band before refresh() — name the table and give form-appropriate guidance, instead of the
105
+ generic 'use the Spark write API' redirect (which misdirects: for UPDATE/DELETE the problem is the
106
+ missing table, not the API)."""
107
+ s = _strip_leading(query)
108
+ m = _DML_TARGET_RE.match(s)
109
+ if m:
110
+ rel = m.group("rel").strip('"')
111
+ verb = s.split(None, 1)[0].lower()
112
+ if verb in ("update", "delete"):
113
+ return (
114
+ f"conn.sql(): no Delta table '{rel}' to {verb}. conn.sql() DML only targets a "
115
+ f"discovered Delta table — check the name, or call conn.refresh() if it was just "
116
+ f"written out-of-band."
117
+ )
118
+ return ( # insert into a table that doesn't exist yet
119
+ f"conn.sql(): no Delta table '{rel}' to insert into. Create it first with "
120
+ f"df.write.saveAsTable('{rel}'), then insert."
121
+ )
122
+ return ( # a CREATE … AS that didn't resolve, or any other unrouted Delta write
123
+ "conn.sql() can't write a Delta table from raw SQL here. "
124
+ "Use the Spark write API: df.write.saveAsTable(...) to create/append, or "
125
+ "conn.delta_table(name).merge(...)/.delete()/.update()/.replaceWhere()."
126
+ )
127
+
128
+
98
129
  def _qid(name: str) -> str:
99
130
  """Quote a SQL identifier (schema/table/view name)."""
100
131
  return '"' + str(name).replace('"', '""') + '"'
@@ -105,6 +136,40 @@ def _qlit(text: str) -> str:
105
136
  return str(text).replace("'", "''")
106
137
 
107
138
 
139
+ def _strip_query_context(msg: str) -> str:
140
+ """DuckDB appends the offending statement to errors as ``\\nLINE N: <sql>\\n ^``. When that
141
+ statement is one duckrun generated internally (the ``delta_scan`` view), echoing it back is
142
+ noise that makes the failure look like it's about the caller's input. Keep the real error
143
+ text; drop the generated-SQL context."""
144
+ idx = msg.find("\nLINE ")
145
+ return msg[:idx].rstrip() if idx != -1 else msg
146
+
147
+
148
+ _GUID = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
149
+
150
+
151
+ def _onelake_guid_hint(root_path: str) -> Optional[str]:
152
+ """Workaround note for the OneLake ``delta_scan`` bug, shown only when a friendly-name
153
+ ``abfss://`` path is involved. OneLake's delta_scan can fail to enumerate a valid table's
154
+ ``_delta_log`` when the path uses friendly workspace/lakehouse names (duckdb-delta#307); the
155
+ GUID form reads fine. Returns ``None`` for non-abfss paths or paths already using GUIDs (no
156
+ point nagging those)."""
157
+ if not remote.is_abfss(root_path):
158
+ return None
159
+ workspace, _host, path = remote._parse_abfss(root_path)
160
+ lakehouse = path.split("/", 1)[0] if path else ""
161
+ if lakehouse.lower().endswith(".lakehouse"):
162
+ lakehouse = lakehouse[: -len(".Lakehouse")]
163
+ if _GUID.match(workspace) and _GUID.match(lakehouse):
164
+ return None
165
+ return (
166
+ "OneLake's delta_scan can fail to read a valid table's _delta_log when the abfss path uses "
167
+ "friendly names — a known upstream issue (duckdb-delta#307). Until it's fixed, use the "
168
+ "workspace and lakehouse GUIDs, e.g. "
169
+ "abfss://<workspace-guid>@onelake.dfs.fabric.microsoft.com/<lakehouse-guid>/Tables"
170
+ )
171
+
172
+
108
173
  def _split_root_schema(path: str, schema: Optional[str]):
109
174
  """Normalize ``path`` into ``(root_path, schema)``.
110
175
 
@@ -227,10 +292,22 @@ class DuckSession:
227
292
 
228
293
  def _register_view(self, schema: str, table: str):
229
294
  path = f"{self.root_path.rstrip('/')}/{schema}/{table}"
230
- self.con.execute(
231
- f"CREATE OR REPLACE VIEW {_qid(schema)}.{_qid(table)} AS "
232
- f"SELECT * FROM delta_scan('{_qlit(path)}')"
233
- )
295
+ try:
296
+ self.con.execute(
297
+ f"CREATE OR REPLACE VIEW {_qid(schema)}.{_qid(table)} AS "
298
+ f"SELECT * FROM delta_scan('{_qlit(path)}')"
299
+ )
300
+ except Exception as exc:
301
+ # delta_scan failed reading the table. Keep the real engine error (it's the signal —
302
+ # e.g. the OneLake "No files in log segment" delta-kernel bug), but drop DuckDB's echo
303
+ # of the CREATE VIEW statement *we* generated, and say which table/path it was. Suppress
304
+ # the chained original (`from None`) so the noisy SQL echo doesn't reappear in tracebacks.
305
+ hint = _onelake_guid_hint(self.root_path)
306
+ raise RuntimeError(
307
+ f"duckrun: could not read Delta table {schema}.{table} at '{path}':\n"
308
+ f"{_strip_query_context(str(exc))}"
309
+ + (f"\n\n{hint}" if hint else "")
310
+ ) from None
234
311
 
235
312
  def _set_search_path(self, schema: str):
236
313
  try:
@@ -275,11 +352,7 @@ class DuckSession:
275
352
  self.refresh(quiet=True)
276
353
  return DataFrame(self.con.sql("SELECT 'ok' AS status"), self)
277
354
  if _is_delta_write(query):
278
- raise ValueError(
279
- "conn.sql() can't write a Delta table from raw SQL here. "
280
- "Use the Spark write API: df.write.saveAsTable(...) to create/append, or "
281
- "conn.delta_table(name).merge(...)/.delete()/.update()/.replaceWhere()."
282
- )
355
+ raise ValueError(_delta_write_message(query))
283
356
  return DataFrame(self.con.sql(query), self)
284
357
 
285
358
  def table(self, name: str) -> "DataFrame":
@@ -436,17 +509,17 @@ class DataFrameWriter:
436
509
  self._partition_by = list(cols)
437
510
  return self
438
511
 
439
- def saveAsTable(self, name: str) -> str:
512
+ def _write(self, path: str, descr: str) -> None:
513
+ """Apply the configured mode to the Delta table at ``path`` (storage-neutral). ``descr``
514
+ names the target in the mode='error' message. Shared by saveAsTable and save."""
440
515
  session = self._df.session
441
- schema, table = session.resolve(name)
442
- path = session.table_path(schema, table)
443
516
  so = session.storage_options
444
517
 
445
518
  mode = self._mode
446
519
  if mode in ("error", "errorifexists"):
447
520
  if engine.table_exists(path, so):
448
521
  raise ValueError(
449
- f"table '{schema}.{table}' already exists (mode='error'). "
522
+ f"{descr} already exists (mode='error'). "
450
523
  f"Use mode('overwrite'), mode('append'), mode('safeappend'), or mode('ignore')."
451
524
  )
452
525
  mode = "overwrite"
@@ -487,6 +560,22 @@ class DataFrameWriter:
487
560
  storage_options=so,
488
561
  compaction_threshold=session.compaction_threshold,
489
562
  )
563
+
564
+ def save(self, path: str) -> str:
565
+ """Spark ``df.write.save(path)`` — write to a Delta table by PATH, not catalog name.
566
+
567
+ Storage-neutral (local / s3:// / gs:// / az:// / abfss://). Unlike :meth:`saveAsTable`,
568
+ the result is addressed only by ``path`` — there is no schema.table name to register a
569
+ view for — so it is read back with ``conn.read.delta(path)`` / ``delta_scan('<path>')``,
570
+ not as an unqualified table. Returns ``path``."""
571
+ self._write(path, f"delta table at '{path}'")
572
+ return path
573
+
574
+ def saveAsTable(self, name: str) -> str:
575
+ session = self._df.session
576
+ schema, table = session.resolve(name)
577
+ path = session.table_path(schema, table)
578
+ self._write(path, f"table '{schema}.{table}'")
490
579
  # Surface the (new or grown) table immediately — no manual refresh() needed.
491
580
  session.con.execute(f"CREATE SCHEMA IF NOT EXISTS {_qid(schema)}")
492
581
  session._register_view(schema, table)
@@ -573,4 +662,5 @@ def connect(path: str, storage_options: Optional[Dict[str, str]] = None,
573
662
  >>> conn.sql("SHOW TABLES").show()
574
663
  >>> conn.sql("select * from orders").write.mode("overwrite").saveAsTable("orders_copy")
575
664
  """
665
+ check_runtime_versions() # fail loud if Fabric's stale duckdb/deltalake are still loaded
576
666
  return DuckSession(path, storage_options, schema, compaction_threshold)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.3.17.dev7
3
+ Version: 0.3.19
4
4
  Summary: A dbt adapter that runs SQL in DuckDB and materializes to Delta Lake (delta_rs).
5
5
  Author: mim
6
6
  License: MIT
@@ -12,7 +12,7 @@ Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
13
  Requires-Dist: dbt-duckdb>=1.8
14
14
  Requires-Dist: dbt-core<2.0,>=1.8
15
- Requires-Dist: duckdb==1.5.4
15
+ Requires-Dist: duckdb<1.6.0,>=1.5.4
16
16
  Requires-Dist: deltalake<1.5.1,>=1.5.0
17
17
  Requires-Dist: requests
18
18
  Provides-Extra: local
@@ -35,9 +35,13 @@ Dynamic: license-file
35
35
  > not affiliated with, endorsed by, or supported by any employer or vendor. No warranty —
36
36
  > use it at your own risk.
37
37
 
38
- **duckrun** is a [dbt](https://www.getdbt.com/) adapter that runs your model SQL in
39
- **DuckDB** and writes the results to **Delta Lake** using
40
- [`delta_rs`](https://delta-io.github.io/delta-rs/) (the `deltalake` Python package).
38
+ **duckrun** runs SQL in [DuckDB](https://duckdb.org/) and writes
39
+ [**Delta Lake**](https://delta-io.github.io/delta-rs/) via delta_rs. It gives you:
40
+
41
+ - a [**dbt**](https://www.getdbt.com/) adapter that materializes models as Delta tables;
42
+ - a **`connect()`** helper to write Delta straight from SQL in a notebook;
43
+ - **full snapshot isolation** from read to write — concurrent writers fail loud, never interleave.
44
+
41
45
  duckrun itself is just glue — it owns none of the heavy lifting. The real work is done
42
46
  by **DuckDB** (executes the SQL), **delta-rs** (writes the Delta table), **Arrow** (the
43
47
  zero-copy (kind of) bridge that hands query results from DuckDB to delta-rs), and **dbt** (orchestrates
@@ -67,6 +71,21 @@ pip install duckrun
67
71
 
68
72
  That single install pulls in `dbt-duckdb` (and therefore `duckdb`) plus `deltalake`.
69
73
 
74
+ ### In a Microsoft Fabric Python notebook
75
+
76
+ duckrun needs `duckdb` ≥ 1.5.4 — the release where `delta_scan` gained its `version => N`
77
+ parameter, which duckrun uses for snapshot-pinned reads. Fabric notebooks ship a **stable**
78
+ `duckdb` release, which trails the newest one, so the `duckdb` already loaded in the kernel may
79
+ predate 1.5.4. Upgrade, then restart the Python kernel so the new version loads.
80
+
81
+ ```python
82
+ !pip install duckrun --upgrade
83
+ notebookutils.session.restartPython()
84
+ ```
85
+
86
+ If you skip the restart, duckrun fails loud at `connect()` (and on `dbt run`) and tells you to
87
+ restart — it won't quietly run on the older `duckdb`/`deltalake` still bound in the kernel.
88
+
70
89
  ## Configure your profile
71
90
 
72
91
  ```yaml
@@ -79,12 +98,22 @@ my_project:
79
98
  # No `threads:` needed — duckrun always runs single-threaded.
80
99
  # DuckDB runs in-memory by default — the Delta tables are the only state.
81
100
  # Default Delta location for models that don't set config(location=...).
82
- root_path: './warehouse' # local path, or s3://..., gs://..., abfss://...
101
+ # OneLake — address by GUID, not friendly names (see "OneLake: use GUID paths" below):
102
+ root_path: "abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables"
103
+ # Or any other store: './warehouse' (local), 's3://...', 'gs://...'.
83
104
  # storage_options: {} # passed through to deltalake for remote stores
84
105
  ```
85
106
 
86
107
  Persisted models are written to `<root_path>/<schema>/<model>` (e.g.
87
- `./warehouse/dbo/orders`), or to an explicit `config(location=...)`.
108
+ `.../Tables/dbo/orders`), or to an explicit `config(location=...)`.
109
+
110
+ ### OneLake: use GUID paths for now
111
+
112
+ Address OneLake tables by **workspace GUID + lakehouse GUID**, not friendly names —
113
+ `abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables/...`. This
114
+ sidesteps an upstream `duckdb-delta` read bug ("No files in log segment") that is **already fixed
115
+ upstream but still rolling out to production OneLake**. Friendly-name paths will work again once
116
+ the fix finishes deploying.
88
117
 
89
118
  ### Fabric Lakehouse without a schema
90
119
 
@@ -95,7 +124,7 @@ let the schema fill that slot:
95
124
 
96
125
  ```yaml
97
126
  schema: Tables
98
- root_path: "abfss://<ws>@onelake.dfs.fabric.microsoft.com/<lh>.Lakehouse"
127
+ root_path: "abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>"
99
128
  ```
100
129
 
101
130
  Since models are written to `<root_path>/<schema>/<model>`, this lands them at
@@ -309,7 +338,7 @@ unchanged since the call, else raises `CommitFailedError`.
309
338
 
310
339
  ```python
311
340
  import duckrun
312
- conn = duckrun.connect("abfss://ws@onelake.dfs.fabric.microsoft.com/lh.Lakehouse/Tables/dbo")
341
+ conn = duckrun.connect("abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables/dbo")
313
342
  conn.sql("select * from orders").write.mode("overwrite").saveAsTable("orders_copy")
314
343
  conn.table("orders_copy").show()
315
344
 
@@ -355,6 +384,32 @@ None of this is required to use duckrun — `pip install duckrun` is unaffected.
355
384
  runs the official suite (above); `tests/correctness/` proves the concurrency guarantees. The cards
356
385
  in those docs are rendered live by CI, so they always reflect the latest `main`.
357
386
 
387
+ ## Limitations
388
+
389
+ These are core design trade-offs, not bugs — they're inherent to gluing DuckDB to delta_rs and
390
+ won't be "fixed" away:
391
+
392
+ - **A single dbt run is single-threaded — but concurrency works fine.** This is purely a dbt-adapter
393
+ implementation detail: *within one dbt process* models run with `threads: 1`, because the
394
+ in-process delta_rs write path isn't thread-safe (parallel writes to a table in the *same* process
395
+ collide). It is **not** a limit on concurrent writers. Multiple independent writers — separate dbt
396
+ runs, notebooks, jobs, whatever — writing the same tables at the same time is fully supported and
397
+ safe: every write uses optimistic concurrency (snapshot-pinned MERGE, `safeappend` compare-and-swap,
398
+ fail-loud on a conflicting commit). So you can absolutely run many writers in parallel; you just
399
+ can't multi-thread the models *inside a single* dbt invocation.
400
+ - **Two engines share one machine's memory.** DuckDB executes the SQL and delta_rs materializes the
401
+ Delta table — two separate memory systems in the same process, each with its own pool. Under heavy
402
+ memory pressure (large merges especially) the budget has to be split between them, and getting that
403
+ split right is fragile: delta_rs's merge spill-to-disk is itself flaky, and coordinating two
404
+ systems that don't know about each other's allocations is the hard, unavoidable part of this design.
405
+ - **`DROP TABLE` is a soft tombstone, not a physical delete.** delta_rs has no `DROP`, and removing the
406
+ Delta files directly would be a filesystem hack that fails on object stores — so `conn.sql("drop
407
+ table x")` overwrites the table with a one-column tombstone marker and unregisters it. The table
408
+ vanishes from `conn.catalog` and discovery, and a later `create table x as …` revives the path with
409
+ real data, but the **files are not reclaimed** (a human purges them). One consequence: reading the
410
+ path *directly* (`conn.read.delta("…/x")`) bypasses discovery and returns the one-row tombstone
411
+ marker rather than erroring — address dropped tables by name, not by path.
412
+
358
413
  ## License
359
414
 
360
415
  MIT
@@ -21,6 +21,7 @@ dbt/include/duckrun/macros/materializations/incremental.sql
21
21
  dbt/include/duckrun/macros/materializations/snapshot.sql
22
22
  dbt/include/duckrun/macros/materializations/table.sql
23
23
  duckrun/__init__.py
24
+ duckrun/_runtime.py
24
25
  duckrun/auth.py
25
26
  duckrun/delta_table.py
26
27
  duckrun/session.py
@@ -1,6 +1,6 @@
1
1
  dbt-duckdb>=1.8
2
2
  dbt-core<2.0,>=1.8
3
- duckdb==1.5.4
3
+ duckdb<1.6.0,>=1.5.4
4
4
  deltalake<1.5.1,>=1.5.0
5
5
  requests
6
6
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "duckrun"
7
- version = "0.3.17.dev7"
7
+ version = "0.3.19"
8
8
  description = "A dbt adapter that runs SQL in DuckDB and materializes to Delta Lake (delta_rs)."
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -21,23 +21,22 @@ dependencies = [
21
21
  # fails at `from dbt.cli.main import dbtRunner`. Declared directly because we only depend on
22
22
  # dbt-core transitively, so the ceiling has to live here to bite.
23
23
  "dbt-core>=1.8,<2.0",
24
- # Pin exactly 1.5.4 (stable). NOT a bare ">=1.5.4" floor: this project resolves with pip --pre
25
- # (to pick up dbt pre-releases), and --pre is global, so an open floor lets pip pull the newest
26
- # duckdb *prerelease*verified: "--pre duckdb>=1.5.4" resolves to 1.6.0.dev12, an unstable,
27
- # untested engine. An upper cap (<1.6.0) blocks 1.6.0.dev but a future 1.5.5.devN would still
28
- # slip through under --pre, so pin exactly, matching the exact deltalake pin below.
29
- # 1.5.4 is the first stable build whose bundled duckdb-delta
30
- # extension supports `delta_scan('...', version => N)` (duckdb-delta #312) the version-pinned
31
- # read this project now relies on to make the incremental read and the write commit resolve at
32
- # ONE Delta snapshot (Spark single-snapshot MERGE parity; see the staging-read pin in
33
- # _delta_core.sql and merge_delta's read_version). The earlier 1.5.2+ "No files in log segment"
34
- # read regression is avoided by addressing OneLake tables via GUID (workspace_id/lakehouse_id)
35
- # abfss paths, and is fixed upstream.
24
+ # duckdb floor is 1.5.4 with a <1.6.0 cap (a floor, NOT an exact pin): 1.5.4 is the first
25
+ # stable build whose bundled duckdb-delta extension supports `delta_scan('...', version => N)`
26
+ # (duckdb-delta #312)the version-pinned read this project relies on to make the incremental
27
+ # read and the write commit resolve at ONE Delta snapshot (Spark single-snapshot MERGE parity;
28
+ # see the staging-read pin in _delta_core.sql and merge_delta's read_version). Stable 1.5.x
29
+ # patches above the floor are fine. The <1.6.0 cap matters because this project resolves with
30
+ # pip --pre (to pick up dbt pre-releases) and --pre is global: an open floor would let pip pull
31
+ # an unstable duckdb *prerelease* (verified: "--pre duckdb>=1.5.4" resolves to 1.6.0.dev12).
32
+ # Per PEP 440, "<1.6.0" excludes 1.6.0 AND its prereleases (1.6.0.devN), so no dev build slips
33
+ # in. The earlier 1.5.2+ "No files in log segment" read regression is avoided by addressing
34
+ # OneLake tables via GUID (workspace_id/lakehouse_id) abfss paths, and is fixed upstream.
36
35
  # deltalake floor stays 1.5.0 (not just a ceiling): 1.5.0 is the first release with MERGE
37
36
  # disk-spill config (max_spill_size), which engine.merge_delta relies on to cap the merge's RAM
38
37
  # and avoid OOM on large upserts; the matching <1.5.1 ceiling avoids the deltalake delta-log
39
38
  # write-side regression, pinning exactly 1.5.0.
40
- "duckdb==1.5.4",
39
+ "duckdb>=1.5.4,<1.6.0",
41
40
  "deltalake>=1.5.0,<1.5.1",
42
41
  # The top-level connection API (duckrun.connect) discovers OneLake tables via the DFS REST
43
42
  # API directly; requests is otherwise only a transitive dbt dependency.
@@ -1 +0,0 @@
1
- version = "0.3.17.dev7"
File without changes
File without changes
File without changes
File without changes