duckrun 0.3.17.dev6__tar.gz → 0.3.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {duckrun-0.3.17.dev6/duckrun.egg-info → duckrun-0.3.18}/PKG-INFO +27 -4
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/README.md +25 -2
- duckrun-0.3.18/dbt/adapters/duckrun/__version__.py +1 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/adapters/duckrun/impl.py +4 -0
- duckrun-0.3.18/duckrun/_runtime.py +51 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/duckrun/session.py +52 -4
- {duckrun-0.3.17.dev6 → duckrun-0.3.18/duckrun.egg-info}/PKG-INFO +27 -4
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/duckrun.egg-info/SOURCES.txt +1 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/duckrun.egg-info/requires.txt +1 -1
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/pyproject.toml +13 -9
- duckrun-0.3.17.dev6/dbt/adapters/duckrun/__version__.py +0 -1
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/LICENSE +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/MANIFEST.in +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/adapters/duckrun/__init__.py +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/adapters/duckrun/credentials.py +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/adapters/duckrun/delta_dml.py +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/adapters/duckrun/delta_plugin.py +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/adapters/duckrun/engine.py +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/adapters/duckrun/environment.py +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/adapters/duckrun/remote.py +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/adapters/duckrun/secret.py +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/__init__.py +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/dbt_project.yml +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/macros/catalog.sql +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/macros/materializations/_delta_core.sql +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/macros/materializations/delta.sql +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/macros/materializations/incremental.sql +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/macros/materializations/snapshot.sql +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/macros/materializations/table.sql +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/duckrun/__init__.py +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/duckrun/auth.py +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/duckrun/delta_table.py +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.3.17.dev6 → duckrun-0.3.18}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: duckrun
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.18
|
|
4
4
|
Summary: A dbt adapter that runs SQL in DuckDB and materializes to Delta Lake (delta_rs).
|
|
5
5
|
Author: mim
|
|
6
6
|
License: MIT
|
|
@@ -12,7 +12,7 @@ Description-Content-Type: text/markdown
|
|
|
12
12
|
License-File: LICENSE
|
|
13
13
|
Requires-Dist: dbt-duckdb>=1.8
|
|
14
14
|
Requires-Dist: dbt-core<2.0,>=1.8
|
|
15
|
-
Requires-Dist: duckdb
|
|
15
|
+
Requires-Dist: duckdb<1.6.0,>=1.5.4
|
|
16
16
|
Requires-Dist: deltalake<1.5.1,>=1.5.0
|
|
17
17
|
Requires-Dist: requests
|
|
18
18
|
Provides-Extra: local
|
|
@@ -67,6 +67,21 @@ pip install duckrun
|
|
|
67
67
|
|
|
68
68
|
That single install pulls in `dbt-duckdb` (and therefore `duckdb`) plus `deltalake`.
|
|
69
69
|
|
|
70
|
+
### In a Microsoft Fabric Python notebook
|
|
71
|
+
|
|
72
|
+
duckrun needs `duckdb` ≥ 1.5.4 — the release where `delta_scan` gained its `version => N`
|
|
73
|
+
parameter, which duckrun uses for snapshot-pinned reads. Fabric notebooks ship a **stable**
|
|
74
|
+
`duckdb` release, which trails the newest one, so the `duckdb` already loaded in the kernel may
|
|
75
|
+
predate 1.5.4. Upgrade, then restart the Python kernel so the new version loads.
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
!pip install duckrun --upgrade
|
|
79
|
+
notebookutils.session.restartPython()
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
If you skip the restart, duckrun fails loud at `connect()` (and on `dbt run`) and tells you to
|
|
83
|
+
restart — it won't quietly run on the older `duckdb`/`deltalake` still bound in the kernel.
|
|
84
|
+
|
|
70
85
|
## Configure your profile
|
|
71
86
|
|
|
72
87
|
```yaml
|
|
@@ -86,6 +101,14 @@ my_project:
|
|
|
86
101
|
Persisted models are written to `<root_path>/<schema>/<model>` (e.g.
|
|
87
102
|
`./warehouse/dbo/orders`), or to an explicit `config(location=...)`.
|
|
88
103
|
|
|
104
|
+
### OneLake: use GUID paths for now
|
|
105
|
+
|
|
106
|
+
Address OneLake tables by **workspace GUID + lakehouse GUID**, not friendly names —
|
|
107
|
+
`abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables/...`. This
|
|
108
|
+
sidesteps an upstream `duckdb-delta` read bug ("No files in log segment") that is **already fixed
|
|
109
|
+
upstream but still rolling out to production OneLake**. Friendly-name paths will work again once
|
|
110
|
+
the fix finishes deploying.
|
|
111
|
+
|
|
89
112
|
### Fabric Lakehouse without a schema
|
|
90
113
|
|
|
91
114
|
A schema-less Lakehouse (tables straight under `Tables/`, no `Tables/<schema>/` grouping) is
|
|
@@ -95,7 +118,7 @@ let the schema fill that slot:
|
|
|
95
118
|
|
|
96
119
|
```yaml
|
|
97
120
|
schema: Tables
|
|
98
|
-
root_path: "abfss://<
|
|
121
|
+
root_path: "abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>"
|
|
99
122
|
```
|
|
100
123
|
|
|
101
124
|
Since models are written to `<root_path>/<schema>/<model>`, this lands them at
|
|
@@ -309,7 +332,7 @@ unchanged since the call, else raises `CommitFailedError`.
|
|
|
309
332
|
|
|
310
333
|
```python
|
|
311
334
|
import duckrun
|
|
312
|
-
conn = duckrun.connect("abfss
|
|
335
|
+
conn = duckrun.connect("abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables/dbo")
|
|
313
336
|
conn.sql("select * from orders").write.mode("overwrite").saveAsTable("orders_copy")
|
|
314
337
|
conn.table("orders_copy").show()
|
|
315
338
|
|
|
@@ -38,6 +38,21 @@ pip install duckrun
|
|
|
38
38
|
|
|
39
39
|
That single install pulls in `dbt-duckdb` (and therefore `duckdb`) plus `deltalake`.
|
|
40
40
|
|
|
41
|
+
### In a Microsoft Fabric Python notebook
|
|
42
|
+
|
|
43
|
+
duckrun needs `duckdb` ≥ 1.5.4 — the release where `delta_scan` gained its `version => N`
|
|
44
|
+
parameter, which duckrun uses for snapshot-pinned reads. Fabric notebooks ship a **stable**
|
|
45
|
+
`duckdb` release, which trails the newest one, so the `duckdb` already loaded in the kernel may
|
|
46
|
+
predate 1.5.4. Upgrade, then restart the Python kernel so the new version loads.
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
!pip install duckrun --upgrade
|
|
50
|
+
notebookutils.session.restartPython()
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
If you skip the restart, duckrun fails loud at `connect()` (and on `dbt run`) and tells you to
|
|
54
|
+
restart — it won't quietly run on the older `duckdb`/`deltalake` still bound in the kernel.
|
|
55
|
+
|
|
41
56
|
## Configure your profile
|
|
42
57
|
|
|
43
58
|
```yaml
|
|
@@ -57,6 +72,14 @@ my_project:
|
|
|
57
72
|
Persisted models are written to `<root_path>/<schema>/<model>` (e.g.
|
|
58
73
|
`./warehouse/dbo/orders`), or to an explicit `config(location=...)`.
|
|
59
74
|
|
|
75
|
+
### OneLake: use GUID paths for now
|
|
76
|
+
|
|
77
|
+
Address OneLake tables by **workspace GUID + lakehouse GUID**, not friendly names —
|
|
78
|
+
`abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables/...`. This
|
|
79
|
+
sidesteps an upstream `duckdb-delta` read bug ("No files in log segment") that is **already fixed
|
|
80
|
+
upstream but still rolling out to production OneLake**. Friendly-name paths will work again once
|
|
81
|
+
the fix finishes deploying.
|
|
82
|
+
|
|
60
83
|
### Fabric Lakehouse without a schema
|
|
61
84
|
|
|
62
85
|
A schema-less Lakehouse (tables straight under `Tables/`, no `Tables/<schema>/` grouping) is
|
|
@@ -66,7 +89,7 @@ let the schema fill that slot:
|
|
|
66
89
|
|
|
67
90
|
```yaml
|
|
68
91
|
schema: Tables
|
|
69
|
-
root_path: "abfss://<
|
|
92
|
+
root_path: "abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>"
|
|
70
93
|
```
|
|
71
94
|
|
|
72
95
|
Since models are written to `<root_path>/<schema>/<model>`, this lands them at
|
|
@@ -280,7 +303,7 @@ unchanged since the call, else raises `CommitFailedError`.
|
|
|
280
303
|
|
|
281
304
|
```python
|
|
282
305
|
import duckrun
|
|
283
|
-
conn = duckrun.connect("abfss
|
|
306
|
+
conn = duckrun.connect("abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables/dbo")
|
|
284
307
|
conn.sql("select * from orders").write.mode("overwrite").saveAsTable("orders_copy")
|
|
285
308
|
conn.table("orders_copy").show()
|
|
286
309
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "0.3.18"
|
|
@@ -32,6 +32,10 @@ class DuckrunConnectionManager(DuckDBConnectionManager):
|
|
|
32
32
|
|
|
33
33
|
@classmethod
|
|
34
34
|
def open(cls, connection):
|
|
35
|
+
# Fail loud if the kernel still has Fabric's stale duckdb/deltalake loaded (installed an
|
|
36
|
+
# upgrade but skipped notebookutils.session.restartPython()). Lazy import: same wheel.
|
|
37
|
+
from duckrun._runtime import check_runtime_versions
|
|
38
|
+
check_runtime_versions()
|
|
35
39
|
# duckrun runs single-threaded, so it uses ONE DuckDB connection for the whole run
|
|
36
40
|
# (DuckrunEnvironment) instead of dbt-duckdb's per-handle cursors — see environment.py.
|
|
37
41
|
# Pre-seed the base class's singleton _ENV with it for the local case; remote/MotherDuck
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Runtime version guardrail.
|
|
2
|
+
|
|
3
|
+
duckrun needs ``duckdb`` >= 1.5.4 — the release where ``delta_scan`` gained its ``version => N``
|
|
4
|
+
parameter (used for snapshot-pinned reads) — and ``deltalake`` >= 1.5.0 (for the merge
|
|
5
|
+
``max_spill_size`` cap). A Microsoft Fabric Python notebook ships a *stable* ``duckdb`` release,
|
|
6
|
+
which trails the newest one, so the ``duckdb`` already imported in the kernel may predate 1.5.4.
|
|
7
|
+
``pip install duckrun --upgrade`` writes the new wheels to disk, but the already-loaded modules stay
|
|
8
|
+
bound until the kernel restarts — so a user who skips the restart would keep running on the older
|
|
9
|
+
modules, quietly losing snapshot-pinned reads and the spill cap.
|
|
10
|
+
|
|
11
|
+
This check turns that into a loud, actionable error. It inspects the *loaded* versions (not the
|
|
12
|
+
pin), so it fires exactly on the forgot-to-restart case.
|
|
13
|
+
"""
|
|
14
|
+
from packaging.version import Version
|
|
15
|
+
|
|
16
|
+
# Floors duckrun needs at *runtime* — keep in sync with the pins in pyproject.toml:
|
|
17
|
+
# duckdb 1.5.4 -> delta_scan('...', version => N) for snapshot-pinned incremental reads
|
|
18
|
+
# deltalake 1.5.0 -> max_spill_size on MERGE to cap merge RAM and avoid OOM on large upserts
|
|
19
|
+
_MIN_DUCKDB = "1.5.4"
|
|
20
|
+
_MIN_DELTALAKE = "1.5.0"
|
|
21
|
+
|
|
22
|
+
_REMEDY = (
|
|
23
|
+
"In a Fabric Python notebook, upgrade then restart the kernel so the new versions load:\n"
|
|
24
|
+
" !pip install duckrun --upgrade\n"
|
|
25
|
+
" notebookutils.session.restartPython()\n"
|
|
26
|
+
"then re-run. (Elsewhere: pip install -U 'duckdb>={duckdb}' 'deltalake>={deltalake}' and "
|
|
27
|
+
"restart the interpreter.)"
|
|
28
|
+
).format(duckdb=_MIN_DUCKDB, deltalake=_MIN_DELTALAKE)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def check_runtime_versions():
|
|
32
|
+
"""Raise ``RuntimeError`` if the *loaded* duckdb/deltalake are older than duckrun requires.
|
|
33
|
+
|
|
34
|
+
Catches the notebook "installed but forgot ``restartPython()``" case: the kernel keeps the
|
|
35
|
+
older duckdb/deltalake bound until restart. Idempotent and cheap; called at each entry point
|
|
36
|
+
(``duckrun.connect()`` and the dbt connection open).
|
|
37
|
+
"""
|
|
38
|
+
import duckdb
|
|
39
|
+
import deltalake
|
|
40
|
+
|
|
41
|
+
too_old = []
|
|
42
|
+
if Version(duckdb.__version__) < Version(_MIN_DUCKDB):
|
|
43
|
+
too_old.append(f"duckdb {duckdb.__version__} (need >= {_MIN_DUCKDB})")
|
|
44
|
+
if Version(deltalake.__version__) < Version(_MIN_DELTALAKE):
|
|
45
|
+
too_old.append(f"deltalake {deltalake.__version__} (need >= {_MIN_DELTALAKE})")
|
|
46
|
+
|
|
47
|
+
if too_old:
|
|
48
|
+
raise RuntimeError(
|
|
49
|
+
"duckrun needs a newer " + " and ".join(too_old) + " than the kernel has loaded.\n"
|
|
50
|
+
+ _REMEDY
|
|
51
|
+
)
|
|
@@ -16,6 +16,7 @@ import duckdb
|
|
|
16
16
|
|
|
17
17
|
from dbt.adapters.duckrun import delta_dml, engine, remote, secret
|
|
18
18
|
from . import auth
|
|
19
|
+
from ._runtime import check_runtime_versions
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
# Statements that would WRITE to a table — rejected by the read-only conn.sql() with a pointer to
|
|
@@ -105,6 +106,40 @@ def _qlit(text: str) -> str:
|
|
|
105
106
|
return str(text).replace("'", "''")
|
|
106
107
|
|
|
107
108
|
|
|
109
|
+
def _strip_query_context(msg: str) -> str:
|
|
110
|
+
"""DuckDB appends the offending statement to errors as ``\\nLINE N: <sql>\\n ^``. When that
|
|
111
|
+
statement is one duckrun generated internally (the ``delta_scan`` view), echoing it back is
|
|
112
|
+
noise that makes the failure look like it's about the caller's input. Keep the real error
|
|
113
|
+
text; drop the generated-SQL context."""
|
|
114
|
+
idx = msg.find("\nLINE ")
|
|
115
|
+
return msg[:idx].rstrip() if idx != -1 else msg
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
_GUID = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _onelake_guid_hint(root_path: str) -> Optional[str]:
|
|
122
|
+
"""Workaround note for the OneLake ``delta_scan`` bug, shown only when a friendly-name
|
|
123
|
+
``abfss://`` path is involved. OneLake's delta_scan can fail to enumerate a valid table's
|
|
124
|
+
``_delta_log`` when the path uses friendly workspace/lakehouse names (duckdb-delta#307); the
|
|
125
|
+
GUID form reads fine. Returns ``None`` for non-abfss paths or paths already using GUIDs (no
|
|
126
|
+
point nagging those)."""
|
|
127
|
+
if not remote.is_abfss(root_path):
|
|
128
|
+
return None
|
|
129
|
+
workspace, _host, path = remote._parse_abfss(root_path)
|
|
130
|
+
lakehouse = path.split("/", 1)[0] if path else ""
|
|
131
|
+
if lakehouse.lower().endswith(".lakehouse"):
|
|
132
|
+
lakehouse = lakehouse[: -len(".Lakehouse")]
|
|
133
|
+
if _GUID.match(workspace) and _GUID.match(lakehouse):
|
|
134
|
+
return None
|
|
135
|
+
return (
|
|
136
|
+
"OneLake's delta_scan can fail to read a valid table's _delta_log when the abfss path uses "
|
|
137
|
+
"friendly names — a known upstream issue (duckdb-delta#307). Until it's fixed, use the "
|
|
138
|
+
"workspace and lakehouse GUIDs, e.g. "
|
|
139
|
+
"abfss://<workspace-guid>@onelake.dfs.fabric.microsoft.com/<lakehouse-guid>/Tables"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
108
143
|
def _split_root_schema(path: str, schema: Optional[str]):
|
|
109
144
|
"""Normalize ``path`` into ``(root_path, schema)``.
|
|
110
145
|
|
|
@@ -227,10 +262,22 @@ class DuckSession:
|
|
|
227
262
|
|
|
228
263
|
def _register_view(self, schema: str, table: str):
|
|
229
264
|
path = f"{self.root_path.rstrip('/')}/{schema}/{table}"
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
265
|
+
try:
|
|
266
|
+
self.con.execute(
|
|
267
|
+
f"CREATE OR REPLACE VIEW {_qid(schema)}.{_qid(table)} AS "
|
|
268
|
+
f"SELECT * FROM delta_scan('{_qlit(path)}')"
|
|
269
|
+
)
|
|
270
|
+
except Exception as exc:
|
|
271
|
+
# delta_scan failed reading the table. Keep the real engine error (it's the signal —
|
|
272
|
+
# e.g. the OneLake "No files in log segment" delta-kernel bug), but drop DuckDB's echo
|
|
273
|
+
# of the CREATE VIEW statement *we* generated, and say which table/path it was. Suppress
|
|
274
|
+
# the chained original (`from None`) so the noisy SQL echo doesn't reappear in tracebacks.
|
|
275
|
+
hint = _onelake_guid_hint(self.root_path)
|
|
276
|
+
raise RuntimeError(
|
|
277
|
+
f"duckrun: could not read Delta table {schema}.{table} at '{path}':\n"
|
|
278
|
+
f"{_strip_query_context(str(exc))}"
|
|
279
|
+
+ (f"\n\n{hint}" if hint else "")
|
|
280
|
+
) from None
|
|
234
281
|
|
|
235
282
|
def _set_search_path(self, schema: str):
|
|
236
283
|
try:
|
|
@@ -573,4 +620,5 @@ def connect(path: str, storage_options: Optional[Dict[str, str]] = None,
|
|
|
573
620
|
>>> conn.sql("SHOW TABLES").show()
|
|
574
621
|
>>> conn.sql("select * from orders").write.mode("overwrite").saveAsTable("orders_copy")
|
|
575
622
|
"""
|
|
623
|
+
check_runtime_versions() # fail loud if Fabric's stale duckdb/deltalake are still loaded
|
|
576
624
|
return DuckSession(path, storage_options, schema, compaction_threshold)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: duckrun
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.18
|
|
4
4
|
Summary: A dbt adapter that runs SQL in DuckDB and materializes to Delta Lake (delta_rs).
|
|
5
5
|
Author: mim
|
|
6
6
|
License: MIT
|
|
@@ -12,7 +12,7 @@ Description-Content-Type: text/markdown
|
|
|
12
12
|
License-File: LICENSE
|
|
13
13
|
Requires-Dist: dbt-duckdb>=1.8
|
|
14
14
|
Requires-Dist: dbt-core<2.0,>=1.8
|
|
15
|
-
Requires-Dist: duckdb
|
|
15
|
+
Requires-Dist: duckdb<1.6.0,>=1.5.4
|
|
16
16
|
Requires-Dist: deltalake<1.5.1,>=1.5.0
|
|
17
17
|
Requires-Dist: requests
|
|
18
18
|
Provides-Extra: local
|
|
@@ -67,6 +67,21 @@ pip install duckrun
|
|
|
67
67
|
|
|
68
68
|
That single install pulls in `dbt-duckdb` (and therefore `duckdb`) plus `deltalake`.
|
|
69
69
|
|
|
70
|
+
### In a Microsoft Fabric Python notebook
|
|
71
|
+
|
|
72
|
+
duckrun needs `duckdb` ≥ 1.5.4 — the release where `delta_scan` gained its `version => N`
|
|
73
|
+
parameter, which duckrun uses for snapshot-pinned reads. Fabric notebooks ship a **stable**
|
|
74
|
+
`duckdb` release, which trails the newest one, so the `duckdb` already loaded in the kernel may
|
|
75
|
+
predate 1.5.4. Upgrade, then restart the Python kernel so the new version loads.
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
!pip install duckrun --upgrade
|
|
79
|
+
notebookutils.session.restartPython()
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
If you skip the restart, duckrun fails loud at `connect()` (and on `dbt run`) and tells you to
|
|
83
|
+
restart — it won't quietly run on the older `duckdb`/`deltalake` still bound in the kernel.
|
|
84
|
+
|
|
70
85
|
## Configure your profile
|
|
71
86
|
|
|
72
87
|
```yaml
|
|
@@ -86,6 +101,14 @@ my_project:
|
|
|
86
101
|
Persisted models are written to `<root_path>/<schema>/<model>` (e.g.
|
|
87
102
|
`./warehouse/dbo/orders`), or to an explicit `config(location=...)`.
|
|
88
103
|
|
|
104
|
+
### OneLake: use GUID paths for now
|
|
105
|
+
|
|
106
|
+
Address OneLake tables by **workspace GUID + lakehouse GUID**, not friendly names —
|
|
107
|
+
`abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables/...`. This
|
|
108
|
+
sidesteps an upstream `duckdb-delta` read bug ("No files in log segment") that is **already fixed
|
|
109
|
+
upstream but still rolling out to production OneLake**. Friendly-name paths will work again once
|
|
110
|
+
the fix finishes deploying.
|
|
111
|
+
|
|
89
112
|
### Fabric Lakehouse without a schema
|
|
90
113
|
|
|
91
114
|
A schema-less Lakehouse (tables straight under `Tables/`, no `Tables/<schema>/` grouping) is
|
|
@@ -95,7 +118,7 @@ let the schema fill that slot:
|
|
|
95
118
|
|
|
96
119
|
```yaml
|
|
97
120
|
schema: Tables
|
|
98
|
-
root_path: "abfss://<
|
|
121
|
+
root_path: "abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>"
|
|
99
122
|
```
|
|
100
123
|
|
|
101
124
|
Since models are written to `<root_path>/<schema>/<model>`, this lands them at
|
|
@@ -309,7 +332,7 @@ unchanged since the call, else raises `CommitFailedError`.
|
|
|
309
332
|
|
|
310
333
|
```python
|
|
311
334
|
import duckrun
|
|
312
|
-
conn = duckrun.connect("abfss
|
|
335
|
+
conn = duckrun.connect("abfss://<workspace_id>@onelake.dfs.fabric.microsoft.com/<lakehouse_id>/Tables/dbo")
|
|
313
336
|
conn.sql("select * from orders").write.mode("overwrite").saveAsTable("orders_copy")
|
|
314
337
|
conn.table("orders_copy").show()
|
|
315
338
|
|
|
@@ -21,6 +21,7 @@ dbt/include/duckrun/macros/materializations/incremental.sql
|
|
|
21
21
|
dbt/include/duckrun/macros/materializations/snapshot.sql
|
|
22
22
|
dbt/include/duckrun/macros/materializations/table.sql
|
|
23
23
|
duckrun/__init__.py
|
|
24
|
+
duckrun/_runtime.py
|
|
24
25
|
duckrun/auth.py
|
|
25
26
|
duckrun/delta_table.py
|
|
26
27
|
duckrun/session.py
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "duckrun"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.18"
|
|
8
8
|
description = "A dbt adapter that runs SQL in DuckDB and materializes to Delta Lake (delta_rs)."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
@@ -21,18 +21,22 @@ dependencies = [
|
|
|
21
21
|
# fails at `from dbt.cli.main import dbtRunner`. Declared directly because we only depend on
|
|
22
22
|
# dbt-core transitively, so the ceiling has to live here to bite.
|
|
23
23
|
"dbt-core>=1.8,<2.0",
|
|
24
|
-
#
|
|
25
|
-
# extension supports `delta_scan('...', version => N)`
|
|
26
|
-
# read this project
|
|
27
|
-
# ONE Delta snapshot (Spark single-snapshot MERGE parity;
|
|
28
|
-
# _delta_core.sql and merge_delta's read_version).
|
|
29
|
-
#
|
|
30
|
-
#
|
|
24
|
+
# duckdb floor is 1.5.4 with a <1.6.0 cap (a floor, NOT an exact pin): 1.5.4 is the first
|
|
25
|
+
# stable build whose bundled duckdb-delta extension supports `delta_scan('...', version => N)`
|
|
26
|
+
# (duckdb-delta #312) — the version-pinned read this project relies on to make the incremental
|
|
27
|
+
# read and the write commit resolve at ONE Delta snapshot (Spark single-snapshot MERGE parity;
|
|
28
|
+
# see the staging-read pin in _delta_core.sql and merge_delta's read_version). Stable 1.5.x
|
|
29
|
+
# patches above the floor are fine. The <1.6.0 cap matters because this project resolves with
|
|
30
|
+
# pip --pre (to pick up dbt pre-releases) and --pre is global: an open floor would let pip pull
|
|
31
|
+
# an unstable duckdb *prerelease* (verified: "--pre duckdb>=1.5.4" resolves to 1.6.0.dev12).
|
|
32
|
+
# Per PEP 440, "<1.6.0" excludes 1.6.0 AND its prereleases (1.6.0.devN), so no dev build slips
|
|
33
|
+
# in. The earlier 1.5.2+ "No files in log segment" read regression is avoided by addressing
|
|
34
|
+
# OneLake tables via GUID (workspace_id/lakehouse_id) abfss paths, and is fixed upstream.
|
|
31
35
|
# deltalake floor stays 1.5.0 (not just a ceiling): 1.5.0 is the first release with MERGE
|
|
32
36
|
# disk-spill config (max_spill_size), which engine.merge_delta relies on to cap the merge's RAM
|
|
33
37
|
# and avoid OOM on large upserts; the matching <1.5.1 ceiling avoids the deltalake delta-log
|
|
34
38
|
# write-side regression, pinning exactly 1.5.0.
|
|
35
|
-
"duckdb>=1.5.4",
|
|
39
|
+
"duckdb>=1.5.4,<1.6.0",
|
|
36
40
|
"deltalake>=1.5.0,<1.5.1",
|
|
37
41
|
# The top-level connection API (duckrun.connect) discovers OneLake tables via the DFS REST
|
|
38
42
|
# API directly; requests is otherwise only a transitive dbt dependency.
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
version = "0.3.17.dev6"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/macros/materializations/_delta_core.sql
RENAMED
|
File without changes
|
{duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/macros/materializations/delta.sql
RENAMED
|
File without changes
|
{duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/macros/materializations/incremental.sql
RENAMED
|
File without changes
|
{duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/macros/materializations/snapshot.sql
RENAMED
|
File without changes
|
{duckrun-0.3.17.dev6 → duckrun-0.3.18}/dbt/include/duckrun/macros/materializations/table.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|