duckrun 0.3.17.dev2__tar.gz → 0.3.17.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {duckrun-0.3.17.dev2/duckrun.egg-info → duckrun-0.3.17.dev3}/PKG-INFO +1 -1
  2. duckrun-0.3.17.dev3/dbt/adapters/duckrun/__version__.py +1 -0
  3. duckrun-0.3.17.dev3/dbt/adapters/duckrun/delta_dml.py +297 -0
  4. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/adapters/duckrun/delta_plugin.py +37 -4
  5. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/adapters/duckrun/engine.py +31 -0
  6. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/adapters/duckrun/environment.py +43 -1
  7. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/adapters/duckrun/impl.py +85 -0
  8. duckrun-0.3.17.dev3/dbt/include/duckrun/macros/catalog.sql +122 -0
  9. duckrun-0.3.17.dev3/dbt/include/duckrun/macros/materializations/snapshot.sql +144 -0
  10. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/duckrun/session.py +25 -12
  11. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3/duckrun.egg-info}/PKG-INFO +1 -1
  12. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/duckrun.egg-info/SOURCES.txt +2 -0
  13. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/pyproject.toml +1 -1
  14. duckrun-0.3.17.dev2/dbt/adapters/duckrun/__version__.py +0 -1
  15. duckrun-0.3.17.dev2/dbt/include/duckrun/macros/catalog.sql +0 -59
  16. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/LICENSE +0 -0
  17. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/MANIFEST.in +0 -0
  18. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/README.md +0 -0
  19. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/adapters/duckrun/__init__.py +0 -0
  20. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/adapters/duckrun/credentials.py +0 -0
  21. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/adapters/duckrun/remote.py +0 -0
  22. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/adapters/duckrun/secret.py +0 -0
  23. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/include/duckrun/__init__.py +0 -0
  24. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/include/duckrun/dbt_project.yml +0 -0
  25. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/include/duckrun/macros/materializations/_delta_core.sql +0 -0
  26. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/include/duckrun/macros/materializations/delta.sql +0 -0
  27. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/include/duckrun/macros/materializations/incremental.sql +0 -0
  28. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/dbt/include/duckrun/macros/materializations/table.sql +0 -0
  29. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/duckrun/__init__.py +0 -0
  30. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/duckrun/auth.py +0 -0
  31. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/duckrun/delta_table.py +0 -0
  32. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/duckrun.egg-info/dependency_links.txt +0 -0
  33. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/duckrun.egg-info/requires.txt +0 -0
  34. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/duckrun.egg-info/top_level.txt +0 -0
  35. {duckrun-0.3.17.dev2 → duckrun-0.3.17.dev3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.3.17.dev2
3
+ Version: 0.3.17.dev3
4
4
  Summary: A dbt adapter that runs SQL in DuckDB and materializes to Delta Lake (delta_rs).
5
5
  Author: mim
6
6
  License: MIT
@@ -0,0 +1 @@
1
+ version = "0.3.17.dev3"
@@ -0,0 +1,297 @@
1
+ """Route raw SQL DML against duckrun-managed (Delta-backed) relations to delta_rs.
2
+
3
+ duckrun intercepts writes at the dbt *materialization* layer (a model/seed/snapshot goes through
4
+ the materialization macros -> store_relation -> delta_rs). But a duckrun relation is surfaced as a
5
+ read-only ``delta_scan`` view, so *raw* DML sent straight to the connection — ``delete from``,
6
+ ``update``, ``insert into ... select``, ``alter table ... add column``, ``create table ... as
7
+ select`` — lands on a view and fails ("Can only delete from base table"), or would create a native
8
+ DuckDB table that bypasses Delta entirely.
9
+
10
+ This module intercepts those statements (at the cursor, see environment.DuckrunCursorWrapper) and
11
+ applies them to the Delta table **via delta_rs only**, then refreshes the ``delta_scan`` view — so
12
+ nothing relies on a native, mutable DuckDB table, and every op works on local AND abfss/OneLake
13
+ stores (delta_rs carries ``storage_options``). ``create table ... as`` writes a new Delta table;
14
+ the mutate forms (delete/update/insert/alter) apply only when a Delta table already exists at the
15
+ target (otherwise the statement passes through — e.g. the test's native ``fact``/``seed``).
16
+
17
+ ``drop table`` unregisters the ``delta_scan`` view AND overwrites the table (via delta_rs) to a
18
+ one-column ``TOMBSTONE_COLUMN`` marker, which discovery recognizes and hides. It does NOT delete
19
+ data: delta_rs has no drop, and removing the Delta files would be a filesystem hack that fails on
20
+ object stores. The directory persists until a human purges it; a later ``create table ... as``
21
+ overwrites the tombstone with real data and the table is live again.
22
+
23
+ The seed loader's own SQL (``create table <t> (<col defs>)``, ``insert ... values``, ``COPY``) lands
24
+ on a native DuckDB table, not a Delta table: ``create table (<col defs>)`` doesn't match the
25
+ ``... as select`` form, and while ``insert ... values`` now *does* match a form here, the mutate
26
+ guard only applies it when a Delta table already exists at the target — the seed's native table has
27
+ none, so it falls through untouched. duckrun's own materializations emit ``create ... view`` (not
28
+ ``table``), so they pass through too.
29
+ """
30
+ import re
31
+ from typing import List, Optional, Tuple
32
+
33
+ from . import engine
34
+
35
+ # `drop table` tombstone: a dropped relation is overwritten (via delta_rs) to a table whose ONLY
36
+ # column is this marker, so (a) discovery recognizes it as dropped and hides it, and (b) anyone who
37
+ # opens the files sees an obviously-not-a-real-table schema rather than a plausible empty table. No
38
+ # data is deleted — the directory stays until a human purges it; a later `create table ... as`
39
+ # overwrites the marker schema with real data and the table is live again.
40
+ TOMBSTONE_COLUMN = "__duckrun_deleted__"
41
+
42
+
43
+ def _columns_are_tombstone(colnames) -> bool:
44
+ return [str(c).lower() for c in colnames] == [TOMBSTONE_COLUMN]
45
+
46
+
47
+ def is_dropped(con, location: str, storage_options=None) -> bool:
48
+ """True if the Delta table at ``location`` is a duckrun drop-tombstone (single marker column).
49
+
50
+ Used by discovery (dbt + connection API) to hide dropped tables. Best-effort: anything that
51
+ can't be opened/scanned is treated as 'not a tombstone' (let normal handling deal with it).
52
+ """
53
+ loc_sql = str(location).replace("'", "''")
54
+ try:
55
+ rel = con.execute(f"select * from delta_scan('{loc_sql}') limit 0")
56
+ return _columns_are_tombstone([d[0] for d in rel.description])
57
+ except Exception:
58
+ return False
59
+
60
+ # --- statement matchers (leading-anchored, DOTALL so multi-line bodies match) ----------------
61
+ _CREATE_AS = re.compile(
62
+ r"\s*create\s+table\s+(?:if\s+not\s+exists\s+)?(?P<rel>.+?)\s+as\s+(?P<body>select\b.*)",
63
+ re.I | re.S,
64
+ )
65
+ _INSERT_SELECT = re.compile(
66
+ r"\s*insert\s+into\s+(?P<rel>.+?)\s+(?P<body>select\b.*)", re.I | re.S
67
+ )
68
+ _INSERT_VALUES = re.compile(
69
+ r"\s*insert\s+into\s+(?P<rel>.+?)\s*(?:\((?P<cols>[^)]*)\))?\s*values\s+(?P<body>\(.+)",
70
+ re.I | re.S,
71
+ )
72
+ _DELETE = re.compile(
73
+ r"\s*delete\s+from\s+(?P<rel>.+?)(?:\s+where\s+(?P<where>.+))?\s*;?\s*", re.I | re.S
74
+ )
75
+ _UPDATE = re.compile(
76
+ r"\s*update\s+(?P<rel>.+?)\s+set\s+(?P<set>.+?)(?:\s+where\s+(?P<where>.+?))?\s*;?\s*",
77
+ re.I | re.S,
78
+ )
79
+ _ALTER_ADD = re.compile(
80
+ r"\s*alter\s+table\s+(?P<rel>.+?)\s+add\s+column\s+(?P<col>\S+)\s+(?P<def>.+?)\s*;?\s*",
81
+ re.I | re.S,
82
+ )
83
+ _DROP = re.compile(
84
+ r"\s*drop\s+table\s+(?:if\s+exists\s+)?(?P<rel>[^\s;]+)\s*;?\s*", re.I | re.S
85
+ )
86
+
87
+
88
+ def _fullmatch(pattern, sql):
89
+ return pattern.fullmatch(sql.strip())
90
+
91
+
92
+ def _split_relation(rel: str) -> Tuple[Optional[str], Optional[str]]:
93
+ """`"db"."schema"."tbl"` / `schema.tbl` / `tbl` -> (schema, identifier), quotes stripped."""
94
+ parts = [p.strip().strip('"') for p in rel.strip().split(".")]
95
+ if not parts or not parts[-1]:
96
+ return None, None
97
+ identifier = parts[-1]
98
+ schema = parts[-2] if len(parts) >= 2 else None
99
+ return schema, identifier
100
+
101
+
102
+ def _split_top_level_commas(s: str) -> List[str]:
103
+ """Split on commas that aren't inside parentheses or quotes (so ``left(email, 3)`` stays whole)."""
104
+ out, depth, start, quote = [], 0, 0, None
105
+ for i, ch in enumerate(s):
106
+ if quote:
107
+ if ch == quote:
108
+ quote = None
109
+ elif ch in ("'", '"'):
110
+ quote = ch
111
+ elif ch in "([":
112
+ depth += 1
113
+ elif ch in ")]":
114
+ depth -= 1
115
+ elif ch == "," and depth == 0:
116
+ out.append(s[start:i])
117
+ start = i + 1
118
+ out.append(s[start:])
119
+ return [p.strip() for p in out if p.strip()]
120
+
121
+
122
+ class _DeltaDML:
123
+ """One attempt to handle a statement; ``run()`` returns True if it was applied to Delta."""
124
+
125
+ def __init__(self, cursor, root_path: str, storage_options, default_schema=None):
126
+ self.cursor = cursor
127
+ self.root_path = root_path.rstrip("/")
128
+ self.so = storage_options
129
+ self.default_schema = default_schema
130
+
131
+ def _loc(self, schema: str, identifier: str) -> str:
132
+ return f"{self.root_path}/{schema}/{identifier}"
133
+
134
+ def _resolve(self, rel: str):
135
+ """(schema, identifier, location) for ``rel``, falling back to default_schema for an
136
+ unqualified name (the connection API relies on a current database). (None, None, None) when
137
+ no schema can be determined."""
138
+ schema, identifier = _split_relation(rel)
139
+ schema = schema or self.default_schema
140
+ if not schema or not identifier:
141
+ return None, None, None
142
+ return schema, identifier, self._loc(schema, identifier)
143
+
144
+ def _exists(self, loc: str) -> bool:
145
+ return engine.table_exists(loc, self.so)
146
+
147
+ def _refresh_view(self, rel: str, schema: str, loc: str) -> None:
148
+ loc_sql = loc.replace("'", "''")
149
+ self.cursor.execute(f'create schema if not exists "{schema}"')
150
+ self.cursor.execute(
151
+ f"create or replace view {rel} as select * from delta_scan('{loc_sql}')"
152
+ )
153
+
154
+ def try_handle(self, sql: str) -> bool:
155
+ m = _fullmatch(_CREATE_AS, sql)
156
+ if m and "__duckrun" not in m.group("rel"):
157
+ return self._create_as(m)
158
+ m = _fullmatch(_INSERT_SELECT, sql)
159
+ if m:
160
+ return self._mutate(m, self._insert_select)
161
+ m = _fullmatch(_INSERT_VALUES, sql)
162
+ if m:
163
+ return self._mutate(m, self._insert_values)
164
+ m = _fullmatch(_DELETE, sql)
165
+ if m:
166
+ return self._mutate(m, self._delete)
167
+ m = _fullmatch(_UPDATE, sql)
168
+ if m:
169
+ return self._mutate(m, self._update)
170
+ m = _fullmatch(_ALTER_ADD, sql)
171
+ if m:
172
+ return self._mutate(m, self._alter_add)
173
+ m = _fullmatch(_DROP, sql)
174
+ if m:
175
+ return self._drop(m)
176
+ return False
177
+
178
+ # -- create table <rel> as <select>: always materialize as a duckrun Delta table -----------
179
+ def _create_as(self, m) -> bool:
180
+ rel = m.group("rel").strip()
181
+ schema, identifier, loc = self._resolve(rel)
182
+ if not loc:
183
+ return False
184
+ data = self.cursor.sql(m.group("body"))
185
+ # overwrite_schema so this replaces a prior table (or a drop-tombstone) wholesale — a live
186
+ # table is recreated with the real schema, clearing any tombstone marker.
187
+ engine.write_delta(loc, data, "overwrite", overwrite_schema=True, storage_options=self.so)
188
+ self._refresh_view(rel, schema, loc)
189
+ return True
190
+
191
+ # -- forms that only apply when a Delta table already exists at the target ------------------
192
+ def _mutate(self, m, op) -> bool:
193
+ rel = m.group("rel").strip()
194
+ schema, identifier, loc = self._resolve(rel)
195
+ if not loc or not self._exists(loc):
196
+ return False # native relation (e.g. the test's `fact`/`seed`) -> let DuckDB handle it
197
+ op(m, rel, schema, loc)
198
+ self._refresh_view(rel, schema, loc)
199
+ return True
200
+
201
+ def _delete(self, m, rel, schema, loc) -> None:
202
+ where = m.group("where")
203
+ engine._delta_table(loc, self.so).delete(predicate=where.strip() if where else None)
204
+
205
+ def _update(self, m, rel, schema, loc) -> None:
206
+ updates = {}
207
+ for assign in _split_top_level_commas(m.group("set")):
208
+ col, _, expr = assign.partition("=")
209
+ updates[col.strip().strip('"')] = expr.strip()
210
+ where = m.group("where")
211
+ engine._delta_table(loc, self.so).update(
212
+ updates=updates, predicate=where.strip() if where else None
213
+ )
214
+
215
+ def _insert_select(self, m, rel, schema, loc) -> None:
216
+ data = self.cursor.sql(m.group("body"))
217
+ engine.write_delta(loc, data, "append", storage_options=self.so)
218
+
219
+ def _insert_values(self, m, rel, schema, loc) -> None:
220
+ # `insert into <rel> [(<cols>)] values (...)`: evaluate the VALUES tuples through DuckDB and
221
+ # project them onto the FULL target Delta schema (so append schemas match) — supplied columns
222
+ # come from the literals, any unsupplied target column is filled with a typed NULL.
223
+ loc_sql = loc.replace("'", "''")
224
+ template = self.cursor.sql(f"select * from delta_scan('{loc_sql}') limit 0")
225
+ target_cols = list(template.columns)
226
+ target_types = [str(t) for t in template.types]
227
+ by_lower = {c.lower(): c for c in target_cols}
228
+
229
+ cols = m.group("cols")
230
+ if cols: # explicit column list → canonicalize to the target's casing
231
+ provided = [by_lower.get(c.strip().strip('"').lower(), c.strip().strip('"'))
232
+ for c in cols.split(",")]
233
+ else: # positional → the literals supply every target column, in order
234
+ provided = target_cols
235
+ provided_set = {c for c in provided}
236
+
237
+ quoted = ", ".join('"' + c + '"' for c in provided)
238
+ inner = f"(values {m.group('body')}) v({quoted})"
239
+ # Cast every projected column to the TARGET column's type — both supplied values and the
240
+ # typed NULLs — so the appended Arrow schema matches the table exactly. This is also what a
241
+ # plain SQL INSERT does (a literal is coerced to the column type), and it stops a literal
242
+ # whose inferred type is wider than the column (e.g. a ::timestamp into a DATE column) from
243
+ # forcing delta_rs to add a new writer feature on append (TimestampWithoutTimezone).
244
+ exprs = [
245
+ f'cast(v."{col}" as {typ}) as "{col}"' if col in provided_set
246
+ else f'cast(null as {typ}) as "{col}"'
247
+ for col, typ in zip(target_cols, target_types)
248
+ ]
249
+ data = self.cursor.sql(f"select {', '.join(exprs)} from {inner}")
250
+ engine.write_delta(loc, data, "append", storage_options=self.so)
251
+
252
+ def _alter_add(self, m, rel, schema, loc) -> None:
253
+ col = m.group("col").strip().strip('"')
254
+ # Keep only the column type (drop any DEFAULT/NULL clause); add it as an all-null column by
255
+ # rewriting the table with overwrite_schema so delta_rs accepts the widened schema.
256
+ coltype = re.split(r"\s+default\b|\s+null\b", m.group("def"), flags=re.I)[0].strip() or "VARCHAR"
257
+ loc_sql = loc.replace("'", "''")
258
+ data = self.cursor.sql(
259
+ f'select *, cast(null as {coltype}) as "{col}" from delta_scan(\'{loc_sql}\')'
260
+ )
261
+ engine.write_delta(loc, data, "overwrite", overwrite_schema=True, storage_options=self.so)
262
+
263
+ def _drop(self, m) -> bool:
264
+ # `drop table` on a duckrun relation: unregister the delta_scan view AND, via delta_rs,
265
+ # overwrite the table to a one-column tombstone (TOMBSTONE_COLUMN) so a later glob discovery
266
+ # hides it. NO data is deleted — delta_rs has no drop, and removing the Delta files would be
267
+ # a filesystem hack that fails on object stores. The directory persists until a human purges
268
+ # it; a later `create table ... as` overwrites the tombstone with real data. If the relation
269
+ # isn't a duckrun-managed Delta table, fall through and let DuckDB drop the native table.
270
+ rel = m.group("rel").strip()
271
+ schema, identifier, loc = self._resolve(rel)
272
+ if not loc or not self._exists(loc):
273
+ return False
274
+ tombstone = self.cursor.sql(f"select true as {TOMBSTONE_COLUMN}")
275
+ engine.write_delta(loc, tombstone, "overwrite", overwrite_schema=True, storage_options=self.so)
276
+ self.cursor.execute(f"drop view if exists {rel}")
277
+ return True
278
+
279
+
280
+ def handle(cursor, root_path, storage_options, sql: str, default_schema=None) -> bool:
281
+ """Apply ``sql`` to Delta if it's a DML form targeting a duckrun-managed relation, using
282
+ ``cursor`` to evaluate any SELECT body and to (re)create the ``delta_scan`` view.
283
+
284
+ Every handled form goes through delta_rs (``engine.write_delta`` / ``DeltaTable.delete`` /
285
+ ``.update``), which carries ``storage_options`` and so works on local AND abfss/OneLake stores.
286
+ ``default_schema`` resolves an unqualified table name (the connection API has a current
287
+ database; the dbt path always renders fully-qualified names so passes None).
288
+ Returns True if handled (the caller must NOT also run it on DuckDB), False to pass through —
289
+ anything unrecognized, or (for the mutate forms) a target that isn't a Delta table.
290
+ """
291
+ if not root_path:
292
+ return False
293
+ # Cheap pre-filter: only the candidate DML verbs.
294
+ head = sql.lstrip()[:7].lower()
295
+ if not head.startswith(("delete", "update", "insert", "create", "alter", "drop")):
296
+ return False
297
+ return _DeltaDML(cursor, root_path, storage_options, default_schema).try_handle(sql)
@@ -6,6 +6,7 @@ connection (``configure_connection``), and on ``store()`` hands the model relati
6
6
  straight to delta_rs. DuckDB relations expose the Arrow C-stream interface, which
7
7
  deltalake 1.x consumes directly, so there is no pyarrow dependency.
8
8
  """
9
+ import re
9
10
  from typing import Any, Optional
10
11
 
11
12
  from dbt.adapters.duckdb.plugins import BasePlugin
@@ -155,11 +156,22 @@ class Plugin(BasePlugin):
155
156
  # Table-like (non-incremental) models always overwrite. Incremental models
156
157
  # overwrite on first run / full-refresh, then apply the incremental strategy.
157
158
  if not incremental or full_refresh or not exists:
159
+ # This branch is a CREATE OR REPLACE: a table model, a --full-refresh, or a first run.
160
+ # When we are REPLACING an existing table (exists), allow delta_rs to replace the schema
161
+ # wholesale (schema_mode="overwrite") — the model SQL defines the new schema, exactly as
162
+ # `CREATE OR REPLACE TABLE` does on every other warehouse. Without it, delta_rs's strict
163
+ # overwrite keeps the OLD schema/protocol and so can't change a column's type or write a
164
+ # column needing a new writer feature the old table lacks (e.g. retyping to ::timestamp /
165
+ # timestampNtz). This is scoped to the full-rebuild replace ONLY — NOT append, safeappend,
166
+ # merge, or microbatch, which must keep their strict, schema-stable writes. A fresh create
167
+ # (not exists) doesn't need it. A user's explicit merge_schema still wins.
168
+ overwrite_schema = exists and not merge_schema
158
169
  with engine.mem_profile("overwrite", con=cur):
159
170
  engine.write_delta(
160
171
  path, data, "overwrite",
161
172
  partition_by=partition_by,
162
173
  merge_schema=merge_schema,
174
+ overwrite_schema=overwrite_schema,
163
175
  storage_options=storage_options,
164
176
  compaction_threshold=self._compaction_threshold,
165
177
  )
@@ -201,7 +213,7 @@ class Plugin(BasePlugin):
201
213
  insert_only=(strategy == "insert"),
202
214
  update_columns=cfg.get("merge_update_columns"),
203
215
  exclude_columns=cfg.get("merge_exclude_columns"),
204
- predicates=self._merge_predicates(cfg),
216
+ predicates=self._merge_predicates(cfg, data.columns),
205
217
  update_condition=self._rewrite_merge_aliases(cfg.get("merge_update_condition")),
206
218
  insert_condition=self._rewrite_merge_aliases(cfg.get("merge_insert_condition")),
207
219
  merge_schema=evolve_schema,
@@ -470,16 +482,37 @@ class Plugin(BasePlugin):
470
482
  return None
471
483
  return str(expr).replace("DBT_INTERNAL_DEST", "target").replace("DBT_INTERNAL_SOURCE", "source")
472
484
 
485
+ @staticmethod
486
+ def _qualify_predicate(expr, columns):
487
+ """Prefix bare references to known target columns with ``target.``.
488
+
489
+ duckrun folds ``incremental_predicates`` into the merge condition
490
+ (``target.k = source.k AND <predicate>``). A bare column there (e.g. ``id != 2``) exists
491
+ on BOTH the source and target, so delta_rs rejects it as an ambiguous reference. dbt's
492
+ ``incremental_predicates`` constrain the existing/target rows (the delete+insert delete, the
493
+ merge ON), so we qualify bare column tokens to ``target.``. Only exact column-name tokens
494
+ that aren't already qualified (preceded by ``.``) or quoted/literal are rewritten — literals
495
+ and functions (e.g. ``current_date``, which is not a column) are left untouched."""
496
+ if not expr or not columns:
497
+ return expr
498
+ # Longest names first so a column that's a prefix of another isn't partially matched.
499
+ for col in sorted({str(c) for c in columns}, key=len, reverse=True):
500
+ # whole-word col, not preceded by '.', a word char, or a quote (already qualified/quoted).
501
+ pattern = re.compile(r'(?<![.\w"\'])' + re.escape(col) + r'\b', re.I)
502
+ expr = pattern.sub(lambda m: "target." + m.group(0), expr)
503
+ return expr
504
+
473
505
  @classmethod
474
- def _merge_predicates(cls, cfg: dict):
506
+ def _merge_predicates(cls, cfg: dict, columns=None):
475
507
  """dbt ``incremental_predicates`` (or ``predicates``), with dbt's standard merge
476
- aliases rewritten to the ones delta_rs uses here."""
508
+ aliases rewritten to the ones delta_rs uses here and bare column refs qualified to
509
+ ``target.`` (see ``_qualify_predicate``)."""
477
510
  preds = cfg.get("incremental_predicates") or cfg.get("predicates")
478
511
  if not preds:
479
512
  return None
480
513
  if isinstance(preds, str):
481
514
  preds = [preds]
482
- return [cls._rewrite_merge_aliases(p) for p in preds]
515
+ return [cls._qualify_predicate(cls._rewrite_merge_aliases(p), columns) for p in preds]
483
516
 
484
517
  @staticmethod
485
518
  def _resolve_schema_change(on_schema_change, path, data, storage_options) -> bool:
@@ -536,6 +536,37 @@ def table_exists(path: str, storage_options: Optional[Dict[str, str]] = None) ->
536
536
  return False
537
537
 
538
538
 
539
+ def delta_stats(cur, path: str, storage_options: Optional[Dict[str, str]] = None):
540
+ """Cheap table statistics for ``dbt docs generate``, read from the Delta **log** (no data scan).
541
+
542
+ ``DeltaTable.get_add_actions()`` carries per-file ``num_records`` / ``size_bytes`` /
543
+ ``modification_time``; summing rows+bytes and taking the latest mtime gives the whole table's
544
+ stats without opening any data file. Aggregation goes through the DuckDB cursor (``cur``) via a
545
+ replacement scan over the arro3 table — no pyarrow dependency.
546
+
547
+ Returns ``{"num_rows", "bytes", "last_modified"}`` (last_modified = epoch milliseconds), or
548
+ ``None`` on ANY failure (a drop-tombstone, a missing table, an unreachable/credential-less remote
549
+ store). Best-effort by design: a statless catalog is fine, but a docs build must never break.
550
+ """
551
+ try:
552
+ add_actions = _delta_table(path, storage_options).get_add_actions() # noqa: F841 (replacement scan)
553
+ row = cur.sql(
554
+ "select coalesce(sum(num_records), 0)::bigint, "
555
+ "coalesce(sum(size_bytes), 0)::bigint, "
556
+ "max(modification_time)::bigint from add_actions"
557
+ ).fetchone()
558
+ except Exception as exc: # best-effort: docs stats must never fail catalog generation
559
+ logger.debug(f"duckrun: no Delta stats for {path!r}: {exc}")
560
+ return None
561
+ if row is None:
562
+ return None
563
+ return {
564
+ "num_rows": int(row[0]),
565
+ "bytes": int(row[1]),
566
+ "last_modified": int(row[2]) if row[2] is not None else None,
567
+ }
568
+
569
+
539
570
  # Delta column-metadata key under which we stash a dbt column description, and the dollar-quote
540
571
  # label used to embed arbitrary comment text (newlines, quotes, dollar signs) in COMMENT ON SQL.
541
572
  _DELTA_COMMENT_KEY = "comment"
@@ -12,10 +12,52 @@ process. We do the same for plugin sources here: instead of registering a Python
12
12
  ``CREATE OR REPLACE VIEW <source> AS <scan sql>``. No pyarrow, no copying the source into a table,
13
13
  and no dependence on dbt-duckdb's per-cursor relation re-registration.
14
14
  """
15
- from dbt.adapters.duckdb.environments.local import LocalEnvironment
15
+ from dbt.adapters.duckdb.environments.local import (
16
+ DuckDBConnectionWrapper,
17
+ DuckDBCursorWrapper,
18
+ LocalEnvironment,
19
+ )
20
+
21
+ from . import delta_dml
22
+
23
+
24
+ class DuckrunCursorWrapper(DuckDBCursorWrapper):
25
+ """Cursor wrapper that routes raw DML against duckrun-managed (Delta-backed) relations to
26
+ delta_rs instead of running it on the read-only ``delta_scan`` view.
27
+
28
+ Every SQL statement — whether issued by dbt's connection manager or by the adapter-test
29
+ harness (which goes straight to ``conn.handle.cursor().execute``) — funnels through here, so
30
+ this is the single production interception point. Non-matching statements, parameterized
31
+ statements (the seed loader's ``insert ... values (?)``), and DML against native relations all
32
+ fall through to DuckDB unchanged. See delta_dml.handle.
33
+ """
34
+
35
+ def __init__(self, cursor, credentials):
36
+ super().__init__(cursor)
37
+ self._duckrun_creds = credentials
38
+
39
+ def execute(self, sql, bindings=None):
40
+ if bindings is None:
41
+ creds = self._duckrun_creds
42
+ if delta_dml.handle(
43
+ self._cursor,
44
+ getattr(creds, "root_path", None),
45
+ getattr(creds, "storage_options", None),
46
+ sql,
47
+ ):
48
+ return self._cursor # applied to Delta; nothing to run on DuckDB
49
+ return super().execute(sql, bindings)
16
50
 
17
51
 
18
52
  class DuckrunEnvironment(LocalEnvironment):
53
+ def handle(self):
54
+ # Swap dbt-duckdb's cursor wrapper for ours so raw DML on Delta relations is intercepted
55
+ # on every cursor (connection-manager AND test-harness paths) — see DuckrunCursorWrapper.
56
+ h = super().handle()
57
+ if isinstance(h, DuckDBConnectionWrapper):
58
+ h._cursor = DuckrunCursorWrapper(h._cursor._cursor, self.creds)
59
+ return h
60
+
19
61
  def load_source(self, plugin_name: str, source_config):
20
62
  plugin = self._plugins.get(plugin_name)
21
63
  # Only special-case the duckrun plugin (it knows how to turn a source into scan SQL).
@@ -13,6 +13,7 @@ from dbt.adapters.events.logging import AdapterLogger
13
13
  from dbt.adapters.duckdb.connections import DuckDBConnectionManager
14
14
  from dbt.adapters.duckdb.impl import DuckDBAdapter
15
15
 
16
+ from dbt.adapters.duckrun import delta_dml
16
17
  from dbt.adapters.duckrun import remote
17
18
  from dbt.adapters.duckrun import secret
18
19
  from dbt.adapters.duckrun.credentials import DuckrunCredentials
@@ -248,6 +249,22 @@ class DuckrunAdapter(DuckDBAdapter):
248
249
  if not discovered:
249
250
  return in_memory
250
251
 
252
+ # Hide drop-tombstones: a `drop table` overwrites the table to a one-column marker (no data
253
+ # deleted). Such a table must not surface as a relation. Check before registering.
254
+ root_path = getattr(self.config.credentials, "root_path", "") or ""
255
+ so = getattr(self.config.credentials, "storage_options", None)
256
+ cur = self._cursor()
257
+ live = []
258
+ for rel in discovered:
259
+ loc = (root_path.rstrip("/") + "/" + str(rel.schema).strip('"')
260
+ + "/" + str(rel.identifier).strip('"'))
261
+ if delta_dml.is_dropped(cur, loc, so):
262
+ continue
263
+ live.append(rel)
264
+ discovered = live
265
+ if not discovered:
266
+ return in_memory
267
+
251
268
  # Physically register each discovered Delta table as a delta_scan view so read-only
252
269
  # commands (dbt test/show/docs) can query models without a prior in-process run.
253
270
  for rel in discovered:
@@ -268,3 +285,71 @@ class DuckrunAdapter(DuckDBAdapter):
268
285
  ]
269
286
  merged.extend(discovered)
270
287
  return merged
288
+
289
+ # --- dbt docs: table stats from the Delta log -------------------------------------------------
290
+ # The stock catalog query (duckrun__get_catalog) emits only column metadata, so dbt-docs shows an
291
+ # empty Stats panel (issue #3). dbt assembles the panel from columns named
292
+ # stats:<key>:{label,value,description,include}; we enrich the catalog agate table with those,
293
+ # sourced from each relation's Delta log (engine.delta_stats — no data scan). Done in Python here
294
+ # rather than in SQL because byte size / last-modified live in the Delta log, not DuckDB metadata.
295
+ _STATS_SPEC = (
296
+ ("num_rows", "Row Count", "Number of rows in the table"),
297
+ ("bytes", "Approximate Size", "Approximate size of the table on disk (bytes)"),
298
+ ("last_modified", "Last Modified", "Time of the most recent Delta commit (UTC)"),
299
+ )
300
+
301
+ def get_catalog(self, *args, **kwargs):
302
+ table, exceptions = super().get_catalog(*args, **kwargs)
303
+ return self._with_delta_stats(table), exceptions
304
+
305
+ def get_catalog_by_relations(self, *args, **kwargs):
306
+ table, exceptions = super().get_catalog_by_relations(*args, **kwargs)
307
+ return self._with_delta_stats(table), exceptions
308
+
309
+ def _with_delta_stats(self, table):
310
+ """Return ``table`` with stats:* columns appended, sourced per-relation from the Delta log.
311
+
312
+ A relation with no Delta table at ``root_path/schema/name`` (a native ``view``, a
313
+ drop-tombstone) gets ``include=False`` stats, so dbt leaves it statless. Best-effort: if
314
+ anything goes wrong the original table is returned unchanged — docs must never break.
315
+ """
316
+ from datetime import datetime, timezone
317
+ from dbt_common.clients.agate_helper import table_from_data_flat
318
+ from . import engine
319
+
320
+ root_path = getattr(self.config.credentials, "root_path", "") or ""
321
+ if not root_path or len(table.rows) == 0:
322
+ return table
323
+ so = getattr(self.config.credentials, "storage_options", None)
324
+ cur = self._cursor()
325
+
326
+ cache = {}
327
+
328
+ def stats_for(schema, name):
329
+ key = (schema, name)
330
+ if key not in cache:
331
+ loc = (root_path.rstrip("/") + "/" + str(schema).strip('"')
332
+ + "/" + str(name).strip('"'))
333
+ cache[key] = (None if delta_dml.is_dropped(cur, loc, so)
334
+ else engine.delta_stats(cur, loc, so))
335
+ return cache[key]
336
+
337
+ cols = list(table.column_names)
338
+ stat_cols = [f"stats:{k}:{p}" for k, _, _ in self._STATS_SPEC
339
+ for p in ("label", "value", "description", "include")]
340
+ rows = []
341
+ for r in table.rows:
342
+ d = dict(zip(cols, r))
343
+ st = stats_for(d.get("table_schema"), d.get("table_name"))
344
+ for k, label, desc in self._STATS_SPEC:
345
+ present = st is not None and st.get(k) is not None
346
+ if k == "last_modified" and present:
347
+ val = datetime.fromtimestamp(st[k] / 1000, tz=timezone.utc).isoformat()
348
+ else:
349
+ val = st.get(k) if present else None
350
+ d[f"stats:{k}:label"] = label
351
+ d[f"stats:{k}:value"] = val
352
+ d[f"stats:{k}:description"] = desc
353
+ d[f"stats:{k}:include"] = bool(present)
354
+ rows.append(d)
355
+ return table_from_data_flat(rows, cols + stat_cols)
@@ -0,0 +1,122 @@
1
+ {#-- duckrun catalog: report Delta-backed relations as BASE TABLE.
2
+
3
+ Every duckrun model is physically a DuckDB *view* over `delta_scan('<location>')`, so the stock
4
+ dbt-duckdb catalog (duckdb_views() -> 'VIEW') reports them as views — which is dishonest: they are
5
+ Delta *tables*, and `dbt docs generate` / is_incremental() treat them as tables. We override the
6
+ catalog so a view whose definition reads from `delta_scan(...)` is reported as `BASE TABLE`, while
7
+ genuine `view`-materialized models (no delta_scan) stay `VIEW`. Comments (table + column) come
8
+ through unchanged from WS4's COMMENT ON, which get_catalog already reads.
9
+
10
+ Stats are intentionally not synthesized here: the duckrun conformance catalog fixtures use
11
+ `no_stats()`, so row/byte counts from the Delta log aren't required to pass — and computing them
12
+ per relation would re-open every table during docs generate. (If stats are wanted later, pull
13
+ num_records / size_bytes from DeltaTable.get_add_actions and cache per build.)
14
+ #}
15
+ {% macro duckrun__get_catalog(information_schema, schemas) -%}
16
+ {%- call statement('catalog', fetch_result=True) -%}
17
+ with relations AS (
18
+ select
19
+ t.table_name
20
+ , t.database_name
21
+ , t.schema_name
22
+ , 'BASE TABLE' as table_type
23
+ , t.comment as table_comment
24
+ from duckdb_tables() t
25
+ WHERE t.database_name = '{{ database }}'
26
+ UNION ALL
27
+ SELECT v.view_name as table_name
28
+ , v.database_name
29
+ , v.schema_name
30
+ -- A delta_scan view is a Delta table surfaced for reads; report it as a table, not a view.
31
+ , case when v.sql ilike '%delta_scan(%' then 'BASE TABLE' else 'VIEW' end as table_type
32
+ , v.comment as table_comment
33
+ from duckdb_views() v
34
+ WHERE v.database_name = '{{ database }}'
35
+ )
36
+ select
37
+ '{{ database }}' as table_database,
38
+ r.schema_name as table_schema,
39
+ r.table_name,
40
+ r.table_type,
41
+ r.table_comment,
42
+ c.column_name,
43
+ c.column_index as column_index,
44
+ c.data_type as column_type,
45
+ c.comment as column_comment,
46
+ NULL as table_owner
47
+ -- join on database too: an attached DB can hold a same-named table in the same schema, and
48
+ -- without this the columns of that shadow relation would bleed into the result.
49
+ FROM relations r JOIN duckdb_columns() c ON r.database_name = c.database_name AND r.schema_name = c.schema_name AND r.table_name = c.table_name
50
+ WHERE (
51
+ {%- for schema in schemas -%}
52
+ upper(r.schema_name) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%}
53
+ {%- endfor -%}
54
+ )
55
+ ORDER BY
56
+ r.schema_name,
57
+ r.table_name,
58
+ c.column_index
59
+ {%- endcall -%}
60
+ {{ return(load_result('catalog').table) }}
61
+ {%- endmacro %}
62
+
63
+
64
+ {#-- duckrun catalog, scoped to a specific set of relations (the `get_catalog_relations`
65
+ capability dbt uses when it can ask for just the relations it cares about — see
66
+ BaseAdapter._get_one_catalog_by_relations). Same delta_scan-view → BASE TABLE rule as
67
+ duckrun__get_catalog above; the only difference is the WHERE clause filters to the passed
68
+ relations (by schema, and by identifier when one is given) instead of to whole schemas.
69
+ Without this, dbt falls back to default__get_catalog_relations, which just raises
70
+ "not implemented for duckrun". #}
71
+ {% macro duckrun__get_catalog_relations(information_schema, relations) -%}
72
+ {%- set database = information_schema.database -%}
73
+ {%- call statement('catalog', fetch_result=True) -%}
74
+ with relations AS (
75
+ select
76
+ t.table_name
77
+ , t.database_name
78
+ , t.schema_name
79
+ , 'BASE TABLE' as table_type
80
+ , t.comment as table_comment
81
+ from duckdb_tables() t
82
+ WHERE t.database_name = '{{ database }}'
83
+ UNION ALL
84
+ SELECT v.view_name as table_name
85
+ , v.database_name
86
+ , v.schema_name
87
+ -- A delta_scan view is a Delta table surfaced for reads; report it as a table, not a view.
88
+ , case when v.sql ilike '%delta_scan(%' then 'BASE TABLE' else 'VIEW' end as table_type
89
+ , v.comment as table_comment
90
+ from duckdb_views() v
91
+ WHERE v.database_name = '{{ database }}'
92
+ )
93
+ select
94
+ '{{ database }}' as table_database,
95
+ r.schema_name as table_schema,
96
+ r.table_name,
97
+ r.table_type,
98
+ r.table_comment,
99
+ c.column_name,
100
+ c.column_index as column_index,
101
+ c.data_type as column_type,
102
+ c.comment as column_comment,
103
+ NULL as table_owner
104
+ -- join on database too: an attached DB can hold a same-named table in the same schema, and
105
+ -- without this the columns of that shadow relation would bleed into the result.
106
+ FROM relations r JOIN duckdb_columns() c ON r.database_name = c.database_name AND r.schema_name = c.schema_name AND r.table_name = c.table_name
107
+ WHERE (
108
+ {%- for relation in relations -%}
109
+ (
110
+ upper(r.schema_name) = upper('{{ relation.schema }}')
111
+ {%- if relation.identifier %} and upper(r.table_name) = upper('{{ relation.identifier }}'){%- endif -%}
112
+ )
113
+ {%- if not loop.last %} or {% endif -%}
114
+ {%- endfor -%}
115
+ )
116
+ ORDER BY
117
+ r.schema_name,
118
+ r.table_name,
119
+ c.column_index
120
+ {%- endcall -%}
121
+ {{ return(load_result('catalog').table) }}
122
+ {%- endmacro %}
@@ -0,0 +1,144 @@
1
+ {#
2
+ dbt `snapshot` materialization, backed by Delta Lake.
3
+
4
+ Why duckrun needs its own: dbt's default snapshot materialization runs `create table` /
5
+ `merge` as SQL DDL/DML against the warehouse. On duckrun the warehouse is Delta + an
6
+ in-memory DuckDB session, so the default snapshot lands only in the in-memory catalog and is
7
+ LOST across processes — a snapshot appears to work within one `dbt` invocation but never
8
+ persists (real SCD2 is cross-invocation). This materialization persists the snapshot to Delta
9
+ via the same store path the table/incremental materializations use.
10
+
11
+ How it maps onto duckrun's supported merge (no SQL MERGE, no merge_clauses):
12
+
13
+ * First run / --full-refresh: `build_snapshot_table` SELECT -> overwrite the Delta table.
14
+ * Subsequent runs: `snapshot_staging_table` (dbt's own SCD2 change detection) produces the
15
+ 'insert' rows (new versions, fresh dbt_scd_id) and the 'update'/'delete' rows (close the
16
+ open version, carry the new dbt_valid_to under the *existing* dbt_scd_id). We project away
17
+ the dbt_change_type / dbt_unique_key helper columns and MERGE on dbt_scd_id with
18
+ merge_update_columns=[dbt_valid_to]:
19
+ - close rows -> matched -> update dbt_valid_to (only that column)
20
+ - insert rows -> not matched -> insert the new version
21
+ The change_type partition is implicit: closes share the open version's scd_id (so they
22
+ match), inserts get a brand-new scd_id (so they don't). This reproduces dbt's
23
+ default__snapshot_merge_sql exactly with the merge controls delta_rs can express.
24
+ #}
25
+ {% materialization snapshot, adapter='duckrun' %}
26
+
27
+ {%- set strategy_name = config.get('strategy') -%}
28
+ {%- set unique_key = config.get('unique_key') -%}
29
+
30
+ {%- set p = duckrun__delta_paths() -%}
31
+ {%- set target_relation = p['target'] -%}
32
+ {%- set tmp_relation = p['tmp'] -%}
33
+ {%- set location = p['location'] -%}
34
+ {%- set _loc_sql = location | replace("'", "''") -%}
35
+
36
+ {#-- Version/existence of the Delta table, captured before the model reads anything, so the
37
+ merge can pin OCC to it (single snapshot for the staging read and the merge commit). --#}
38
+ {%- set read_version = adapter.delta_version(location) -%}
39
+ {%- set exists = adapter.delta_table_exists(location) -%}
40
+
41
+ {{ run_hooks(pre_hooks, inside_transaction=False) }}
42
+ {%- do adapter.create_schema(target_relation) -%}
43
+ {{ run_hooks(pre_hooks, inside_transaction=True) }}
44
+
45
+ {% set strategy_macro = strategy_dispatch(strategy_name) %}
46
+ {% set strategy = strategy_macro(model, "snapshotted_data", "source_data", model['config'], exists) %}
47
+
48
+ {% if not exists or should_full_refresh() %}
49
+
50
+ {#-- First run (or full refresh): build the initial snapshot and overwrite the Delta table. --#}
51
+ {% set build_sql = build_snapshot_table(strategy, model['compiled_code']) %}
52
+ {{ check_time_data_types(build_sql) }}
53
+
54
+ {% call statement('stage_model') -%}
55
+ create or replace view {{ tmp_relation }} as {{ build_sql }}
56
+ {%- endcall %}
57
+ {{ adapter.commit() }}
58
+ {%- set columns = adapter.get_columns_in_relation(tmp_relation) -%}
59
+ {% do adapter.store_relation('duckrun', tmp_relation, columns, location, 'delta', {
60
+ 'incremental': false,
61
+ 'full_refresh': true,
62
+ 'invocation_id': invocation_id,
63
+ }) %}
64
+ {% call statement('drop_stage') -%}
65
+ drop view if exists {{ tmp_relation }}
66
+ {%- endcall %}
67
+
68
+ {% else %}
69
+
70
+ {% set snapshot_cols = config.get("snapshot_table_column_names") or get_snapshot_table_column_names() %}
71
+ {{ adapter.assert_valid_snapshot_target_given_strategy(target_relation, snapshot_cols, strategy) }}
72
+
73
+ {#-- Pin the existing snapshot read to read_version so the staging change-detection sees one
74
+ consistent version (matches the merge's OCC pin below). --#}
75
+ {% call statement('register_this') -%}
76
+ create or replace view {{ target_relation }} as
77
+ select * from delta_scan('{{ _loc_sql }}'{% if read_version is not none %}, version => {{ read_version }}{% endif %})
78
+ {%- endcall %}
79
+
80
+ {% set staging_sql = snapshot_staging_table(strategy, sql, target_relation) %}
81
+ {{ check_time_data_types(staging_sql) }}
82
+ {% call statement('stage_model') -%}
83
+ create or replace view {{ tmp_relation }} as {{ staging_sql }}
84
+ {%- endcall %}
85
+ {{ adapter.commit() }}
86
+
87
+ {#-- Drop dbt's staging-only helper columns; the merge source must match the snapshot table. --#}
88
+ {% set remove_columns = ['dbt_change_type', 'DBT_CHANGE_TYPE', 'dbt_unique_key', 'DBT_UNIQUE_KEY'] %}
89
+ {% if unique_key | is_list %}
90
+ {% for key in strategy.unique_key %}
91
+ {% do remove_columns.append('dbt_unique_key_' + loop.index|string) %}
92
+ {% do remove_columns.append('DBT_UNIQUE_KEY_' + loop.index|string) %}
93
+ {% endfor %}
94
+ {% endif %}
95
+ {% set source_columns = adapter.get_columns_in_relation(tmp_relation)
96
+ | rejectattr('name', 'in', remove_columns) | list %}
97
+ {% set col_csv = source_columns | map(attribute='name') | join(', ') %}
98
+
99
+ {%- set merge_src = api.Relation.create(
100
+ database=target_relation.database,
101
+ schema=target_relation.schema,
102
+ identifier=target_relation.identifier ~ '__duckrun_snap_src',
103
+ type='view') -%}
104
+ {% call statement('stage_merge_src') -%}
105
+ create or replace view {{ merge_src }} as select {{ col_csv }} from {{ tmp_relation }}
106
+ {%- endcall %}
107
+ {{ adapter.commit() }}
108
+ {%- set columns = adapter.get_columns_in_relation(merge_src) -%}
109
+
110
+ {#-- Merge on dbt_scd_id; matched (closes) update only dbt_valid_to, unmatched (new versions)
111
+ insert. read_version pins OCC to the version the staging read. --#}
112
+ {% do adapter.store_relation('duckrun', merge_src, columns, location, 'delta', {
113
+ 'incremental': true,
114
+ 'incremental_strategy': 'merge',
115
+ 'unique_key': snapshot_cols.dbt_scd_id,
116
+ 'merge_update_columns': [snapshot_cols.dbt_valid_to],
117
+ 'read_version': read_version,
118
+ 'dbt_believes_exists': true,
119
+ 'full_refresh': false,
120
+ 'on_schema_change': 'ignore',
121
+ 'invocation_id': invocation_id,
122
+ }) %}
123
+
124
+ {% call statement('drop_stage') -%}
125
+ drop view if exists {{ merge_src }}; drop view if exists {{ tmp_relation }}
126
+ {%- endcall %}
127
+
128
+ {% endif %}
129
+
130
+ {#-- Surface the snapshot as a delta_scan view over the freshly written HEAD. --#}
131
+ {%- do adapter.create_schema(target_relation) -%}
132
+ {% call statement('main') -%}
133
+ create or replace view {{ target_relation }} as select * from delta_scan('{{ _loc_sql }}')
134
+ {%- endcall %}
135
+
136
+ {% do persist_docs(target_relation, model) %}
137
+
138
+ {{ run_hooks(post_hooks, inside_transaction=True) }}
139
+ {{ adapter.commit() }}
140
+ {{ run_hooks(post_hooks, inside_transaction=False) }}
141
+
142
+ {{ return({'relations': [target_relation]}) }}
143
+
144
+ {% endmaterialization %}
@@ -14,7 +14,7 @@ from typing import Dict, List, Optional
14
14
 
15
15
  import duckdb
16
16
 
17
- from dbt.adapters.duckrun import engine, remote, secret
17
+ from dbt.adapters.duckrun import delta_dml, engine, remote, secret
18
18
  from . import auth
19
19
 
20
20
 
@@ -164,6 +164,10 @@ class DuckSession:
164
164
  continue
165
165
  self.con.execute(f"CREATE SCHEMA IF NOT EXISTS {_qid(schema)}")
166
166
  for table in tables:
167
+ # Hide drop-tombstones (a `drop table` overwrites the table to a one-column marker;
168
+ # no data is deleted, the files persist, but the table must not surface).
169
+ if delta_dml.is_dropped(self.con, self.table_path(schema, table), self.storage_options):
170
+ continue
167
171
  self._register_view(schema, table)
168
172
  registered.append(f"{schema}.{table}")
169
173
 
@@ -204,20 +208,29 @@ class DuckSession:
204
208
  # ---- Spark-shaped surface --------------------------------------------------------------
205
209
 
206
210
  def sql(self, query: str) -> "DataFrame":
207
- """Run a **read** query and return a :class:`DataFrame`. ``conn.sql()`` is read-only: the
208
- tables are registered as read-only ``delta_scan`` views, so it passes straight through to
209
- DuckDB. Time-travel works for free ``conn.sql("from delta_scan('path', version => 0)")``.
210
-
211
- Writes go through the Spark-shaped surface, not SQL: ``df.write.saveAsTable`` (create /
212
- append) and the ``conn.delta_table(name)`` handle
213
- ``.merge(...)`` / ``.delete()`` / ``.update()`` / ``.replaceWhere()``. A Delta-write
214
- statement is rejected up front (not executed) — a bare DuckDB ``CREATE TABLEAS`` would
215
- otherwise silently make an ephemeral DuckDB-local table that never reaches Delta.
216
- ``CREATE TEMP/VIEW`` and other DuckDB-local scratch DDL still pass through.
211
+ """Run a query and return a :class:`DataFrame`.
212
+
213
+ Reads pass straight through to DuckDB over the ``delta_scan`` views (time-travel works for
214
+ free — ``conn.sql("from delta_scan('path', version => 0)")``).
215
+
216
+ Delta **DML** is applied to the Delta table via delta_rs (works local AND on OneLake):
217
+ ``create table … as select`` (overwrite), ``insert into select``/``insert into … values``
218
+ (append), ``delete``/``update`` (delta_rs delete/update), ``alter tableadd column``, and
219
+ ``drop table`` (tombstone marks the table dropped without deleting data; a human purges
220
+ the files). After a DML statement the catalog is refreshed.
221
+
222
+ ``merge`` isn't expressible via delta_rs DML here — use the Spark write surface instead:
223
+ ``df.write.saveAsTable(...)`` or
224
+ ``conn.delta_table(name).merge(...)/.delete()/.update()/.replaceWhere()``.
225
+ ``CREATE TEMP/VIEW`` and other DuckDB-local scratch DDL pass through to DuckDB.
217
226
  """
227
+ if delta_dml.handle(self.con, self.root_path, self.storage_options, query,
228
+ default_schema=self._current_database):
229
+ self.refresh(quiet=True)
230
+ return DataFrame(self.con.sql("SELECT 'ok' AS status"), self)
218
231
  if _is_delta_write(query):
219
232
  raise ValueError(
220
- "conn.sql() is read-only (Delta tables are registered as read-only views). "
233
+ "conn.sql() can't run a SQL MERGE via delta_rs. "
221
234
  "Use the Spark write API: df.write.saveAsTable(...) to create/append, or "
222
235
  "conn.delta_table(name).merge(...)/.delete()/.update()/.replaceWhere()."
223
236
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.3.17.dev2
3
+ Version: 0.3.17.dev3
4
4
  Summary: A dbt adapter that runs SQL in DuckDB and materializes to Delta Lake (delta_rs).
5
5
  Author: mim
6
6
  License: MIT
@@ -5,6 +5,7 @@ pyproject.toml
5
5
  dbt/adapters/duckrun/__init__.py
6
6
  dbt/adapters/duckrun/__version__.py
7
7
  dbt/adapters/duckrun/credentials.py
8
+ dbt/adapters/duckrun/delta_dml.py
8
9
  dbt/adapters/duckrun/delta_plugin.py
9
10
  dbt/adapters/duckrun/engine.py
10
11
  dbt/adapters/duckrun/environment.py
@@ -17,6 +18,7 @@ dbt/include/duckrun/macros/catalog.sql
17
18
  dbt/include/duckrun/macros/materializations/_delta_core.sql
18
19
  dbt/include/duckrun/macros/materializations/delta.sql
19
20
  dbt/include/duckrun/macros/materializations/incremental.sql
21
+ dbt/include/duckrun/macros/materializations/snapshot.sql
20
22
  dbt/include/duckrun/macros/materializations/table.sql
21
23
  duckrun/__init__.py
22
24
  duckrun/auth.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "duckrun"
7
- version = "0.3.17.dev2"
7
+ version = "0.3.17.dev3"
8
8
  description = "A dbt adapter that runs SQL in DuckDB and materializes to Delta Lake (delta_rs)."
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -1 +0,0 @@
1
- version = "0.3.17.dev2"
@@ -1,59 +0,0 @@
1
- {#-- duckrun catalog: report Delta-backed relations as BASE TABLE.
2
-
3
- Every duckrun model is physically a DuckDB *view* over `delta_scan('<location>')`, so the stock
4
- dbt-duckdb catalog (duckdb_views() -> 'VIEW') reports them as views — which is dishonest: they are
5
- Delta *tables*, and `dbt docs generate` / is_incremental() treat them as tables. We override the
6
- catalog so a view whose definition reads from `delta_scan(...)` is reported as `BASE TABLE`, while
7
- genuine `view`-materialized models (no delta_scan) stay `VIEW`. Comments (table + column) come
8
- through unchanged from WS4's COMMENT ON, which get_catalog already reads.
9
-
10
- Stats are intentionally not synthesized here: the duckrun conformance catalog fixtures use
11
- `no_stats()`, so row/byte counts from the Delta log aren't required to pass — and computing them
12
- per relation would re-open every table during docs generate. (If stats are wanted later, pull
13
- num_records / size_bytes from DeltaTable.get_add_actions and cache per build.)
14
- #}
15
- {% macro duckrun__get_catalog(information_schema, schemas) -%}
16
- {%- call statement('catalog', fetch_result=True) -%}
17
- with relations AS (
18
- select
19
- t.table_name
20
- , t.database_name
21
- , t.schema_name
22
- , 'BASE TABLE' as table_type
23
- , t.comment as table_comment
24
- from duckdb_tables() t
25
- WHERE t.database_name = '{{ database }}'
26
- UNION ALL
27
- SELECT v.view_name as table_name
28
- , v.database_name
29
- , v.schema_name
30
- -- A delta_scan view is a Delta table surfaced for reads; report it as a table, not a view.
31
- , case when v.sql ilike '%delta_scan(%' then 'BASE TABLE' else 'VIEW' end as table_type
32
- , v.comment as table_comment
33
- from duckdb_views() v
34
- WHERE v.database_name = '{{ database }}'
35
- )
36
- select
37
- '{{ database }}' as table_database,
38
- r.schema_name as table_schema,
39
- r.table_name,
40
- r.table_type,
41
- r.table_comment,
42
- c.column_name,
43
- c.column_index as column_index,
44
- c.data_type as column_type,
45
- c.comment as column_comment,
46
- NULL as table_owner
47
- FROM relations r JOIN duckdb_columns() c ON r.schema_name = c.schema_name AND r.table_name = c.table_name
48
- WHERE (
49
- {%- for schema in schemas -%}
50
- upper(r.schema_name) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%}
51
- {%- endfor -%}
52
- )
53
- ORDER BY
54
- r.schema_name,
55
- r.table_name,
56
- c.column_index
57
- {%- endcall -%}
58
- {{ return(load_result('catalog').table) }}
59
- {%- endmacro %}
File without changes
File without changes
File without changes
File without changes