duckrun 0.1.4__py3-none-any.whl → 0.1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckrun/core.py +104 -3
- {duckrun-0.1.4.dist-info → duckrun-0.1.5.1.dist-info}/METADATA +2 -1
- duckrun-0.1.5.1.dist-info/RECORD +7 -0
- duckrun-0.1.4.dist-info/RECORD +0 -7
- {duckrun-0.1.4.dist-info → duckrun-0.1.5.1.dist-info}/WHEEL +0 -0
- {duckrun-0.1.4.dist-info → duckrun-0.1.5.1.dist-info}/licenses/LICENSE +0 -0
- {duckrun-0.1.4.dist-info → duckrun-0.1.5.1.dist-info}/top_level.txt +0 -0
duckrun/core.py
CHANGED
@@ -6,6 +6,100 @@ from deltalake import DeltaTable, write_deltalake
|
|
6
6
|
from typing import List, Tuple, Union, Optional, Callable, Dict, Any
|
7
7
|
from string import Template
|
8
8
|
|
9
|
+
|
10
|
+
class DeltaWriter:
|
11
|
+
"""Spark-style write API for Delta Lake"""
|
12
|
+
|
13
|
+
def __init__(self, relation, duckrun_instance):
|
14
|
+
self.relation = relation
|
15
|
+
self.duckrun = duckrun_instance
|
16
|
+
self._format = "delta" # Default to delta format
|
17
|
+
self._mode = "overwrite"
|
18
|
+
|
19
|
+
def format(self, format_type: str):
|
20
|
+
"""Set output format (only 'delta' supported)"""
|
21
|
+
if format_type.lower() != "delta":
|
22
|
+
raise ValueError(f"Only 'delta' format is supported, got '{format_type}'")
|
23
|
+
self._format = "delta"
|
24
|
+
return self
|
25
|
+
|
26
|
+
def mode(self, write_mode: str):
|
27
|
+
"""Set write mode: 'overwrite' or 'append'"""
|
28
|
+
if write_mode not in {"overwrite", "append"}:
|
29
|
+
raise ValueError(f"Mode must be 'overwrite' or 'append', got '{write_mode}'")
|
30
|
+
self._mode = write_mode
|
31
|
+
return self
|
32
|
+
|
33
|
+
def saveAsTable(self, table_name: str):
|
34
|
+
"""Save query result as Delta table"""
|
35
|
+
# Format defaults to "delta", so no need to check
|
36
|
+
if self._format != "delta":
|
37
|
+
raise RuntimeError(f"Only 'delta' format is supported, got '{self._format}'")
|
38
|
+
|
39
|
+
# Parse schema.table or use default schema
|
40
|
+
if "." in table_name:
|
41
|
+
schema, table = table_name.split(".", 1)
|
42
|
+
else:
|
43
|
+
schema = self.duckrun.schema
|
44
|
+
table = table_name
|
45
|
+
|
46
|
+
# Ensure OneLake secret is created
|
47
|
+
self.duckrun._create_onelake_secret()
|
48
|
+
|
49
|
+
# Build path
|
50
|
+
path = f"{self.duckrun.table_base_url}{schema}/{table}"
|
51
|
+
|
52
|
+
# Execute query and get result
|
53
|
+
df = self.relation.record_batch()
|
54
|
+
|
55
|
+
print(f"Writing to Delta table: {schema}.{table} (mode={self._mode})")
|
56
|
+
|
57
|
+
# Write to Delta
|
58
|
+
write_deltalake(path, df, mode=self._mode)
|
59
|
+
|
60
|
+
# Create or replace view in DuckDB
|
61
|
+
self.duckrun.con.sql(f"DROP VIEW IF EXISTS {table}")
|
62
|
+
self.duckrun.con.sql(f"""
|
63
|
+
CREATE OR REPLACE VIEW {table}
|
64
|
+
AS SELECT * FROM delta_scan('{path}')
|
65
|
+
""")
|
66
|
+
|
67
|
+
# Optimize if needed
|
68
|
+
dt = DeltaTable(path)
|
69
|
+
|
70
|
+
if self._mode == "overwrite":
|
71
|
+
dt.vacuum(retention_hours=0, dry_run=False, enforce_retention_duration=False)
|
72
|
+
dt.cleanup_metadata()
|
73
|
+
print(f"✅ Table {schema}.{table} created/overwritten")
|
74
|
+
else: # append
|
75
|
+
file_count = len(dt.file_uris())
|
76
|
+
if file_count > self.duckrun.compaction_threshold:
|
77
|
+
print(f"Compacting {schema}.{table} ({file_count} files)")
|
78
|
+
dt.optimize.compact()
|
79
|
+
dt.vacuum(dry_run=False)
|
80
|
+
dt.cleanup_metadata()
|
81
|
+
print(f"✅ Data appended to {schema}.{table}")
|
82
|
+
|
83
|
+
return table
|
84
|
+
|
85
|
+
|
86
|
+
class QueryResult:
|
87
|
+
"""Wrapper for DuckDB relation with write API"""
|
88
|
+
|
89
|
+
def __init__(self, relation, duckrun_instance):
|
90
|
+
self.relation = relation
|
91
|
+
self.duckrun = duckrun_instance
|
92
|
+
|
93
|
+
@property
|
94
|
+
def write(self):
|
95
|
+
"""Access write API"""
|
96
|
+
return DeltaWriter(self.relation, self.duckrun)
|
97
|
+
|
98
|
+
def __getattr__(self, name):
|
99
|
+
"""Delegate all other methods to underlying DuckDB relation"""
|
100
|
+
return getattr(self.relation, name)
|
101
|
+
|
102
|
+
|
9
103
|
class Duckrun:
|
10
104
|
"""
|
11
105
|
Lakehouse task runner with clean tuple-based API.
|
@@ -20,9 +114,10 @@ class Duckrun:
|
|
20
114
|
dr = Duckrun.connect(workspace, lakehouse, schema, sql_folder)
|
21
115
|
dr.run(pipeline)
|
22
116
|
|
23
|
-
# For data exploration
|
117
|
+
# For data exploration with Spark-style API:
|
24
118
|
dr = Duckrun.connect(workspace, lakehouse, schema)
|
25
119
|
dr.sql("SELECT * FROM table").show()
|
120
|
+
dr.sql("SELECT 43").write.mode("append").saveAsTable("test")
|
26
121
|
"""
|
27
122
|
|
28
123
|
def __init__(self, workspace: str, lakehouse_name: str, schema: str,
|
@@ -312,13 +407,19 @@ class Duckrun:
|
|
312
407
|
|
313
408
|
def sql(self, query: str):
|
314
409
|
"""
|
315
|
-
Execute raw SQL query.
|
410
|
+
Execute raw SQL query with Spark-style write API.
|
316
411
|
|
317
412
|
Example:
|
413
|
+
# Traditional DuckDB style
|
318
414
|
dr.sql("SELECT * FROM table").show()
|
319
415
|
df = dr.sql("SELECT * FROM table").df()
|
416
|
+
|
417
|
+
# New Spark-style write API (format is optional, defaults to delta)
|
418
|
+
dr.sql("SELECT 43 as value").write.mode("append").saveAsTable("test")
|
419
|
+
dr.sql("SELECT * FROM source").write.mode("overwrite").saveAsTable("target")
|
320
420
|
"""
|
321
|
-
|
421
|
+
relation = self.con.sql(query)
|
422
|
+
return QueryResult(relation, self)
|
322
423
|
|
323
424
|
def get_connection(self):
|
324
425
|
"""Get underlying DuckDB connection"""
|
@@ -1,7 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.5.1
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
|
+
Author: mim
|
5
6
|
License-Expression: MIT
|
6
7
|
Project-URL: Homepage, https://github.com/djouallah/duckrun
|
7
8
|
Project-URL: Repository, https://github.com/djouallah/duckrun
|
@@ -0,0 +1,7 @@
|
|
1
|
+
duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
|
2
|
+
duckrun/core.py,sha256=EoXlQsx7i3BS2a26zB90n4xDBy_WQu1sNicPNYU3DgY,18110
|
3
|
+
duckrun-0.1.5.1.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
4
|
+
duckrun-0.1.5.1.dist-info/METADATA,sha256=piXLbt2nRJoAngkOFojRNVX1-nfEGta6p7WKyAKcxEU,4392
|
5
|
+
duckrun-0.1.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
6
|
+
duckrun-0.1.5.1.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
7
|
+
duckrun-0.1.5.1.dist-info/RECORD,,
|
duckrun-0.1.4.dist-info/RECORD
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
|
2
|
-
duckrun/core.py,sha256=u56bWZDKevbplARgnFdI0wm9BfrIVyAiu3eOIwE5FJc,14259
|
3
|
-
duckrun-0.1.4.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
4
|
-
duckrun-0.1.4.dist-info/METADATA,sha256=eoPhYn2zC0s_YyEGdiCe1Gs7iWfKY9vakYm3rZdMrrs,4377
|
5
|
-
duckrun-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
6
|
-
duckrun-0.1.4.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
7
|
-
duckrun-0.1.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|