duckrun 0.1.4__tar.gz → 0.1.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.4
3
+ Version: 0.1.5.1
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
+ Author: mim
5
6
  License-Expression: MIT
6
7
  Project-URL: Homepage, https://github.com/djouallah/duckrun
7
8
  Project-URL: Repository, https://github.com/djouallah/duckrun
@@ -6,6 +6,100 @@ from deltalake import DeltaTable, write_deltalake
6
6
  from typing import List, Tuple, Union, Optional, Callable, Dict, Any
7
7
  from string import Template
8
8
 
9
+
10
+ class DeltaWriter:
11
+ """Spark-style write API for Delta Lake"""
12
+
13
+ def __init__(self, relation, duckrun_instance):
14
+ self.relation = relation
15
+ self.duckrun = duckrun_instance
16
+ self._format = "delta" # Default to delta format
17
+ self._mode = "overwrite"
18
+
19
+ def format(self, format_type: str):
20
+ """Set output format (only 'delta' supported)"""
21
+ if format_type.lower() != "delta":
22
+ raise ValueError(f"Only 'delta' format is supported, got '{format_type}'")
23
+ self._format = "delta"
24
+ return self
25
+
26
+ def mode(self, write_mode: str):
27
+ """Set write mode: 'overwrite' or 'append'"""
28
+ if write_mode not in {"overwrite", "append"}:
29
+ raise ValueError(f"Mode must be 'overwrite' or 'append', got '{write_mode}'")
30
+ self._mode = write_mode
31
+ return self
32
+
33
+ def saveAsTable(self, table_name: str):
34
+ """Save query result as Delta table"""
35
+ # Format defaults to "delta", so no need to check
36
+ if self._format != "delta":
37
+ raise RuntimeError(f"Only 'delta' format is supported, got '{self._format}'")
38
+
39
+ # Parse schema.table or use default schema
40
+ if "." in table_name:
41
+ schema, table = table_name.split(".", 1)
42
+ else:
43
+ schema = self.duckrun.schema
44
+ table = table_name
45
+
46
+ # Ensure OneLake secret is created
47
+ self.duckrun._create_onelake_secret()
48
+
49
+ # Build path
50
+ path = f"{self.duckrun.table_base_url}{schema}/{table}"
51
+
52
+ # Execute query and get result
53
+ df = self.relation.record_batch()
54
+
55
+ print(f"Writing to Delta table: {schema}.{table} (mode={self._mode})")
56
+
57
+ # Write to Delta
58
+ write_deltalake(path, df, mode=self._mode)
59
+
60
+ # Create or replace view in DuckDB
61
+ self.duckrun.con.sql(f"DROP VIEW IF EXISTS {table}")
62
+ self.duckrun.con.sql(f"""
63
+ CREATE OR REPLACE VIEW {table}
64
+ AS SELECT * FROM delta_scan('{path}')
65
+ """)
66
+
67
+ # Optimize if needed
68
+ dt = DeltaTable(path)
69
+
70
+ if self._mode == "overwrite":
71
+ dt.vacuum(retention_hours=0, dry_run=False, enforce_retention_duration=False)
72
+ dt.cleanup_metadata()
73
+ print(f"✅ Table {schema}.{table} created/overwritten")
74
+ else: # append
75
+ file_count = len(dt.file_uris())
76
+ if file_count > self.duckrun.compaction_threshold:
77
+ print(f"Compacting {schema}.{table} ({file_count} files)")
78
+ dt.optimize.compact()
79
+ dt.vacuum(dry_run=False)
80
+ dt.cleanup_metadata()
81
+ print(f"✅ Data appended to {schema}.{table}")
82
+
83
+ return table
84
+
85
+
86
+ class QueryResult:
87
+ """Wrapper for DuckDB relation with write API"""
88
+
89
+ def __init__(self, relation, duckrun_instance):
90
+ self.relation = relation
91
+ self.duckrun = duckrun_instance
92
+
93
+ @property
94
+ def write(self):
95
+ """Access write API"""
96
+ return DeltaWriter(self.relation, self.duckrun)
97
+
98
+ def __getattr__(self, name):
99
+ """Delegate all other methods to underlying DuckDB relation"""
100
+ return getattr(self.relation, name)
101
+
102
+
9
103
  class Duckrun:
10
104
  """
11
105
  Lakehouse task runner with clean tuple-based API.
@@ -20,9 +114,10 @@ class Duckrun:
20
114
  dr = Duckrun.connect(workspace, lakehouse, schema, sql_folder)
21
115
  dr.run(pipeline)
22
116
 
23
- # For data exploration only:
117
+ # For data exploration with Spark-style API:
24
118
  dr = Duckrun.connect(workspace, lakehouse, schema)
25
119
  dr.sql("SELECT * FROM table").show()
120
+ dr.sql("SELECT 43").write.mode("append").saveAsTable("test")
26
121
  """
27
122
 
28
123
  def __init__(self, workspace: str, lakehouse_name: str, schema: str,
@@ -312,13 +407,19 @@ class Duckrun:
312
407
 
313
408
  def sql(self, query: str):
314
409
  """
315
- Execute raw SQL query.
410
+ Execute raw SQL query with Spark-style write API.
316
411
 
317
412
  Example:
413
+ # Traditional DuckDB style
318
414
  dr.sql("SELECT * FROM table").show()
319
415
  df = dr.sql("SELECT * FROM table").df()
416
+
417
+ # New Spark-style write API (format is optional, defaults to delta)
418
+ dr.sql("SELECT 43 as value").write.mode("append").saveAsTable("test")
419
+ dr.sql("SELECT * FROM source").write.mode("overwrite").saveAsTable("target")
320
420
  """
321
- return self.con.sql(query)
421
+ relation = self.con.sql(query)
422
+ return QueryResult(relation, self)
322
423
 
323
424
  def get_connection(self):
324
425
  """Get underlying DuckDB connection"""
@@ -1,7 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.4
3
+ Version: 0.1.5.1
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
+ Author: mim
5
6
  License-Expression: MIT
6
7
  Project-URL: Homepage, https://github.com/djouallah/duckrun
7
8
  Project-URL: Repository, https://github.com/djouallah/duckrun
@@ -1,14 +1,16 @@
1
-
2
1
  [build-system]
3
2
  requires = ["setuptools>=61.0", "wheel"]
4
3
  build-backend = "setuptools.build_meta"
5
4
 
6
5
  [project]
7
6
  name = "duckrun"
8
- version = "0.1.4"
7
+ version = "0.1.5.1"
9
8
  description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
10
9
  readme = "README.md"
11
10
  license = "MIT"
11
+ authors = [
12
+ {name = "mim"}
13
+ ]
12
14
  requires-python = ">=3.9"
13
15
  dependencies = [
14
16
  "duckdb>=1.2.0",
@@ -19,5 +21,4 @@ dependencies = [
19
21
  [project.urls]
20
22
  Homepage = "https://github.com/djouallah/duckrun"
21
23
  Repository = "https://github.com/djouallah/duckrun"
22
- Issues = "https://github.com/djouallah/duckrun/issues"
23
-
24
+ Issues = "https://github.com/djouallah/duckrun/issues"
File without changes
File without changes
File without changes
File without changes