duckrun 0.1.4__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  License-Expression: MIT
6
6
  Project-URL: Homepage, https://github.com/djouallah/duckrun
@@ -6,6 +6,99 @@ from deltalake import DeltaTable, write_deltalake
6
6
  from typing import List, Tuple, Union, Optional, Callable, Dict, Any
7
7
  from string import Template
8
8
 
9
+
10
+ class DeltaWriter:
11
+ """Spark-style write API for Delta Lake"""
12
+
13
+ def __init__(self, relation, duckrun_instance):
14
+ self.relation = relation
15
+ self.duckrun = duckrun_instance
16
+ self._format = None
17
+ self._mode = "overwrite"
18
+
19
+ def format(self, format_type: str):
20
+ """Set output format (only 'delta' supported)"""
21
+ if format_type.lower() != "delta":
22
+ raise ValueError(f"Only 'delta' format is supported, got '{format_type}'")
23
+ self._format = "delta"
24
+ return self
25
+
26
+ def mode(self, write_mode: str):
27
+ """Set write mode: 'overwrite' or 'append'"""
28
+ if write_mode not in {"overwrite", "append"}:
29
+ raise ValueError(f"Mode must be 'overwrite' or 'append', got '{write_mode}'")
30
+ self._mode = write_mode
31
+ return self
32
+
33
+ def saveAsTable(self, table_name: str):
34
+ """Save query result as Delta table"""
35
+ if self._format != "delta":
36
+ raise RuntimeError("Must call .format('delta') before saveAsTable()")
37
+
38
+ # Parse schema.table or use default schema
39
+ if "." in table_name:
40
+ schema, table = table_name.split(".", 1)
41
+ else:
42
+ schema = self.duckrun.schema
43
+ table = table_name
44
+
45
+ # Ensure OneLake secret is created
46
+ self.duckrun._create_onelake_secret()
47
+
48
+ # Build path
49
+ path = f"{self.duckrun.table_base_url}{schema}/{table}"
50
+
51
+ # Execute query and get result
52
+ df = self.relation.record_batch()
53
+
54
+ print(f"Writing to Delta table: {schema}.{table} (mode={self._mode})")
55
+
56
+ # Write to Delta
57
+ write_deltalake(path, df, mode=self._mode)
58
+
59
+ # Create or replace view in DuckDB
60
+ self.duckrun.con.sql(f"DROP VIEW IF EXISTS {table}")
61
+ self.duckrun.con.sql(f"""
62
+ CREATE OR REPLACE VIEW {table}
63
+ AS SELECT * FROM delta_scan('{path}')
64
+ """)
65
+
66
+ # Optimize if needed
67
+ dt = DeltaTable(path)
68
+
69
+ if self._mode == "overwrite":
70
+ dt.vacuum(retention_hours=0, dry_run=False, enforce_retention_duration=False)
71
+ dt.cleanup_metadata()
72
+ print(f"✅ Table {schema}.{table} created/overwritten")
73
+ else: # append
74
+ file_count = len(dt.file_uris())
75
+ if file_count > self.duckrun.compaction_threshold:
76
+ print(f"Compacting {schema}.{table} ({file_count} files)")
77
+ dt.optimize.compact()
78
+ dt.vacuum(dry_run=False)
79
+ dt.cleanup_metadata()
80
+ print(f"✅ Data appended to {schema}.{table}")
81
+
82
+ return table
83
+
84
+
85
+ class QueryResult:
86
+ """Wrapper for DuckDB relation with write API"""
87
+
88
+ def __init__(self, relation, duckrun_instance):
89
+ self.relation = relation
90
+ self.duckrun = duckrun_instance
91
+
92
+ @property
93
+ def write(self):
94
+ """Access write API"""
95
+ return DeltaWriter(self.relation, self.duckrun)
96
+
97
+ def __getattr__(self, name):
98
+ """Delegate all other methods to underlying DuckDB relation"""
99
+ return getattr(self.relation, name)
100
+
101
+
9
102
  class Duckrun:
10
103
  """
11
104
  Lakehouse task runner with clean tuple-based API.
@@ -20,9 +113,10 @@ class Duckrun:
20
113
  dr = Duckrun.connect(workspace, lakehouse, schema, sql_folder)
21
114
  dr.run(pipeline)
22
115
 
23
- # For data exploration only:
116
+ # For data exploration with Spark-style API:
24
117
  dr = Duckrun.connect(workspace, lakehouse, schema)
25
118
  dr.sql("SELECT * FROM table").show()
119
+ dr.sql("SELECT 43").write.format("delta").mode("append").saveAsTable("aemo.test")
26
120
  """
27
121
 
28
122
  def __init__(self, workspace: str, lakehouse_name: str, schema: str,
@@ -312,13 +406,19 @@ class Duckrun:
312
406
 
313
407
  def sql(self, query: str):
314
408
  """
315
- Execute raw SQL query.
409
+ Execute raw SQL query with Spark-style write API.
316
410
 
317
411
  Example:
412
+ # Traditional DuckDB style
318
413
  dr.sql("SELECT * FROM table").show()
319
414
  df = dr.sql("SELECT * FROM table").df()
415
+
416
+ # New Spark-style write API
417
+ dr.sql("SELECT 43 as value").write.format("delta").mode("append").saveAsTable("aemo.test")
418
+ dr.sql("SELECT * FROM source").write.format("delta").mode("overwrite").saveAsTable("target")
320
419
  """
321
- return self.con.sql(query)
420
+ relation = self.con.sql(query)
421
+ return QueryResult(relation, self)
322
422
 
323
423
  def get_connection(self):
324
424
  """Get underlying DuckDB connection"""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  License-Expression: MIT
6
6
  Project-URL: Homepage, https://github.com/djouallah/duckrun
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "duckrun"
8
- version = "0.1.4"
8
+ version = "0.1.5"
9
9
  description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
10
10
  readme = "README.md"
11
11
  license = "MIT"
File without changes
File without changes
File without changes
File without changes