duckrun 0.1.6.3__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckrun/core.py +35 -55
- {duckrun-0.1.6.3.dist-info → duckrun-0.1.7.dist-info}/METADATA +7 -3
- duckrun-0.1.7.dist-info/RECORD +7 -0
- duckrun-0.1.6.3.dist-info/RECORD +0 -7
- {duckrun-0.1.6.3.dist-info → duckrun-0.1.7.dist-info}/WHEEL +0 -0
- {duckrun-0.1.6.3.dist-info → duckrun-0.1.7.dist-info}/licenses/LICENSE +0 -0
- {duckrun-0.1.6.3.dist-info → duckrun-0.1.7.dist-info}/top_level.txt +0 -0
duckrun/core.py
CHANGED
@@ -127,77 +127,57 @@ class Duckrun:
|
|
127
127
|
self._attach_lakehouse()
|
128
128
|
|
129
129
|
@classmethod
|
130
|
-
def connect(cls,
|
131
|
-
schema: str = "dbo", sql_folder: Optional[str] = None,
|
130
|
+
def connect(cls, connection_string: str, sql_folder: Optional[str] = None,
|
132
131
|
compaction_threshold: int = 100):
|
133
132
|
"""
|
134
133
|
Create and connect to lakehouse.
|
135
134
|
|
136
|
-
|
137
|
-
1. Compact: connect("ws/lh.lakehouse/schema", sql_folder=...) or connect("ws/lh.lakehouse")
|
138
|
-
2. Traditional: connect("ws", "lh", "schema", sql_folder) or connect("ws", "lh")
|
135
|
+
Uses compact format: connect("ws/lh.lakehouse/schema") or connect("ws/lh.lakehouse")
|
139
136
|
|
140
137
|
Args:
|
141
|
-
|
142
|
-
lakehouse_name: Lakehouse name (optional if using compact format)
|
143
|
-
schema: Schema name (defaults to "dbo")
|
138
|
+
connection_string: OneLake path "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
|
144
139
|
sql_folder: Optional path or URL to SQL files folder
|
145
140
|
compaction_threshold: File count threshold for compaction
|
146
141
|
|
147
142
|
Examples:
|
148
|
-
|
149
|
-
dr = Duckrun.connect("temp/power.lakehouse/wa", "https://github.com/.../sql/")
|
150
|
-
dr = Duckrun.connect("ws/lh.lakehouse/schema", "./sql")
|
143
|
+
dr = Duckrun.connect("ws/lh.lakehouse/schema", sql_folder="./sql")
|
151
144
|
dr = Duckrun.connect("ws/lh.lakehouse/schema") # no SQL folder
|
152
|
-
|
153
|
-
# Traditional format
|
154
|
-
dr = Duckrun.connect("ws", "lh", "schema", "./sql")
|
155
|
-
dr = Duckrun.connect("ws", "lh", "schema")
|
145
|
+
dr = Duckrun.connect("ws/lh.lakehouse") # defaults to dbo schema
|
156
146
|
"""
|
157
147
|
print("Connecting to Lakehouse...")
|
158
148
|
|
159
149
|
scan_all_schemas = False
|
160
150
|
|
161
|
-
#
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
if lakehouse_name.endswith(".lakehouse"):
|
186
|
-
lakehouse_name = lakehouse_name[:-10]
|
187
|
-
elif lakehouse_name is not None:
|
188
|
-
# Traditional format - check if schema was explicitly provided
|
189
|
-
if schema == "dbo":
|
190
|
-
scan_all_schemas = True
|
191
|
-
print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
|
192
|
-
print(f" Scanning all schemas for table discovery...\n")
|
151
|
+
# Only support compact format: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
|
152
|
+
if not connection_string or "/" not in connection_string:
|
153
|
+
raise ValueError(
|
154
|
+
"Invalid connection string format. "
|
155
|
+
"Expected format: 'workspace/lakehouse.lakehouse/schema' or 'workspace/lakehouse.lakehouse'"
|
156
|
+
)
|
157
|
+
|
158
|
+
parts = connection_string.split("/")
|
159
|
+
if len(parts) == 2:
|
160
|
+
workspace, lakehouse_name = parts
|
161
|
+
scan_all_schemas = True
|
162
|
+
schema = "dbo"
|
163
|
+
print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
|
164
|
+
print(f" Scanning all schemas for table discovery...\n")
|
165
|
+
elif len(parts) == 3:
|
166
|
+
workspace, lakehouse_name, schema = parts
|
167
|
+
else:
|
168
|
+
raise ValueError(
|
169
|
+
f"Invalid connection string format: '{connection_string}'. "
|
170
|
+
"Expected format: 'workspace/lakehouse.lakehouse' or 'workspace/lakehouse.lakehouse/schema'"
|
171
|
+
)
|
172
|
+
|
173
|
+
if lakehouse_name.endswith(".lakehouse"):
|
174
|
+
lakehouse_name = lakehouse_name[:-10]
|
193
175
|
|
194
176
|
if not workspace or not lakehouse_name:
|
195
177
|
raise ValueError(
|
196
|
-
"Missing required parameters. Use
|
178
|
+
"Missing required parameters. Use compact format:\n"
|
197
179
|
" connect('workspace/lakehouse.lakehouse/schema', 'sql_folder')\n"
|
198
|
-
" connect('workspace/lakehouse.lakehouse') # defaults to dbo
|
199
|
-
" connect('workspace', 'lakehouse', 'schema', 'sql_folder')\n"
|
200
|
-
" connect('workspace', 'lakehouse') # defaults to dbo"
|
180
|
+
" connect('workspace/lakehouse.lakehouse') # defaults to dbo"
|
201
181
|
)
|
202
182
|
|
203
183
|
return cls(workspace, lakehouse_name, schema, sql_folder, compaction_threshold, scan_all_schemas)
|
@@ -210,7 +190,7 @@ class Duckrun:
|
|
210
190
|
if token != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
211
191
|
self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
|
212
192
|
else:
|
213
|
-
print("
|
193
|
+
print("Authenticating with Azure (trying CLI, will fallback to browser if needed)...")
|
214
194
|
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
215
195
|
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
216
196
|
token = credential.get_token("https://storage.azure.com/.default")
|
@@ -227,7 +207,7 @@ class Duckrun:
|
|
227
207
|
"""
|
228
208
|
token = self._get_storage_token()
|
229
209
|
if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
230
|
-
print("
|
210
|
+
print("Authenticating with Azure for table discovery (trying CLI, will fallback to browser if needed)...")
|
231
211
|
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
232
212
|
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
233
213
|
token_obj = credential.get_token("https://storage.azure.com/.default")
|
@@ -542,7 +522,7 @@ class Duckrun:
|
|
542
522
|
# Get Azure token
|
543
523
|
token = self._get_storage_token()
|
544
524
|
if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
545
|
-
print("
|
525
|
+
print("Authenticating with Azure for file upload (trying CLI, will fallback to browser if needed)...")
|
546
526
|
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
547
527
|
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
548
528
|
token_obj = credential.get_token("https://storage.azure.com/.default")
|
@@ -649,7 +629,7 @@ class Duckrun:
|
|
649
629
|
# Get Azure token
|
650
630
|
token = self._get_storage_token()
|
651
631
|
if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
652
|
-
print("
|
632
|
+
print("Authenticating with Azure for file download (trying CLI, will fallback to browser if needed)...")
|
653
633
|
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
654
634
|
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
655
635
|
token_obj = credential.get_token("https://storage.azure.com/.default")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.7
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
5
|
Author: mim
|
6
6
|
License: MIT
|
@@ -20,7 +20,7 @@ Dynamic: license-file
|
|
20
20
|
|
21
21
|
<img src="https://raw.githubusercontent.com/djouallah/duckrun/main/duckrun.png" width="400" alt="Duckrun">
|
22
22
|
|
23
|
-
|
23
|
+
A helper package for stuff that made my life easier when working with Fabric Python notebooks. Just the things that actually made sense to me - nothing fancy
|
24
24
|
|
25
25
|
## Important Notes
|
26
26
|
|
@@ -30,6 +30,10 @@ Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and
|
|
30
30
|
|
31
31
|
**Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
|
32
32
|
|
33
|
+
## What It Does
|
34
|
+
|
35
|
+
It does orchestration, arbitrary SQL statements, and file manipulation. That's it - just stuff I encounter in my daily workflow when working with Fabric notebooks.
|
36
|
+
|
33
37
|
## Installation
|
34
38
|
|
35
39
|
```bash
|
@@ -101,7 +105,7 @@ con.sql("SELECT * FROM dbo_customers").show()
|
|
101
105
|
con.sql("SELECT * FROM bronze_raw_data").show()
|
102
106
|
```
|
103
107
|
|
104
|
-
##
|
108
|
+
## Three Ways to Use Duckrun
|
105
109
|
|
106
110
|
### 1. Data Exploration (Spark-Style API)
|
107
111
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
|
2
|
+
duckrun/core.py,sha256=PzeY1WJVhAGTOuN5Yf86oNhKpK_zw6GYdylZ_BdSJfg,32982
|
3
|
+
duckrun-0.1.7.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
4
|
+
duckrun-0.1.7.dist-info/METADATA,sha256=BIsqAq6Z1JwSv7RwJ6wthzTC7xKSDeigZfVom5RJH0s,13847
|
5
|
+
duckrun-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
6
|
+
duckrun-0.1.7.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
7
|
+
duckrun-0.1.7.dist-info/RECORD,,
|
duckrun-0.1.6.3.dist-info/RECORD
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
|
2
|
-
duckrun/core.py,sha256=CT2NH5hCLsv4uB5zH3VxTuCVQy0nWkPBG-cICLPhG_8,34245
|
3
|
-
duckrun-0.1.6.3.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
4
|
-
duckrun-0.1.6.3.dist-info/METADATA,sha256=ny5DcRSU1B4SdHdJqHCYk0-hNo9-zqFABqMY9ulAVNk,13595
|
5
|
-
duckrun-0.1.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
6
|
-
duckrun-0.1.6.3.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
7
|
-
duckrun-0.1.6.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|