duckrun 0.1.6.3__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/core.py CHANGED
@@ -127,77 +127,57 @@ class Duckrun:
127
127
  self._attach_lakehouse()
128
128
 
129
129
  @classmethod
130
- def connect(cls, workspace: Union[str, None] = None, lakehouse_name: Optional[str] = None,
131
- schema: str = "dbo", sql_folder: Optional[str] = None,
130
+ def connect(cls, connection_string: str, sql_folder: Optional[str] = None,
132
131
  compaction_threshold: int = 100):
133
132
  """
134
133
  Create and connect to lakehouse.
135
134
 
136
- Supports two formats:
137
- 1. Compact: connect("ws/lh.lakehouse/schema", sql_folder=...) or connect("ws/lh.lakehouse")
138
- 2. Traditional: connect("ws", "lh", "schema", sql_folder) or connect("ws", "lh")
135
+ Uses compact format: connect("ws/lh.lakehouse/schema") or connect("ws/lh.lakehouse")
139
136
 
140
137
  Args:
141
- workspace: Workspace name or full path "ws/lh.lakehouse/schema"
142
- lakehouse_name: Lakehouse name (optional if using compact format)
143
- schema: Schema name (defaults to "dbo")
138
+ connection_string: OneLake path "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
144
139
  sql_folder: Optional path or URL to SQL files folder
145
140
  compaction_threshold: File count threshold for compaction
146
141
 
147
142
  Examples:
148
- # Compact format (second param treated as sql_folder if it's a URL/path string)
149
- dr = Duckrun.connect("temp/power.lakehouse/wa", "https://github.com/.../sql/")
150
- dr = Duckrun.connect("ws/lh.lakehouse/schema", "./sql")
143
+ dr = Duckrun.connect("ws/lh.lakehouse/schema", sql_folder="./sql")
151
144
  dr = Duckrun.connect("ws/lh.lakehouse/schema") # no SQL folder
152
-
153
- # Traditional format
154
- dr = Duckrun.connect("ws", "lh", "schema", "./sql")
155
- dr = Duckrun.connect("ws", "lh", "schema")
145
+ dr = Duckrun.connect("ws/lh.lakehouse") # defaults to dbo schema
156
146
  """
157
147
  print("Connecting to Lakehouse...")
158
148
 
159
149
  scan_all_schemas = False
160
150
 
161
- # Check if using compact format: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
162
- # If second param looks like a path/URL and not a lakehouse name, treat it as sql_folder
163
- if workspace and "/" in workspace and (lakehouse_name is None or
164
- (isinstance(lakehouse_name, str) and ('/' in lakehouse_name or lakehouse_name.startswith('http') or lakehouse_name.startswith('.')))):
165
-
166
- # If lakehouse_name looks like a sql_folder, shift it
167
- if lakehouse_name and ('/' in lakehouse_name or lakehouse_name.startswith('http') or lakehouse_name.startswith('.')):
168
- sql_folder = lakehouse_name
169
- lakehouse_name = None
170
-
171
- parts = workspace.split("/")
172
- if len(parts) == 2:
173
- workspace, lakehouse_name = parts
174
- scan_all_schemas = True
175
- print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
176
- print(f" Scanning all schemas for table discovery...\n")
177
- elif len(parts) == 3:
178
- workspace, lakehouse_name, schema = parts
179
- else:
180
- raise ValueError(
181
- f"Invalid connection string format: '{workspace}'. "
182
- "Expected format: 'workspace/lakehouse.lakehouse' or 'workspace/lakehouse.lakehouse/schema'"
183
- )
184
-
185
- if lakehouse_name.endswith(".lakehouse"):
186
- lakehouse_name = lakehouse_name[:-10]
187
- elif lakehouse_name is not None:
188
- # Traditional format - check if schema was explicitly provided
189
- if schema == "dbo":
190
- scan_all_schemas = True
191
- print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
192
- print(f" Scanning all schemas for table discovery...\n")
151
+ # Only support compact format: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
152
+ if not connection_string or "/" not in connection_string:
153
+ raise ValueError(
154
+ "Invalid connection string format. "
155
+ "Expected format: 'workspace/lakehouse.lakehouse/schema' or 'workspace/lakehouse.lakehouse'"
156
+ )
157
+
158
+ parts = connection_string.split("/")
159
+ if len(parts) == 2:
160
+ workspace, lakehouse_name = parts
161
+ scan_all_schemas = True
162
+ schema = "dbo"
163
+ print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
164
+ print(f" Scanning all schemas for table discovery...\n")
165
+ elif len(parts) == 3:
166
+ workspace, lakehouse_name, schema = parts
167
+ else:
168
+ raise ValueError(
169
+ f"Invalid connection string format: '{connection_string}'. "
170
+ "Expected format: 'workspace/lakehouse.lakehouse' or 'workspace/lakehouse.lakehouse/schema'"
171
+ )
172
+
173
+ if lakehouse_name.endswith(".lakehouse"):
174
+ lakehouse_name = lakehouse_name[:-10]
193
175
 
194
176
  if not workspace or not lakehouse_name:
195
177
  raise ValueError(
196
- "Missing required parameters. Use either:\n"
178
+ "Missing required parameters. Use compact format:\n"
197
179
  " connect('workspace/lakehouse.lakehouse/schema', 'sql_folder')\n"
198
- " connect('workspace/lakehouse.lakehouse') # defaults to dbo\n"
199
- " connect('workspace', 'lakehouse', 'schema', 'sql_folder')\n"
200
- " connect('workspace', 'lakehouse') # defaults to dbo"
180
+ " connect('workspace/lakehouse.lakehouse') # defaults to dbo"
201
181
  )
202
182
 
203
183
  return cls(workspace, lakehouse_name, schema, sql_folder, compaction_threshold, scan_all_schemas)
@@ -210,7 +190,7 @@ class Duckrun:
210
190
  if token != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
211
191
  self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
212
192
  else:
213
- print("Please login to Azure CLI")
193
+ print("Authenticating with Azure (trying CLI, will fallback to browser if needed)...")
214
194
  from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
215
195
  credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
216
196
  token = credential.get_token("https://storage.azure.com/.default")
@@ -227,7 +207,7 @@ class Duckrun:
227
207
  """
228
208
  token = self._get_storage_token()
229
209
  if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
230
- print("Getting Azure token for table discovery...")
210
+ print("Authenticating with Azure for table discovery (trying CLI, will fallback to browser if needed)...")
231
211
  from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
232
212
  credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
233
213
  token_obj = credential.get_token("https://storage.azure.com/.default")
@@ -542,7 +522,7 @@ class Duckrun:
542
522
  # Get Azure token
543
523
  token = self._get_storage_token()
544
524
  if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
545
- print("Getting Azure token for file upload...")
525
+ print("Authenticating with Azure for file upload (trying CLI, will fallback to browser if needed)...")
546
526
  from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
547
527
  credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
548
528
  token_obj = credential.get_token("https://storage.azure.com/.default")
@@ -649,7 +629,7 @@ class Duckrun:
649
629
  # Get Azure token
650
630
  token = self._get_storage_token()
651
631
  if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
652
- print("Getting Azure token for file download...")
632
+ print("Authenticating with Azure for file download (trying CLI, will fallback to browser if needed)...")
653
633
  from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
654
634
  credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
655
635
  token_obj = credential.get_token("https://storage.azure.com/.default")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.6.3
3
+ Version: 0.1.7
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -20,7 +20,7 @@ Dynamic: license-file
20
20
 
21
21
  <img src="https://raw.githubusercontent.com/djouallah/duckrun/main/duckrun.png" width="400" alt="Duckrun">
22
22
 
23
- Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and Delta Lake.
23
+ A helper package for stuff that made my life easier when working with Fabric Python notebooks. Just the things that actually made sense to me - nothing fancy
24
24
 
25
25
  ## Important Notes
26
26
 
@@ -30,6 +30,10 @@ Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and
30
30
 
31
31
  **Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
32
32
 
33
+ ## What It Does
34
+
35
+ It does orchestration, arbitrary SQL statements, and file manipulation. That's it - just stuff I encounter in my daily workflow when working with Fabric notebooks.
36
+
33
37
  ## Installation
34
38
 
35
39
  ```bash
@@ -101,7 +105,7 @@ con.sql("SELECT * FROM dbo_customers").show()
101
105
  con.sql("SELECT * FROM bronze_raw_data").show()
102
106
  ```
103
107
 
104
- ## Two Ways to Use Duckrun
108
+ ## Three Ways to Use Duckrun
105
109
 
106
110
  ### 1. Data Exploration (Spark-Style API)
107
111
 
@@ -0,0 +1,7 @@
1
+ duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
+ duckrun/core.py,sha256=PzeY1WJVhAGTOuN5Yf86oNhKpK_zw6GYdylZ_BdSJfg,32982
3
+ duckrun-0.1.7.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
+ duckrun-0.1.7.dist-info/METADATA,sha256=BIsqAq6Z1JwSv7RwJ6wthzTC7xKSDeigZfVom5RJH0s,13847
5
+ duckrun-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ duckrun-0.1.7.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
+ duckrun-0.1.7.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
- duckrun/core.py,sha256=CT2NH5hCLsv4uB5zH3VxTuCVQy0nWkPBG-cICLPhG_8,34245
3
- duckrun-0.1.6.3.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
- duckrun-0.1.6.3.dist-info/METADATA,sha256=ny5DcRSU1B4SdHdJqHCYk0-hNo9-zqFABqMY9ulAVNk,13595
5
- duckrun-0.1.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- duckrun-0.1.6.3.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
- duckrun-0.1.6.3.dist-info/RECORD,,