duckrun 0.2.16.dev2__tar.gz → 0.2.18.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckrun might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.16.dev2
4
- Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
3
+ Version: 0.2.18.dev1
4
+ Summary: Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)
5
5
  Author: mim
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/djouallah/duckrun
@@ -0,0 +1,11 @@
1
+ """Duckrun - Lakehouse task runner powered by DuckDB"""
2
+
3
+ from duckrun.core import Duckrun
4
+ from duckrun.notebook import import_notebook_from_web, import_notebook
5
+
6
+ __version__ = "0.2.18.dev1"
7
+
8
+ # Expose unified connect method at module level
9
+ connect = Duckrun.connect
10
+
11
+ __all__ = ["Duckrun", "connect", "import_notebook_from_web", "import_notebook"]
@@ -12,7 +12,71 @@ from .runner import run as _run
12
12
  from .files import copy as _copy, download as _download
13
13
  from .writer import QueryResult
14
14
 
15
- class Duckrun:
15
+
16
+ class WorkspaceOperationsMixin:
17
+ """
18
+ Mixin class for workspace-level operations that work for both
19
+ full Duckrun connections and workspace-only connections.
20
+ """
21
+
22
+ def import_notebook_from_web(self, url: str,
23
+ notebook_name: Optional[str] = None,
24
+ overwrite: bool = False) -> dict:
25
+ """
26
+ Import a Jupyter notebook from a web URL into the workspace.
27
+
28
+ Args:
29
+ url: URL to the notebook file (e.g., GitHub raw URL). Required.
30
+ notebook_name: Name for the imported notebook. Optional - derived from URL if not provided.
31
+ overwrite: Whether to overwrite if notebook already exists (default: False)
32
+
33
+ Returns:
34
+ Dictionary with import result
35
+
36
+ Examples:
37
+ con = duckrun.connect("workspace/lakehouse.lakehouse")
38
+ result = con.import_notebook_from_web(
39
+ url="https://raw.githubusercontent.com/user/repo/main/notebook.ipynb"
40
+ )
41
+
42
+ ws = duckrun.connect("workspace")
43
+ result = ws.import_notebook_from_web(
44
+ url="https://raw.githubusercontent.com/user/repo/main/notebook.ipynb"
45
+ )
46
+ """
47
+ from .notebook import import_notebook_from_web as _import_notebook_from_web
48
+
49
+ # Get workspace name from either self.workspace or self.workspace_name
50
+ workspace_name = getattr(self, 'workspace', None) or getattr(self, 'workspace_name', None)
51
+
52
+ return _import_notebook_from_web(
53
+ url=url,
54
+ notebook_name=notebook_name,
55
+ overwrite=overwrite,
56
+ workspace_name=workspace_name
57
+ )
58
+
59
+ def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
60
+ """Helper method to get workspace ID from name"""
61
+ try:
62
+ url = "https://api.fabric.microsoft.com/v1/workspaces"
63
+ headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
64
+
65
+ response = requests.get(url, headers=headers)
66
+ response.raise_for_status()
67
+
68
+ workspaces = response.json().get("value", [])
69
+ for workspace in workspaces:
70
+ if workspace.get("displayName") == workspace_name:
71
+ return workspace.get("id")
72
+
73
+ return None
74
+
75
+ except Exception:
76
+ return None
77
+
78
+
79
+ class Duckrun(WorkspaceOperationsMixin):
16
80
  """
17
81
  OneLake task runner with clean tuple-based API.
18
82
  Supports lakehouses, warehouses, databases, and other OneLake items.
@@ -1162,25 +1226,6 @@ class Duckrun:
1162
1226
  wait_seconds=wait_seconds
1163
1227
  )
1164
1228
 
1165
- def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
1166
- """Helper method to get workspace ID from name"""
1167
- try:
1168
- url = "https://api.fabric.microsoft.com/v1/workspaces"
1169
- headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
1170
-
1171
- response = requests.get(url, headers=headers)
1172
- response.raise_for_status()
1173
-
1174
- workspaces = response.json().get("value", [])
1175
- for workspace in workspaces:
1176
- if workspace.get("displayName") == workspace_name:
1177
- return workspace.get("id")
1178
-
1179
- return None
1180
-
1181
- except Exception:
1182
- return None
1183
-
1184
1229
  def close(self):
1185
1230
  """Close DuckDB connection"""
1186
1231
  if self.con:
@@ -1188,7 +1233,7 @@ class Duckrun:
1188
1233
  print("Connection closed")
1189
1234
 
1190
1235
 
1191
- class WorkspaceConnection:
1236
+ class WorkspaceConnection(WorkspaceOperationsMixin):
1192
1237
  """
1193
1238
  Simple workspace connection for lakehouse management operations.
1194
1239
  """
@@ -1428,23 +1473,4 @@ class WorkspaceConnection:
1428
1473
  print(f"❌ Error downloading semantic model: {e}")
1429
1474
  import traceback
1430
1475
  traceback.print_exc()
1431
- return None
1432
-
1433
- def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
1434
- """Helper method to get workspace ID from name"""
1435
- try:
1436
- url = "https://api.fabric.microsoft.com/v1/workspaces"
1437
- headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
1438
-
1439
- response = requests.get(url, headers=headers)
1440
- response.raise_for_status()
1441
-
1442
- workspaces = response.json().get("value", [])
1443
- for workspace in workspaces:
1444
- if workspace.get("displayName") == workspace_name:
1445
- return workspace.get("id")
1446
-
1447
- return None
1448
-
1449
- except Exception:
1450
1476
  return None
@@ -0,0 +1,322 @@
1
+ """
2
+ Notebook operations functionality for duckrun - Import notebooks from web using Fabric REST API
3
+ """
4
+ import requests
5
+ import base64
6
+ from typing import Optional
7
+
8
+
9
+ def import_notebook_from_web(
10
+ url: str,
11
+ notebook_name: Optional[str] = None,
12
+ overwrite: bool = False,
13
+ workspace_name: Optional[str] = None
14
+ ) -> dict:
15
+ """
16
+ Import a Jupyter notebook from a web URL into Microsoft Fabric workspace using REST API only.
17
+ Uses duckrun.connect context by default or explicit workspace name.
18
+
19
+ Args:
20
+ url: URL to the notebook file (e.g., GitHub raw URL). Required.
21
+ notebook_name: Name for the imported notebook in Fabric. Optional - will use filename from URL if not provided.
22
+ overwrite: Whether to overwrite if notebook already exists (default: False)
23
+ workspace_name: Target workspace name. Optional - will use current workspace from duckrun context if available.
24
+
25
+ Returns:
26
+ Dictionary with import result:
27
+ {
28
+ "success": bool,
29
+ "message": str,
30
+ "notebook": dict (if successful),
31
+ "overwritten": bool
32
+ }
33
+
34
+ Examples:
35
+ # Basic usage with duckrun context
36
+ import duckrun
37
+ dr = duckrun.connect("MyWorkspace/MyLakehouse.lakehouse")
38
+ from duckrun.notebook import import_notebook_from_web
39
+
40
+ result = import_notebook_from_web(
41
+ url="https://raw.githubusercontent.com/user/repo/main/notebook.ipynb",
42
+ notebook_name="MyNotebook"
43
+ )
44
+
45
+ # With explicit workspace
46
+ result = import_notebook_from_web(
47
+ url="https://raw.githubusercontent.com/user/repo/main/notebook.ipynb",
48
+ notebook_name="MyNotebook",
49
+ workspace_name="Analytics Workspace",
50
+ overwrite=True
51
+ )
52
+
53
+ # Minimal usage - derives name from URL
54
+ result = import_notebook_from_web(
55
+ url="https://raw.githubusercontent.com/user/repo/main/RunPerfScenario.ipynb"
56
+ )
57
+ """
58
+ try:
59
+ # Get authentication token
60
+ from duckrun.auth import get_fabric_api_token
61
+ token = get_fabric_api_token()
62
+ if not token:
63
+ return {
64
+ "success": False,
65
+ "message": "Failed to get authentication token",
66
+ "notebook": None,
67
+ "overwritten": False
68
+ }
69
+
70
+ base_url = "https://api.fabric.microsoft.com/v1"
71
+ headers = {
72
+ "Authorization": f"Bearer {token}",
73
+ "Content-Type": "application/json"
74
+ }
75
+
76
+ # Determine workspace ID
77
+ workspace_id = None
78
+
79
+ # Try to get from duckrun context if not provided
80
+ if not workspace_name:
81
+ try:
82
+ # Try to get from notebook context first
83
+ import notebookutils # type: ignore
84
+ workspace_id = notebookutils.runtime.context.get("workspaceId")
85
+ print("📓 Using current workspace from Fabric notebook context")
86
+ except (ImportError, Exception):
87
+ # Not in notebook, try to get from environment/last connection
88
+ pass
89
+
90
+ # If still no workspace_id, resolve from workspace_name
91
+ if not workspace_id:
92
+ if not workspace_name:
93
+ return {
94
+ "success": False,
95
+ "message": "workspace_name must be provided when not in Fabric notebook context",
96
+ "notebook": None,
97
+ "overwritten": False
98
+ }
99
+
100
+ # Get workspace ID by name
101
+ print(f"🔍 Resolving workspace: {workspace_name}")
102
+ ws_url = f"{base_url}/workspaces"
103
+ response = requests.get(ws_url, headers=headers)
104
+ response.raise_for_status()
105
+
106
+ workspaces = response.json().get("value", [])
107
+ workspace = next((ws for ws in workspaces if ws.get("displayName") == workspace_name), None)
108
+
109
+ if not workspace:
110
+ return {
111
+ "success": False,
112
+ "message": f"Workspace '{workspace_name}' not found",
113
+ "notebook": None,
114
+ "overwritten": False
115
+ }
116
+
117
+ workspace_id = workspace.get("id")
118
+ print(f"✓ Found workspace: {workspace_name}")
119
+
120
+ # Derive notebook name from URL if not provided
121
+ if not notebook_name:
122
+ # Extract filename from URL
123
+ notebook_name = url.split("/")[-1]
124
+ if notebook_name.endswith(".ipynb"):
125
+ notebook_name = notebook_name[:-6] # Remove .ipynb extension
126
+ print(f"📝 Using notebook name from URL: {notebook_name}")
127
+
128
+ # Check if notebook already exists
129
+ notebooks_url = f"{base_url}/workspaces/{workspace_id}/notebooks"
130
+ response = requests.get(notebooks_url, headers=headers)
131
+ response.raise_for_status()
132
+
133
+ notebooks = response.json().get("value", [])
134
+ existing_notebook = next((nb for nb in notebooks if nb.get("displayName") == notebook_name), None)
135
+
136
+ if existing_notebook and not overwrite:
137
+ return {
138
+ "success": True,
139
+ "message": f"Notebook '{notebook_name}' already exists (use overwrite=True to replace)",
140
+ "notebook": existing_notebook,
141
+ "overwritten": False
142
+ }
143
+
144
+ # Download notebook content from URL
145
+ print(f"⬇️ Downloading notebook from: {url}")
146
+ response = requests.get(url)
147
+ response.raise_for_status()
148
+ notebook_content = response.text
149
+ print(f"✓ Notebook downloaded successfully")
150
+
151
+ # Convert notebook content to base64
152
+ notebook_base64 = base64.b64encode(notebook_content.encode('utf-8')).decode('utf-8')
153
+
154
+ # Prepare the payload for creating/updating the notebook
155
+ if existing_notebook and overwrite:
156
+ # Update existing notebook
157
+ notebook_id = existing_notebook.get("id")
158
+ print(f"🔄 Updating existing notebook: {notebook_name}")
159
+
160
+ update_url = f"{base_url}/workspaces/{workspace_id}/notebooks/{notebook_id}/updateDefinition"
161
+ payload = {
162
+ "definition": {
163
+ "parts": [
164
+ {
165
+ "path": "notebook-content.py",
166
+ "payload": notebook_base64,
167
+ "payloadType": "InlineBase64"
168
+ }
169
+ ]
170
+ }
171
+ }
172
+
173
+ response = requests.post(update_url, headers=headers, json=payload)
174
+ response.raise_for_status()
175
+
176
+ # Handle long-running operation
177
+ if response.status_code == 202:
178
+ operation_id = response.headers.get('x-ms-operation-id')
179
+ if operation_id:
180
+ _wait_for_operation(operation_id, headers)
181
+
182
+ return {
183
+ "success": True,
184
+ "message": f"Notebook '{notebook_name}' updated successfully",
185
+ "notebook": existing_notebook,
186
+ "overwritten": True
187
+ }
188
+ else:
189
+ # Create new notebook
190
+ print(f"➕ Creating new notebook: {notebook_name}")
191
+
192
+ payload = {
193
+ "displayName": notebook_name,
194
+ "definition": {
195
+ "parts": [
196
+ {
197
+ "path": "notebook-content.py",
198
+ "payload": notebook_base64,
199
+ "payloadType": "InlineBase64"
200
+ }
201
+ ]
202
+ }
203
+ }
204
+
205
+ response = requests.post(notebooks_url, headers=headers, json=payload)
206
+ response.raise_for_status()
207
+
208
+ # Handle long-running operation
209
+ if response.status_code == 202:
210
+ operation_id = response.headers.get('x-ms-operation-id')
211
+ if operation_id:
212
+ _wait_for_operation(operation_id, headers)
213
+
214
+ created_notebook = response.json()
215
+
216
+ return {
217
+ "success": True,
218
+ "message": f"Notebook '{notebook_name}' created successfully",
219
+ "notebook": created_notebook,
220
+ "overwritten": False
221
+ }
222
+
223
+ except requests.exceptions.RequestException as e:
224
+ return {
225
+ "success": False,
226
+ "message": f"HTTP Error: {str(e)}",
227
+ "notebook": None,
228
+ "overwritten": False
229
+ }
230
+ except Exception as e:
231
+ return {
232
+ "success": False,
233
+ "message": f"Error: {str(e)}",
234
+ "notebook": None,
235
+ "overwritten": False
236
+ }
237
+
238
+
239
+ def _wait_for_operation(operation_id: str, headers: dict, max_attempts: int = 30) -> bool:
240
+ """
241
+ Wait for a long-running Fabric API operation to complete.
242
+
243
+ Args:
244
+ operation_id: The operation ID to monitor
245
+ headers: Request headers with authentication
246
+ max_attempts: Maximum number of polling attempts (default: 30)
247
+
248
+ Returns:
249
+ True if operation succeeded, False otherwise
250
+ """
251
+ import time
252
+
253
+ status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
254
+
255
+ for attempt in range(max_attempts):
256
+ time.sleep(2)
257
+
258
+ try:
259
+ response = requests.get(status_url, headers=headers)
260
+ response.raise_for_status()
261
+
262
+ status_data = response.json()
263
+ status = status_data.get('status')
264
+
265
+ if status == 'Succeeded':
266
+ print(f"✓ Operation completed successfully")
267
+ return True
268
+ elif status == 'Failed':
269
+ error = status_data.get('error', {})
270
+ print(f"❌ Operation failed: {error.get('message', 'Unknown error')}")
271
+ return False
272
+ else:
273
+ print(f"⏳ Operation in progress... ({status})")
274
+
275
+ except Exception as e:
276
+ print(f"⚠️ Error checking operation status: {e}")
277
+ return False
278
+
279
+ print(f"⚠️ Operation timed out after {max_attempts} attempts")
280
+ return False
281
+
282
+
283
+ # Convenience wrapper for the try-except pattern mentioned in the request
284
+ def import_notebook(
285
+ url: str,
286
+ notebook_name: Optional[str] = None,
287
+ overwrite: bool = False,
288
+ workspace_name: Optional[str] = None
289
+ ) -> None:
290
+ """
291
+ Convenience wrapper that prints results and handles errors.
292
+
293
+ Args:
294
+ url: URL to the notebook file
295
+ notebook_name: Name for the imported notebook
296
+ overwrite: Whether to overwrite if exists
297
+ workspace_name: Target workspace name
298
+
299
+ Examples:
300
+ from duckrun.notebook import import_notebook
301
+
302
+ import_notebook(
303
+ url="https://raw.githubusercontent.com/djouallah/fabric_demo/refs/heads/main/Benchmark/RunPerfScenario.ipynb",
304
+ notebook_name="RunPerfScenario",
305
+ overwrite=False
306
+ )
307
+ """
308
+ try:
309
+ result = import_notebook_from_web(
310
+ url=url,
311
+ notebook_name=notebook_name,
312
+ overwrite=overwrite,
313
+ workspace_name=workspace_name
314
+ )
315
+
316
+ if result["success"]:
317
+ print(f"✅ {result['message']}")
318
+ else:
319
+ print(f"❌ {result['message']}")
320
+
321
+ except Exception as e:
322
+ print(f"Error: {e}")
@@ -137,7 +137,7 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
137
137
  dataset_id = get_dataset_id(dataset_name, workspace_id, client)
138
138
 
139
139
  payload = {
140
- "type": "full",
140
+ "type": "clearValues",
141
141
  "commitMode": "transactional",
142
142
  "maxParallelism": 10,
143
143
  "retryCount": 2,
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.16.dev2
4
- Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
3
+ Version: 0.2.18.dev1
4
+ Summary: Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)
5
5
  Author: mim
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/djouallah/duckrun
@@ -6,6 +6,7 @@ duckrun/auth.py
6
6
  duckrun/core.py
7
7
  duckrun/files.py
8
8
  duckrun/lakehouse.py
9
+ duckrun/notebook.py
9
10
  duckrun/runner.py
10
11
  duckrun/semantic_model.py
11
12
  duckrun/stats.py
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "duckrun"
7
- version = "0.2.16.dev2"
8
- description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
7
+ version = "0.2.18.dev1"
8
+ description = "Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
11
11
  authors = [
@@ -1,10 +0,0 @@
1
- """Duckrun - Lakehouse task runner powered by DuckDB"""
2
-
3
- from duckrun.core import Duckrun
4
-
5
- __version__ = "0.2.14.dev2"
6
-
7
- # Expose unified connect method at module level
8
- connect = Duckrun.connect
9
-
10
- __all__ = ["Duckrun", "connect"]
File without changes
File without changes
File without changes