duckrun 0.2.13__py3-none-any.whl → 0.2.19.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckrun/__init__.py +4 -2
- duckrun/auth.py +12 -0
- duckrun/core.py +703 -179
- duckrun/notebook.py +324 -0
- duckrun/rle.py +860 -0
- duckrun/runner.py +15 -45
- duckrun/semantic_model.py +143 -17
- duckrun/stats.py +267 -62
- duckrun/writer.py +35 -6
- {duckrun-0.2.13.dist-info → duckrun-0.2.19.dev5.dist-info}/METADATA +3 -3
- duckrun-0.2.19.dev5.dist-info/RECORD +16 -0
- duckrun-0.2.13.dist-info/RECORD +0 -14
- {duckrun-0.2.13.dist-info → duckrun-0.2.19.dev5.dist-info}/WHEEL +0 -0
- {duckrun-0.2.13.dist-info → duckrun-0.2.19.dev5.dist-info}/licenses/LICENSE +0 -0
- {duckrun-0.2.13.dist-info → duckrun-0.2.19.dev5.dist-info}/top_level.txt +0 -0
duckrun/notebook.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Notebook operations functionality for duckrun - Import notebooks from web using Fabric REST API
|
|
3
|
+
"""
|
|
4
|
+
import requests
|
|
5
|
+
import base64
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def import_notebook_from_web(
|
|
10
|
+
url: str,
|
|
11
|
+
notebook_name: Optional[str] = None,
|
|
12
|
+
overwrite: bool = False,
|
|
13
|
+
workspace_name: Optional[str] = None
|
|
14
|
+
) -> dict:
|
|
15
|
+
"""
|
|
16
|
+
Import a Jupyter notebook from a web URL into Microsoft Fabric workspace using REST API only.
|
|
17
|
+
Uses duckrun.connect context by default or explicit workspace name.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
url: URL to the notebook file (e.g., GitHub raw URL). Required.
|
|
21
|
+
notebook_name: Name for the imported notebook in Fabric. Optional - will use filename from URL if not provided.
|
|
22
|
+
overwrite: Whether to overwrite if notebook already exists (default: False)
|
|
23
|
+
workspace_name: Target workspace name. Optional - will use current workspace from duckrun context if available.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Dictionary with import result:
|
|
27
|
+
{
|
|
28
|
+
"success": bool,
|
|
29
|
+
"message": str,
|
|
30
|
+
"notebook": dict (if successful),
|
|
31
|
+
"overwritten": bool
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
Examples:
|
|
35
|
+
# Basic usage with duckrun context
|
|
36
|
+
import duckrun
|
|
37
|
+
dr = duckrun.connect("MyWorkspace/MyLakehouse.lakehouse")
|
|
38
|
+
from duckrun.notebook import import_notebook_from_web
|
|
39
|
+
|
|
40
|
+
result = import_notebook_from_web(
|
|
41
|
+
url="https://raw.githubusercontent.com/user/repo/main/notebook.ipynb",
|
|
42
|
+
notebook_name="MyNotebook"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# With explicit workspace
|
|
46
|
+
result = import_notebook_from_web(
|
|
47
|
+
url="https://raw.githubusercontent.com/user/repo/main/notebook.ipynb",
|
|
48
|
+
notebook_name="MyNotebook",
|
|
49
|
+
workspace_name="Analytics Workspace",
|
|
50
|
+
overwrite=True
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Minimal usage - derives name from URL
|
|
54
|
+
result = import_notebook_from_web(
|
|
55
|
+
url="https://raw.githubusercontent.com/user/repo/main/RunPerfScenario.ipynb"
|
|
56
|
+
)
|
|
57
|
+
"""
|
|
58
|
+
try:
|
|
59
|
+
# Get authentication token
|
|
60
|
+
from duckrun.auth import get_fabric_api_token
|
|
61
|
+
token = get_fabric_api_token()
|
|
62
|
+
if not token:
|
|
63
|
+
return {
|
|
64
|
+
"success": False,
|
|
65
|
+
"message": "Failed to get authentication token",
|
|
66
|
+
"notebook": None,
|
|
67
|
+
"overwritten": False
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
base_url = "https://api.fabric.microsoft.com/v1"
|
|
71
|
+
headers = {
|
|
72
|
+
"Authorization": f"Bearer {token}",
|
|
73
|
+
"Content-Type": "application/json"
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Determine workspace ID
|
|
77
|
+
workspace_id = None
|
|
78
|
+
|
|
79
|
+
# Try to get from duckrun context if not provided
|
|
80
|
+
if not workspace_name:
|
|
81
|
+
try:
|
|
82
|
+
# Try to get from notebook context first
|
|
83
|
+
import notebookutils # type: ignore
|
|
84
|
+
workspace_id = notebookutils.runtime.context.get("workspaceId")
|
|
85
|
+
print("📓 Using current workspace from Fabric notebook context")
|
|
86
|
+
except (ImportError, Exception):
|
|
87
|
+
# Not in notebook, try to get from environment/last connection
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
# If still no workspace_id, resolve from workspace_name
|
|
91
|
+
if not workspace_id:
|
|
92
|
+
if not workspace_name:
|
|
93
|
+
return {
|
|
94
|
+
"success": False,
|
|
95
|
+
"message": "workspace_name must be provided when not in Fabric notebook context",
|
|
96
|
+
"notebook": None,
|
|
97
|
+
"overwritten": False
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
# Get workspace ID by name
|
|
101
|
+
print(f"🔍 Resolving workspace: {workspace_name}")
|
|
102
|
+
ws_url = f"{base_url}/workspaces"
|
|
103
|
+
response = requests.get(ws_url, headers=headers)
|
|
104
|
+
response.raise_for_status()
|
|
105
|
+
|
|
106
|
+
workspaces = response.json().get("value", [])
|
|
107
|
+
workspace = next((ws for ws in workspaces if ws.get("displayName") == workspace_name), None)
|
|
108
|
+
|
|
109
|
+
if not workspace:
|
|
110
|
+
return {
|
|
111
|
+
"success": False,
|
|
112
|
+
"message": f"Workspace '{workspace_name}' not found",
|
|
113
|
+
"notebook": None,
|
|
114
|
+
"overwritten": False
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
workspace_id = workspace.get("id")
|
|
118
|
+
print(f"✓ Found workspace: {workspace_name}")
|
|
119
|
+
|
|
120
|
+
# Derive notebook name from URL if not provided
|
|
121
|
+
if not notebook_name:
|
|
122
|
+
# Extract filename from URL
|
|
123
|
+
notebook_name = url.split("/")[-1]
|
|
124
|
+
if notebook_name.endswith(".ipynb"):
|
|
125
|
+
notebook_name = notebook_name[:-6] # Remove .ipynb extension
|
|
126
|
+
print(f"📝 Using notebook name from URL: {notebook_name}")
|
|
127
|
+
|
|
128
|
+
# Check if notebook already exists
|
|
129
|
+
notebooks_url = f"{base_url}/workspaces/{workspace_id}/notebooks"
|
|
130
|
+
response = requests.get(notebooks_url, headers=headers)
|
|
131
|
+
response.raise_for_status()
|
|
132
|
+
|
|
133
|
+
notebooks = response.json().get("value", [])
|
|
134
|
+
existing_notebook = next((nb for nb in notebooks if nb.get("displayName") == notebook_name), None)
|
|
135
|
+
|
|
136
|
+
if existing_notebook and not overwrite:
|
|
137
|
+
return {
|
|
138
|
+
"success": True,
|
|
139
|
+
"message": f"Notebook '{notebook_name}' already exists (use overwrite=True to replace)",
|
|
140
|
+
"notebook": existing_notebook,
|
|
141
|
+
"overwritten": False
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
# Download notebook content from URL
|
|
145
|
+
print(f"⬇️ Downloading notebook from: {url}")
|
|
146
|
+
response = requests.get(url)
|
|
147
|
+
response.raise_for_status()
|
|
148
|
+
notebook_content = response.text
|
|
149
|
+
print(f"✓ Notebook downloaded successfully")
|
|
150
|
+
|
|
151
|
+
# Convert notebook content to base64
|
|
152
|
+
notebook_base64 = base64.b64encode(notebook_content.encode('utf-8')).decode('utf-8')
|
|
153
|
+
|
|
154
|
+
# Prepare the payload for creating/updating the notebook
|
|
155
|
+
if existing_notebook and overwrite:
|
|
156
|
+
# Update existing notebook
|
|
157
|
+
notebook_id = existing_notebook.get("id")
|
|
158
|
+
print(f"🔄 Updating existing notebook: {notebook_name}")
|
|
159
|
+
|
|
160
|
+
update_url = f"{base_url}/workspaces/{workspace_id}/notebooks/{notebook_id}/updateDefinition"
|
|
161
|
+
payload = {
|
|
162
|
+
"definition": {
|
|
163
|
+
"format": "ipynb",
|
|
164
|
+
"parts": [
|
|
165
|
+
{
|
|
166
|
+
"path": "notebook-content.py",
|
|
167
|
+
"payload": notebook_base64,
|
|
168
|
+
"payloadType": "InlineBase64"
|
|
169
|
+
}
|
|
170
|
+
]
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
response = requests.post(update_url, headers=headers, json=payload)
|
|
175
|
+
response.raise_for_status()
|
|
176
|
+
|
|
177
|
+
# Handle long-running operation
|
|
178
|
+
if response.status_code == 202:
|
|
179
|
+
operation_id = response.headers.get('x-ms-operation-id')
|
|
180
|
+
if operation_id:
|
|
181
|
+
_wait_for_operation(operation_id, headers)
|
|
182
|
+
|
|
183
|
+
return {
|
|
184
|
+
"success": True,
|
|
185
|
+
"message": f"Notebook '{notebook_name}' updated successfully",
|
|
186
|
+
"notebook": existing_notebook,
|
|
187
|
+
"overwritten": True
|
|
188
|
+
}
|
|
189
|
+
else:
|
|
190
|
+
# Create new notebook
|
|
191
|
+
print(f"➕ Creating new notebook: {notebook_name}")
|
|
192
|
+
|
|
193
|
+
payload = {
|
|
194
|
+
"displayName": notebook_name,
|
|
195
|
+
"definition": {
|
|
196
|
+
"format": "ipynb",
|
|
197
|
+
"parts": [
|
|
198
|
+
{
|
|
199
|
+
"path": "notebook-content.py",
|
|
200
|
+
"payload": notebook_base64,
|
|
201
|
+
"payloadType": "InlineBase64"
|
|
202
|
+
}
|
|
203
|
+
]
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
response = requests.post(notebooks_url, headers=headers, json=payload)
|
|
208
|
+
response.raise_for_status()
|
|
209
|
+
|
|
210
|
+
# Handle long-running operation
|
|
211
|
+
if response.status_code == 202:
|
|
212
|
+
operation_id = response.headers.get('x-ms-operation-id')
|
|
213
|
+
if operation_id:
|
|
214
|
+
_wait_for_operation(operation_id, headers)
|
|
215
|
+
|
|
216
|
+
created_notebook = response.json()
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
"success": True,
|
|
220
|
+
"message": f"Notebook '{notebook_name}' created successfully",
|
|
221
|
+
"notebook": created_notebook,
|
|
222
|
+
"overwritten": False
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
except requests.exceptions.RequestException as e:
|
|
226
|
+
return {
|
|
227
|
+
"success": False,
|
|
228
|
+
"message": f"HTTP Error: {str(e)}",
|
|
229
|
+
"notebook": None,
|
|
230
|
+
"overwritten": False
|
|
231
|
+
}
|
|
232
|
+
except Exception as e:
|
|
233
|
+
return {
|
|
234
|
+
"success": False,
|
|
235
|
+
"message": f"Error: {str(e)}",
|
|
236
|
+
"notebook": None,
|
|
237
|
+
"overwritten": False
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _wait_for_operation(operation_id: str, headers: dict, max_attempts: int = 30) -> bool:
|
|
242
|
+
"""
|
|
243
|
+
Wait for a long-running Fabric API operation to complete.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
operation_id: The operation ID to monitor
|
|
247
|
+
headers: Request headers with authentication
|
|
248
|
+
max_attempts: Maximum number of polling attempts (default: 30)
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
True if operation succeeded, False otherwise
|
|
252
|
+
"""
|
|
253
|
+
import time
|
|
254
|
+
|
|
255
|
+
status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
|
|
256
|
+
|
|
257
|
+
for attempt in range(max_attempts):
|
|
258
|
+
time.sleep(2)
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
response = requests.get(status_url, headers=headers)
|
|
262
|
+
response.raise_for_status()
|
|
263
|
+
|
|
264
|
+
status_data = response.json()
|
|
265
|
+
status = status_data.get('status')
|
|
266
|
+
|
|
267
|
+
if status == 'Succeeded':
|
|
268
|
+
print(f"✓ Operation completed successfully")
|
|
269
|
+
return True
|
|
270
|
+
elif status == 'Failed':
|
|
271
|
+
error = status_data.get('error', {})
|
|
272
|
+
print(f"❌ Operation failed: {error.get('message', 'Unknown error')}")
|
|
273
|
+
return False
|
|
274
|
+
else:
|
|
275
|
+
print(f"⏳ Operation in progress... ({status})")
|
|
276
|
+
|
|
277
|
+
except Exception as e:
|
|
278
|
+
print(f"⚠️ Error checking operation status: {e}")
|
|
279
|
+
return False
|
|
280
|
+
|
|
281
|
+
print(f"⚠️ Operation timed out after {max_attempts} attempts")
|
|
282
|
+
return False
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
# Convenience wrapper for the try-except pattern mentioned in the request
|
|
286
|
+
def import_notebook(
|
|
287
|
+
url: str,
|
|
288
|
+
notebook_name: Optional[str] = None,
|
|
289
|
+
overwrite: bool = False,
|
|
290
|
+
workspace_name: Optional[str] = None
|
|
291
|
+
) -> None:
|
|
292
|
+
"""
|
|
293
|
+
Convenience wrapper that prints results and handles errors.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
url: URL to the notebook file
|
|
297
|
+
notebook_name: Name for the imported notebook
|
|
298
|
+
overwrite: Whether to overwrite if exists
|
|
299
|
+
workspace_name: Target workspace name
|
|
300
|
+
|
|
301
|
+
Examples:
|
|
302
|
+
from duckrun.notebook import import_notebook
|
|
303
|
+
|
|
304
|
+
import_notebook(
|
|
305
|
+
url="https://raw.githubusercontent.com/djouallah/fabric_demo/refs/heads/main/Benchmark/RunPerfScenario.ipynb",
|
|
306
|
+
notebook_name="RunPerfScenario",
|
|
307
|
+
overwrite=False
|
|
308
|
+
)
|
|
309
|
+
"""
|
|
310
|
+
try:
|
|
311
|
+
result = import_notebook_from_web(
|
|
312
|
+
url=url,
|
|
313
|
+
notebook_name=notebook_name,
|
|
314
|
+
overwrite=overwrite,
|
|
315
|
+
workspace_name=workspace_name
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
if result["success"]:
|
|
319
|
+
print(f"✅ {result['message']}")
|
|
320
|
+
else:
|
|
321
|
+
print(f"❌ {result['message']}")
|
|
322
|
+
|
|
323
|
+
except Exception as e:
|
|
324
|
+
print(f"Error: {e}")
|