duckrun 0.2.17__py3-none-any.whl → 0.2.18.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckrun might be problematic. Click here for more details.
- duckrun/__init__.py +3 -2
- duckrun/core.py +72 -42
- duckrun/notebook.py +322 -0
- duckrun/semantic_model.py +33 -9
- duckrun/stats.py +10 -2
- {duckrun-0.2.17.dist-info → duckrun-0.2.18.dev2.dist-info}/METADATA +2 -2
- duckrun-0.2.18.dev2.dist-info/RECORD +15 -0
- duckrun-0.2.17.dist-info/RECORD +0 -14
- {duckrun-0.2.17.dist-info → duckrun-0.2.18.dev2.dist-info}/WHEEL +0 -0
- {duckrun-0.2.17.dist-info → duckrun-0.2.18.dev2.dist-info}/licenses/LICENSE +0 -0
- {duckrun-0.2.17.dist-info → duckrun-0.2.18.dev2.dist-info}/top_level.txt +0 -0
duckrun/__init__.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
"""Duckrun - Lakehouse task runner powered by DuckDB"""
|
|
2
2
|
|
|
3
3
|
from duckrun.core import Duckrun
|
|
4
|
+
from duckrun.notebook import import_notebook_from_web, import_notebook
|
|
4
5
|
|
|
5
|
-
__version__ = "0.2.
|
|
6
|
+
__version__ = "0.2.18.dev2"
|
|
6
7
|
|
|
7
8
|
# Expose unified connect method at module level
|
|
8
9
|
connect = Duckrun.connect
|
|
9
10
|
|
|
10
|
-
__all__ = ["Duckrun", "connect"]
|
|
11
|
+
__all__ = ["Duckrun", "connect", "import_notebook_from_web", "import_notebook"]
|
duckrun/core.py
CHANGED
|
@@ -12,7 +12,71 @@ from .runner import run as _run
|
|
|
12
12
|
from .files import copy as _copy, download as _download
|
|
13
13
|
from .writer import QueryResult
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
|
|
16
|
+
class WorkspaceOperationsMixin:
|
|
17
|
+
"""
|
|
18
|
+
Mixin class for workspace-level operations that work for both
|
|
19
|
+
full Duckrun connections and workspace-only connections.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def import_notebook_from_web(self, url: str,
|
|
23
|
+
notebook_name: Optional[str] = None,
|
|
24
|
+
overwrite: bool = False) -> dict:
|
|
25
|
+
"""
|
|
26
|
+
Import a Jupyter notebook from a web URL into the workspace.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
url: URL to the notebook file (e.g., GitHub raw URL). Required.
|
|
30
|
+
notebook_name: Name for the imported notebook. Optional - derived from URL if not provided.
|
|
31
|
+
overwrite: Whether to overwrite if notebook already exists (default: False)
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Dictionary with import result
|
|
35
|
+
|
|
36
|
+
Examples:
|
|
37
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse")
|
|
38
|
+
result = con.import_notebook_from_web(
|
|
39
|
+
url="https://raw.githubusercontent.com/user/repo/main/notebook.ipynb"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
ws = duckrun.connect("workspace")
|
|
43
|
+
result = ws.import_notebook_from_web(
|
|
44
|
+
url="https://raw.githubusercontent.com/user/repo/main/notebook.ipynb"
|
|
45
|
+
)
|
|
46
|
+
"""
|
|
47
|
+
from .notebook import import_notebook_from_web as _import_notebook_from_web
|
|
48
|
+
|
|
49
|
+
# Get workspace name from either self.workspace or self.workspace_name
|
|
50
|
+
workspace_name = getattr(self, 'workspace', None) or getattr(self, 'workspace_name', None)
|
|
51
|
+
|
|
52
|
+
return _import_notebook_from_web(
|
|
53
|
+
url=url,
|
|
54
|
+
notebook_name=notebook_name,
|
|
55
|
+
overwrite=overwrite,
|
|
56
|
+
workspace_name=workspace_name
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
|
|
60
|
+
"""Helper method to get workspace ID from name"""
|
|
61
|
+
try:
|
|
62
|
+
url = "https://api.fabric.microsoft.com/v1/workspaces"
|
|
63
|
+
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
|
64
|
+
|
|
65
|
+
response = requests.get(url, headers=headers)
|
|
66
|
+
response.raise_for_status()
|
|
67
|
+
|
|
68
|
+
workspaces = response.json().get("value", [])
|
|
69
|
+
for workspace in workspaces:
|
|
70
|
+
if workspace.get("displayName") == workspace_name:
|
|
71
|
+
return workspace.get("id")
|
|
72
|
+
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
except Exception:
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class Duckrun(WorkspaceOperationsMixin):
|
|
16
80
|
"""
|
|
17
81
|
OneLake task runner with clean tuple-based API.
|
|
18
82
|
Supports lakehouses, warehouses, databases, and other OneLake items.
|
|
@@ -971,12 +1035,13 @@ class Duckrun:
|
|
|
971
1035
|
"""Get underlying DuckDB connection"""
|
|
972
1036
|
return self.con
|
|
973
1037
|
|
|
974
|
-
def get_stats(self, source: str):
|
|
1038
|
+
def get_stats(self, source: str = None):
|
|
975
1039
|
"""
|
|
976
1040
|
Get comprehensive statistics for Delta Lake tables.
|
|
977
1041
|
|
|
978
1042
|
Args:
|
|
979
|
-
source: Can be one of:
|
|
1043
|
+
source: Optional. Can be one of:
|
|
1044
|
+
- None: Use all tables in the connection's schema (default)
|
|
980
1045
|
- Table name: 'table_name' (uses current schema)
|
|
981
1046
|
- Schema.table: 'schema.table_name' (specific table in schema)
|
|
982
1047
|
- Schema only: 'schema' (all tables in schema)
|
|
@@ -988,6 +1053,9 @@ class Duckrun:
|
|
|
988
1053
|
Examples:
|
|
989
1054
|
con = duckrun.connect("tmp/data.lakehouse/aemo")
|
|
990
1055
|
|
|
1056
|
+
# All tables in current schema (aemo)
|
|
1057
|
+
stats = con.get_stats()
|
|
1058
|
+
|
|
991
1059
|
# Single table in current schema
|
|
992
1060
|
stats = con.get_stats('price')
|
|
993
1061
|
|
|
@@ -1162,25 +1230,6 @@ class Duckrun:
|
|
|
1162
1230
|
wait_seconds=wait_seconds
|
|
1163
1231
|
)
|
|
1164
1232
|
|
|
1165
|
-
def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
|
|
1166
|
-
"""Helper method to get workspace ID from name"""
|
|
1167
|
-
try:
|
|
1168
|
-
url = "https://api.fabric.microsoft.com/v1/workspaces"
|
|
1169
|
-
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
|
1170
|
-
|
|
1171
|
-
response = requests.get(url, headers=headers)
|
|
1172
|
-
response.raise_for_status()
|
|
1173
|
-
|
|
1174
|
-
workspaces = response.json().get("value", [])
|
|
1175
|
-
for workspace in workspaces:
|
|
1176
|
-
if workspace.get("displayName") == workspace_name:
|
|
1177
|
-
return workspace.get("id")
|
|
1178
|
-
|
|
1179
|
-
return None
|
|
1180
|
-
|
|
1181
|
-
except Exception:
|
|
1182
|
-
return None
|
|
1183
|
-
|
|
1184
1233
|
def close(self):
|
|
1185
1234
|
"""Close DuckDB connection"""
|
|
1186
1235
|
if self.con:
|
|
@@ -1188,7 +1237,7 @@ class Duckrun:
|
|
|
1188
1237
|
print("Connection closed")
|
|
1189
1238
|
|
|
1190
1239
|
|
|
1191
|
-
class WorkspaceConnection:
|
|
1240
|
+
class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
1192
1241
|
"""
|
|
1193
1242
|
Simple workspace connection for lakehouse management operations.
|
|
1194
1243
|
"""
|
|
@@ -1428,23 +1477,4 @@ class WorkspaceConnection:
|
|
|
1428
1477
|
print(f"❌ Error downloading semantic model: {e}")
|
|
1429
1478
|
import traceback
|
|
1430
1479
|
traceback.print_exc()
|
|
1431
|
-
return None
|
|
1432
|
-
|
|
1433
|
-
def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
|
|
1434
|
-
"""Helper method to get workspace ID from name"""
|
|
1435
|
-
try:
|
|
1436
|
-
url = "https://api.fabric.microsoft.com/v1/workspaces"
|
|
1437
|
-
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
|
1438
|
-
|
|
1439
|
-
response = requests.get(url, headers=headers)
|
|
1440
|
-
response.raise_for_status()
|
|
1441
|
-
|
|
1442
|
-
workspaces = response.json().get("value", [])
|
|
1443
|
-
for workspace in workspaces:
|
|
1444
|
-
if workspace.get("displayName") == workspace_name:
|
|
1445
|
-
return workspace.get("id")
|
|
1446
|
-
|
|
1447
|
-
return None
|
|
1448
|
-
|
|
1449
|
-
except Exception:
|
|
1450
1480
|
return None
|
duckrun/notebook.py
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Notebook operations functionality for duckrun - Import notebooks from web using Fabric REST API
|
|
3
|
+
"""
|
|
4
|
+
import requests
|
|
5
|
+
import base64
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def import_notebook_from_web(
|
|
10
|
+
url: str,
|
|
11
|
+
notebook_name: Optional[str] = None,
|
|
12
|
+
overwrite: bool = False,
|
|
13
|
+
workspace_name: Optional[str] = None
|
|
14
|
+
) -> dict:
|
|
15
|
+
"""
|
|
16
|
+
Import a Jupyter notebook from a web URL into Microsoft Fabric workspace using REST API only.
|
|
17
|
+
Uses duckrun.connect context by default or explicit workspace name.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
url: URL to the notebook file (e.g., GitHub raw URL). Required.
|
|
21
|
+
notebook_name: Name for the imported notebook in Fabric. Optional - will use filename from URL if not provided.
|
|
22
|
+
overwrite: Whether to overwrite if notebook already exists (default: False)
|
|
23
|
+
workspace_name: Target workspace name. Optional - will use current workspace from duckrun context if available.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Dictionary with import result:
|
|
27
|
+
{
|
|
28
|
+
"success": bool,
|
|
29
|
+
"message": str,
|
|
30
|
+
"notebook": dict (if successful),
|
|
31
|
+
"overwritten": bool
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
Examples:
|
|
35
|
+
# Basic usage with duckrun context
|
|
36
|
+
import duckrun
|
|
37
|
+
dr = duckrun.connect("MyWorkspace/MyLakehouse.lakehouse")
|
|
38
|
+
from duckrun.notebook import import_notebook_from_web
|
|
39
|
+
|
|
40
|
+
result = import_notebook_from_web(
|
|
41
|
+
url="https://raw.githubusercontent.com/user/repo/main/notebook.ipynb",
|
|
42
|
+
notebook_name="MyNotebook"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# With explicit workspace
|
|
46
|
+
result = import_notebook_from_web(
|
|
47
|
+
url="https://raw.githubusercontent.com/user/repo/main/notebook.ipynb",
|
|
48
|
+
notebook_name="MyNotebook",
|
|
49
|
+
workspace_name="Analytics Workspace",
|
|
50
|
+
overwrite=True
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Minimal usage - derives name from URL
|
|
54
|
+
result = import_notebook_from_web(
|
|
55
|
+
url="https://raw.githubusercontent.com/user/repo/main/RunPerfScenario.ipynb"
|
|
56
|
+
)
|
|
57
|
+
"""
|
|
58
|
+
try:
|
|
59
|
+
# Get authentication token
|
|
60
|
+
from duckrun.auth import get_fabric_api_token
|
|
61
|
+
token = get_fabric_api_token()
|
|
62
|
+
if not token:
|
|
63
|
+
return {
|
|
64
|
+
"success": False,
|
|
65
|
+
"message": "Failed to get authentication token",
|
|
66
|
+
"notebook": None,
|
|
67
|
+
"overwritten": False
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
base_url = "https://api.fabric.microsoft.com/v1"
|
|
71
|
+
headers = {
|
|
72
|
+
"Authorization": f"Bearer {token}",
|
|
73
|
+
"Content-Type": "application/json"
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Determine workspace ID
|
|
77
|
+
workspace_id = None
|
|
78
|
+
|
|
79
|
+
# Try to get from duckrun context if not provided
|
|
80
|
+
if not workspace_name:
|
|
81
|
+
try:
|
|
82
|
+
# Try to get from notebook context first
|
|
83
|
+
import notebookutils # type: ignore
|
|
84
|
+
workspace_id = notebookutils.runtime.context.get("workspaceId")
|
|
85
|
+
print("📓 Using current workspace from Fabric notebook context")
|
|
86
|
+
except (ImportError, Exception):
|
|
87
|
+
# Not in notebook, try to get from environment/last connection
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
# If still no workspace_id, resolve from workspace_name
|
|
91
|
+
if not workspace_id:
|
|
92
|
+
if not workspace_name:
|
|
93
|
+
return {
|
|
94
|
+
"success": False,
|
|
95
|
+
"message": "workspace_name must be provided when not in Fabric notebook context",
|
|
96
|
+
"notebook": None,
|
|
97
|
+
"overwritten": False
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
# Get workspace ID by name
|
|
101
|
+
print(f"🔍 Resolving workspace: {workspace_name}")
|
|
102
|
+
ws_url = f"{base_url}/workspaces"
|
|
103
|
+
response = requests.get(ws_url, headers=headers)
|
|
104
|
+
response.raise_for_status()
|
|
105
|
+
|
|
106
|
+
workspaces = response.json().get("value", [])
|
|
107
|
+
workspace = next((ws for ws in workspaces if ws.get("displayName") == workspace_name), None)
|
|
108
|
+
|
|
109
|
+
if not workspace:
|
|
110
|
+
return {
|
|
111
|
+
"success": False,
|
|
112
|
+
"message": f"Workspace '{workspace_name}' not found",
|
|
113
|
+
"notebook": None,
|
|
114
|
+
"overwritten": False
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
workspace_id = workspace.get("id")
|
|
118
|
+
print(f"✓ Found workspace: {workspace_name}")
|
|
119
|
+
|
|
120
|
+
# Derive notebook name from URL if not provided
|
|
121
|
+
if not notebook_name:
|
|
122
|
+
# Extract filename from URL
|
|
123
|
+
notebook_name = url.split("/")[-1]
|
|
124
|
+
if notebook_name.endswith(".ipynb"):
|
|
125
|
+
notebook_name = notebook_name[:-6] # Remove .ipynb extension
|
|
126
|
+
print(f"📝 Using notebook name from URL: {notebook_name}")
|
|
127
|
+
|
|
128
|
+
# Check if notebook already exists
|
|
129
|
+
notebooks_url = f"{base_url}/workspaces/{workspace_id}/notebooks"
|
|
130
|
+
response = requests.get(notebooks_url, headers=headers)
|
|
131
|
+
response.raise_for_status()
|
|
132
|
+
|
|
133
|
+
notebooks = response.json().get("value", [])
|
|
134
|
+
existing_notebook = next((nb for nb in notebooks if nb.get("displayName") == notebook_name), None)
|
|
135
|
+
|
|
136
|
+
if existing_notebook and not overwrite:
|
|
137
|
+
return {
|
|
138
|
+
"success": True,
|
|
139
|
+
"message": f"Notebook '{notebook_name}' already exists (use overwrite=True to replace)",
|
|
140
|
+
"notebook": existing_notebook,
|
|
141
|
+
"overwritten": False
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
# Download notebook content from URL
|
|
145
|
+
print(f"⬇️ Downloading notebook from: {url}")
|
|
146
|
+
response = requests.get(url)
|
|
147
|
+
response.raise_for_status()
|
|
148
|
+
notebook_content = response.text
|
|
149
|
+
print(f"✓ Notebook downloaded successfully")
|
|
150
|
+
|
|
151
|
+
# Convert notebook content to base64
|
|
152
|
+
notebook_base64 = base64.b64encode(notebook_content.encode('utf-8')).decode('utf-8')
|
|
153
|
+
|
|
154
|
+
# Prepare the payload for creating/updating the notebook
|
|
155
|
+
if existing_notebook and overwrite:
|
|
156
|
+
# Update existing notebook
|
|
157
|
+
notebook_id = existing_notebook.get("id")
|
|
158
|
+
print(f"🔄 Updating existing notebook: {notebook_name}")
|
|
159
|
+
|
|
160
|
+
update_url = f"{base_url}/workspaces/{workspace_id}/notebooks/{notebook_id}/updateDefinition"
|
|
161
|
+
payload = {
|
|
162
|
+
"definition": {
|
|
163
|
+
"parts": [
|
|
164
|
+
{
|
|
165
|
+
"path": "notebook-content.py",
|
|
166
|
+
"payload": notebook_base64,
|
|
167
|
+
"payloadType": "InlineBase64"
|
|
168
|
+
}
|
|
169
|
+
]
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
response = requests.post(update_url, headers=headers, json=payload)
|
|
174
|
+
response.raise_for_status()
|
|
175
|
+
|
|
176
|
+
# Handle long-running operation
|
|
177
|
+
if response.status_code == 202:
|
|
178
|
+
operation_id = response.headers.get('x-ms-operation-id')
|
|
179
|
+
if operation_id:
|
|
180
|
+
_wait_for_operation(operation_id, headers)
|
|
181
|
+
|
|
182
|
+
return {
|
|
183
|
+
"success": True,
|
|
184
|
+
"message": f"Notebook '{notebook_name}' updated successfully",
|
|
185
|
+
"notebook": existing_notebook,
|
|
186
|
+
"overwritten": True
|
|
187
|
+
}
|
|
188
|
+
else:
|
|
189
|
+
# Create new notebook
|
|
190
|
+
print(f"➕ Creating new notebook: {notebook_name}")
|
|
191
|
+
|
|
192
|
+
payload = {
|
|
193
|
+
"displayName": notebook_name,
|
|
194
|
+
"definition": {
|
|
195
|
+
"parts": [
|
|
196
|
+
{
|
|
197
|
+
"path": "notebook-content.py",
|
|
198
|
+
"payload": notebook_base64,
|
|
199
|
+
"payloadType": "InlineBase64"
|
|
200
|
+
}
|
|
201
|
+
]
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
response = requests.post(notebooks_url, headers=headers, json=payload)
|
|
206
|
+
response.raise_for_status()
|
|
207
|
+
|
|
208
|
+
# Handle long-running operation
|
|
209
|
+
if response.status_code == 202:
|
|
210
|
+
operation_id = response.headers.get('x-ms-operation-id')
|
|
211
|
+
if operation_id:
|
|
212
|
+
_wait_for_operation(operation_id, headers)
|
|
213
|
+
|
|
214
|
+
created_notebook = response.json()
|
|
215
|
+
|
|
216
|
+
return {
|
|
217
|
+
"success": True,
|
|
218
|
+
"message": f"Notebook '{notebook_name}' created successfully",
|
|
219
|
+
"notebook": created_notebook,
|
|
220
|
+
"overwritten": False
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
except requests.exceptions.RequestException as e:
|
|
224
|
+
return {
|
|
225
|
+
"success": False,
|
|
226
|
+
"message": f"HTTP Error: {str(e)}",
|
|
227
|
+
"notebook": None,
|
|
228
|
+
"overwritten": False
|
|
229
|
+
}
|
|
230
|
+
except Exception as e:
|
|
231
|
+
return {
|
|
232
|
+
"success": False,
|
|
233
|
+
"message": f"Error: {str(e)}",
|
|
234
|
+
"notebook": None,
|
|
235
|
+
"overwritten": False
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _wait_for_operation(operation_id: str, headers: dict, max_attempts: int = 30) -> bool:
|
|
240
|
+
"""
|
|
241
|
+
Wait for a long-running Fabric API operation to complete.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
operation_id: The operation ID to monitor
|
|
245
|
+
headers: Request headers with authentication
|
|
246
|
+
max_attempts: Maximum number of polling attempts (default: 30)
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
True if operation succeeded, False otherwise
|
|
250
|
+
"""
|
|
251
|
+
import time
|
|
252
|
+
|
|
253
|
+
status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
|
|
254
|
+
|
|
255
|
+
for attempt in range(max_attempts):
|
|
256
|
+
time.sleep(2)
|
|
257
|
+
|
|
258
|
+
try:
|
|
259
|
+
response = requests.get(status_url, headers=headers)
|
|
260
|
+
response.raise_for_status()
|
|
261
|
+
|
|
262
|
+
status_data = response.json()
|
|
263
|
+
status = status_data.get('status')
|
|
264
|
+
|
|
265
|
+
if status == 'Succeeded':
|
|
266
|
+
print(f"✓ Operation completed successfully")
|
|
267
|
+
return True
|
|
268
|
+
elif status == 'Failed':
|
|
269
|
+
error = status_data.get('error', {})
|
|
270
|
+
print(f"❌ Operation failed: {error.get('message', 'Unknown error')}")
|
|
271
|
+
return False
|
|
272
|
+
else:
|
|
273
|
+
print(f"⏳ Operation in progress... ({status})")
|
|
274
|
+
|
|
275
|
+
except Exception as e:
|
|
276
|
+
print(f"⚠️ Error checking operation status: {e}")
|
|
277
|
+
return False
|
|
278
|
+
|
|
279
|
+
print(f"⚠️ Operation timed out after {max_attempts} attempts")
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
# Convenience wrapper for the try-except pattern mentioned in the request
|
|
284
|
+
def import_notebook(
|
|
285
|
+
url: str,
|
|
286
|
+
notebook_name: Optional[str] = None,
|
|
287
|
+
overwrite: bool = False,
|
|
288
|
+
workspace_name: Optional[str] = None
|
|
289
|
+
) -> None:
|
|
290
|
+
"""
|
|
291
|
+
Convenience wrapper that prints results and handles errors.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
url: URL to the notebook file
|
|
295
|
+
notebook_name: Name for the imported notebook
|
|
296
|
+
overwrite: Whether to overwrite if exists
|
|
297
|
+
workspace_name: Target workspace name
|
|
298
|
+
|
|
299
|
+
Examples:
|
|
300
|
+
from duckrun.notebook import import_notebook
|
|
301
|
+
|
|
302
|
+
import_notebook(
|
|
303
|
+
url="https://raw.githubusercontent.com/djouallah/fabric_demo/refs/heads/main/Benchmark/RunPerfScenario.ipynb",
|
|
304
|
+
notebook_name="RunPerfScenario",
|
|
305
|
+
overwrite=False
|
|
306
|
+
)
|
|
307
|
+
"""
|
|
308
|
+
try:
|
|
309
|
+
result = import_notebook_from_web(
|
|
310
|
+
url=url,
|
|
311
|
+
notebook_name=notebook_name,
|
|
312
|
+
overwrite=overwrite,
|
|
313
|
+
workspace_name=workspace_name
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
if result["success"]:
|
|
317
|
+
print(f"✅ {result['message']}")
|
|
318
|
+
else:
|
|
319
|
+
print(f"❌ {result['message']}")
|
|
320
|
+
|
|
321
|
+
except Exception as e:
|
|
322
|
+
print(f"Error: {e}")
|
duckrun/semantic_model.py
CHANGED
|
@@ -130,25 +130,49 @@ def check_dataset_exists(dataset_name, workspace_id, client):
|
|
|
130
130
|
|
|
131
131
|
|
|
132
132
|
def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
|
|
133
|
-
"""Refresh a dataset and monitor progress using Power BI API
|
|
133
|
+
"""Refresh a dataset and monitor progress using Power BI API
|
|
134
|
+
|
|
135
|
+
For DirectLake models, performs a two-step refresh:
|
|
136
|
+
1. clearValues - Purges data from memory
|
|
137
|
+
2. full - Reframes data from Delta tables
|
|
138
|
+
"""
|
|
134
139
|
|
|
135
140
|
# If dataset_id not provided, look it up by name
|
|
136
141
|
if not dataset_id:
|
|
137
142
|
dataset_id = get_dataset_id(dataset_name, workspace_id, client)
|
|
138
143
|
|
|
139
|
-
|
|
140
|
-
|
|
144
|
+
# Use Power BI API for refresh (not Fabric API)
|
|
145
|
+
powerbi_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes"
|
|
146
|
+
headers = client._get_headers()
|
|
147
|
+
|
|
148
|
+
# Step 1: clearValues - Purge data from memory
|
|
149
|
+
print(" Step 1: Clearing values from memory...")
|
|
150
|
+
clearvalues_payload = {
|
|
151
|
+
"type": "clearValues",
|
|
141
152
|
"commitMode": "transactional",
|
|
142
153
|
"maxParallelism": 10,
|
|
143
154
|
"retryCount": 2,
|
|
144
155
|
"objects": []
|
|
145
156
|
}
|
|
146
157
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
158
|
+
response = requests.post(powerbi_url, headers=headers, json=clearvalues_payload)
|
|
159
|
+
|
|
160
|
+
if response.status_code in [200, 202]:
|
|
161
|
+
print(" ✓ Clear values completed")
|
|
162
|
+
else:
|
|
163
|
+
response.raise_for_status()
|
|
164
|
+
|
|
165
|
+
# Step 2: full refresh - Reframe data from Delta tables
|
|
166
|
+
print(" Step 2: Full refresh to reframe data...")
|
|
167
|
+
full_payload = {
|
|
168
|
+
"type": "full",
|
|
169
|
+
"commitMode": "transactional",
|
|
170
|
+
"maxParallelism": 10,
|
|
171
|
+
"retryCount": 2,
|
|
172
|
+
"objects": []
|
|
173
|
+
}
|
|
150
174
|
|
|
151
|
-
response = requests.post(powerbi_url, headers=headers, json=
|
|
175
|
+
response = requests.post(powerbi_url, headers=headers, json=full_payload)
|
|
152
176
|
|
|
153
177
|
if response.status_code in [200, 202]:
|
|
154
178
|
print(f"✓ Refresh initiated")
|
|
@@ -471,13 +495,13 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
|
471
495
|
dataset_exists = check_dataset_exists(dataset_name, workspace_id, client)
|
|
472
496
|
|
|
473
497
|
if dataset_exists:
|
|
474
|
-
print(f"
|
|
498
|
+
print(f"✓ Dataset '{dataset_name}' already exists - skipping deployment")
|
|
475
499
|
|
|
476
500
|
if wait_seconds > 0:
|
|
477
501
|
print(f" Waiting {wait_seconds} seconds...")
|
|
478
502
|
time.sleep(wait_seconds)
|
|
479
503
|
|
|
480
|
-
print("\n[Step
|
|
504
|
+
print("\n[Step 3/3] Refreshing existing semantic model...")
|
|
481
505
|
refresh_dataset(dataset_name, workspace_id, client)
|
|
482
506
|
|
|
483
507
|
print("\n" + "=" * 70)
|
duckrun/stats.py
CHANGED
|
@@ -60,13 +60,14 @@ def _get_existing_tables_in_schema(duckrun_instance, schema_name: str) -> list:
|
|
|
60
60
|
return []
|
|
61
61
|
|
|
62
62
|
|
|
63
|
-
def get_stats(duckrun_instance, source: str):
|
|
63
|
+
def get_stats(duckrun_instance, source: str = None):
|
|
64
64
|
"""
|
|
65
65
|
Get comprehensive statistics for Delta Lake tables.
|
|
66
66
|
|
|
67
67
|
Args:
|
|
68
68
|
duckrun_instance: The Duckrun connection instance
|
|
69
|
-
source: Can be one of:
|
|
69
|
+
source: Optional. Can be one of:
|
|
70
|
+
- None: Use all tables in the connection's schema (default)
|
|
70
71
|
- Table name: 'table_name' (uses main schema in DuckDB)
|
|
71
72
|
- Schema.table: 'schema.table_name' (specific table in schema, if multi-schema)
|
|
72
73
|
- Schema only: 'schema' (all tables in schema, if multi-schema)
|
|
@@ -78,6 +79,9 @@ def get_stats(duckrun_instance, source: str):
|
|
|
78
79
|
Examples:
|
|
79
80
|
con = duckrun.connect("tmp/data.lakehouse/test")
|
|
80
81
|
|
|
82
|
+
# All tables in the connection's schema
|
|
83
|
+
stats = con.get_stats()
|
|
84
|
+
|
|
81
85
|
# Single table in main schema (DuckDB uses 'main', not 'test')
|
|
82
86
|
stats = con.get_stats('price_today')
|
|
83
87
|
|
|
@@ -93,6 +97,10 @@ def get_stats(duckrun_instance, source: str):
|
|
|
93
97
|
duckdb_schema = "main"
|
|
94
98
|
url_schema = duckrun_instance.schema # This is from the connection URL path
|
|
95
99
|
|
|
100
|
+
# If source is not provided, default to all tables in the connection's schema
|
|
101
|
+
if source is None:
|
|
102
|
+
source = url_schema
|
|
103
|
+
|
|
96
104
|
# Parse the source and validate existence
|
|
97
105
|
if '.' in source:
|
|
98
106
|
# Format: schema.table - only valid if multi-schema is enabled
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: duckrun
|
|
3
|
-
Version: 0.2.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.2.18.dev2
|
|
4
|
+
Summary: Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)
|
|
5
5
|
Author: mim
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/djouallah/duckrun
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
duckrun/__init__.py,sha256=vqv_bJjHjrrXGs8Zyxuy-GKTCyJlZ5z3npPQgE9ipBY,355
|
|
2
|
+
duckrun/auth.py,sha256=EMaf-L2zeNOjbHOT97xYxfZNfWo4WrwrU1h3vBQTgEc,9624
|
|
3
|
+
duckrun/core.py,sha256=tWLFOSVZHoJ0r5YJaj0lG1s_kehiIrnxPMrQQIcyh94,68367
|
|
4
|
+
duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
|
|
5
|
+
duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
|
|
6
|
+
duckrun/notebook.py,sha256=SzdKTpvzHiWMrvg7mCd3DN6R4gU_6Gm7gfkuETzylaE,12103
|
|
7
|
+
duckrun/runner.py,sha256=NGVyerJA44UP2umRdndfL0fuFM_gdOZmuJUz-PLOFf0,13461
|
|
8
|
+
duckrun/semantic_model.py,sha256=X3VKdo4BehAg681Ucq7fzB2KPY2mwPLbfIZqI5Gbqp4,30377
|
|
9
|
+
duckrun/stats.py,sha256=qvWnPk2P8Ob_tzaiNfdQmUQqMVq2FWv3EgArE7hPl44,15482
|
|
10
|
+
duckrun/writer.py,sha256=wIsU77DSj4J7d9_bIhvk6AbC51uUrLW0e6pcSPQOY1c,9424
|
|
11
|
+
duckrun-0.2.18.dev2.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
|
12
|
+
duckrun-0.2.18.dev2.dist-info/METADATA,sha256=JpewTO7QqHrdUn_G3Lz-1jxFifVyBxj9lNX_Qodhe2A,20807
|
|
13
|
+
duckrun-0.2.18.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
+
duckrun-0.2.18.dev2.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
|
15
|
+
duckrun-0.2.18.dev2.dist-info/RECORD,,
|
duckrun-0.2.17.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
duckrun/__init__.py,sha256=oPQXpJEgHpX_KgMrx_TWax9awIbr2B9z32cFuuG_p30,236
|
|
2
|
-
duckrun/auth.py,sha256=EMaf-L2zeNOjbHOT97xYxfZNfWo4WrwrU1h3vBQTgEc,9624
|
|
3
|
-
duckrun/core.py,sha256=c98sASAWlq0DDIR9gYbj5ZaKOa6MoO8Z09qhRhG4JWI,67097
|
|
4
|
-
duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
|
|
5
|
-
duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
|
|
6
|
-
duckrun/runner.py,sha256=NGVyerJA44UP2umRdndfL0fuFM_gdOZmuJUz-PLOFf0,13461
|
|
7
|
-
duckrun/semantic_model.py,sha256=obzlN2-dbEW3JmDop-vrZGGGLi9u3ThhTbgtDjou7uY,29509
|
|
8
|
-
duckrun/stats.py,sha256=EqrCN1xwGo5nZgwezBvb6RepXT6b8H7xgK0yJJGFLfE,15155
|
|
9
|
-
duckrun/writer.py,sha256=wIsU77DSj4J7d9_bIhvk6AbC51uUrLW0e6pcSPQOY1c,9424
|
|
10
|
-
duckrun-0.2.17.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
|
11
|
-
duckrun-0.2.17.dist-info/METADATA,sha256=Id1vgyEEjsd4-sklkqOy1w1g3sdydmTdFFj6DMPd6bY,20766
|
|
12
|
-
duckrun-0.2.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
-
duckrun-0.2.17.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
|
14
|
-
duckrun-0.2.17.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|