duckrun 0.2.13__py3-none-any.whl → 0.2.19.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckrun might be problematic. Click here for more details.

duckrun/notebook.py ADDED
@@ -0,0 +1,324 @@
1
+ """
2
+ Notebook operations functionality for duckrun - Import notebooks from web using Fabric REST API
3
+ """
4
+ import requests
5
+ import base64
6
+ from typing import Optional
7
+
8
+
9
+ def import_notebook_from_web(
10
+ url: str,
11
+ notebook_name: Optional[str] = None,
12
+ overwrite: bool = False,
13
+ workspace_name: Optional[str] = None
14
+ ) -> dict:
15
+ """
16
+ Import a Jupyter notebook from a web URL into Microsoft Fabric workspace using REST API only.
17
+ Uses duckrun.connect context by default or explicit workspace name.
18
+
19
+ Args:
20
+ url: URL to the notebook file (e.g., GitHub raw URL). Required.
21
+ notebook_name: Name for the imported notebook in Fabric. Optional - will use filename from URL if not provided.
22
+ overwrite: Whether to overwrite if notebook already exists (default: False)
23
+ workspace_name: Target workspace name. Optional - will use current workspace from duckrun context if available.
24
+
25
+ Returns:
26
+ Dictionary with import result:
27
+ {
28
+ "success": bool,
29
+ "message": str,
30
+ "notebook": dict (if successful),
31
+ "overwritten": bool
32
+ }
33
+
34
+ Examples:
35
+ # Basic usage with duckrun context
36
+ import duckrun
37
+ dr = duckrun.connect("MyWorkspace/MyLakehouse.lakehouse")
38
+ from duckrun.notebook import import_notebook_from_web
39
+
40
+ result = import_notebook_from_web(
41
+ url="https://raw.githubusercontent.com/user/repo/main/notebook.ipynb",
42
+ notebook_name="MyNotebook"
43
+ )
44
+
45
+ # With explicit workspace
46
+ result = import_notebook_from_web(
47
+ url="https://raw.githubusercontent.com/user/repo/main/notebook.ipynb",
48
+ notebook_name="MyNotebook",
49
+ workspace_name="Analytics Workspace",
50
+ overwrite=True
51
+ )
52
+
53
+ # Minimal usage - derives name from URL
54
+ result = import_notebook_from_web(
55
+ url="https://raw.githubusercontent.com/user/repo/main/RunPerfScenario.ipynb"
56
+ )
57
+ """
58
+ try:
59
+ # Get authentication token
60
+ from duckrun.auth import get_fabric_api_token
61
+ token = get_fabric_api_token()
62
+ if not token:
63
+ return {
64
+ "success": False,
65
+ "message": "Failed to get authentication token",
66
+ "notebook": None,
67
+ "overwritten": False
68
+ }
69
+
70
+ base_url = "https://api.fabric.microsoft.com/v1"
71
+ headers = {
72
+ "Authorization": f"Bearer {token}",
73
+ "Content-Type": "application/json"
74
+ }
75
+
76
+ # Determine workspace ID
77
+ workspace_id = None
78
+
79
+ # Try to get from duckrun context if not provided
80
+ if not workspace_name:
81
+ try:
82
+ # Try to get from notebook context first
83
+ import notebookutils # type: ignore
84
+ workspace_id = notebookutils.runtime.context.get("workspaceId")
85
+ print("📓 Using current workspace from Fabric notebook context")
86
+ except (ImportError, Exception):
87
+ # Not in notebook, try to get from environment/last connection
88
+ pass
89
+
90
+ # If still no workspace_id, resolve from workspace_name
91
+ if not workspace_id:
92
+ if not workspace_name:
93
+ return {
94
+ "success": False,
95
+ "message": "workspace_name must be provided when not in Fabric notebook context",
96
+ "notebook": None,
97
+ "overwritten": False
98
+ }
99
+
100
+ # Get workspace ID by name
101
+ print(f"🔍 Resolving workspace: {workspace_name}")
102
+ ws_url = f"{base_url}/workspaces"
103
+ response = requests.get(ws_url, headers=headers)
104
+ response.raise_for_status()
105
+
106
+ workspaces = response.json().get("value", [])
107
+ workspace = next((ws for ws in workspaces if ws.get("displayName") == workspace_name), None)
108
+
109
+ if not workspace:
110
+ return {
111
+ "success": False,
112
+ "message": f"Workspace '{workspace_name}' not found",
113
+ "notebook": None,
114
+ "overwritten": False
115
+ }
116
+
117
+ workspace_id = workspace.get("id")
118
+ print(f"✓ Found workspace: {workspace_name}")
119
+
120
+ # Derive notebook name from URL if not provided
121
+ if not notebook_name:
122
+ # Extract filename from URL
123
+ notebook_name = url.split("/")[-1]
124
+ if notebook_name.endswith(".ipynb"):
125
+ notebook_name = notebook_name[:-6] # Remove .ipynb extension
126
+ print(f"📝 Using notebook name from URL: {notebook_name}")
127
+
128
+ # Check if notebook already exists
129
+ notebooks_url = f"{base_url}/workspaces/{workspace_id}/notebooks"
130
+ response = requests.get(notebooks_url, headers=headers)
131
+ response.raise_for_status()
132
+
133
+ notebooks = response.json().get("value", [])
134
+ existing_notebook = next((nb for nb in notebooks if nb.get("displayName") == notebook_name), None)
135
+
136
+ if existing_notebook and not overwrite:
137
+ return {
138
+ "success": True,
139
+ "message": f"Notebook '{notebook_name}' already exists (use overwrite=True to replace)",
140
+ "notebook": existing_notebook,
141
+ "overwritten": False
142
+ }
143
+
144
+ # Download notebook content from URL
145
+ print(f"⬇️ Downloading notebook from: {url}")
146
+ response = requests.get(url)
147
+ response.raise_for_status()
148
+ notebook_content = response.text
149
+ print(f"✓ Notebook downloaded successfully")
150
+
151
+ # Convert notebook content to base64
152
+ notebook_base64 = base64.b64encode(notebook_content.encode('utf-8')).decode('utf-8')
153
+
154
+ # Prepare the payload for creating/updating the notebook
155
+ if existing_notebook and overwrite:
156
+ # Update existing notebook
157
+ notebook_id = existing_notebook.get("id")
158
+ print(f"🔄 Updating existing notebook: {notebook_name}")
159
+
160
+ update_url = f"{base_url}/workspaces/{workspace_id}/notebooks/{notebook_id}/updateDefinition"
161
+ payload = {
162
+ "definition": {
163
+ "format": "ipynb",
164
+ "parts": [
165
+ {
166
+ "path": "notebook-content.py",
167
+ "payload": notebook_base64,
168
+ "payloadType": "InlineBase64"
169
+ }
170
+ ]
171
+ }
172
+ }
173
+
174
+ response = requests.post(update_url, headers=headers, json=payload)
175
+ response.raise_for_status()
176
+
177
+ # Handle long-running operation
178
+ if response.status_code == 202:
179
+ operation_id = response.headers.get('x-ms-operation-id')
180
+ if operation_id:
181
+ _wait_for_operation(operation_id, headers)
182
+
183
+ return {
184
+ "success": True,
185
+ "message": f"Notebook '{notebook_name}' updated successfully",
186
+ "notebook": existing_notebook,
187
+ "overwritten": True
188
+ }
189
+ else:
190
+ # Create new notebook
191
+ print(f"➕ Creating new notebook: {notebook_name}")
192
+
193
+ payload = {
194
+ "displayName": notebook_name,
195
+ "definition": {
196
+ "format": "ipynb",
197
+ "parts": [
198
+ {
199
+ "path": "notebook-content.py",
200
+ "payload": notebook_base64,
201
+ "payloadType": "InlineBase64"
202
+ }
203
+ ]
204
+ }
205
+ }
206
+
207
+ response = requests.post(notebooks_url, headers=headers, json=payload)
208
+ response.raise_for_status()
209
+
210
+ # Handle long-running operation
211
+ if response.status_code == 202:
212
+ operation_id = response.headers.get('x-ms-operation-id')
213
+ if operation_id:
214
+ _wait_for_operation(operation_id, headers)
215
+
216
+ created_notebook = response.json()
217
+
218
+ return {
219
+ "success": True,
220
+ "message": f"Notebook '{notebook_name}' created successfully",
221
+ "notebook": created_notebook,
222
+ "overwritten": False
223
+ }
224
+
225
+ except requests.exceptions.RequestException as e:
226
+ return {
227
+ "success": False,
228
+ "message": f"HTTP Error: {str(e)}",
229
+ "notebook": None,
230
+ "overwritten": False
231
+ }
232
+ except Exception as e:
233
+ return {
234
+ "success": False,
235
+ "message": f"Error: {str(e)}",
236
+ "notebook": None,
237
+ "overwritten": False
238
+ }
239
+
240
+
241
+ def _wait_for_operation(operation_id: str, headers: dict, max_attempts: int = 30) -> bool:
242
+ """
243
+ Wait for a long-running Fabric API operation to complete.
244
+
245
+ Args:
246
+ operation_id: The operation ID to monitor
247
+ headers: Request headers with authentication
248
+ max_attempts: Maximum number of polling attempts (default: 30)
249
+
250
+ Returns:
251
+ True if operation succeeded, False otherwise
252
+ """
253
+ import time
254
+
255
+ status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
256
+
257
+ for attempt in range(max_attempts):
258
+ time.sleep(2)
259
+
260
+ try:
261
+ response = requests.get(status_url, headers=headers)
262
+ response.raise_for_status()
263
+
264
+ status_data = response.json()
265
+ status = status_data.get('status')
266
+
267
+ if status == 'Succeeded':
268
+ print(f"✓ Operation completed successfully")
269
+ return True
270
+ elif status == 'Failed':
271
+ error = status_data.get('error', {})
272
+ print(f"❌ Operation failed: {error.get('message', 'Unknown error')}")
273
+ return False
274
+ else:
275
+ print(f"⏳ Operation in progress... ({status})")
276
+
277
+ except Exception as e:
278
+ print(f"⚠️ Error checking operation status: {e}")
279
+ return False
280
+
281
+ print(f"⚠️ Operation timed out after {max_attempts} attempts")
282
+ return False
283
+
284
+
285
+ # Convenience wrapper for the try-except pattern mentioned in the request
286
+ def import_notebook(
287
+ url: str,
288
+ notebook_name: Optional[str] = None,
289
+ overwrite: bool = False,
290
+ workspace_name: Optional[str] = None
291
+ ) -> None:
292
+ """
293
+ Convenience wrapper that prints results and handles errors.
294
+
295
+ Args:
296
+ url: URL to the notebook file
297
+ notebook_name: Name for the imported notebook
298
+ overwrite: Whether to overwrite if exists
299
+ workspace_name: Target workspace name
300
+
301
+ Examples:
302
+ from duckrun.notebook import import_notebook
303
+
304
+ import_notebook(
305
+ url="https://raw.githubusercontent.com/djouallah/fabric_demo/refs/heads/main/Benchmark/RunPerfScenario.ipynb",
306
+ notebook_name="RunPerfScenario",
307
+ overwrite=False
308
+ )
309
+ """
310
+ try:
311
+ result = import_notebook_from_web(
312
+ url=url,
313
+ notebook_name=notebook_name,
314
+ overwrite=overwrite,
315
+ workspace_name=workspace_name
316
+ )
317
+
318
+ if result["success"]:
319
+ print(f"✅ {result['message']}")
320
+ else:
321
+ print(f"❌ {result['message']}")
322
+
323
+ except Exception as e:
324
+ print(f"Error: {e}")
duckrun/runner.py CHANGED
@@ -7,45 +7,7 @@ import importlib.util
7
7
  from typing import List, Tuple, Dict, Optional, Callable, Any
8
8
  from string import Template
9
9
  from deltalake import DeltaTable, write_deltalake
10
- # Row Group configuration for optimal Delta Lake performance
11
- RG = 8_000_000
12
-
13
-
14
- def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=None):
15
- """
16
- Build arguments for write_deltalake based on requirements:
17
- - If schema_mode='merge': use rust engine (no row group params)
18
- - Otherwise: use pyarrow engine with row group optimization (if supported)
19
- """
20
- args = {
21
- 'table_or_uri': path,
22
- 'data': df,
23
- 'mode': mode
24
- }
25
-
26
- # Add partition_by if specified
27
- if partition_by:
28
- args['partition_by'] = partition_by
29
-
30
- # Engine selection based on schema_mode
31
- if schema_mode == 'merge':
32
- # Use rust engine for schema merging (no row group params supported)
33
- args['schema_mode'] = 'merge'
34
- args['engine'] = 'rust'
35
- else:
36
- # Try to use pyarrow engine with row group optimization
37
- # Check if row group parameters are supported by inspecting function signature
38
- import inspect
39
- sig = inspect.signature(write_deltalake)
40
-
41
- if 'max_rows_per_file' in sig.parameters:
42
- # Older deltalake version - use row group optimization
43
- args['max_rows_per_file'] = RG
44
- args['max_rows_per_group'] = RG
45
- args['min_rows_per_group'] = RG
46
- # For newer versions, just use default parameters
47
-
48
- return args
10
+ from .writer import _build_write_deltalake_args
49
11
 
50
12
 
51
13
  def run(duckrun_instance, pipeline: List[Tuple]) -> bool:
@@ -130,9 +92,12 @@ def _run_python(duckrun_instance, name: str, args: tuple) -> Any:
130
92
 
131
93
  # Get original and resolved names
132
94
  original_workspace = duckrun_instance.workspace
133
- original_lakehouse = duckrun_instance.lakehouse_name
95
+ original_lakehouse = duckrun_instance.lakehouse_display_name # Base name without suffix (e.g., "data")
134
96
  resolved_workspace = duckrun_instance.workspace_id
135
- resolved_lakehouse = duckrun_instance.lakehouse_id
97
+
98
+ # Always pass base lakehouse name (without .Lakehouse suffix) to user functions
99
+ # User functions expect just the name like "data", not "data.Lakehouse"
100
+ resolved_lakehouse = duckrun_instance.lakehouse_display_name
136
101
 
137
102
  # Substitute workspace/lakehouse names in args if they differ
138
103
  # This prevents URL encoding issues when names contain spaces
@@ -149,7 +114,7 @@ def _run_python(duckrun_instance, name: str, args: tuple) -> Any:
149
114
  else:
150
115
  substituted_args.append(arg)
151
116
  args = tuple(substituted_args)
152
- print(f"📝 Auto-substituted workspace/lakehouse names in args for URL compatibility")
117
+ print(f"📝 Auto-substituted workspace/lakehouse names in args")
153
118
 
154
119
  print(f"Running Python: {name}{args}")
155
120
  result = func(*args)
@@ -282,12 +247,17 @@ def _read_sql_file(duckrun_instance, table_name: str, params: Optional[Dict] = N
282
247
  # If GUID, use just the GUID
283
248
  content = content.replace('${lh}.Lakehouse', duckrun_instance.lakehouse_name)
284
249
  else:
285
- # If not GUID, use legacy format
286
- content = content.replace('${lh}.Lakehouse', f'{duckrun_instance.lakehouse_name}.Lakehouse')
250
+ # If not GUID, check if lakehouse_name already has .ItemType suffix
251
+ if duckrun_instance.lakehouse_name.endswith(('.Lakehouse', '.Warehouse', '.Database', '.SnowflakeDatabase')):
252
+ # Already has suffix - use as is
253
+ content = content.replace('${lh}.Lakehouse', duckrun_instance.lakehouse_name)
254
+ else:
255
+ # No suffix - add .Lakehouse for legacy format
256
+ content = content.replace('${lh}.Lakehouse', f'{duckrun_instance.lakehouse_name}.Lakehouse')
287
257
 
288
258
  full_params = {
289
259
  'ws': duckrun_instance.workspace,
290
- 'lh': duckrun_instance.lakehouse_name,
260
+ 'lh': duckrun_instance.lakehouse_display_name, # Use display name (without suffix) for backward compat
291
261
  'schema': duckrun_instance.schema,
292
262
  'storage_account': duckrun_instance.storage_account,
293
263
  'tables_url': duckrun_instance.table_base_url,
duckrun/semantic_model.py CHANGED
@@ -129,29 +129,136 @@ def check_dataset_exists(dataset_name, workspace_id, client):
129
129
  return False
130
130
 
131
131
 
132
- def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
133
- """Refresh a dataset and monitor progress using Power BI API"""
132
+ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None, refresh="full"):
133
+ """Refresh a dataset and monitor progress using Power BI API
134
+
135
+ For DirectLake models, performs refresh based on refresh parameter:
136
+ - refresh="full": Two-step refresh (clearValues + full reframe)
137
+ - refresh="ignore": Skip refresh entirely
138
+
139
+ If a refresh is already in progress, waits for it to complete before starting a new one.
140
+ """
141
+
142
+ # Skip refresh entirely if refresh is "ignore"
143
+ if refresh == "ignore":
144
+ print(" Ignoring refresh - skipping refresh")
145
+ return
134
146
 
135
147
  # If dataset_id not provided, look it up by name
136
148
  if not dataset_id:
137
149
  dataset_id = get_dataset_id(dataset_name, workspace_id, client)
138
150
 
139
- payload = {
140
- "type": "full",
151
+ # Use Power BI API for refresh (not Fabric API)
152
+ powerbi_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes"
153
+ headers = client._get_headers()
154
+
155
+ # Check for in-progress refreshes
156
+ print(" Checking for in-progress refreshes...")
157
+ try:
158
+ status_response = requests.get(f"{powerbi_url}?$top=1", headers=headers)
159
+ if status_response.status_code == 200:
160
+ refreshes = status_response.json().get('value', [])
161
+ if refreshes:
162
+ latest_refresh = refreshes[0]
163
+ status = latest_refresh.get('status')
164
+ if status in ['InProgress', 'Unknown']:
165
+ refresh_id = latest_refresh.get('requestId')
166
+ print(f" ⚠️ Found in-progress refresh (ID: {refresh_id})")
167
+ print(f" Waiting for current refresh to complete...")
168
+
169
+ # Wait for the in-progress refresh to complete
170
+ max_wait_attempts = 60
171
+ for attempt in range(max_wait_attempts):
172
+ time.sleep(5)
173
+ check_response = requests.get(f"{powerbi_url}/{refresh_id}", headers=headers)
174
+ if check_response.status_code == 200:
175
+ current_status = check_response.json().get('status')
176
+
177
+ if current_status == 'Completed':
178
+ print(f" ✓ Previous refresh completed")
179
+ break
180
+ elif current_status == 'Failed':
181
+ print(f" ⚠️ Previous refresh failed, continuing with new refresh")
182
+ break
183
+ elif current_status == 'Cancelled':
184
+ print(f" ⚠️ Previous refresh was cancelled, continuing with new refresh")
185
+ break
186
+
187
+ if attempt % 6 == 0:
188
+ print(f" Still waiting... (status: {current_status})")
189
+ else:
190
+ print(f" ⚠️ Timeout waiting for previous refresh, will attempt new refresh anyway")
191
+ except Exception as e:
192
+ print(f" ⚠️ Could not check refresh status: {e}")
193
+ print(f" Continuing with refresh attempt...")
194
+
195
+ # Step 1: clearValues - Purge data from memory
196
+ print(" Step 1: Clearing values from memory...")
197
+ clearvalues_payload = {
198
+ "type": "clearValues",
141
199
  "commitMode": "transactional",
142
200
  "maxParallelism": 10,
143
201
  "retryCount": 2,
144
202
  "objects": []
145
203
  }
146
204
 
147
- # Use Power BI API for refresh (not Fabric API)
148
- powerbi_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes"
149
- headers = client._get_headers()
205
+ response = requests.post(powerbi_url, headers=headers, json=clearvalues_payload)
150
206
 
151
- response = requests.post(powerbi_url, headers=headers, json=payload)
207
+ if response.status_code in [200, 202]:
208
+ # For 202, monitor the clearValues operation
209
+ if response.status_code == 202:
210
+ location = response.headers.get('Location')
211
+ if location:
212
+ clear_refresh_id = location.split('/')[-1]
213
+ print(" ✓ Clear values initiated, monitoring progress...")
214
+
215
+ max_attempts = 60
216
+ for attempt in range(max_attempts):
217
+ time.sleep(2)
218
+
219
+ status_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes/{clear_refresh_id}"
220
+ status_response = requests.get(status_url, headers=headers)
221
+ status_response.raise_for_status()
222
+ status = status_response.json().get('status')
223
+
224
+ if status == 'Completed':
225
+ print(f" ✓ Clear values completed")
226
+ break
227
+ elif status == 'Failed':
228
+ error = status_response.json().get('serviceExceptionJson', '')
229
+ raise Exception(f"Clear values failed: {error}")
230
+ elif status == 'Cancelled':
231
+ raise Exception("Clear values was cancelled")
232
+
233
+ if attempt % 10 == 0 and attempt > 0:
234
+ print(f" Clear values status: {status}...")
235
+ else:
236
+ raise Exception(f"Clear values timed out")
237
+ else:
238
+ print(" ✓ Clear values completed")
239
+ else:
240
+ # Provide detailed error message
241
+ try:
242
+ error_details = response.json()
243
+ error_message = error_details.get('error', {}).get('message', response.text)
244
+ raise Exception(f"Clear values failed with status {response.status_code}: {error_message}")
245
+ except (json.JSONDecodeError, ValueError):
246
+ response.raise_for_status()
247
+
248
+ # Step 2: full refresh - Reframe data from Delta tables
249
+ print(" Step 2: Full refresh to reframe data...")
250
+ full_payload = {
251
+ "type": "full",
252
+ "commitMode": "transactional",
253
+ "maxParallelism": 10,
254
+ "retryCount": 2,
255
+ "objects": []
256
+ }
257
+
258
+ response = requests.post(powerbi_url, headers=headers, json=full_payload)
152
259
 
153
260
  if response.status_code in [200, 202]:
154
- print(f"✓ Refresh initiated")
261
+ print(f" ✓ Refresh initiated")
155
262
 
156
263
  # For 202, get the refresh_id from the Location header
157
264
  if response.status_code == 202:
@@ -183,7 +290,13 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
183
290
 
184
291
  raise Exception(f"Refresh timed out")
185
292
  else:
186
- response.raise_for_status()
293
+ # Provide detailed error message
294
+ try:
295
+ error_details = response.json()
296
+ error_message = error_details.get('error', {}).get('message', response.text)
297
+ raise Exception(f"Refresh request failed with status {response.status_code}: {error_message}")
298
+ except (json.JSONDecodeError, ValueError):
299
+ response.raise_for_status()
187
300
 
188
301
 
189
302
  def download_bim_from_github(url_or_path):
@@ -431,7 +544,7 @@ def create_dataset_from_bim(dataset_name, bim_content, workspace_id, client):
431
544
 
432
545
 
433
546
  def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_name, dataset_name,
434
- bim_url_or_path, wait_seconds=5):
547
+ bim_url_or_path, wait_seconds=5, refresh="full"):
435
548
  """
436
549
  Deploy a semantic model using DirectLake mode.
437
550
 
@@ -442,6 +555,9 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
442
555
  dataset_name: Name for the semantic model
443
556
  bim_url_or_path: URL to the BIM file or local file path (e.g., 'model.bim' or 'https://...')
444
557
  wait_seconds: Seconds to wait before refresh (default: 5)
558
+ refresh: Refresh strategy (default: "full")
559
+ - "full": Clear values and process full refresh
560
+ - "ignore": Skip refresh entirely
445
561
 
446
562
  Returns:
447
563
  1 for success, 0 for failure
@@ -454,6 +570,9 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
454
570
  # Using a local file
455
571
  dr.deploy("./my_model.bim")
456
572
  dr.deploy("C:/path/to/model.bim")
573
+
574
+ # Deploy without refresh
575
+ dr.deploy("./my_model.bim", refresh="ignore")
457
576
  """
458
577
  print("=" * 70)
459
578
  print("Semantic Model Deployment (DirectLake)")
@@ -471,14 +590,14 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
471
590
  dataset_exists = check_dataset_exists(dataset_name, workspace_id, client)
472
591
 
473
592
  if dataset_exists:
474
- print(f"\n✓ Dataset exists - refreshing...")
593
+ print(f"✓ Dataset '{dataset_name}' already exists - skipping deployment")
475
594
 
476
595
  if wait_seconds > 0:
477
596
  print(f" Waiting {wait_seconds} seconds...")
478
597
  time.sleep(wait_seconds)
479
598
 
480
- print("\n[Step 6/6] Refreshing semantic model...")
481
- refresh_dataset(dataset_name, workspace_id, client)
599
+ print("\n[Step 3/3] Refreshing existing semantic model...")
600
+ refresh_dataset(dataset_name, workspace_id, client, refresh=refresh)
482
601
 
483
602
  print("\n" + "=" * 70)
484
603
  print("🎉 Refresh Completed!")
@@ -510,7 +629,7 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
510
629
 
511
630
  # Step 6: Refresh using the dataset ID returned from creation
512
631
  print("\n[Step 6/6] Refreshing semantic model...")
513
- refresh_dataset(dataset_name, workspace_id, client, dataset_id=dataset_id)
632
+ refresh_dataset(dataset_name, workspace_id, client, dataset_id=dataset_id, refresh=refresh)
514
633
 
515
634
  print("\n" + "=" * 70)
516
635
  print("🎉 Deployment Completed!")
@@ -537,7 +656,7 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
537
656
  return 0
538
657
 
539
658
 
540
- def copy_model(ws_source, model_name, destination, new_model_name=None, wait_seconds=5):
659
+ def copy_model(ws_source, model_name, destination, new_model_name=None, wait_seconds=5, refresh="full"):
541
660
  """
542
661
  Copy a semantic model from one workspace to another.
543
662
 
@@ -550,6 +669,9 @@ def copy_model(ws_source, model_name, destination, new_model_name=None, wait_sec
550
669
  destination: Destination in format "workspace/lakehouse.lakehouse/schema"
551
670
  new_model_name: Name for the new semantic model (default: same as source)
552
671
  wait_seconds: Seconds to wait before refresh (default: 5)
672
+ refresh: Refresh strategy (default: "full")
673
+ - "full": Clear values and process full refresh
674
+ - "ignore": Skip refresh entirely
553
675
 
554
676
  Returns:
555
677
  1 for success, 0 for failure
@@ -562,6 +684,9 @@ def copy_model(ws_source, model_name, destination, new_model_name=None, wait_sec
562
684
  copy_model("Source WS", "Production Model", "Target WS/Data Lake.lakehouse/analytics",
563
685
  new_model_name="Production Model - Copy")
564
686
 
687
+ # Copy without refresh
688
+ copy_model("Source WS", "Model", "Target WS/LH.lakehouse/dbo", refresh="ignore")
689
+
565
690
  # Using the connect pattern
566
691
  import duckrun
567
692
  duckrun.semantic_model.copy_model("Source", "Model", "Target/LH.lakehouse/dbo")
@@ -688,7 +813,8 @@ def copy_model(ws_source, model_name, destination, new_model_name=None, wait_sec
688
813
  schema_name=schema,
689
814
  dataset_name=new_model_name,
690
815
  bim_url_or_path=temp_bim_path,
691
- wait_seconds=wait_seconds
816
+ wait_seconds=wait_seconds,
817
+ refresh=refresh
692
818
  )
693
819
 
694
820
  # Clean up temp file