duckrun 0.2.21.dev1__tar.gz → 0.2.22.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/PKG-INFO +2 -2
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/README.md +1 -1
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun/core.py +177 -39
- duckrun-0.2.22.dev0/duckrun/rle.py +362 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun/semantic_model.py +290 -40
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun.egg-info/PKG-INFO +2 -2
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun.egg-info/SOURCES.txt +3 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/pyproject.toml +1 -1
- duckrun-0.2.22.dev0/tests/test_deploy_fresh.py +18 -0
- duckrun-0.2.22.dev0/tests/test_filename.py +10 -0
- duckrun-0.2.22.dev0/tests/test_rle_analysis.py +149 -0
- duckrun-0.2.21.dev1/duckrun/rle.py +0 -940
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/LICENSE +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun/__init__.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun/auth.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun/ducklake_metadata.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun/files.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun/lakehouse.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun/notebook.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun/runner.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun/stats.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun/writer.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun.egg-info/requires.txt +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/setup.cfg +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/tests/test_checkpoint_format.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/tests/test_ducklake_export.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/tests/test_register.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/tests/test_rle.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/tests/test_writer_dictionary.py +0 -0
- {duckrun-0.2.21.dev1 → duckrun-0.2.22.dev0}/tests/test_writer_integration.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: duckrun
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.22.dev0
|
|
4
4
|
Summary: Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)
|
|
5
5
|
Author: mim
|
|
6
6
|
License: MIT
|
|
@@ -28,7 +28,7 @@ A helper package for working with Microsoft Fabric lakehouses - orchestration, S
|
|
|
28
28
|
## Important Notes
|
|
29
29
|
|
|
30
30
|
**Requirements:**
|
|
31
|
-
-
|
|
31
|
+
- Lakehouses without schema are not supported
|
|
32
32
|
|
|
33
33
|
**Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
|
|
34
34
|
|
|
@@ -8,7 +8,7 @@ A helper package for working with Microsoft Fabric lakehouses - orchestration, S
|
|
|
8
8
|
## Important Notes
|
|
9
9
|
|
|
10
10
|
**Requirements:**
|
|
11
|
-
-
|
|
11
|
+
- Lakehouses without schema are not supported
|
|
12
12
|
|
|
13
13
|
**Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
|
|
14
14
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import duckdb
|
|
1
|
+
import duckdb
|
|
2
2
|
import requests
|
|
3
3
|
import os
|
|
4
4
|
import importlib.util
|
|
@@ -197,7 +197,7 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
197
197
|
else:
|
|
198
198
|
# In token_only mode, just create the secret for authentication
|
|
199
199
|
self._create_onelake_secret()
|
|
200
|
-
print("
|
|
200
|
+
print("OK Token authenticated (fast mode - tables not listed)")
|
|
201
201
|
|
|
202
202
|
@classmethod
|
|
203
203
|
def connect(cls, connection_string: str, sql_folder: Optional[str] = None,
|
|
@@ -364,7 +364,7 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
364
364
|
workspace_id = cls._resolve_workspace_id_by_name(token, workspace_name)
|
|
365
365
|
if not workspace_id:
|
|
366
366
|
# Fallback to current workspace if name resolution fails
|
|
367
|
-
print(f"
|
|
367
|
+
print(f" WARNING: Could not validate workspace name '{workspace_name}', using current workspace")
|
|
368
368
|
workspace_id = current_workspace_id
|
|
369
369
|
else:
|
|
370
370
|
# External environment - must resolve by name
|
|
@@ -385,9 +385,9 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
385
385
|
return workspace_id, item_id
|
|
386
386
|
|
|
387
387
|
except Exception as e:
|
|
388
|
-
print(f"
|
|
389
|
-
print(f"
|
|
390
|
-
print("
|
|
388
|
+
print(f"ERROR Failed to resolve names to GUIDs: {e}")
|
|
389
|
+
print(f"ERROR Cannot resolve '{workspace_name}'/'{item_name}' ({item_type}) to GUIDs")
|
|
390
|
+
print("ERROR Microsoft Fabric requires actual workspace and item GUIDs for ABFSS access")
|
|
391
391
|
raise ValueError(
|
|
392
392
|
f"Unable to resolve workspace '{workspace_name}' and {item_type.lower()} '{item_name}' to GUIDs. "
|
|
393
393
|
f"ABFSS URLs require actual GUIDs. "
|
|
@@ -540,7 +540,7 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
540
540
|
from .auth import get_token
|
|
541
541
|
token = get_token()
|
|
542
542
|
if not token:
|
|
543
|
-
print("
|
|
543
|
+
print("ERROR Failed to authenticate for table discovery")
|
|
544
544
|
return []
|
|
545
545
|
|
|
546
546
|
# OneLake Delta Table API endpoint (Unity Catalog compatible)
|
|
@@ -640,7 +640,7 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
640
640
|
return tables_found
|
|
641
641
|
|
|
642
642
|
except Exception as e:
|
|
643
|
-
print(f"
|
|
643
|
+
print(f"ERROR Error during table discovery: {e}")
|
|
644
644
|
import traceback
|
|
645
645
|
traceback.print_exc()
|
|
646
646
|
return []
|
|
@@ -655,9 +655,9 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
655
655
|
|
|
656
656
|
if not tables:
|
|
657
657
|
if self.scan_all_schemas:
|
|
658
|
-
print(f"
|
|
658
|
+
print(f" WARNING: No tables found in any schema")
|
|
659
659
|
else:
|
|
660
|
-
print(f"
|
|
660
|
+
print(f" WARNING: No tables found in {self.schema} schema")
|
|
661
661
|
return
|
|
662
662
|
|
|
663
663
|
# Collect table names for display
|
|
@@ -680,7 +680,7 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
680
680
|
AS SELECT * FROM delta_scan('{self.table_base_url}{schema_name}/{table_name}');
|
|
681
681
|
""")
|
|
682
682
|
except Exception as e:
|
|
683
|
-
print(f"
|
|
683
|
+
print(f" WARNING: Failed to attach table {schema_name}.{table_name}: {e}")
|
|
684
684
|
continue
|
|
685
685
|
|
|
686
686
|
# Print discovered tables as comma-separated list
|
|
@@ -688,7 +688,7 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
688
688
|
print(", ".join(table_names))
|
|
689
689
|
|
|
690
690
|
except Exception as e:
|
|
691
|
-
print(f"
|
|
691
|
+
print(f"ERROR Error attaching lakehouse: {e}")
|
|
692
692
|
import traceback
|
|
693
693
|
traceback.print_exc()
|
|
694
694
|
|
|
@@ -834,7 +834,7 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
834
834
|
self.con.create_function("get_workspace_id_from_name", get_workspace_id_from_name, null_handling='SPECIAL')
|
|
835
835
|
self.con.create_function("get_lakehouse_id_from_name", get_lakehouse_id_from_name, null_handling='SPECIAL')
|
|
836
836
|
except Exception as e:
|
|
837
|
-
print(f"
|
|
837
|
+
print(f" WARNING: Warning: Could not register lookup functions: {e}")
|
|
838
838
|
|
|
839
839
|
def get_workspace_id(self, force: bool = False) -> str:
|
|
840
840
|
"""
|
|
@@ -1050,6 +1050,98 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1050
1050
|
"""
|
|
1051
1051
|
self.con.register(name, df)
|
|
1052
1052
|
|
|
1053
|
+
def get_rle_stats(self, table_name: str, top_n_values: int = 10):
|
|
1054
|
+
"""
|
|
1055
|
+
Get comprehensive table statistics including NDV and value frequency analysis.
|
|
1056
|
+
|
|
1057
|
+
Analyzes column characteristics for RLE compression optimization.
|
|
1058
|
+
|
|
1059
|
+
Args:
|
|
1060
|
+
table_name: Name of the table to analyze
|
|
1061
|
+
top_n_values: Number of top frequent values to show per column (default: 10)
|
|
1062
|
+
|
|
1063
|
+
Returns:
|
|
1064
|
+
DataFrame with statistics for each column:
|
|
1065
|
+
- column_name: Name of the column
|
|
1066
|
+
- data_type: Data type
|
|
1067
|
+
- total_rows: Total number of rows
|
|
1068
|
+
- null_count, null_pct: NULL statistics
|
|
1069
|
+
- ndv: Number of distinct values (exact)
|
|
1070
|
+
- cardinality_ratio: NDV / total_rows (lower = better for RLE)
|
|
1071
|
+
- top_value, top_value_count, top_value_pct: Most frequent value stats
|
|
1072
|
+
- top_n_coverage: Percentage covered by top N values
|
|
1073
|
+
- repetition_score: RLE potential score (higher = better)
|
|
1074
|
+
|
|
1075
|
+
Examples:
|
|
1076
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse")
|
|
1077
|
+
|
|
1078
|
+
# Analyze a table
|
|
1079
|
+
stats = con.get_rle_stats('sales')
|
|
1080
|
+
print(stats)
|
|
1081
|
+
|
|
1082
|
+
# Show top 20 values per column
|
|
1083
|
+
stats = con.get_rle_stats('sales', top_n_values=20)
|
|
1084
|
+
"""
|
|
1085
|
+
from .rle import get_table_stats as _get_rle_stats
|
|
1086
|
+
return _get_rle_stats(self, table_name, top_n_values)
|
|
1087
|
+
|
|
1088
|
+
def get_value_frequency(self, table_name: str, column_name: str, limit: int = 20):
|
|
1089
|
+
"""
|
|
1090
|
+
Get detailed value frequency distribution for a specific column.
|
|
1091
|
+
|
|
1092
|
+
Args:
|
|
1093
|
+
table_name: Name of the table
|
|
1094
|
+
column_name: Name of the column to analyze
|
|
1095
|
+
limit: Maximum number of values to return (default: 20)
|
|
1096
|
+
|
|
1097
|
+
Returns:
|
|
1098
|
+
DataFrame with value frequencies:
|
|
1099
|
+
- value: The distinct value
|
|
1100
|
+
- count: Number of occurrences
|
|
1101
|
+
- percentage: Percentage of total rows
|
|
1102
|
+
- cumulative_pct: Cumulative percentage
|
|
1103
|
+
|
|
1104
|
+
Examples:
|
|
1105
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse")
|
|
1106
|
+
|
|
1107
|
+
# Get top 20 values for a column
|
|
1108
|
+
freq = con.get_value_frequency('sales', 'status')
|
|
1109
|
+
print(freq)
|
|
1110
|
+
"""
|
|
1111
|
+
from .rle import get_value_frequency_details as _get_value_frequency
|
|
1112
|
+
return _get_value_frequency(self, table_name, column_name, limit)
|
|
1113
|
+
|
|
1114
|
+
def find_optimal_sort_order(self, table_name: str, max_combinations: int = 10):
|
|
1115
|
+
"""
|
|
1116
|
+
Find optimal column sort order for compression using V-Order-like testing.
|
|
1117
|
+
|
|
1118
|
+
Tests different column orderings and measures RLE compression effectiveness.
|
|
1119
|
+
This simulates how V-Order/VertiPaq optimizes data layout.
|
|
1120
|
+
|
|
1121
|
+
Args:
|
|
1122
|
+
table_name: Name of the table to analyze
|
|
1123
|
+
max_combinations: Maximum sort orderings to test (default: 10)
|
|
1124
|
+
|
|
1125
|
+
Returns:
|
|
1126
|
+
DataFrame with tested orderings ranked by compression:
|
|
1127
|
+
- sort_order: Column ordering (e.g., "date → DUID → time")
|
|
1128
|
+
- total_runs: Total RLE runs (fewer = better compression)
|
|
1129
|
+
- compression_score: Compression effectiveness (higher = better)
|
|
1130
|
+
- Individual RLE counts per column
|
|
1131
|
+
|
|
1132
|
+
Examples:
|
|
1133
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse")
|
|
1134
|
+
|
|
1135
|
+
# Find optimal sort order
|
|
1136
|
+
optimal = con.find_optimal_sort_order('energy_data')
|
|
1137
|
+
print(optimal)
|
|
1138
|
+
|
|
1139
|
+
# Test more combinations
|
|
1140
|
+
optimal = con.find_optimal_sort_order('energy_data', max_combinations=20)
|
|
1141
|
+
"""
|
|
1142
|
+
from .rle import find_optimal_sort_order as _find_optimal_sort_order
|
|
1143
|
+
return _find_optimal_sort_order(self, table_name, max_combinations)
|
|
1144
|
+
|
|
1053
1145
|
def get_stats(self, source: str = None, detailed = False):
|
|
1054
1146
|
"""
|
|
1055
1147
|
Get comprehensive statistics for Delta Lake tables.
|
|
@@ -1101,7 +1193,7 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1101
1193
|
from .auth import get_fabric_api_token
|
|
1102
1194
|
token = get_fabric_api_token()
|
|
1103
1195
|
if not token:
|
|
1104
|
-
print("
|
|
1196
|
+
print("ERROR Failed to authenticate for listing lakehouses")
|
|
1105
1197
|
return []
|
|
1106
1198
|
|
|
1107
1199
|
# Try to get current workspace ID if in notebook environment
|
|
@@ -1151,7 +1243,7 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1151
1243
|
from .auth import get_fabric_api_token
|
|
1152
1244
|
token = get_fabric_api_token()
|
|
1153
1245
|
if not token:
|
|
1154
|
-
print("
|
|
1246
|
+
print("ERROR Failed to authenticate for lakehouse creation")
|
|
1155
1247
|
return False
|
|
1156
1248
|
|
|
1157
1249
|
# Try to get current workspace ID if in notebook environment
|
|
@@ -1193,11 +1285,11 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1193
1285
|
response = requests.post(url, headers=headers, json=payload)
|
|
1194
1286
|
response.raise_for_status()
|
|
1195
1287
|
|
|
1196
|
-
print(f"
|
|
1288
|
+
print(f"OK Lakehouse '{lakehouse_name}' created successfully")
|
|
1197
1289
|
return True
|
|
1198
1290
|
|
|
1199
1291
|
except Exception as e:
|
|
1200
|
-
print(f"
|
|
1292
|
+
print(f"ERROR Error creating lakehouse '{lakehouse_name}': {e}")
|
|
1201
1293
|
return False
|
|
1202
1294
|
|
|
1203
1295
|
def deploy(self, bim_url: str, dataset_name: Optional[str] = None,
|
|
@@ -1261,6 +1353,52 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1261
1353
|
refresh=refresh
|
|
1262
1354
|
)
|
|
1263
1355
|
|
|
1356
|
+
def deploy_pbix(self, pbix_url: str, semantic_model_name: str, report_name: Optional[str] = None) -> int:
|
|
1357
|
+
"""
|
|
1358
|
+
Download a PBIX file and bind it to an existing semantic model.
|
|
1359
|
+
|
|
1360
|
+
This method downloads a PBIX report from a URL and binds it to an existing
|
|
1361
|
+
semantic model in the workspace. The PBIX file should contain only report pages
|
|
1362
|
+
(visualizations) without a data model, as it will be bound to the specified
|
|
1363
|
+
semantic model.
|
|
1364
|
+
|
|
1365
|
+
Args:
|
|
1366
|
+
pbix_url: URL to download the PBIX file from
|
|
1367
|
+
semantic_model_name: Name of the existing semantic model to bind to
|
|
1368
|
+
report_name: Name for the new report (default: semantic model name)
|
|
1369
|
+
|
|
1370
|
+
Returns:
|
|
1371
|
+
1 for success, 0 for failure
|
|
1372
|
+
|
|
1373
|
+
Examples:
|
|
1374
|
+
con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
|
|
1375
|
+
|
|
1376
|
+
# Download and bind PBIX to existing semantic model
|
|
1377
|
+
con.deploy_pbix(
|
|
1378
|
+
pbix_url="https://github.com/user/repo/raw/main/report.pbix",
|
|
1379
|
+
semantic_model_name="Sales Model"
|
|
1380
|
+
)
|
|
1381
|
+
|
|
1382
|
+
# With custom report name
|
|
1383
|
+
con.deploy_pbix(
|
|
1384
|
+
pbix_url="https://example.com/reports/dashboard.pbix",
|
|
1385
|
+
semantic_model_name="Analytics Model",
|
|
1386
|
+
report_name="Monthly Dashboard"
|
|
1387
|
+
)
|
|
1388
|
+
"""
|
|
1389
|
+
from .semantic_model import upload_pbix_and_bind
|
|
1390
|
+
|
|
1391
|
+
# Use semantic model name as report name if not provided
|
|
1392
|
+
if report_name is None:
|
|
1393
|
+
report_name = semantic_model_name
|
|
1394
|
+
|
|
1395
|
+
return upload_pbix_and_bind(
|
|
1396
|
+
workspace_name_or_id=self.workspace,
|
|
1397
|
+
pbix_url=pbix_url,
|
|
1398
|
+
report_name=report_name,
|
|
1399
|
+
semantic_model_name=semantic_model_name
|
|
1400
|
+
)
|
|
1401
|
+
|
|
1264
1402
|
def export_ducklake_to_delta(self, db_path: str, data_root: str = None) -> bool:
|
|
1265
1403
|
"""
|
|
1266
1404
|
Export DuckLake metadata to Delta Lake format for Spark compatibility.
|
|
@@ -1302,7 +1440,7 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1302
1440
|
print("Authenticating with Azure for DuckLake export...")
|
|
1303
1441
|
token = get_token()
|
|
1304
1442
|
if not token:
|
|
1305
|
-
print("
|
|
1443
|
+
print("ERROR Failed to authenticate for DuckLake export")
|
|
1306
1444
|
return False
|
|
1307
1445
|
|
|
1308
1446
|
# Setup OneLake store for uploading checkpoint files
|
|
@@ -1316,10 +1454,10 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1316
1454
|
|
|
1317
1455
|
try:
|
|
1318
1456
|
generate_latest_delta_log(full_db_path, data_root, store, token)
|
|
1319
|
-
print(f"
|
|
1457
|
+
print(f"OK DuckLake export completed successfully")
|
|
1320
1458
|
return True
|
|
1321
1459
|
except Exception as e:
|
|
1322
|
-
print(f"
|
|
1460
|
+
print(f"ERROR DuckLake export failed: {e}")
|
|
1323
1461
|
import traceback
|
|
1324
1462
|
traceback.print_exc()
|
|
1325
1463
|
return False
|
|
@@ -1384,10 +1522,10 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1384
1522
|
# Parse table name and construct path
|
|
1385
1523
|
if table_name is None:
|
|
1386
1524
|
if mode != "summary":
|
|
1387
|
-
print("
|
|
1525
|
+
print(" WARNING: Table name is required for 'smart' and 'full' modes")
|
|
1388
1526
|
return None
|
|
1389
1527
|
# TODO: Implement all-tables summary
|
|
1390
|
-
print("
|
|
1528
|
+
print(" WARNING: All-tables summary not yet implemented. Please specify a table name.")
|
|
1391
1529
|
return None
|
|
1392
1530
|
|
|
1393
1531
|
# Parse schema.table or just table
|
|
@@ -1408,11 +1546,11 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1408
1546
|
delta_files = dt.files()
|
|
1409
1547
|
|
|
1410
1548
|
if not delta_files:
|
|
1411
|
-
print("
|
|
1549
|
+
print(" WARNING: Table is empty (no files)")
|
|
1412
1550
|
return None
|
|
1413
1551
|
|
|
1414
1552
|
except Exception as e:
|
|
1415
|
-
print(f"
|
|
1553
|
+
print(f"ERROR Error accessing Delta table: {e}")
|
|
1416
1554
|
return None
|
|
1417
1555
|
|
|
1418
1556
|
# Check if mode is a list of columns (custom ordering)
|
|
@@ -1537,7 +1675,7 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
|
1537
1675
|
from .auth import get_fabric_api_token
|
|
1538
1676
|
token = get_fabric_api_token()
|
|
1539
1677
|
if not token:
|
|
1540
|
-
print("
|
|
1678
|
+
print("ERROR Failed to authenticate for listing lakehouses")
|
|
1541
1679
|
return []
|
|
1542
1680
|
|
|
1543
1681
|
# Always resolve workspace name to ID, even in notebook environment
|
|
@@ -1577,7 +1715,7 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
|
1577
1715
|
from .auth import get_fabric_api_token
|
|
1578
1716
|
token = get_fabric_api_token()
|
|
1579
1717
|
if not token:
|
|
1580
|
-
print("
|
|
1718
|
+
print("ERROR Failed to authenticate for lakehouse creation")
|
|
1581
1719
|
return False
|
|
1582
1720
|
|
|
1583
1721
|
# Always resolve workspace name to ID, even in notebook environment
|
|
@@ -1613,11 +1751,11 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
|
1613
1751
|
response = requests.post(url, headers=headers, json=payload)
|
|
1614
1752
|
response.raise_for_status()
|
|
1615
1753
|
|
|
1616
|
-
print(f"
|
|
1754
|
+
print(f"OK Lakehouse '{lakehouse_name}' created successfully")
|
|
1617
1755
|
return True
|
|
1618
1756
|
|
|
1619
1757
|
except Exception as e:
|
|
1620
|
-
print(f"
|
|
1758
|
+
print(f"ERROR Error creating lakehouse '{lakehouse_name}': {e}")
|
|
1621
1759
|
return False
|
|
1622
1760
|
|
|
1623
1761
|
def download_bim(self, semantic_model_name: str, output_path: Optional[str] = None) -> Optional[str]:
|
|
@@ -1643,13 +1781,13 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
|
1643
1781
|
from .auth import get_fabric_api_token
|
|
1644
1782
|
token = get_fabric_api_token()
|
|
1645
1783
|
if not token:
|
|
1646
|
-
print("
|
|
1784
|
+
print("ERROR Failed to authenticate for downloading semantic model")
|
|
1647
1785
|
return None
|
|
1648
1786
|
|
|
1649
1787
|
# Resolve workspace name to ID
|
|
1650
1788
|
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
|
1651
1789
|
if not workspace_id:
|
|
1652
|
-
print(f"
|
|
1790
|
+
print(f"ERROR Workspace '{self.workspace_name}' not found")
|
|
1653
1791
|
return None
|
|
1654
1792
|
|
|
1655
1793
|
# Get semantic model ID
|
|
@@ -1664,11 +1802,11 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
|
1664
1802
|
model = next((m for m in models if m.get("displayName") == semantic_model_name), None)
|
|
1665
1803
|
|
|
1666
1804
|
if not model:
|
|
1667
|
-
print(f"
|
|
1805
|
+
print(f"ERROR Semantic model '{semantic_model_name}' not found in workspace '{self.workspace_name}'")
|
|
1668
1806
|
return None
|
|
1669
1807
|
|
|
1670
1808
|
model_id = model.get("id")
|
|
1671
|
-
print(f"
|
|
1809
|
+
print(f"OK Found semantic model: {semantic_model_name} (ID: {model_id})")
|
|
1672
1810
|
|
|
1673
1811
|
# Get the model definition using the generic items API
|
|
1674
1812
|
print("📥 Downloading BIM definition...")
|
|
@@ -1702,10 +1840,10 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
|
1702
1840
|
break
|
|
1703
1841
|
elif status == 'Failed':
|
|
1704
1842
|
error = status_response.json().get('error', {})
|
|
1705
|
-
print(f"
|
|
1843
|
+
print(f"ERROR Operation failed: {error.get('message')}")
|
|
1706
1844
|
return None
|
|
1707
1845
|
elif attempt == max_attempts - 1:
|
|
1708
|
-
print("
|
|
1846
|
+
print("ERROR Operation timed out")
|
|
1709
1847
|
return None
|
|
1710
1848
|
else:
|
|
1711
1849
|
result_data = response.json()
|
|
@@ -1716,7 +1854,7 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
|
1716
1854
|
|
|
1717
1855
|
# Debug: show what parts we have
|
|
1718
1856
|
if not parts:
|
|
1719
|
-
print("
|
|
1857
|
+
print("ERROR No definition parts found in response")
|
|
1720
1858
|
print(f" Result data keys: {list(result_data.keys())}")
|
|
1721
1859
|
print(f" Definition keys: {list(definition.keys()) if definition else 'None'}")
|
|
1722
1860
|
return None
|
|
@@ -1727,7 +1865,7 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
|
1727
1865
|
|
|
1728
1866
|
bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
|
|
1729
1867
|
if not bim_part:
|
|
1730
|
-
print("
|
|
1868
|
+
print("ERROR No BIM file found in semantic model definition")
|
|
1731
1869
|
print(f" Looking for files ending with '.bim', found: {[p.get('path') for p in parts]}")
|
|
1732
1870
|
return None
|
|
1733
1871
|
|
|
@@ -1740,7 +1878,7 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
|
1740
1878
|
# Format as pretty JSON
|
|
1741
1879
|
bim_formatted = json.dumps(bim_json, indent=2)
|
|
1742
1880
|
|
|
1743
|
-
print(f"
|
|
1881
|
+
print(f"OK BIM file downloaded successfully")
|
|
1744
1882
|
print(f" - Tables: {len(bim_json.get('model', {}).get('tables', []))}")
|
|
1745
1883
|
print(f" - Relationships: {len(bim_json.get('model', {}).get('relationships', []))}")
|
|
1746
1884
|
|
|
@@ -1748,13 +1886,13 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
|
1748
1886
|
if output_path:
|
|
1749
1887
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
1750
1888
|
f.write(bim_formatted)
|
|
1751
|
-
print(f"
|
|
1889
|
+
print(f"OK Saved to: {output_path}")
|
|
1752
1890
|
return output_path
|
|
1753
1891
|
else:
|
|
1754
1892
|
return bim_formatted
|
|
1755
1893
|
|
|
1756
1894
|
except Exception as e:
|
|
1757
|
-
print(f"
|
|
1895
|
+
print(f"ERROR Error downloading semantic model: {e}")
|
|
1758
1896
|
import traceback
|
|
1759
1897
|
traceback.print_exc()
|
|
1760
1898
|
return None
|