duckrun 0.2.19.dev8__tar.gz → 0.2.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/PKG-INFO +1 -1
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/auth.py +3 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/core.py +76 -2
- duckrun-0.2.20/duckrun/ducklake_metadata.py +592 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/semantic_model.py +134 -13
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/writer.py +3 -1
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun.egg-info/PKG-INFO +1 -1
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun.egg-info/SOURCES.txt +6 -1
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/pyproject.toml +1 -1
- duckrun-0.2.20/tests/test_checkpoint_format.py +102 -0
- duckrun-0.2.20/tests/test_ducklake_export.py +4 -0
- duckrun-0.2.20/tests/test_writer_dictionary.py +142 -0
- duckrun-0.2.20/tests/test_writer_integration.py +152 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/LICENSE +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/README.md +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/__init__.py +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/files.py +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/lakehouse.py +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/notebook.py +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/rle.py +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/runner.py +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/stats.py +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun.egg-info/requires.txt +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/setup.cfg +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/tests/test_register.py +0 -0
- {duckrun-0.2.19.dev8 → duckrun-0.2.20}/tests/test_rle.py +0 -0
|
@@ -104,6 +104,8 @@ def _get_local_token() -> Optional[str]:
|
|
|
104
104
|
|
|
105
105
|
except Exception as cli_error:
|
|
106
106
|
print(f"⚠️ Azure CLI authentication failed: {cli_error}")
|
|
107
|
+
print("💡 TIP: Due to MFA requirements, you now need to login with scope:")
|
|
108
|
+
print(" az login --scope https://storage.azure.com/.default")
|
|
107
109
|
print("🔐 Falling back to interactive browser authentication...")
|
|
108
110
|
|
|
109
111
|
# Fallback to interactive browser
|
|
@@ -119,6 +121,7 @@ def _get_local_token() -> Optional[str]:
|
|
|
119
121
|
|
|
120
122
|
except Exception as browser_error:
|
|
121
123
|
print(f"❌ Interactive browser authentication failed: {browser_error}")
|
|
124
|
+
print("💡 Please run: az login --scope https://storage.azure.com/.default")
|
|
122
125
|
return None
|
|
123
126
|
|
|
124
127
|
|
|
@@ -1249,9 +1249,11 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1249
1249
|
dataset_name = self.schema # Use schema name
|
|
1250
1250
|
|
|
1251
1251
|
# Call the deployment function (DirectLake only)
|
|
1252
|
+
# Use lakehouse_id (with .ItemType suffix) instead of lakehouse_name (without suffix)
|
|
1253
|
+
# This ensures proper item resolution for non-lakehouse items like .SnowflakeDatabase
|
|
1252
1254
|
return deploy_semantic_model(
|
|
1253
1255
|
workspace_name_or_id=self.workspace,
|
|
1254
|
-
lakehouse_name_or_id=self.
|
|
1256
|
+
lakehouse_name_or_id=self.lakehouse_id,
|
|
1255
1257
|
schema_name=self.schema,
|
|
1256
1258
|
dataset_name=dataset_name,
|
|
1257
1259
|
bim_url_or_path=bim_url,
|
|
@@ -1259,6 +1261,69 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1259
1261
|
refresh=refresh
|
|
1260
1262
|
)
|
|
1261
1263
|
|
|
1264
|
+
def export_ducklake_to_delta(self, db_path: str, data_root: str = None) -> bool:
|
|
1265
|
+
"""
|
|
1266
|
+
Export DuckLake metadata to Delta Lake format for Spark compatibility.
|
|
1267
|
+
|
|
1268
|
+
Reads a DuckLake database file from the Files section and generates Delta Lake
|
|
1269
|
+
checkpoint files and JSON logs for all tables, making them readable by Spark
|
|
1270
|
+
and other Delta Lake tools.
|
|
1271
|
+
|
|
1272
|
+
Args:
|
|
1273
|
+
db_path: Relative path to DuckLake DB file in Files section (e.g., "db/test/test.db")
|
|
1274
|
+
data_root: Optional base path for lakehouse data. If None, reads from DuckLake metadata.
|
|
1275
|
+
|
|
1276
|
+
Returns:
|
|
1277
|
+
True if export succeeded, False otherwise
|
|
1278
|
+
|
|
1279
|
+
Examples:
|
|
1280
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse/dbo")
|
|
1281
|
+
|
|
1282
|
+
# Export DuckLake tables to Delta format
|
|
1283
|
+
con.export_ducklake_to_delta("meta.db")
|
|
1284
|
+
|
|
1285
|
+
# With explicit data root
|
|
1286
|
+
con.export_ducklake_to_delta("db/ducklake.db", data_root="abfss://...")
|
|
1287
|
+
"""
|
|
1288
|
+
from .ducklake_metadata import generate_latest_delta_log
|
|
1289
|
+
import obstore as obs
|
|
1290
|
+
from obstore.store import AzureStore
|
|
1291
|
+
|
|
1292
|
+
# Construct full ABFSS path to DB file in Files section
|
|
1293
|
+
full_db_path = f"{self.files_base_url}{db_path}"
|
|
1294
|
+
|
|
1295
|
+
print(f"🔍 Exporting DuckLake metadata from: {db_path}")
|
|
1296
|
+
print(f"📂 Full DB path: {full_db_path}")
|
|
1297
|
+
|
|
1298
|
+
# Get Azure token
|
|
1299
|
+
from .auth import get_token
|
|
1300
|
+
token = self._get_storage_token()
|
|
1301
|
+
if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
|
1302
|
+
print("Authenticating with Azure for DuckLake export...")
|
|
1303
|
+
token = get_token()
|
|
1304
|
+
if not token:
|
|
1305
|
+
print("❌ Failed to authenticate for DuckLake export")
|
|
1306
|
+
return False
|
|
1307
|
+
|
|
1308
|
+
# Setup OneLake store for uploading checkpoint files
|
|
1309
|
+
# Use table_base_url as the base since we'll be writing to Tables section
|
|
1310
|
+
store = AzureStore.from_url(self.table_base_url, bearer_token=token)
|
|
1311
|
+
|
|
1312
|
+
# If data_root not provided, use table_base_url (which includes /Tables/)
|
|
1313
|
+
# This will be used to construct full paths for checkpoint files
|
|
1314
|
+
if data_root is None:
|
|
1315
|
+
data_root = self.table_base_url.rstrip('/')
|
|
1316
|
+
|
|
1317
|
+
try:
|
|
1318
|
+
generate_latest_delta_log(full_db_path, data_root, store, token)
|
|
1319
|
+
print(f"✅ DuckLake export completed successfully")
|
|
1320
|
+
return True
|
|
1321
|
+
except Exception as e:
|
|
1322
|
+
print(f"❌ DuckLake export failed: {e}")
|
|
1323
|
+
import traceback
|
|
1324
|
+
traceback.print_exc()
|
|
1325
|
+
return False
|
|
1326
|
+
|
|
1262
1327
|
def rle(self, table_name: str = None, mode = "natural",
|
|
1263
1328
|
min_distinct_threshold: int = 2, max_cardinality_pct: float = 0.01,
|
|
1264
1329
|
max_ordering_depth: int = 3, limit: int = None):
|
|
@@ -1692,4 +1757,13 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
|
1692
1757
|
print(f"❌ Error downloading semantic model: {e}")
|
|
1693
1758
|
import traceback
|
|
1694
1759
|
traceback.print_exc()
|
|
1695
|
-
return None
|
|
1760
|
+
return None
|
|
1761
|
+
|
|
1762
|
+
def close(self):
|
|
1763
|
+
"""
|
|
1764
|
+
Close the workspace connection.
|
|
1765
|
+
|
|
1766
|
+
Note: WorkspaceConnection doesn't maintain persistent connections,
|
|
1767
|
+
so this is a no-op for compatibility with code patterns that call close().
|
|
1768
|
+
"""
|
|
1769
|
+
pass
|