duckrun 0.2.19.dev7__tar.gz → 0.2.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/PKG-INFO +1 -1
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/auth.py +3 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/core.py +91 -2
- duckrun-0.2.20/duckrun/ducklake_metadata.py +592 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/semantic_model.py +134 -13
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/writer.py +3 -1
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun.egg-info/PKG-INFO +1 -1
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun.egg-info/SOURCES.txt +7 -1
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/pyproject.toml +1 -1
- duckrun-0.2.20/tests/test_checkpoint_format.py +102 -0
- duckrun-0.2.20/tests/test_ducklake_export.py +4 -0
- duckrun-0.2.20/tests/test_register.py +275 -0
- duckrun-0.2.20/tests/test_writer_dictionary.py +142 -0
- duckrun-0.2.20/tests/test_writer_integration.py +152 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/LICENSE +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/README.md +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/__init__.py +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/files.py +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/lakehouse.py +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/notebook.py +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/rle.py +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/runner.py +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/stats.py +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun.egg-info/requires.txt +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/setup.cfg +0 -0
- {duckrun-0.2.19.dev7 → duckrun-0.2.20}/tests/test_rle.py +0 -0
|
@@ -104,6 +104,8 @@ def _get_local_token() -> Optional[str]:
|
|
|
104
104
|
|
|
105
105
|
except Exception as cli_error:
|
|
106
106
|
print(f"⚠️ Azure CLI authentication failed: {cli_error}")
|
|
107
|
+
print("💡 TIP: Due to MFA requirements, you now need to login with scope:")
|
|
108
|
+
print(" az login --scope https://storage.azure.com/.default")
|
|
107
109
|
print("🔐 Falling back to interactive browser authentication...")
|
|
108
110
|
|
|
109
111
|
# Fallback to interactive browser
|
|
@@ -119,6 +121,7 @@ def _get_local_token() -> Optional[str]:
|
|
|
119
121
|
|
|
120
122
|
except Exception as browser_error:
|
|
121
123
|
print(f"❌ Interactive browser authentication failed: {browser_error}")
|
|
124
|
+
print("💡 Please run: az login --scope https://storage.azure.com/.default")
|
|
122
125
|
return None
|
|
123
126
|
|
|
124
127
|
|
|
@@ -1035,6 +1035,21 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1035
1035
|
"""Get underlying DuckDB connection"""
|
|
1036
1036
|
return self.con
|
|
1037
1037
|
|
|
1038
|
+
def register(self, name: str, df):
|
|
1039
|
+
"""
|
|
1040
|
+
Register a pandas DataFrame as a virtual table in DuckDB.
|
|
1041
|
+
|
|
1042
|
+
Args:
|
|
1043
|
+
name: Name for the virtual table
|
|
1044
|
+
df: pandas DataFrame to register
|
|
1045
|
+
|
|
1046
|
+
Example:
|
|
1047
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse")
|
|
1048
|
+
con.register("tb", df)
|
|
1049
|
+
con.sql("SELECT * FROM tb").show()
|
|
1050
|
+
"""
|
|
1051
|
+
self.con.register(name, df)
|
|
1052
|
+
|
|
1038
1053
|
def get_stats(self, source: str = None, detailed = False):
|
|
1039
1054
|
"""
|
|
1040
1055
|
Get comprehensive statistics for Delta Lake tables.
|
|
@@ -1234,9 +1249,11 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1234
1249
|
dataset_name = self.schema # Use schema name
|
|
1235
1250
|
|
|
1236
1251
|
# Call the deployment function (DirectLake only)
|
|
1252
|
+
# Use lakehouse_id (with .ItemType suffix) instead of lakehouse_name (without suffix)
|
|
1253
|
+
# This ensures proper item resolution for non-lakehouse items like .SnowflakeDatabase
|
|
1237
1254
|
return deploy_semantic_model(
|
|
1238
1255
|
workspace_name_or_id=self.workspace,
|
|
1239
|
-
lakehouse_name_or_id=self.
|
|
1256
|
+
lakehouse_name_or_id=self.lakehouse_id,
|
|
1240
1257
|
schema_name=self.schema,
|
|
1241
1258
|
dataset_name=dataset_name,
|
|
1242
1259
|
bim_url_or_path=bim_url,
|
|
@@ -1244,6 +1261,69 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1244
1261
|
refresh=refresh
|
|
1245
1262
|
)
|
|
1246
1263
|
|
|
1264
|
+
def export_ducklake_to_delta(self, db_path: str, data_root: str = None) -> bool:
|
|
1265
|
+
"""
|
|
1266
|
+
Export DuckLake metadata to Delta Lake format for Spark compatibility.
|
|
1267
|
+
|
|
1268
|
+
Reads a DuckLake database file from the Files section and generates Delta Lake
|
|
1269
|
+
checkpoint files and JSON logs for all tables, making them readable by Spark
|
|
1270
|
+
and other Delta Lake tools.
|
|
1271
|
+
|
|
1272
|
+
Args:
|
|
1273
|
+
db_path: Relative path to DuckLake DB file in Files section (e.g., "db/test/test.db")
|
|
1274
|
+
data_root: Optional base path for lakehouse data. If None, reads from DuckLake metadata.
|
|
1275
|
+
|
|
1276
|
+
Returns:
|
|
1277
|
+
True if export succeeded, False otherwise
|
|
1278
|
+
|
|
1279
|
+
Examples:
|
|
1280
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse/dbo")
|
|
1281
|
+
|
|
1282
|
+
# Export DuckLake tables to Delta format
|
|
1283
|
+
con.export_ducklake_to_delta("meta.db")
|
|
1284
|
+
|
|
1285
|
+
# With explicit data root
|
|
1286
|
+
con.export_ducklake_to_delta("db/ducklake.db", data_root="abfss://...")
|
|
1287
|
+
"""
|
|
1288
|
+
from .ducklake_metadata import generate_latest_delta_log
|
|
1289
|
+
import obstore as obs
|
|
1290
|
+
from obstore.store import AzureStore
|
|
1291
|
+
|
|
1292
|
+
# Construct full ABFSS path to DB file in Files section
|
|
1293
|
+
full_db_path = f"{self.files_base_url}{db_path}"
|
|
1294
|
+
|
|
1295
|
+
print(f"🔍 Exporting DuckLake metadata from: {db_path}")
|
|
1296
|
+
print(f"📂 Full DB path: {full_db_path}")
|
|
1297
|
+
|
|
1298
|
+
# Get Azure token
|
|
1299
|
+
from .auth import get_token
|
|
1300
|
+
token = self._get_storage_token()
|
|
1301
|
+
if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
|
1302
|
+
print("Authenticating with Azure for DuckLake export...")
|
|
1303
|
+
token = get_token()
|
|
1304
|
+
if not token:
|
|
1305
|
+
print("❌ Failed to authenticate for DuckLake export")
|
|
1306
|
+
return False
|
|
1307
|
+
|
|
1308
|
+
# Setup OneLake store for uploading checkpoint files
|
|
1309
|
+
# Use table_base_url as the base since we'll be writing to Tables section
|
|
1310
|
+
store = AzureStore.from_url(self.table_base_url, bearer_token=token)
|
|
1311
|
+
|
|
1312
|
+
# If data_root not provided, use table_base_url (which includes /Tables/)
|
|
1313
|
+
# This will be used to construct full paths for checkpoint files
|
|
1314
|
+
if data_root is None:
|
|
1315
|
+
data_root = self.table_base_url.rstrip('/')
|
|
1316
|
+
|
|
1317
|
+
try:
|
|
1318
|
+
generate_latest_delta_log(full_db_path, data_root, store, token)
|
|
1319
|
+
print(f"✅ DuckLake export completed successfully")
|
|
1320
|
+
return True
|
|
1321
|
+
except Exception as e:
|
|
1322
|
+
print(f"❌ DuckLake export failed: {e}")
|
|
1323
|
+
import traceback
|
|
1324
|
+
traceback.print_exc()
|
|
1325
|
+
return False
|
|
1326
|
+
|
|
1247
1327
|
def rle(self, table_name: str = None, mode = "natural",
|
|
1248
1328
|
min_distinct_threshold: int = 2, max_cardinality_pct: float = 0.01,
|
|
1249
1329
|
max_ordering_depth: int = 3, limit: int = None):
|
|
@@ -1677,4 +1757,13 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
|
|
|
1677
1757
|
print(f"❌ Error downloading semantic model: {e}")
|
|
1678
1758
|
import traceback
|
|
1679
1759
|
traceback.print_exc()
|
|
1680
|
-
return None
|
|
1760
|
+
return None
|
|
1761
|
+
|
|
1762
|
+
def close(self):
|
|
1763
|
+
"""
|
|
1764
|
+
Close the workspace connection.
|
|
1765
|
+
|
|
1766
|
+
Note: WorkspaceConnection doesn't maintain persistent connections,
|
|
1767
|
+
so this is a no-op for compatibility with code patterns that call close().
|
|
1768
|
+
"""
|
|
1769
|
+
pass
|