duckrun 0.2.19.dev8__tar.gz → 0.2.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/PKG-INFO +1 -1
  2. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/auth.py +3 -0
  3. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/core.py +76 -2
  4. duckrun-0.2.20/duckrun/ducklake_metadata.py +592 -0
  5. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/semantic_model.py +134 -13
  6. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/writer.py +3 -1
  7. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun.egg-info/PKG-INFO +1 -1
  8. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun.egg-info/SOURCES.txt +6 -1
  9. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/pyproject.toml +1 -1
  10. duckrun-0.2.20/tests/test_checkpoint_format.py +102 -0
  11. duckrun-0.2.20/tests/test_ducklake_export.py +4 -0
  12. duckrun-0.2.20/tests/test_writer_dictionary.py +142 -0
  13. duckrun-0.2.20/tests/test_writer_integration.py +152 -0
  14. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/LICENSE +0 -0
  15. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/README.md +0 -0
  16. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/__init__.py +0 -0
  17. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/files.py +0 -0
  18. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/lakehouse.py +0 -0
  19. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/notebook.py +0 -0
  20. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/rle.py +0 -0
  21. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/runner.py +0 -0
  22. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun/stats.py +0 -0
  23. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun.egg-info/dependency_links.txt +0 -0
  24. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun.egg-info/requires.txt +0 -0
  25. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/duckrun.egg-info/top_level.txt +0 -0
  26. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/setup.cfg +0 -0
  27. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/tests/test_register.py +0 -0
  28. {duckrun-0.2.19.dev8 → duckrun-0.2.20}/tests/test_rle.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.19.dev8
3
+ Version: 0.2.20
4
4
  Summary: Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)
5
5
  Author: mim
6
6
  License: MIT
@@ -104,6 +104,8 @@ def _get_local_token() -> Optional[str]:
104
104
 
105
105
  except Exception as cli_error:
106
106
  print(f"⚠️ Azure CLI authentication failed: {cli_error}")
107
+ print("💡 TIP: Due to MFA requirements, you now need to login with scope:")
108
+ print(" az login --scope https://storage.azure.com/.default")
107
109
  print("🔐 Falling back to interactive browser authentication...")
108
110
 
109
111
  # Fallback to interactive browser
@@ -119,6 +121,7 @@ def _get_local_token() -> Optional[str]:
119
121
 
120
122
  except Exception as browser_error:
121
123
  print(f"❌ Interactive browser authentication failed: {browser_error}")
124
+ print("💡 Please run: az login --scope https://storage.azure.com/.default")
122
125
  return None
123
126
 
124
127
 
@@ -1249,9 +1249,11 @@ class Duckrun(WorkspaceOperationsMixin):
1249
1249
  dataset_name = self.schema # Use schema name
1250
1250
 
1251
1251
  # Call the deployment function (DirectLake only)
1252
+ # Use lakehouse_id (with .ItemType suffix) instead of lakehouse_name (without suffix)
1253
+ # This ensures proper item resolution for non-lakehouse items like .SnowflakeDatabase
1252
1254
  return deploy_semantic_model(
1253
1255
  workspace_name_or_id=self.workspace,
1254
- lakehouse_name_or_id=self.lakehouse_name,
1256
+ lakehouse_name_or_id=self.lakehouse_id,
1255
1257
  schema_name=self.schema,
1256
1258
  dataset_name=dataset_name,
1257
1259
  bim_url_or_path=bim_url,
@@ -1259,6 +1261,69 @@ class Duckrun(WorkspaceOperationsMixin):
1259
1261
  refresh=refresh
1260
1262
  )
1261
1263
 
1264
+ def export_ducklake_to_delta(self, db_path: str, data_root: str = None) -> bool:
1265
+ """
1266
+ Export DuckLake metadata to Delta Lake format for Spark compatibility.
1267
+
1268
+ Reads a DuckLake database file from the Files section and generates Delta Lake
1269
+ checkpoint files and JSON logs for all tables, making them readable by Spark
1270
+ and other Delta Lake tools.
1271
+
1272
+ Args:
1273
+ db_path: Relative path to DuckLake DB file in Files section (e.g., "db/test/test.db")
1274
+ data_root: Optional base path for lakehouse data. If None, reads from DuckLake metadata.
1275
+
1276
+ Returns:
1277
+ True if export succeeded, False otherwise
1278
+
1279
+ Examples:
1280
+ con = duckrun.connect("workspace/lakehouse.lakehouse/dbo")
1281
+
1282
+ # Export DuckLake tables to Delta format
1283
+ con.export_ducklake_to_delta("meta.db")
1284
+
1285
+ # With explicit data root
1286
+ con.export_ducklake_to_delta("db/ducklake.db", data_root="abfss://...")
1287
+ """
1288
+ from .ducklake_metadata import generate_latest_delta_log
1289
+ import obstore as obs
1290
+ from obstore.store import AzureStore
1291
+
1292
+ # Construct full ABFSS path to DB file in Files section
1293
+ full_db_path = f"{self.files_base_url}{db_path}"
1294
+
1295
+ print(f"🔍 Exporting DuckLake metadata from: {db_path}")
1296
+ print(f"📂 Full DB path: {full_db_path}")
1297
+
1298
+ # Get Azure token
1299
+ from .auth import get_token
1300
+ token = self._get_storage_token()
1301
+ if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
1302
+ print("Authenticating with Azure for DuckLake export...")
1303
+ token = get_token()
1304
+ if not token:
1305
+ print("❌ Failed to authenticate for DuckLake export")
1306
+ return False
1307
+
1308
+ # Setup OneLake store for uploading checkpoint files
1309
+ # Use table_base_url as the base since we'll be writing to Tables section
1310
+ store = AzureStore.from_url(self.table_base_url, bearer_token=token)
1311
+
1312
+ # If data_root not provided, use table_base_url (which includes /Tables/)
1313
+ # This will be used to construct full paths for checkpoint files
1314
+ if data_root is None:
1315
+ data_root = self.table_base_url.rstrip('/')
1316
+
1317
+ try:
1318
+ generate_latest_delta_log(full_db_path, data_root, store, token)
1319
+ print(f"✅ DuckLake export completed successfully")
1320
+ return True
1321
+ except Exception as e:
1322
+ print(f"❌ DuckLake export failed: {e}")
1323
+ import traceback
1324
+ traceback.print_exc()
1325
+ return False
1326
+
1262
1327
  def rle(self, table_name: str = None, mode = "natural",
1263
1328
  min_distinct_threshold: int = 2, max_cardinality_pct: float = 0.01,
1264
1329
  max_ordering_depth: int = 3, limit: int = None):
@@ -1692,4 +1757,13 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
1692
1757
  print(f"❌ Error downloading semantic model: {e}")
1693
1758
  import traceback
1694
1759
  traceback.print_exc()
1695
- return None
1760
+ return None
1761
+
1762
+ def close(self):
1763
+ """
1764
+ Close the workspace connection.
1765
+
1766
+ Note: WorkspaceConnection doesn't maintain persistent connections,
1767
+ so this is a no-op for compatibility with code patterns that call close().
1768
+ """
1769
+ pass