duckrun 0.2.19.dev7__tar.gz → 0.2.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/PKG-INFO +1 -1
  2. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/auth.py +3 -0
  3. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/core.py +91 -2
  4. duckrun-0.2.20/duckrun/ducklake_metadata.py +592 -0
  5. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/semantic_model.py +134 -13
  6. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/writer.py +3 -1
  7. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun.egg-info/PKG-INFO +1 -1
  8. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun.egg-info/SOURCES.txt +7 -1
  9. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/pyproject.toml +1 -1
  10. duckrun-0.2.20/tests/test_checkpoint_format.py +102 -0
  11. duckrun-0.2.20/tests/test_ducklake_export.py +4 -0
  12. duckrun-0.2.20/tests/test_register.py +275 -0
  13. duckrun-0.2.20/tests/test_writer_dictionary.py +142 -0
  14. duckrun-0.2.20/tests/test_writer_integration.py +152 -0
  15. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/LICENSE +0 -0
  16. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/README.md +0 -0
  17. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/__init__.py +0 -0
  18. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/files.py +0 -0
  19. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/lakehouse.py +0 -0
  20. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/notebook.py +0 -0
  21. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/rle.py +0 -0
  22. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/runner.py +0 -0
  23. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun/stats.py +0 -0
  24. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun.egg-info/dependency_links.txt +0 -0
  25. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun.egg-info/requires.txt +0 -0
  26. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/duckrun.egg-info/top_level.txt +0 -0
  27. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/setup.cfg +0 -0
  28. {duckrun-0.2.19.dev7 → duckrun-0.2.20}/tests/test_rle.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.19.dev7
3
+ Version: 0.2.20
4
4
  Summary: Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)
5
5
  Author: mim
6
6
  License: MIT
@@ -104,6 +104,8 @@ def _get_local_token() -> Optional[str]:
104
104
 
105
105
  except Exception as cli_error:
106
106
  print(f"⚠️ Azure CLI authentication failed: {cli_error}")
107
+ print("💡 TIP: Due to MFA requirements, you now need to login with scope:")
108
+ print(" az login --scope https://storage.azure.com/.default")
107
109
  print("🔐 Falling back to interactive browser authentication...")
108
110
 
109
111
  # Fallback to interactive browser
@@ -119,6 +121,7 @@ def _get_local_token() -> Optional[str]:
119
121
 
120
122
  except Exception as browser_error:
121
123
  print(f"❌ Interactive browser authentication failed: {browser_error}")
124
+ print("💡 Please run: az login --scope https://storage.azure.com/.default")
122
125
  return None
123
126
 
124
127
 
@@ -1035,6 +1035,21 @@ class Duckrun(WorkspaceOperationsMixin):
1035
1035
  """Get underlying DuckDB connection"""
1036
1036
  return self.con
1037
1037
 
1038
+ def register(self, name: str, df):
1039
+ """
1040
+ Register a pandas DataFrame as a virtual table in DuckDB.
1041
+
1042
+ Args:
1043
+ name: Name for the virtual table
1044
+ df: pandas DataFrame to register
1045
+
1046
+ Example:
1047
+ con = duckrun.connect("workspace/lakehouse.lakehouse")
1048
+ con.register("tb", df)
1049
+ con.sql("SELECT * FROM tb").show()
1050
+ """
1051
+ self.con.register(name, df)
1052
+
1038
1053
  def get_stats(self, source: str = None, detailed = False):
1039
1054
  """
1040
1055
  Get comprehensive statistics for Delta Lake tables.
@@ -1234,9 +1249,11 @@ class Duckrun(WorkspaceOperationsMixin):
1234
1249
  dataset_name = self.schema # Use schema name
1235
1250
 
1236
1251
  # Call the deployment function (DirectLake only)
1252
+ # Use lakehouse_id (with .ItemType suffix) instead of lakehouse_name (without suffix)
1253
+ # This ensures proper item resolution for non-lakehouse items like .SnowflakeDatabase
1237
1254
  return deploy_semantic_model(
1238
1255
  workspace_name_or_id=self.workspace,
1239
- lakehouse_name_or_id=self.lakehouse_name,
1256
+ lakehouse_name_or_id=self.lakehouse_id,
1240
1257
  schema_name=self.schema,
1241
1258
  dataset_name=dataset_name,
1242
1259
  bim_url_or_path=bim_url,
@@ -1244,6 +1261,69 @@ class Duckrun(WorkspaceOperationsMixin):
1244
1261
  refresh=refresh
1245
1262
  )
1246
1263
 
1264
+ def export_ducklake_to_delta(self, db_path: str, data_root: str = None) -> bool:
1265
+ """
1266
+ Export DuckLake metadata to Delta Lake format for Spark compatibility.
1267
+
1268
+ Reads a DuckLake database file from the Files section and generates Delta Lake
1269
+ checkpoint files and JSON logs for all tables, making them readable by Spark
1270
+ and other Delta Lake tools.
1271
+
1272
+ Args:
1273
+ db_path: Relative path to DuckLake DB file in Files section (e.g., "db/test/test.db")
1274
+ data_root: Optional base path for lakehouse data. If None, reads from DuckLake metadata.
1275
+
1276
+ Returns:
1277
+ True if export succeeded, False otherwise
1278
+
1279
+ Examples:
1280
+ con = duckrun.connect("workspace/lakehouse.lakehouse/dbo")
1281
+
1282
+ # Export DuckLake tables to Delta format
1283
+ con.export_ducklake_to_delta("meta.db")
1284
+
1285
+ # With explicit data root
1286
+ con.export_ducklake_to_delta("db/ducklake.db", data_root="abfss://...")
1287
+ """
1288
+ from .ducklake_metadata import generate_latest_delta_log
1289
+ import obstore as obs
1290
+ from obstore.store import AzureStore
1291
+
1292
+ # Construct full ABFSS path to DB file in Files section
1293
+ full_db_path = f"{self.files_base_url}{db_path}"
1294
+
1295
+ print(f"🔍 Exporting DuckLake metadata from: {db_path}")
1296
+ print(f"📂 Full DB path: {full_db_path}")
1297
+
1298
+ # Get Azure token
1299
+ from .auth import get_token
1300
+ token = self._get_storage_token()
1301
+ if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
1302
+ print("Authenticating with Azure for DuckLake export...")
1303
+ token = get_token()
1304
+ if not token:
1305
+ print("❌ Failed to authenticate for DuckLake export")
1306
+ return False
1307
+
1308
+ # Setup OneLake store for uploading checkpoint files
1309
+ # Use table_base_url as the base since we'll be writing to Tables section
1310
+ store = AzureStore.from_url(self.table_base_url, bearer_token=token)
1311
+
1312
+ # If data_root not provided, use table_base_url (which includes /Tables/)
1313
+ # This will be used to construct full paths for checkpoint files
1314
+ if data_root is None:
1315
+ data_root = self.table_base_url.rstrip('/')
1316
+
1317
+ try:
1318
+ generate_latest_delta_log(full_db_path, data_root, store, token)
1319
+ print(f"✅ DuckLake export completed successfully")
1320
+ return True
1321
+ except Exception as e:
1322
+ print(f"❌ DuckLake export failed: {e}")
1323
+ import traceback
1324
+ traceback.print_exc()
1325
+ return False
1326
+
1247
1327
  def rle(self, table_name: str = None, mode = "natural",
1248
1328
  min_distinct_threshold: int = 2, max_cardinality_pct: float = 0.01,
1249
1329
  max_ordering_depth: int = 3, limit: int = None):
@@ -1677,4 +1757,13 @@ class WorkspaceConnection(WorkspaceOperationsMixin):
1677
1757
  print(f"❌ Error downloading semantic model: {e}")
1678
1758
  import traceback
1679
1759
  traceback.print_exc()
1680
- return None
1760
+ return None
1761
+
1762
+ def close(self):
1763
+ """
1764
+ Close the workspace connection.
1765
+
1766
+ Note: WorkspaceConnection doesn't maintain persistent connections,
1767
+ so this is a no-op for compatibility with code patterns that call close().
1768
+ """
1769
+ pass