duckrun 0.2.13.dev0__tar.gz → 0.2.14.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckrun might be problematic. Click here for more details.
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/PKG-INFO +1 -1
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun/__init__.py +1 -1
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun/auth.py +0 -30
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun/core.py +30 -20
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun.egg-info/PKG-INFO +1 -1
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/pyproject.toml +1 -1
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/LICENSE +0 -0
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/README.md +0 -0
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun/files.py +0 -0
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun/lakehouse.py +0 -0
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun/runner.py +0 -0
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun/semantic_model.py +0 -0
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun/stats.py +0 -0
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun/writer.py +0 -0
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun.egg-info/SOURCES.txt +0 -0
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun.egg-info/requires.txt +0 -0
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/setup.cfg +0 -0
|
@@ -20,7 +20,6 @@ def get_token() -> Optional[str]:
|
|
|
20
20
|
# Check if we already have a cached token
|
|
21
21
|
token_env = os.environ.get("AZURE_STORAGE_TOKEN")
|
|
22
22
|
if token_env and token_env != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
|
23
|
-
print("✅ Using existing Azure Storage token")
|
|
24
23
|
return token_env
|
|
25
24
|
|
|
26
25
|
print("🔐 Starting Azure authentication...")
|
|
@@ -77,35 +76,6 @@ def _get_device_code_token() -> Optional[str]:
|
|
|
77
76
|
return None
|
|
78
77
|
|
|
79
78
|
|
|
80
|
-
def _is_databricks() -> bool:
|
|
81
|
-
"""Check if we're running in a Databricks environment"""
|
|
82
|
-
# Databricks sets specific environment variables
|
|
83
|
-
return (
|
|
84
|
-
os.environ.get("DATABRICKS_RUNTIME_VERSION") is not None or
|
|
85
|
-
os.environ.get("DB_HOME") is not None or
|
|
86
|
-
"databricks" in os.environ.get("SPARK_HOME", "").lower()
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def _get_databricks_token() -> Optional[str]:
|
|
91
|
-
"""Get token using DefaultAzureCredential for Databricks environments"""
|
|
92
|
-
try:
|
|
93
|
-
from azure.identity import DefaultAzureCredential
|
|
94
|
-
|
|
95
|
-
# DefaultAzureCredential will automatically use Databricks managed identity
|
|
96
|
-
credential = DefaultAzureCredential()
|
|
97
|
-
token_obj = credential.get_token("https://storage.azure.com/.default")
|
|
98
|
-
|
|
99
|
-
os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
|
|
100
|
-
print("✅ Databricks authentication successful!")
|
|
101
|
-
return token_obj.token
|
|
102
|
-
|
|
103
|
-
except Exception as e:
|
|
104
|
-
print(f"❌ Databricks authentication failed: {e}")
|
|
105
|
-
print("💡 Make sure your Databricks cluster has the required Azure permissions")
|
|
106
|
-
return None
|
|
107
|
-
|
|
108
|
-
|
|
109
79
|
def _get_local_token() -> Optional[str]:
|
|
110
80
|
"""Get token using CLI first, then browser fallback for local environments"""
|
|
111
81
|
# First try Azure CLI directly
|
|
@@ -53,7 +53,8 @@ class Duckrun:
|
|
|
53
53
|
|
|
54
54
|
def __init__(self, workspace_id: str, lakehouse_id: str, schema: str = "dbo",
|
|
55
55
|
sql_folder: Optional[str] = None, compaction_threshold: int = 10,
|
|
56
|
-
scan_all_schemas: bool = False, storage_account: str = "onelake"
|
|
56
|
+
scan_all_schemas: bool = False, storage_account: str = "onelake",
|
|
57
|
+
token_only: bool = False):
|
|
57
58
|
# Store GUIDs for internal use
|
|
58
59
|
self.workspace_id = workspace_id
|
|
59
60
|
self.lakehouse_id = lakehouse_id
|
|
@@ -62,6 +63,7 @@ class Duckrun:
|
|
|
62
63
|
self.compaction_threshold = compaction_threshold
|
|
63
64
|
self.scan_all_schemas = scan_all_schemas
|
|
64
65
|
self.storage_account = storage_account
|
|
66
|
+
self.token_only = token_only
|
|
65
67
|
|
|
66
68
|
# Construct proper ABFSS URLs
|
|
67
69
|
import re
|
|
@@ -93,12 +95,19 @@ class Duckrun:
|
|
|
93
95
|
except ImportError:
|
|
94
96
|
pass # Not in Colab, use default transport
|
|
95
97
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
+
# Only attach lakehouse and register functions if not token_only mode
|
|
99
|
+
if not token_only:
|
|
100
|
+
self._attach_lakehouse()
|
|
101
|
+
self._register_lookup_functions()
|
|
102
|
+
else:
|
|
103
|
+
# In token_only mode, just create the secret for authentication
|
|
104
|
+
self._create_onelake_secret()
|
|
105
|
+
print("✓ Token authenticated (fast mode - tables not listed)")
|
|
98
106
|
|
|
99
107
|
@classmethod
|
|
100
108
|
def connect(cls, connection_string: str, sql_folder: Optional[str] = None,
|
|
101
|
-
compaction_threshold: int = 100, storage_account: str = "onelake"
|
|
109
|
+
compaction_threshold: int = 100, storage_account: str = "onelake",
|
|
110
|
+
token_only: bool = False):
|
|
102
111
|
"""
|
|
103
112
|
Create and connect to lakehouse or workspace.
|
|
104
113
|
|
|
@@ -112,6 +121,7 @@ class Duckrun:
|
|
|
112
121
|
sql_folder: Optional path or URL to SQL files folder
|
|
113
122
|
compaction_threshold: File count threshold for compaction
|
|
114
123
|
storage_account: Storage account name (default: "onelake")
|
|
124
|
+
token_only: If True, only authenticate without listing tables (faster connection)
|
|
115
125
|
|
|
116
126
|
Examples:
|
|
117
127
|
# Workspace management only (supports spaces in names)
|
|
@@ -125,6 +135,9 @@ class Duckrun:
|
|
|
125
135
|
dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse") # defaults to dbo schema
|
|
126
136
|
dr = Duckrun.connect("workspace/lakehouse.lakehouse", storage_account="xxx-onelake") # custom storage
|
|
127
137
|
|
|
138
|
+
# Fast connection without table listing (token only)
|
|
139
|
+
dr = Duckrun.connect("workspace/lakehouse.lakehouse", token_only=True)
|
|
140
|
+
|
|
128
141
|
Note:
|
|
129
142
|
Internally resolves friendly names (with spaces) to GUIDs and constructs proper ABFSS URLs:
|
|
130
143
|
"My Workspace/My Lakehouse.lakehouse/schema" becomes
|
|
@@ -169,7 +182,7 @@ class Duckrun:
|
|
|
169
182
|
# Resolve friendly names to GUIDs and construct proper ABFSS path
|
|
170
183
|
workspace_id, lakehouse_id = cls._resolve_names_to_guids(workspace_name, lakehouse_name)
|
|
171
184
|
|
|
172
|
-
return cls(workspace_id, lakehouse_id, schema, sql_folder, compaction_threshold, scan_all_schemas, storage_account)
|
|
185
|
+
return cls(workspace_id, lakehouse_id, schema, sql_folder, compaction_threshold, scan_all_schemas, storage_account, token_only)
|
|
173
186
|
|
|
174
187
|
@classmethod
|
|
175
188
|
def _resolve_names_to_guids(cls, workspace_name: str, lakehouse_name: str) -> tuple[str, str]:
|
|
@@ -401,15 +414,8 @@ class Duckrun:
|
|
|
401
414
|
if not tables:
|
|
402
415
|
return
|
|
403
416
|
|
|
404
|
-
#
|
|
405
|
-
|
|
406
|
-
for schema_name, table_name in tables:
|
|
407
|
-
if schema_name not in schema_tables:
|
|
408
|
-
schema_tables[schema_name] = []
|
|
409
|
-
schema_tables[schema_name].append(table_name)
|
|
410
|
-
|
|
411
|
-
attached_count = 0
|
|
412
|
-
skipped_tables = []
|
|
417
|
+
# Collect table names for display
|
|
418
|
+
table_names = []
|
|
413
419
|
|
|
414
420
|
for schema_name, table_name in tables:
|
|
415
421
|
try:
|
|
@@ -417,18 +423,22 @@ class Duckrun:
|
|
|
417
423
|
# Create proper schema.table structure in DuckDB
|
|
418
424
|
self.con.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
|
|
419
425
|
view_name = f"{schema_name}.{table_name}"
|
|
426
|
+
table_names.append(view_name)
|
|
420
427
|
else:
|
|
421
428
|
# Single schema mode - use just table name
|
|
422
429
|
view_name = table_name
|
|
430
|
+
table_names.append(table_name)
|
|
423
431
|
|
|
424
432
|
self.con.sql(f"""
|
|
425
433
|
CREATE OR REPLACE VIEW {view_name}
|
|
426
434
|
AS SELECT * FROM delta_scan('{self.table_base_url}{schema_name}/{table_name}');
|
|
427
435
|
""")
|
|
428
|
-
attached_count += 1
|
|
429
436
|
except Exception as e:
|
|
430
|
-
skipped_tables.append(f"{schema_name}.{table_name}")
|
|
431
437
|
continue
|
|
438
|
+
|
|
439
|
+
# Print discovered tables as comma-separated list
|
|
440
|
+
if table_names:
|
|
441
|
+
print(", ".join(table_names))
|
|
432
442
|
|
|
433
443
|
except Exception as e:
|
|
434
444
|
print(f"❌ Error attaching lakehouse: {e}")
|
|
@@ -570,10 +580,10 @@ class Duckrun:
|
|
|
570
580
|
|
|
571
581
|
# Register functions in DuckDB
|
|
572
582
|
try:
|
|
573
|
-
self.con.create_function("get_workspace_name", get_workspace_name)
|
|
574
|
-
self.con.create_function("get_lakehouse_name", get_lakehouse_name)
|
|
575
|
-
self.con.create_function("get_workspace_id_from_name", get_workspace_id_from_name)
|
|
576
|
-
self.con.create_function("get_lakehouse_id_from_name", get_lakehouse_id_from_name)
|
|
583
|
+
self.con.create_function("get_workspace_name", get_workspace_name, null_handling='SPECIAL')
|
|
584
|
+
self.con.create_function("get_lakehouse_name", get_lakehouse_name, null_handling='SPECIAL')
|
|
585
|
+
self.con.create_function("get_workspace_id_from_name", get_workspace_id_from_name, null_handling='SPECIAL')
|
|
586
|
+
self.con.create_function("get_lakehouse_id_from_name", get_lakehouse_id_from_name, null_handling='SPECIAL')
|
|
577
587
|
except Exception as e:
|
|
578
588
|
print(f"⚠️ Warning: Could not register lookup functions: {e}")
|
|
579
589
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|