duckrun 0.2.5.dev1__tar.gz → 0.2.5.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/PKG-INFO +1 -1
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/duckrun/core.py +57 -7
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/duckrun/runner.py +29 -1
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/duckrun.egg-info/PKG-INFO +1 -1
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/pyproject.toml +1 -1
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/LICENSE +0 -0
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/README.md +0 -0
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/duckrun/__init__.py +0 -0
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/duckrun/files.py +0 -0
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/duckrun/lakehouse.py +0 -0
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/duckrun/stats.py +0 -0
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/duckrun/writer.py +0 -0
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/duckrun.egg-info/SOURCES.txt +0 -0
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/duckrun.egg-info/requires.txt +0 -0
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.2.5.dev1 → duckrun-0.2.5.dev4}/setup.cfg +0 -0
@@ -61,10 +61,20 @@ class Duckrun:
|
|
61
61
|
self.scan_all_schemas = scan_all_schemas
|
62
62
|
self.storage_account = storage_account
|
63
63
|
|
64
|
-
# Construct proper ABFSS URLs
|
65
|
-
|
66
|
-
|
67
|
-
|
64
|
+
# Construct proper ABFSS URLs
|
65
|
+
import re
|
66
|
+
guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
|
67
|
+
# If lakehouse_id is a GUID, use as-is
|
68
|
+
if guid_pattern.match(lakehouse_id):
|
69
|
+
lakehouse_url_part = lakehouse_id
|
70
|
+
else:
|
71
|
+
# If workspace name has no spaces, always append .lakehouse unless already present
|
72
|
+
if " " not in workspace_id and not lakehouse_id.endswith('.lakehouse'):
|
73
|
+
lakehouse_url_part = f'{lakehouse_id}.lakehouse'
|
74
|
+
else:
|
75
|
+
lakehouse_url_part = lakehouse_id
|
76
|
+
self.table_base_url = f'abfss://{workspace_id}@{storage_account}.dfs.fabric.microsoft.com/{lakehouse_url_part}/Tables/'
|
77
|
+
self.files_base_url = f'abfss://{workspace_id}@{storage_account}.dfs.fabric.microsoft.com/{lakehouse_url_part}/Files/'
|
68
78
|
|
69
79
|
# Keep legacy properties for backward compatibility
|
70
80
|
self.workspace = workspace_id
|
@@ -326,7 +336,19 @@ class Duckrun:
|
|
326
336
|
url = f"abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/"
|
327
337
|
store = AzureStore.from_url(url, bearer_token=token)
|
328
338
|
|
329
|
-
|
339
|
+
# Use the same lakehouse URL part logic as in __init__ to ensure .lakehouse suffix is added when needed
|
340
|
+
import re
|
341
|
+
guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
|
342
|
+
if guid_pattern.match(self.lakehouse_id):
|
343
|
+
lakehouse_url_part = self.lakehouse_id
|
344
|
+
else:
|
345
|
+
# If workspace name has no spaces, always append .lakehouse unless already present
|
346
|
+
if " " not in self.workspace_id and not self.lakehouse_id.endswith('.lakehouse'):
|
347
|
+
lakehouse_url_part = f'{self.lakehouse_id}.lakehouse'
|
348
|
+
else:
|
349
|
+
lakehouse_url_part = self.lakehouse_id
|
350
|
+
|
351
|
+
base_path = f"{lakehouse_url_part}/Tables/"
|
330
352
|
tables_found = []
|
331
353
|
|
332
354
|
if self.scan_all_schemas:
|
@@ -419,6 +441,26 @@ class Duckrun:
|
|
419
441
|
print(f"❌ Error attaching lakehouse: {e}")
|
420
442
|
print("Continuing without pre-attached tables.")
|
421
443
|
|
444
|
+
def get_workspace_id(self) -> str:
|
445
|
+
"""
|
446
|
+
Get the workspace ID (GUID or name without spaces).
|
447
|
+
Use this when passing workspace parameter to Python functions.
|
448
|
+
|
449
|
+
Returns:
|
450
|
+
Workspace ID - either a GUID or workspace name without spaces
|
451
|
+
"""
|
452
|
+
return self.workspace_id
|
453
|
+
|
454
|
+
def get_lakehouse_id(self) -> str:
|
455
|
+
"""
|
456
|
+
Get the lakehouse ID (GUID or name).
|
457
|
+
Use this when passing lakehouse parameter to Python functions.
|
458
|
+
|
459
|
+
Returns:
|
460
|
+
Lakehouse ID - either a GUID or lakehouse name
|
461
|
+
"""
|
462
|
+
return self.lakehouse_id
|
463
|
+
|
422
464
|
def run(self, pipeline: List[Tuple]) -> bool:
|
423
465
|
"""
|
424
466
|
Execute pipeline of tasks.
|
@@ -680,7 +722,11 @@ class WorkspaceConnection:
|
|
680
722
|
try:
|
681
723
|
import notebookutils # type: ignore
|
682
724
|
token = notebookutils.credentials.getToken("pbi")
|
683
|
-
|
725
|
+
# Always resolve workspace name to ID, even in notebook environment
|
726
|
+
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
727
|
+
if not workspace_id:
|
728
|
+
print(f"Workspace '{self.workspace_name}' not found")
|
729
|
+
return []
|
684
730
|
except ImportError:
|
685
731
|
# Fallback to azure-identity
|
686
732
|
print("Getting authentication token...")
|
@@ -727,7 +773,11 @@ class WorkspaceConnection:
|
|
727
773
|
try:
|
728
774
|
import notebookutils # type: ignore
|
729
775
|
token = notebookutils.credentials.getToken("pbi")
|
730
|
-
|
776
|
+
# Always resolve workspace name to ID, even in notebook environment
|
777
|
+
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
778
|
+
if not workspace_id:
|
779
|
+
print(f"Workspace '{self.workspace_name}' not found")
|
780
|
+
return False
|
731
781
|
except ImportError:
|
732
782
|
# Fallback to azure-identity
|
733
783
|
print("Getting authentication token...")
|
@@ -110,12 +110,40 @@ def run(duckrun_instance, pipeline: List[Tuple]) -> bool:
|
|
110
110
|
|
111
111
|
|
112
112
|
def _run_python(duckrun_instance, name: str, args: tuple) -> Any:
|
113
|
-
"""
|
113
|
+
"""
|
114
|
+
Execute Python task, return result.
|
115
|
+
|
116
|
+
Automatically substitutes workspace/lakehouse names in args with their resolved IDs
|
117
|
+
to prevent URL encoding issues with names containing spaces.
|
118
|
+
"""
|
114
119
|
duckrun_instance._create_onelake_secret()
|
115
120
|
func = _load_py_function(duckrun_instance, name)
|
116
121
|
if not func:
|
117
122
|
raise RuntimeError(f"Python function '{name}' not found")
|
118
123
|
|
124
|
+
# Get original and resolved names
|
125
|
+
original_workspace = duckrun_instance.workspace
|
126
|
+
original_lakehouse = duckrun_instance.lakehouse_name
|
127
|
+
resolved_workspace = duckrun_instance.workspace_id
|
128
|
+
resolved_lakehouse = duckrun_instance.lakehouse_id
|
129
|
+
|
130
|
+
# Substitute workspace/lakehouse names in args if they differ
|
131
|
+
# This prevents URL encoding issues when names contain spaces
|
132
|
+
substituted_args = []
|
133
|
+
needs_substitution = (original_workspace != resolved_workspace or
|
134
|
+
original_lakehouse != resolved_lakehouse)
|
135
|
+
|
136
|
+
if needs_substitution:
|
137
|
+
for arg in args:
|
138
|
+
if arg == original_workspace:
|
139
|
+
substituted_args.append(resolved_workspace)
|
140
|
+
elif arg == original_lakehouse:
|
141
|
+
substituted_args.append(resolved_lakehouse)
|
142
|
+
else:
|
143
|
+
substituted_args.append(arg)
|
144
|
+
args = tuple(substituted_args)
|
145
|
+
print(f"📝 Auto-substituted workspace/lakehouse names in args for URL compatibility")
|
146
|
+
|
119
147
|
print(f"Running Python: {name}{args}")
|
120
148
|
result = func(*args)
|
121
149
|
print(f"✅ Python '{name}' completed")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|