duckrun 0.2.5.dev1__tar.gz → 0.2.5.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.5.dev1
3
+ Version: 0.2.5.dev4
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -61,10 +61,20 @@ class Duckrun:
61
61
  self.scan_all_schemas = scan_all_schemas
62
62
  self.storage_account = storage_account
63
63
 
64
- # Construct proper ABFSS URLs using GUIDs
65
- # Both Tables and Files use lakehouse GUID directly (no .Lakehouse suffix)
66
- self.table_base_url = f'abfss://{workspace_id}@{storage_account}.dfs.fabric.microsoft.com/{lakehouse_id}/Tables/'
67
- self.files_base_url = f'abfss://{workspace_id}@{storage_account}.dfs.fabric.microsoft.com/{lakehouse_id}/Files/'
64
+ # Construct proper ABFSS URLs
65
+ import re
66
+ guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
67
+ # If lakehouse_id is a GUID, use as-is
68
+ if guid_pattern.match(lakehouse_id):
69
+ lakehouse_url_part = lakehouse_id
70
+ else:
71
+ # If workspace name has no spaces, always append .lakehouse unless already present
72
+ if " " not in workspace_id and not lakehouse_id.endswith('.lakehouse'):
73
+ lakehouse_url_part = f'{lakehouse_id}.lakehouse'
74
+ else:
75
+ lakehouse_url_part = lakehouse_id
76
+ self.table_base_url = f'abfss://{workspace_id}@{storage_account}.dfs.fabric.microsoft.com/{lakehouse_url_part}/Tables/'
77
+ self.files_base_url = f'abfss://{workspace_id}@{storage_account}.dfs.fabric.microsoft.com/{lakehouse_url_part}/Files/'
68
78
 
69
79
  # Keep legacy properties for backward compatibility
70
80
  self.workspace = workspace_id
@@ -326,7 +336,19 @@ class Duckrun:
326
336
  url = f"abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/"
327
337
  store = AzureStore.from_url(url, bearer_token=token)
328
338
 
329
- base_path = f"{self.lakehouse_name}/Tables/"
339
+ # Use the same lakehouse URL part logic as in __init__ to ensure .lakehouse suffix is added when needed
340
+ import re
341
+ guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
342
+ if guid_pattern.match(self.lakehouse_id):
343
+ lakehouse_url_part = self.lakehouse_id
344
+ else:
345
+ # If workspace name has no spaces, always append .lakehouse unless already present
346
+ if " " not in self.workspace_id and not self.lakehouse_id.endswith('.lakehouse'):
347
+ lakehouse_url_part = f'{self.lakehouse_id}.lakehouse'
348
+ else:
349
+ lakehouse_url_part = self.lakehouse_id
350
+
351
+ base_path = f"{lakehouse_url_part}/Tables/"
330
352
  tables_found = []
331
353
 
332
354
  if self.scan_all_schemas:
@@ -419,6 +441,26 @@ class Duckrun:
419
441
  print(f"❌ Error attaching lakehouse: {e}")
420
442
  print("Continuing without pre-attached tables.")
421
443
 
444
+ def get_workspace_id(self) -> str:
445
+ """
446
+ Get the workspace ID (GUID or name without spaces).
447
+ Use this when passing workspace parameter to Python functions.
448
+
449
+ Returns:
450
+ Workspace ID - either a GUID or workspace name without spaces
451
+ """
452
+ return self.workspace_id
453
+
454
+ def get_lakehouse_id(self) -> str:
455
+ """
456
+ Get the lakehouse ID (GUID or name).
457
+ Use this when passing lakehouse parameter to Python functions.
458
+
459
+ Returns:
460
+ Lakehouse ID - either a GUID or lakehouse name
461
+ """
462
+ return self.lakehouse_id
463
+
422
464
  def run(self, pipeline: List[Tuple]) -> bool:
423
465
  """
424
466
  Execute pipeline of tasks.
@@ -680,7 +722,11 @@ class WorkspaceConnection:
680
722
  try:
681
723
  import notebookutils # type: ignore
682
724
  token = notebookutils.credentials.getToken("pbi")
683
- workspace_id = notebookutils.runtime.context.get("workspaceId")
725
+ # Always resolve workspace name to ID, even in notebook environment
726
+ workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
727
+ if not workspace_id:
728
+ print(f"Workspace '{self.workspace_name}' not found")
729
+ return []
684
730
  except ImportError:
685
731
  # Fallback to azure-identity
686
732
  print("Getting authentication token...")
@@ -727,7 +773,11 @@ class WorkspaceConnection:
727
773
  try:
728
774
  import notebookutils # type: ignore
729
775
  token = notebookutils.credentials.getToken("pbi")
730
- workspace_id = notebookutils.runtime.context.get("workspaceId")
776
+ # Always resolve workspace name to ID, even in notebook environment
777
+ workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
778
+ if not workspace_id:
779
+ print(f"Workspace '{self.workspace_name}' not found")
780
+ return False
731
781
  except ImportError:
732
782
  # Fallback to azure-identity
733
783
  print("Getting authentication token...")
@@ -110,12 +110,40 @@ def run(duckrun_instance, pipeline: List[Tuple]) -> bool:
110
110
 
111
111
 
112
112
  def _run_python(duckrun_instance, name: str, args: tuple) -> Any:
113
- """Execute Python task, return result"""
113
+ """
114
+ Execute Python task, return result.
115
+
116
+ Automatically substitutes workspace/lakehouse names in args with their resolved IDs
117
+ to prevent URL encoding issues with names containing spaces.
118
+ """
114
119
  duckrun_instance._create_onelake_secret()
115
120
  func = _load_py_function(duckrun_instance, name)
116
121
  if not func:
117
122
  raise RuntimeError(f"Python function '{name}' not found")
118
123
 
124
+ # Get original and resolved names
125
+ original_workspace = duckrun_instance.workspace
126
+ original_lakehouse = duckrun_instance.lakehouse_name
127
+ resolved_workspace = duckrun_instance.workspace_id
128
+ resolved_lakehouse = duckrun_instance.lakehouse_id
129
+
130
+ # Substitute workspace/lakehouse names in args if they differ
131
+ # This prevents URL encoding issues when names contain spaces
132
+ substituted_args = []
133
+ needs_substitution = (original_workspace != resolved_workspace or
134
+ original_lakehouse != resolved_lakehouse)
135
+
136
+ if needs_substitution:
137
+ for arg in args:
138
+ if arg == original_workspace:
139
+ substituted_args.append(resolved_workspace)
140
+ elif arg == original_lakehouse:
141
+ substituted_args.append(resolved_lakehouse)
142
+ else:
143
+ substituted_args.append(arg)
144
+ args = tuple(substituted_args)
145
+ print(f"📝 Auto-substituted workspace/lakehouse names in args for URL compatibility")
146
+
119
147
  print(f"Running Python: {name}{args}")
120
148
  result = func(*args)
121
149
  print(f"✅ Python '{name}' completed")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.5.dev1
3
+ Version: 0.2.5.dev4
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "duckrun"
7
- version = "0.2.5.dev1"
7
+ version = "0.2.5.dev4"
8
8
  description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
File without changes
File without changes
File without changes