duckrun 0.2.5.dev2__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/core.py CHANGED
@@ -441,6 +441,26 @@ class Duckrun:
441
441
  print(f"❌ Error attaching lakehouse: {e}")
442
442
  print("Continuing without pre-attached tables.")
443
443
 
444
+ def get_workspace_id(self) -> str:
445
+ """
446
+ Get the workspace ID (GUID or name without spaces).
447
+ Use this when passing workspace parameter to Python functions.
448
+
449
+ Returns:
450
+ Workspace ID - either a GUID or workspace name without spaces
451
+ """
452
+ return self.workspace_id
453
+
454
+ def get_lakehouse_id(self) -> str:
455
+ """
456
+ Get the lakehouse ID (GUID or name).
457
+ Use this when passing lakehouse parameter to Python functions.
458
+
459
+ Returns:
460
+ Lakehouse ID - either a GUID or lakehouse name
461
+ """
462
+ return self.lakehouse_id
463
+
444
464
  def run(self, pipeline: List[Tuple]) -> bool:
445
465
  """
446
466
  Execute pipeline of tasks.
@@ -702,7 +722,11 @@ class WorkspaceConnection:
702
722
  try:
703
723
  import notebookutils # type: ignore
704
724
  token = notebookutils.credentials.getToken("pbi")
705
- workspace_id = notebookutils.runtime.context.get("workspaceId")
725
+ # Always resolve workspace name to ID, even in notebook environment
726
+ workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
727
+ if not workspace_id:
728
+ print(f"Workspace '{self.workspace_name}' not found")
729
+ return []
706
730
  except ImportError:
707
731
  # Fallback to azure-identity
708
732
  print("Getting authentication token...")
@@ -727,7 +751,6 @@ class WorkspaceConnection:
727
751
  lakehouses = response.json().get("value", [])
728
752
  lakehouse_names = [lh.get("displayName", "") for lh in lakehouses]
729
753
 
730
- print(f"Found {len(lakehouse_names)} lakehouses: {lakehouse_names}")
731
754
  return lakehouse_names
732
755
 
733
756
  except Exception as e:
@@ -749,7 +772,11 @@ class WorkspaceConnection:
749
772
  try:
750
773
  import notebookutils # type: ignore
751
774
  token = notebookutils.credentials.getToken("pbi")
752
- workspace_id = notebookutils.runtime.context.get("workspaceId")
775
+ # Always resolve workspace name to ID, even in notebook environment
776
+ workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
777
+ if not workspace_id:
778
+ print(f"Workspace '{self.workspace_name}' not found")
779
+ return False
753
780
  except ImportError:
754
781
  # Fallback to azure-identity
755
782
  print("Getting authentication token...")
duckrun/runner.py CHANGED
@@ -110,12 +110,40 @@ def run(duckrun_instance, pipeline: List[Tuple]) -> bool:
110
110
 
111
111
 
112
112
  def _run_python(duckrun_instance, name: str, args: tuple) -> Any:
113
- """Execute Python task, return result"""
113
+ """
114
+ Execute Python task, return result.
115
+
116
+ Automatically substitutes workspace/lakehouse names in args with their resolved IDs
117
+ to prevent URL encoding issues with names containing spaces.
118
+ """
114
119
  duckrun_instance._create_onelake_secret()
115
120
  func = _load_py_function(duckrun_instance, name)
116
121
  if not func:
117
122
  raise RuntimeError(f"Python function '{name}' not found")
118
123
 
124
+ # Get original and resolved names
125
+ original_workspace = duckrun_instance.workspace
126
+ original_lakehouse = duckrun_instance.lakehouse_name
127
+ resolved_workspace = duckrun_instance.workspace_id
128
+ resolved_lakehouse = duckrun_instance.lakehouse_id
129
+
130
+ # Substitute workspace/lakehouse names in args if they differ
131
+ # This prevents URL encoding issues when names contain spaces
132
+ substituted_args = []
133
+ needs_substitution = (original_workspace != resolved_workspace or
134
+ original_lakehouse != resolved_lakehouse)
135
+
136
+ if needs_substitution:
137
+ for arg in args:
138
+ if arg == original_workspace:
139
+ substituted_args.append(resolved_workspace)
140
+ elif arg == original_lakehouse:
141
+ substituted_args.append(resolved_lakehouse)
142
+ else:
143
+ substituted_args.append(arg)
144
+ args = tuple(substituted_args)
145
+ print(f"📝 Auto-substituted workspace/lakehouse names in args for URL compatibility")
146
+
119
147
  print(f"Running Python: {name}{args}")
120
148
  result = func(*args)
121
149
  print(f"✅ Python '{name}' completed")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.5.dev2
3
+ Version: 0.2.6
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -26,12 +26,10 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
26
26
 
27
27
  **Requirements:**
28
28
  - Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
29
- - Workspace and lakehouse names cannot contain spaces
29
+ - **Workspace and lakehouse names with spaces are now fully supported!** ✅
30
30
 
31
31
  **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
32
32
 
33
- **Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
34
-
35
33
  ## What It Does
36
34
 
37
35
  It does orchestration, arbitrary SQL statements, and file manipulation. That's it - just stuff I encounter in my daily workflow when working with Fabric notebooks.
@@ -52,20 +50,28 @@ pip install duckrun[local]
52
50
  ```python
53
51
  import duckrun
54
52
 
55
- # Connect to your Fabric lakehouse with a specific schema
56
- con = duckrun.connect("my_workspace/my_lakehouse.lakehouse/dbo")
53
+ # 1. Workspace Management (list and create lakehouses)
54
+ ws = duckrun.connect("My Workspace")
55
+ lakehouses = ws.list_lakehouses() # Returns list of lakehouse names
56
+ ws.create_lakehouse_if_not_exists("New Lakehouse")
57
+
58
+ # 2. Connect to lakehouse with a specific schema
59
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
60
+
61
+ # Works with workspace names containing spaces!
62
+ con = duckrun.connect("Data Analytics/Sales Data.lakehouse/analytics")
57
63
 
58
64
  # Schema defaults to 'dbo' if not specified (scans all schemas)
59
65
  # ⚠️ WARNING: Scanning all schemas can be slow for large lakehouses!
60
- con = duckrun.connect("my_workspace/my_lakehouse.lakehouse")
66
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse")
61
67
 
62
- # Explore data
68
+ # 3. Explore data
63
69
  con.sql("SELECT * FROM my_table LIMIT 10").show()
64
70
 
65
- # Write to Delta tables (Spark-style API)
71
+ # 4. Write to Delta tables (Spark-style API)
66
72
  con.sql("SELECT * FROM source").write.mode("overwrite").saveAsTable("target")
67
73
 
68
- # Upload/download files to/from OneLake Files
74
+ # 5. Upload/download files to/from OneLake Files
69
75
  con.copy("./local_folder", "target_folder") # Upload files
70
76
  con.download("target_folder", "./downloaded") # Download files
71
77
  ```
@@ -75,15 +81,23 @@ That's it! No `sql_folder` needed for data exploration.
75
81
  ## Connection Format
76
82
 
77
83
  ```python
78
- # With schema (recommended for better performance)
79
- con = duckrun.connect("workspace/lakehouse.lakehouse/schema")
84
+ # Workspace management (list and create lakehouses)
85
+ ws = duckrun.connect("My Workspace")
86
+ ws.list_lakehouses() # Returns: ['lakehouse1', 'lakehouse2', ...]
87
+ ws.create_lakehouse_if_not_exists("New Lakehouse")
88
+
89
+ # Lakehouse connection with schema (recommended for best performance)
90
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
91
+
92
+ # Supports workspace names with spaces!
93
+ con = duckrun.connect("Data Analytics/Sales Data.lakehouse/analytics")
80
94
 
81
95
  # Without schema (defaults to 'dbo', scans all schemas)
82
96
  # ⚠️ This can be slow for large lakehouses!
83
- con = duckrun.connect("workspace/lakehouse.lakehouse")
97
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse")
84
98
 
85
- # With options
86
- con = duckrun.connect("workspace/lakehouse.lakehouse/dbo", sql_folder="./sql")
99
+ # With SQL folder for pipeline orchestration
100
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo", sql_folder="./sql")
87
101
  ```
88
102
 
89
103
  ### Multi-Schema Support
@@ -0,0 +1,12 @@
1
+ duckrun/__init__.py,sha256=XA85pL2vK1AkmBic8e7WxeqNvcd6SjFX4zsQpImDO6E,230
2
+ duckrun/core.py,sha256=Y4-5H83Xw0mZa12QM5pcC7qOPidrDFASLcGIoUW3zwY,39394
3
+ duckrun/files.py,sha256=piWRU5w9jHrW-wuV4Gf-SKY_jhFv9eflxgWO8AZCQTI,10495
4
+ duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
5
+ duckrun/runner.py,sha256=XsQqWlesFD2cuhH2gsQj3Astg0XN7xhW15WPmr8D65I,13797
6
+ duckrun/stats.py,sha256=2FTqoQNVjD84-H1HjStHxZkOpAGKXS79M55B00pOlok,9804
7
+ duckrun/writer.py,sha256=eWrGtDQTbXi8H3sSt2WucYTdEQUjK97KmQxzCbqAuMs,6221
8
+ duckrun-0.2.6.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
9
+ duckrun-0.2.6.dist-info/METADATA,sha256=i8kvmnqpsddtJGq5GD44SFif0YTN-UFFviPyQZMPHn0,18799
10
+ duckrun-0.2.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ duckrun-0.2.6.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
12
+ duckrun-0.2.6.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- duckrun/__init__.py,sha256=XA85pL2vK1AkmBic8e7WxeqNvcd6SjFX4zsQpImDO6E,230
2
- duckrun/core.py,sha256=UgBE90zTFvnieTrUEb4tDA2cWSwFfh24M_e46FTmFvg,38345
3
- duckrun/files.py,sha256=piWRU5w9jHrW-wuV4Gf-SKY_jhFv9eflxgWO8AZCQTI,10495
4
- duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
5
- duckrun/runner.py,sha256=lfwNoU1CZXh6bPTHvGWVaUWjzG5crvT7Pzq4onMEVjw,12576
6
- duckrun/stats.py,sha256=2FTqoQNVjD84-H1HjStHxZkOpAGKXS79M55B00pOlok,9804
7
- duckrun/writer.py,sha256=eWrGtDQTbXi8H3sSt2WucYTdEQUjK97KmQxzCbqAuMs,6221
8
- duckrun-0.2.5.dev2.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
9
- duckrun-0.2.5.dev2.dist-info/METADATA,sha256=wiB12-pG_jlyUbghVNMyE3KHcwt8GFJd04VNZ2C4VwE,18344
10
- duckrun-0.2.5.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- duckrun-0.2.5.dev2.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
12
- duckrun-0.2.5.dev2.dist-info/RECORD,,