duckrun 0.2.5.dev4__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.5.dev4
3
+ Version: 0.2.7
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -26,11 +26,10 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
26
26
 
27
27
  **Requirements:**
28
28
  - Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
29
- - Workspace and lakehouse names cannot contain spaces
29
+ - **Workspace names with spaces are fully supported!** ✅
30
30
 
31
- **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
32
31
 
33
- **Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
32
+ **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
34
33
 
35
34
  ## What It Does
36
35
 
@@ -49,23 +48,48 @@ pip install duckrun[local]
49
48
 
50
49
  ## Quick Start
51
50
 
51
+ ### Simple Example for New Users
52
+
53
+ ```python
54
+ import duckrun
55
+
56
+ # Connect to a workspace and manage lakehouses
57
+ con = duckrun.connect('My Workspace')
58
+ con.list_lakehouses() # See what lakehouses exist
59
+ con.create_lakehouse_if_not_exists('data') # Create if needed
60
+
61
+ # Connect to a specific lakehouse and query data
62
+ con = duckrun.connect("My Workspace/data.lakehouse/dbo")
63
+ con.sql("SELECT * FROM my_table LIMIT 10").show()
64
+ ```
65
+
66
+ ### Full Feature Overview
67
+
52
68
  ```python
53
69
  import duckrun
54
70
 
55
- # Connect to your Fabric lakehouse with a specific schema
56
- con = duckrun.connect("my_workspace/my_lakehouse.lakehouse/dbo")
71
+ # 1. Workspace Management (list and create lakehouses)
72
+ ws = duckrun.connect("My Workspace")
73
+ lakehouses = ws.list_lakehouses() # Returns list of lakehouse names
74
+ ws.create_lakehouse_if_not_exists("New_Lakehouse")
75
+
76
+ # 2. Connect to lakehouse with a specific schema
77
+ con = duckrun.connect("My Workspace/MyLakehouse.lakehouse/dbo")
78
+
79
+ # Workspace names with spaces are supported!
80
+ con = duckrun.connect("Data Analytics/SalesData.lakehouse/analytics")
57
81
 
58
82
  # Schema defaults to 'dbo' if not specified (scans all schemas)
59
83
  # ⚠️ WARNING: Scanning all schemas can be slow for large lakehouses!
60
- con = duckrun.connect("my_workspace/my_lakehouse.lakehouse")
84
+ con = duckrun.connect("My Workspace/My_Lakehouse.lakehouse")
61
85
 
62
- # Explore data
86
+ # 3. Explore data
63
87
  con.sql("SELECT * FROM my_table LIMIT 10").show()
64
88
 
65
- # Write to Delta tables (Spark-style API)
89
+ # 4. Write to Delta tables (Spark-style API)
66
90
  con.sql("SELECT * FROM source").write.mode("overwrite").saveAsTable("target")
67
91
 
68
- # Upload/download files to/from OneLake Files
92
+ # 5. Upload/download files to/from OneLake Files
69
93
  con.copy("./local_folder", "target_folder") # Upload files
70
94
  con.download("target_folder", "./downloaded") # Download files
71
95
  ```
@@ -75,15 +99,23 @@ That's it! No `sql_folder` needed for data exploration.
75
99
  ## Connection Format
76
100
 
77
101
  ```python
78
- # With schema (recommended for better performance)
79
- con = duckrun.connect("workspace/lakehouse.lakehouse/schema")
102
+ # Workspace management (list and create lakehouses)
103
+ ws = duckrun.connect("My Workspace")
104
+ ws.list_lakehouses() # Returns: ['lakehouse1', 'lakehouse2', ...]
105
+ ws.create_lakehouse_if_not_exists("New Lakehouse")
106
+
107
+ # Lakehouse connection with schema (recommended for best performance)
108
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
109
+
110
+ # Supports workspace names with spaces!
111
+ con = duckrun.connect("Data Analytics/Sales Data.lakehouse/analytics")
80
112
 
81
113
  # Without schema (defaults to 'dbo', scans all schemas)
82
114
  # ⚠️ This can be slow for large lakehouses!
83
- con = duckrun.connect("workspace/lakehouse.lakehouse")
115
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse")
84
116
 
85
- # With options
86
- con = duckrun.connect("workspace/lakehouse.lakehouse/dbo", sql_folder="./sql")
117
+ # With SQL folder for pipeline orchestration
118
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo", sql_folder="./sql")
87
119
  ```
88
120
 
89
121
  ### Multi-Schema Support
@@ -6,11 +6,10 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
6
6
 
7
7
  **Requirements:**
8
8
  - Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
9
- - Workspace and lakehouse names cannot contain spaces
9
+ - **Workspace names with spaces are fully supported!** ✅
10
10
 
11
- **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
12
11
 
13
- **Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
12
+ **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
14
13
 
15
14
  ## What It Does
16
15
 
@@ -29,23 +28,48 @@ pip install duckrun[local]
29
28
 
30
29
  ## Quick Start
31
30
 
31
+ ### Simple Example for New Users
32
+
33
+ ```python
34
+ import duckrun
35
+
36
+ # Connect to a workspace and manage lakehouses
37
+ con = duckrun.connect('My Workspace')
38
+ con.list_lakehouses() # See what lakehouses exist
39
+ con.create_lakehouse_if_not_exists('data') # Create if needed
40
+
41
+ # Connect to a specific lakehouse and query data
42
+ con = duckrun.connect("My Workspace/data.lakehouse/dbo")
43
+ con.sql("SELECT * FROM my_table LIMIT 10").show()
44
+ ```
45
+
46
+ ### Full Feature Overview
47
+
32
48
  ```python
33
49
  import duckrun
34
50
 
35
- # Connect to your Fabric lakehouse with a specific schema
36
- con = duckrun.connect("my_workspace/my_lakehouse.lakehouse/dbo")
51
+ # 1. Workspace Management (list and create lakehouses)
52
+ ws = duckrun.connect("My Workspace")
53
+ lakehouses = ws.list_lakehouses() # Returns list of lakehouse names
54
+ ws.create_lakehouse_if_not_exists("New_Lakehouse")
55
+
56
+ # 2. Connect to lakehouse with a specific schema
57
+ con = duckrun.connect("My Workspace/MyLakehouse.lakehouse/dbo")
58
+
59
+ # Workspace names with spaces are supported!
60
+ con = duckrun.connect("Data Analytics/SalesData.lakehouse/analytics")
37
61
 
38
62
  # Schema defaults to 'dbo' if not specified (scans all schemas)
39
63
  # ⚠️ WARNING: Scanning all schemas can be slow for large lakehouses!
40
- con = duckrun.connect("my_workspace/my_lakehouse.lakehouse")
64
+ con = duckrun.connect("My Workspace/My_Lakehouse.lakehouse")
41
65
 
42
- # Explore data
66
+ # 3. Explore data
43
67
  con.sql("SELECT * FROM my_table LIMIT 10").show()
44
68
 
45
- # Write to Delta tables (Spark-style API)
69
+ # 4. Write to Delta tables (Spark-style API)
46
70
  con.sql("SELECT * FROM source").write.mode("overwrite").saveAsTable("target")
47
71
 
48
- # Upload/download files to/from OneLake Files
72
+ # 5. Upload/download files to/from OneLake Files
49
73
  con.copy("./local_folder", "target_folder") # Upload files
50
74
  con.download("target_folder", "./downloaded") # Download files
51
75
  ```
@@ -55,15 +79,23 @@ That's it! No `sql_folder` needed for data exploration.
55
79
  ## Connection Format
56
80
 
57
81
  ```python
58
- # With schema (recommended for better performance)
59
- con = duckrun.connect("workspace/lakehouse.lakehouse/schema")
82
+ # Workspace management (list and create lakehouses)
83
+ ws = duckrun.connect("My Workspace")
84
+ ws.list_lakehouses() # Returns: ['lakehouse1', 'lakehouse2', ...]
85
+ ws.create_lakehouse_if_not_exists("New Lakehouse")
86
+
87
+ # Lakehouse connection with schema (recommended for best performance)
88
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
89
+
90
+ # Supports workspace names with spaces!
91
+ con = duckrun.connect("Data Analytics/Sales Data.lakehouse/analytics")
60
92
 
61
93
  # Without schema (defaults to 'dbo', scans all schemas)
62
94
  # ⚠️ This can be slow for large lakehouses!
63
- con = duckrun.connect("workspace/lakehouse.lakehouse")
95
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse")
64
96
 
65
- # With options
66
- con = duckrun.connect("workspace/lakehouse.lakehouse/dbo", sql_folder="./sql")
97
+ # With SQL folder for pipeline orchestration
98
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo", sql_folder="./sql")
67
99
  ```
68
100
 
69
101
  ### Multi-Schema Support
@@ -751,7 +751,6 @@ class WorkspaceConnection:
751
751
  lakehouses = response.json().get("value", [])
752
752
  lakehouse_names = [lh.get("displayName", "") for lh in lakehouses]
753
753
 
754
- print(f"Found {len(lakehouse_names)} lakehouses: {lakehouse_names}")
755
754
  return lakehouse_names
756
755
 
757
756
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.5.dev4
3
+ Version: 0.2.7
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -26,11 +26,10 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
26
26
 
27
27
  **Requirements:**
28
28
  - Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
29
- - Workspace and lakehouse names cannot contain spaces
29
+ - **Workspace names with spaces are fully supported!** ✅
30
30
 
31
- **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
32
31
 
33
- **Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
32
+ **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
34
33
 
35
34
  ## What It Does
36
35
 
@@ -49,23 +48,48 @@ pip install duckrun[local]
49
48
 
50
49
  ## Quick Start
51
50
 
51
+ ### Simple Example for New Users
52
+
53
+ ```python
54
+ import duckrun
55
+
56
+ # Connect to a workspace and manage lakehouses
57
+ con = duckrun.connect('My Workspace')
58
+ con.list_lakehouses() # See what lakehouses exist
59
+ con.create_lakehouse_if_not_exists('data') # Create if needed
60
+
61
+ # Connect to a specific lakehouse and query data
62
+ con = duckrun.connect("My Workspace/data.lakehouse/dbo")
63
+ con.sql("SELECT * FROM my_table LIMIT 10").show()
64
+ ```
65
+
66
+ ### Full Feature Overview
67
+
52
68
  ```python
53
69
  import duckrun
54
70
 
55
- # Connect to your Fabric lakehouse with a specific schema
56
- con = duckrun.connect("my_workspace/my_lakehouse.lakehouse/dbo")
71
+ # 1. Workspace Management (list and create lakehouses)
72
+ ws = duckrun.connect("My Workspace")
73
+ lakehouses = ws.list_lakehouses() # Returns list of lakehouse names
74
+ ws.create_lakehouse_if_not_exists("New_Lakehouse")
75
+
76
+ # 2. Connect to lakehouse with a specific schema
77
+ con = duckrun.connect("My Workspace/MyLakehouse.lakehouse/dbo")
78
+
79
+ # Workspace names with spaces are supported!
80
+ con = duckrun.connect("Data Analytics/SalesData.lakehouse/analytics")
57
81
 
58
82
  # Schema defaults to 'dbo' if not specified (scans all schemas)
59
83
  # ⚠️ WARNING: Scanning all schemas can be slow for large lakehouses!
60
- con = duckrun.connect("my_workspace/my_lakehouse.lakehouse")
84
+ con = duckrun.connect("My Workspace/My_Lakehouse.lakehouse")
61
85
 
62
- # Explore data
86
+ # 3. Explore data
63
87
  con.sql("SELECT * FROM my_table LIMIT 10").show()
64
88
 
65
- # Write to Delta tables (Spark-style API)
89
+ # 4. Write to Delta tables (Spark-style API)
66
90
  con.sql("SELECT * FROM source").write.mode("overwrite").saveAsTable("target")
67
91
 
68
- # Upload/download files to/from OneLake Files
92
+ # 5. Upload/download files to/from OneLake Files
69
93
  con.copy("./local_folder", "target_folder") # Upload files
70
94
  con.download("target_folder", "./downloaded") # Download files
71
95
  ```
@@ -75,15 +99,23 @@ That's it! No `sql_folder` needed for data exploration.
75
99
  ## Connection Format
76
100
 
77
101
  ```python
78
- # With schema (recommended for better performance)
79
- con = duckrun.connect("workspace/lakehouse.lakehouse/schema")
102
+ # Workspace management (list and create lakehouses)
103
+ ws = duckrun.connect("My Workspace")
104
+ ws.list_lakehouses() # Returns: ['lakehouse1', 'lakehouse2', ...]
105
+ ws.create_lakehouse_if_not_exists("New Lakehouse")
106
+
107
+ # Lakehouse connection with schema (recommended for best performance)
108
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
109
+
110
+ # Supports workspace names with spaces!
111
+ con = duckrun.connect("Data Analytics/Sales Data.lakehouse/analytics")
80
112
 
81
113
  # Without schema (defaults to 'dbo', scans all schemas)
82
114
  # ⚠️ This can be slow for large lakehouses!
83
- con = duckrun.connect("workspace/lakehouse.lakehouse")
115
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse")
84
116
 
85
- # With options
86
- con = duckrun.connect("workspace/lakehouse.lakehouse/dbo", sql_folder="./sql")
117
+ # With SQL folder for pipeline orchestration
118
+ con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo", sql_folder="./sql")
87
119
  ```
88
120
 
89
121
  ### Multi-Schema Support
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "duckrun"
7
- version = "0.2.5.dev4"
7
+ version = "0.2.7"
8
8
  description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes