duckrun 0.1.5.2__tar.gz → 0.1.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.5.2
3
+ Version: 0.1.5.4
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License-Expression: MIT
@@ -111,16 +111,17 @@ class Duckrun:
111
111
 
112
112
  Usage:
113
113
  # For pipelines:
114
- dr = Duckrun.connect(workspace, lakehouse, schema, sql_folder)
114
+ dr = Duckrun.connect("workspace/lakehouse.lakehouse/schema", sql_folder="./sql")
115
+ dr = Duckrun.connect("workspace/lakehouse.lakehouse") # defaults to dbo schema
115
116
  dr.run(pipeline)
116
117
 
117
118
  # For data exploration with Spark-style API:
118
- dr = Duckrun.connect(workspace, lakehouse, schema)
119
+ dr = Duckrun.connect("workspace/lakehouse.lakehouse")
119
120
  dr.sql("SELECT * FROM table").show()
120
121
  dr.sql("SELECT 43").write.mode("append").saveAsTable("test")
121
122
  """
122
123
 
123
- def __init__(self, workspace: str, lakehouse_name: str, schema: str,
124
+ def __init__(self, workspace: str, lakehouse_name: str, schema: str = "dbo",
124
125
  sql_folder: Optional[str] = None, compaction_threshold: int = 10):
125
126
  self.workspace = workspace
126
127
  self.lakehouse_name = lakehouse_name
@@ -133,10 +134,60 @@ class Duckrun:
133
134
  self._attach_lakehouse()
134
135
 
135
136
  @classmethod
136
- def connect(cls, workspace: str, lakehouse_name: str, schema: str,
137
- sql_folder: Optional[str] = None, compaction_threshold: int = 100):
138
- """Create and connect to lakehouse"""
137
+ def connect(cls, workspace: Union[str, None] = None, lakehouse_name: Optional[str] = None,
138
+ schema: str = "dbo", sql_folder: Optional[str] = None,
139
+ compaction_threshold: int = 100):
140
+ """
141
+ Create and connect to lakehouse.
142
+
143
+ Supports two formats:
144
+ 1. Compact: connect("ws/lh.lakehouse/schema") or connect("ws/lh.lakehouse")
145
+ 2. Traditional: connect("ws", "lh", "schema") or connect("ws", "lh")
146
+
147
+ Schema defaults to "dbo" if not specified.
148
+
149
+ Examples:
150
+ dr = Duckrun.connect("myworkspace/mylakehouse.lakehouse/bronze")
151
+ dr = Duckrun.connect("myworkspace/mylakehouse.lakehouse") # uses dbo
152
+ dr = Duckrun.connect("myworkspace", "mylakehouse", "bronze")
153
+ dr = Duckrun.connect("myworkspace", "mylakehouse") # uses dbo
154
+ dr = Duckrun.connect("ws/lh.lakehouse", sql_folder="./sql")
155
+ """
139
156
  print("Connecting to Lakehouse...")
157
+
158
+ # Check if using compact format: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
159
+ if workspace and "/" in workspace and lakehouse_name is None:
160
+ parts = workspace.split("/")
161
+ if len(parts) == 2:
162
+ # Format: "ws/lh.lakehouse" (schema will use default)
163
+ workspace, lakehouse_name = parts
164
+ # schema already has default value "dbo"
165
+ print(f"ℹ️ No schema specified. Using default schema 'dbo'.")
166
+ print(f" To specify a schema, use: {workspace}/{lakehouse_name}.lakehouse/schema")
167
+ print(f" Note: Scanning all schemas will be added in a future update.\n")
168
+ elif len(parts) == 3:
169
+ # Format: "ws/lh.lakehouse/schema"
170
+ workspace, lakehouse_name, schema = parts
171
+ else:
172
+ raise ValueError(
173
+ f"Invalid connection string format: '{workspace}'. "
174
+ "Expected format: 'workspace/lakehouse.lakehouse' or 'workspace/lakehouse.lakehouse/schema'"
175
+ )
176
+
177
+ # Remove .lakehouse suffix if present
178
+ if lakehouse_name.endswith(".lakehouse"):
179
+ lakehouse_name = lakehouse_name[:-10]
180
+
181
+ # Validate all required parameters are present
182
+ if not workspace or not lakehouse_name:
183
+ raise ValueError(
184
+ "Missing required parameters. Use either:\n"
185
+ " connect('workspace/lakehouse.lakehouse/schema')\n"
186
+ " connect('workspace/lakehouse.lakehouse') # defaults to dbo\n"
187
+ " connect('workspace', 'lakehouse', 'schema')\n"
188
+ " connect('workspace', 'lakehouse') # defaults to dbo"
189
+ )
190
+
140
191
  return cls(workspace, lakehouse_name, schema, sql_folder, compaction_threshold)
141
192
 
142
193
  def _get_storage_token(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.5.2
3
+ Version: 0.1.5.4
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "duckrun"
7
- version = "0.1.5.2"
7
+ version = "0.1.5.4"
8
8
  description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
9
9
  readme = "README.md"
10
10
  license = "MIT"
File without changes
File without changes
File without changes
File without changes