duckrun 0.1.5.3__tar.gz → 0.1.5.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {duckrun-0.1.5.3 → duckrun-0.1.5.5}/PKG-INFO +25 -17
- {duckrun-0.1.5.3 → duckrun-0.1.5.5}/README.md +24 -16
- {duckrun-0.1.5.3 → duckrun-0.1.5.5}/duckrun/core.py +3 -0
- {duckrun-0.1.5.3 → duckrun-0.1.5.5}/duckrun.egg-info/PKG-INFO +25 -17
- {duckrun-0.1.5.3 → duckrun-0.1.5.5}/pyproject.toml +1 -1
- {duckrun-0.1.5.3 → duckrun-0.1.5.5}/LICENSE +0 -0
- {duckrun-0.1.5.3 → duckrun-0.1.5.5}/duckrun/__init__.py +0 -0
- {duckrun-0.1.5.3 → duckrun-0.1.5.5}/duckrun.egg-info/SOURCES.txt +0 -0
- {duckrun-0.1.5.3 → duckrun-0.1.5.5}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.1.5.3 → duckrun-0.1.5.5}/duckrun.egg-info/requires.txt +0 -0
- {duckrun-0.1.5.3 → duckrun-0.1.5.5}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.1.5.3 → duckrun-0.1.5.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.1.5.
|
3
|
+
Version: 0.1.5.5
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
5
|
Author: mim
|
6
6
|
License-Expression: MIT
|
@@ -39,11 +39,10 @@ pip install duckrun
|
|
39
39
|
import duckrun
|
40
40
|
|
41
41
|
# Connect to your Fabric lakehouse
|
42
|
-
con = duckrun.connect(
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
)
|
42
|
+
con = duckrun.connect("my_workspace/my_lakehouse.lakehouse/dbo")
|
43
|
+
|
44
|
+
# Schema defaults to 'dbo' if not specified
|
45
|
+
con = duckrun.connect("my_workspace/my_lakehouse.lakehouse")
|
47
46
|
|
48
47
|
# Explore data
|
49
48
|
con.sql("SELECT * FROM my_table LIMIT 10").show()
|
@@ -54,6 +53,21 @@ con.sql("SELECT * FROM source").write.mode("overwrite").saveAsTable("target")
|
|
54
53
|
|
55
54
|
That's it! No `sql_folder` needed for data exploration.
|
56
55
|
|
56
|
+
## Connection Format
|
57
|
+
|
58
|
+
```python
|
59
|
+
# With schema
|
60
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse/schema")
|
61
|
+
|
62
|
+
# Without schema (uses 'dbo' by default)
|
63
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse")
|
64
|
+
|
65
|
+
# With options
|
66
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse/dbo", sql_folder="./sql")
|
67
|
+
```
|
68
|
+
|
69
|
+
**Note:** When schema is not specified, Duckrun defaults to `dbo`. Multi-schema scanning will be added in a future update.
|
70
|
+
|
57
71
|
## Two Ways to Use Duckrun
|
58
72
|
|
59
73
|
### 1. Data Exploration (Spark-Style API)
|
@@ -61,7 +75,7 @@ That's it! No `sql_folder` needed for data exploration.
|
|
61
75
|
Perfect for ad-hoc analysis and interactive notebooks:
|
62
76
|
|
63
77
|
```python
|
64
|
-
con = duckrun.connect("workspace
|
78
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse/dbo")
|
65
79
|
|
66
80
|
# Query existing tables
|
67
81
|
con.sql("SELECT * FROM sales WHERE year = 2024").show()
|
@@ -90,9 +104,7 @@ For production workflows with reusable SQL and Python tasks:
|
|
90
104
|
|
91
105
|
```python
|
92
106
|
con = duckrun.connect(
|
93
|
-
|
94
|
-
lakehouse_name="my_lakehouse",
|
95
|
-
schema="dbo",
|
107
|
+
"my_workspace/my_lakehouse.lakehouse/dbo",
|
96
108
|
sql_folder="./sql" # folder with .sql and .py files
|
97
109
|
)
|
98
110
|
|
@@ -185,9 +197,7 @@ Load tasks from GitHub or any URL:
|
|
185
197
|
|
186
198
|
```python
|
187
199
|
con = duckrun.connect(
|
188
|
-
|
189
|
-
lakehouse_name="Sales",
|
190
|
-
schema="dbo",
|
200
|
+
"Analytics/Sales.lakehouse/dbo",
|
191
201
|
sql_folder="https://raw.githubusercontent.com/user/repo/main/sql"
|
192
202
|
)
|
193
203
|
```
|
@@ -242,9 +252,7 @@ Customize compaction threshold:
|
|
242
252
|
|
243
253
|
```python
|
244
254
|
con = duckrun.connect(
|
245
|
-
|
246
|
-
lakehouse_name="lakehouse",
|
247
|
-
schema="dbo",
|
255
|
+
"workspace/lakehouse.lakehouse/dbo",
|
248
256
|
compaction_threshold=50 # compact after 50 files
|
249
257
|
)
|
250
258
|
```
|
@@ -255,7 +263,7 @@ con = duckrun.connect(
|
|
255
263
|
import duckrun
|
256
264
|
|
257
265
|
# Connect
|
258
|
-
con = duckrun.connect("Analytics
|
266
|
+
con = duckrun.connect("Analytics/Sales.lakehouse/dbo", sql_folder="./sql")
|
259
267
|
|
260
268
|
# Pipeline with mixed tasks
|
261
269
|
pipeline = [
|
@@ -22,11 +22,10 @@ pip install duckrun
|
|
22
22
|
import duckrun
|
23
23
|
|
24
24
|
# Connect to your Fabric lakehouse
|
25
|
-
con = duckrun.connect(
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
)
|
25
|
+
con = duckrun.connect("my_workspace/my_lakehouse.lakehouse/dbo")
|
26
|
+
|
27
|
+
# Schema defaults to 'dbo' if not specified
|
28
|
+
con = duckrun.connect("my_workspace/my_lakehouse.lakehouse")
|
30
29
|
|
31
30
|
# Explore data
|
32
31
|
con.sql("SELECT * FROM my_table LIMIT 10").show()
|
@@ -37,6 +36,21 @@ con.sql("SELECT * FROM source").write.mode("overwrite").saveAsTable("target")
|
|
37
36
|
|
38
37
|
That's it! No `sql_folder` needed for data exploration.
|
39
38
|
|
39
|
+
## Connection Format
|
40
|
+
|
41
|
+
```python
|
42
|
+
# With schema
|
43
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse/schema")
|
44
|
+
|
45
|
+
# Without schema (uses 'dbo' by default)
|
46
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse")
|
47
|
+
|
48
|
+
# With options
|
49
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse/dbo", sql_folder="./sql")
|
50
|
+
```
|
51
|
+
|
52
|
+
**Note:** When schema is not specified, Duckrun defaults to `dbo`. Multi-schema scanning will be added in a future update.
|
53
|
+
|
40
54
|
## Two Ways to Use Duckrun
|
41
55
|
|
42
56
|
### 1. Data Exploration (Spark-Style API)
|
@@ -44,7 +58,7 @@ That's it! No `sql_folder` needed for data exploration.
|
|
44
58
|
Perfect for ad-hoc analysis and interactive notebooks:
|
45
59
|
|
46
60
|
```python
|
47
|
-
con = duckrun.connect("workspace
|
61
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse/dbo")
|
48
62
|
|
49
63
|
# Query existing tables
|
50
64
|
con.sql("SELECT * FROM sales WHERE year = 2024").show()
|
@@ -73,9 +87,7 @@ For production workflows with reusable SQL and Python tasks:
|
|
73
87
|
|
74
88
|
```python
|
75
89
|
con = duckrun.connect(
|
76
|
-
|
77
|
-
lakehouse_name="my_lakehouse",
|
78
|
-
schema="dbo",
|
90
|
+
"my_workspace/my_lakehouse.lakehouse/dbo",
|
79
91
|
sql_folder="./sql" # folder with .sql and .py files
|
80
92
|
)
|
81
93
|
|
@@ -168,9 +180,7 @@ Load tasks from GitHub or any URL:
|
|
168
180
|
|
169
181
|
```python
|
170
182
|
con = duckrun.connect(
|
171
|
-
|
172
|
-
lakehouse_name="Sales",
|
173
|
-
schema="dbo",
|
183
|
+
"Analytics/Sales.lakehouse/dbo",
|
174
184
|
sql_folder="https://raw.githubusercontent.com/user/repo/main/sql"
|
175
185
|
)
|
176
186
|
```
|
@@ -225,9 +235,7 @@ Customize compaction threshold:
|
|
225
235
|
|
226
236
|
```python
|
227
237
|
con = duckrun.connect(
|
228
|
-
|
229
|
-
lakehouse_name="lakehouse",
|
230
|
-
schema="dbo",
|
238
|
+
"workspace/lakehouse.lakehouse/dbo",
|
231
239
|
compaction_threshold=50 # compact after 50 files
|
232
240
|
)
|
233
241
|
```
|
@@ -238,7 +246,7 @@ con = duckrun.connect(
|
|
238
246
|
import duckrun
|
239
247
|
|
240
248
|
# Connect
|
241
|
-
con = duckrun.connect("Analytics
|
249
|
+
con = duckrun.connect("Analytics/Sales.lakehouse/dbo", sql_folder="./sql")
|
242
250
|
|
243
251
|
# Pipeline with mixed tasks
|
244
252
|
pipeline = [
|
@@ -162,6 +162,9 @@ class Duckrun:
|
|
162
162
|
# Format: "ws/lh.lakehouse" (schema will use default)
|
163
163
|
workspace, lakehouse_name = parts
|
164
164
|
# schema already has default value "dbo"
|
165
|
+
print(f"ℹ️ No schema specified. Using default schema 'dbo'.")
|
166
|
+
print(f" To specify a schema, use: {workspace}/{lakehouse_name}.lakehouse/schema")
|
167
|
+
print(f" Note: Scanning all schemas will be added in a future update.\n")
|
165
168
|
elif len(parts) == 3:
|
166
169
|
# Format: "ws/lh.lakehouse/schema"
|
167
170
|
workspace, lakehouse_name, schema = parts
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.1.5.
|
3
|
+
Version: 0.1.5.5
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
5
|
Author: mim
|
6
6
|
License-Expression: MIT
|
@@ -39,11 +39,10 @@ pip install duckrun
|
|
39
39
|
import duckrun
|
40
40
|
|
41
41
|
# Connect to your Fabric lakehouse
|
42
|
-
con = duckrun.connect(
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
)
|
42
|
+
con = duckrun.connect("my_workspace/my_lakehouse.lakehouse/dbo")
|
43
|
+
|
44
|
+
# Schema defaults to 'dbo' if not specified
|
45
|
+
con = duckrun.connect("my_workspace/my_lakehouse.lakehouse")
|
47
46
|
|
48
47
|
# Explore data
|
49
48
|
con.sql("SELECT * FROM my_table LIMIT 10").show()
|
@@ -54,6 +53,21 @@ con.sql("SELECT * FROM source").write.mode("overwrite").saveAsTable("target")
|
|
54
53
|
|
55
54
|
That's it! No `sql_folder` needed for data exploration.
|
56
55
|
|
56
|
+
## Connection Format
|
57
|
+
|
58
|
+
```python
|
59
|
+
# With schema
|
60
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse/schema")
|
61
|
+
|
62
|
+
# Without schema (uses 'dbo' by default)
|
63
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse")
|
64
|
+
|
65
|
+
# With options
|
66
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse/dbo", sql_folder="./sql")
|
67
|
+
```
|
68
|
+
|
69
|
+
**Note:** When schema is not specified, Duckrun defaults to `dbo`. Multi-schema scanning will be added in a future update.
|
70
|
+
|
57
71
|
## Two Ways to Use Duckrun
|
58
72
|
|
59
73
|
### 1. Data Exploration (Spark-Style API)
|
@@ -61,7 +75,7 @@ That's it! No `sql_folder` needed for data exploration.
|
|
61
75
|
Perfect for ad-hoc analysis and interactive notebooks:
|
62
76
|
|
63
77
|
```python
|
64
|
-
con = duckrun.connect("workspace
|
78
|
+
con = duckrun.connect("workspace/lakehouse.lakehouse/dbo")
|
65
79
|
|
66
80
|
# Query existing tables
|
67
81
|
con.sql("SELECT * FROM sales WHERE year = 2024").show()
|
@@ -90,9 +104,7 @@ For production workflows with reusable SQL and Python tasks:
|
|
90
104
|
|
91
105
|
```python
|
92
106
|
con = duckrun.connect(
|
93
|
-
|
94
|
-
lakehouse_name="my_lakehouse",
|
95
|
-
schema="dbo",
|
107
|
+
"my_workspace/my_lakehouse.lakehouse/dbo",
|
96
108
|
sql_folder="./sql" # folder with .sql and .py files
|
97
109
|
)
|
98
110
|
|
@@ -185,9 +197,7 @@ Load tasks from GitHub or any URL:
|
|
185
197
|
|
186
198
|
```python
|
187
199
|
con = duckrun.connect(
|
188
|
-
|
189
|
-
lakehouse_name="Sales",
|
190
|
-
schema="dbo",
|
200
|
+
"Analytics/Sales.lakehouse/dbo",
|
191
201
|
sql_folder="https://raw.githubusercontent.com/user/repo/main/sql"
|
192
202
|
)
|
193
203
|
```
|
@@ -242,9 +252,7 @@ Customize compaction threshold:
|
|
242
252
|
|
243
253
|
```python
|
244
254
|
con = duckrun.connect(
|
245
|
-
|
246
|
-
lakehouse_name="lakehouse",
|
247
|
-
schema="dbo",
|
255
|
+
"workspace/lakehouse.lakehouse/dbo",
|
248
256
|
compaction_threshold=50 # compact after 50 files
|
249
257
|
)
|
250
258
|
```
|
@@ -255,7 +263,7 @@ con = duckrun.connect(
|
|
255
263
|
import duckrun
|
256
264
|
|
257
265
|
# Connect
|
258
|
-
con = duckrun.connect("Analytics
|
266
|
+
con = duckrun.connect("Analytics/Sales.lakehouse/dbo", sql_folder="./sql")
|
259
267
|
|
260
268
|
# Pipeline with mixed tasks
|
261
269
|
pipeline = [
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|