duckrun 0.1.6.3__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {duckrun-0.1.6.3 → duckrun-0.1.8}/PKG-INFO +8 -4
- {duckrun-0.1.6.3 → duckrun-0.1.8}/README.md +6 -2
- {duckrun-0.1.6.3 → duckrun-0.1.8}/duckrun/core.py +42 -59
- {duckrun-0.1.6.3 → duckrun-0.1.8}/duckrun.egg-info/PKG-INFO +8 -4
- {duckrun-0.1.6.3 → duckrun-0.1.8}/duckrun.egg-info/SOURCES.txt +1 -4
- {duckrun-0.1.6.3 → duckrun-0.1.8}/duckrun.egg-info/requires.txt +1 -1
- {duckrun-0.1.6.3 → duckrun-0.1.8}/pyproject.toml +2 -2
- duckrun-0.1.6.3/tests/test_download.py +0 -77
- duckrun-0.1.6.3/tests/test_new_methods.py +0 -240
- duckrun-0.1.6.3/tests/test_signatures.py +0 -162
- {duckrun-0.1.6.3 → duckrun-0.1.8}/LICENSE +0 -0
- {duckrun-0.1.6.3 → duckrun-0.1.8}/duckrun/__init__.py +0 -0
- {duckrun-0.1.6.3 → duckrun-0.1.8}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.1.6.3 → duckrun-0.1.8}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.1.6.3 → duckrun-0.1.8}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.8
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
5
|
Author: mim
|
6
6
|
License: MIT
|
@@ -11,7 +11,7 @@ Requires-Python: >=3.9
|
|
11
11
|
Description-Content-Type: text/markdown
|
12
12
|
License-File: LICENSE
|
13
13
|
Requires-Dist: duckdb>=1.2.0
|
14
|
-
Requires-Dist: deltalake
|
14
|
+
Requires-Dist: deltalake<=0.18.2
|
15
15
|
Requires-Dist: requests>=2.28.0
|
16
16
|
Requires-Dist: obstore>=0.2.0
|
17
17
|
Provides-Extra: local
|
@@ -20,7 +20,7 @@ Dynamic: license-file
|
|
20
20
|
|
21
21
|
<img src="https://raw.githubusercontent.com/djouallah/duckrun/main/duckrun.png" width="400" alt="Duckrun">
|
22
22
|
|
23
|
-
|
23
|
+
A helper package for stuff that made my life easier when working with Fabric Python notebooks. Just the things that actually made sense to me - nothing fancy
|
24
24
|
|
25
25
|
## Important Notes
|
26
26
|
|
@@ -30,6 +30,10 @@ Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and
|
|
30
30
|
|
31
31
|
**Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
|
32
32
|
|
33
|
+
## What It Does
|
34
|
+
|
35
|
+
It does orchestration, arbitrary SQL statements, and file manipulation. That's it - just stuff I encounter in my daily workflow when working with Fabric notebooks.
|
36
|
+
|
33
37
|
## Installation
|
34
38
|
|
35
39
|
```bash
|
@@ -101,7 +105,7 @@ con.sql("SELECT * FROM dbo_customers").show()
|
|
101
105
|
con.sql("SELECT * FROM bronze_raw_data").show()
|
102
106
|
```
|
103
107
|
|
104
|
-
##
|
108
|
+
## Three Ways to Use Duckrun
|
105
109
|
|
106
110
|
### 1. Data Exploration (Spark-Style API)
|
107
111
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
<img src="https://raw.githubusercontent.com/djouallah/duckrun/main/duckrun.png" width="400" alt="Duckrun">
|
2
2
|
|
3
|
-
|
3
|
+
A helper package for stuff that made my life easier when working with Fabric Python notebooks. Just the things that actually made sense to me - nothing fancy
|
4
4
|
|
5
5
|
## Important Notes
|
6
6
|
|
@@ -10,6 +10,10 @@ Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and
|
|
10
10
|
|
11
11
|
**Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
|
12
12
|
|
13
|
+
## What It Does
|
14
|
+
|
15
|
+
It does orchestration, arbitrary SQL statements, and file manipulation. That's it - just stuff I encounter in my daily workflow when working with Fabric notebooks.
|
16
|
+
|
13
17
|
## Installation
|
14
18
|
|
15
19
|
```bash
|
@@ -81,7 +85,7 @@ con.sql("SELECT * FROM dbo_customers").show()
|
|
81
85
|
con.sql("SELECT * FROM bronze_raw_data").show()
|
82
86
|
```
|
83
87
|
|
84
|
-
##
|
88
|
+
## Three Ways to Use Duckrun
|
85
89
|
|
86
90
|
### 1. Data Exploration (Spark-Style API)
|
87
91
|
|
@@ -8,6 +8,9 @@ from string import Template
|
|
8
8
|
import obstore as obs
|
9
9
|
from obstore.store import AzureStore
|
10
10
|
|
11
|
+
# Row Group configuration for optimal Delta Lake performance
|
12
|
+
RG = 8_000_000
|
13
|
+
|
11
14
|
|
12
15
|
class DeltaWriter:
|
13
16
|
"""Spark-style write API for Delta Lake"""
|
@@ -48,7 +51,7 @@ class DeltaWriter:
|
|
48
51
|
df = self.relation.record_batch()
|
49
52
|
|
50
53
|
print(f"Writing to Delta table: {schema}.{table} (mode={self._mode})")
|
51
|
-
write_deltalake(path, df, mode=self._mode)
|
54
|
+
write_deltalake(path, df, mode=self._mode, max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
|
52
55
|
|
53
56
|
self.duckrun.con.sql(f"DROP VIEW IF EXISTS {table}")
|
54
57
|
self.duckrun.con.sql(f"""
|
@@ -127,77 +130,57 @@ class Duckrun:
|
|
127
130
|
self._attach_lakehouse()
|
128
131
|
|
129
132
|
@classmethod
|
130
|
-
def connect(cls,
|
131
|
-
schema: str = "dbo", sql_folder: Optional[str] = None,
|
133
|
+
def connect(cls, connection_string: str, sql_folder: Optional[str] = None,
|
132
134
|
compaction_threshold: int = 100):
|
133
135
|
"""
|
134
136
|
Create and connect to lakehouse.
|
135
137
|
|
136
|
-
|
137
|
-
1. Compact: connect("ws/lh.lakehouse/schema", sql_folder=...) or connect("ws/lh.lakehouse")
|
138
|
-
2. Traditional: connect("ws", "lh", "schema", sql_folder) or connect("ws", "lh")
|
138
|
+
Uses compact format: connect("ws/lh.lakehouse/schema") or connect("ws/lh.lakehouse")
|
139
139
|
|
140
140
|
Args:
|
141
|
-
|
142
|
-
lakehouse_name: Lakehouse name (optional if using compact format)
|
143
|
-
schema: Schema name (defaults to "dbo")
|
141
|
+
connection_string: OneLake path "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
|
144
142
|
sql_folder: Optional path or URL to SQL files folder
|
145
143
|
compaction_threshold: File count threshold for compaction
|
146
144
|
|
147
145
|
Examples:
|
148
|
-
|
149
|
-
dr = Duckrun.connect("temp/power.lakehouse/wa", "https://github.com/.../sql/")
|
150
|
-
dr = Duckrun.connect("ws/lh.lakehouse/schema", "./sql")
|
146
|
+
dr = Duckrun.connect("ws/lh.lakehouse/schema", sql_folder="./sql")
|
151
147
|
dr = Duckrun.connect("ws/lh.lakehouse/schema") # no SQL folder
|
152
|
-
|
153
|
-
# Traditional format
|
154
|
-
dr = Duckrun.connect("ws", "lh", "schema", "./sql")
|
155
|
-
dr = Duckrun.connect("ws", "lh", "schema")
|
148
|
+
dr = Duckrun.connect("ws/lh.lakehouse") # defaults to dbo schema
|
156
149
|
"""
|
157
150
|
print("Connecting to Lakehouse...")
|
158
151
|
|
159
152
|
scan_all_schemas = False
|
160
153
|
|
161
|
-
#
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
if lakehouse_name.endswith(".lakehouse"):
|
186
|
-
lakehouse_name = lakehouse_name[:-10]
|
187
|
-
elif lakehouse_name is not None:
|
188
|
-
# Traditional format - check if schema was explicitly provided
|
189
|
-
if schema == "dbo":
|
190
|
-
scan_all_schemas = True
|
191
|
-
print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
|
192
|
-
print(f" Scanning all schemas for table discovery...\n")
|
154
|
+
# Only support compact format: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
|
155
|
+
if not connection_string or "/" not in connection_string:
|
156
|
+
raise ValueError(
|
157
|
+
"Invalid connection string format. "
|
158
|
+
"Expected format: 'workspace/lakehouse.lakehouse/schema' or 'workspace/lakehouse.lakehouse'"
|
159
|
+
)
|
160
|
+
|
161
|
+
parts = connection_string.split("/")
|
162
|
+
if len(parts) == 2:
|
163
|
+
workspace, lakehouse_name = parts
|
164
|
+
scan_all_schemas = True
|
165
|
+
schema = "dbo"
|
166
|
+
print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
|
167
|
+
print(f" Scanning all schemas for table discovery...\n")
|
168
|
+
elif len(parts) == 3:
|
169
|
+
workspace, lakehouse_name, schema = parts
|
170
|
+
else:
|
171
|
+
raise ValueError(
|
172
|
+
f"Invalid connection string format: '{connection_string}'. "
|
173
|
+
"Expected format: 'workspace/lakehouse.lakehouse' or 'workspace/lakehouse.lakehouse/schema'"
|
174
|
+
)
|
175
|
+
|
176
|
+
if lakehouse_name.endswith(".lakehouse"):
|
177
|
+
lakehouse_name = lakehouse_name[:-10]
|
193
178
|
|
194
179
|
if not workspace or not lakehouse_name:
|
195
180
|
raise ValueError(
|
196
|
-
"Missing required parameters. Use
|
181
|
+
"Missing required parameters. Use compact format:\n"
|
197
182
|
" connect('workspace/lakehouse.lakehouse/schema', 'sql_folder')\n"
|
198
|
-
" connect('workspace/lakehouse.lakehouse') # defaults to dbo
|
199
|
-
" connect('workspace', 'lakehouse', 'schema', 'sql_folder')\n"
|
200
|
-
" connect('workspace', 'lakehouse') # defaults to dbo"
|
183
|
+
" connect('workspace/lakehouse.lakehouse') # defaults to dbo"
|
201
184
|
)
|
202
185
|
|
203
186
|
return cls(workspace, lakehouse_name, schema, sql_folder, compaction_threshold, scan_all_schemas)
|
@@ -210,7 +193,7 @@ class Duckrun:
|
|
210
193
|
if token != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
211
194
|
self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
|
212
195
|
else:
|
213
|
-
print("
|
196
|
+
print("Authenticating with Azure (trying CLI, will fallback to browser if needed)...")
|
214
197
|
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
215
198
|
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
216
199
|
token = credential.get_token("https://storage.azure.com/.default")
|
@@ -227,7 +210,7 @@ class Duckrun:
|
|
227
210
|
"""
|
228
211
|
token = self._get_storage_token()
|
229
212
|
if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
230
|
-
print("
|
213
|
+
print("Authenticating with Azure for table discovery (trying CLI, will fallback to browser if needed)...")
|
231
214
|
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
232
215
|
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
233
216
|
token_obj = credential.get_token("https://storage.azure.com/.default")
|
@@ -426,7 +409,7 @@ class Duckrun:
|
|
426
409
|
if mode == 'overwrite':
|
427
410
|
self.con.sql(f"DROP VIEW IF EXISTS {normalized_table}")
|
428
411
|
df = self.con.sql(sql).record_batch()
|
429
|
-
write_deltalake(path, df, mode='overwrite')
|
412
|
+
write_deltalake(path, df, mode='overwrite', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
|
430
413
|
self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
|
431
414
|
dt = DeltaTable(path)
|
432
415
|
dt.vacuum(retention_hours=0, dry_run=False, enforce_retention_duration=False)
|
@@ -434,7 +417,7 @@ class Duckrun:
|
|
434
417
|
|
435
418
|
elif mode == 'append':
|
436
419
|
df = self.con.sql(sql).record_batch()
|
437
|
-
write_deltalake(path, df, mode='append')
|
420
|
+
write_deltalake(path, df, mode='append', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
|
438
421
|
self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
|
439
422
|
dt = DeltaTable(path)
|
440
423
|
if len(dt.file_uris()) > self.compaction_threshold:
|
@@ -451,7 +434,7 @@ class Duckrun:
|
|
451
434
|
print(f"Table {normalized_table} doesn't exist. Creating...")
|
452
435
|
self.con.sql(f"DROP VIEW IF EXISTS {normalized_table}")
|
453
436
|
df = self.con.sql(sql).record_batch()
|
454
|
-
write_deltalake(path, df, mode='overwrite')
|
437
|
+
write_deltalake(path, df, mode='overwrite', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
|
455
438
|
self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
|
456
439
|
dt = DeltaTable(path)
|
457
440
|
dt.vacuum(dry_run=False)
|
@@ -542,7 +525,7 @@ class Duckrun:
|
|
542
525
|
# Get Azure token
|
543
526
|
token = self._get_storage_token()
|
544
527
|
if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
545
|
-
print("
|
528
|
+
print("Authenticating with Azure for file upload (trying CLI, will fallback to browser if needed)...")
|
546
529
|
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
547
530
|
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
548
531
|
token_obj = credential.get_token("https://storage.azure.com/.default")
|
@@ -649,7 +632,7 @@ class Duckrun:
|
|
649
632
|
# Get Azure token
|
650
633
|
token = self._get_storage_token()
|
651
634
|
if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
652
|
-
print("
|
635
|
+
print("Authenticating with Azure for file download (trying CLI, will fallback to browser if needed)...")
|
653
636
|
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
654
637
|
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
655
638
|
token_obj = credential.get_token("https://storage.azure.com/.default")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.8
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
5
|
Author: mim
|
6
6
|
License: MIT
|
@@ -11,7 +11,7 @@ Requires-Python: >=3.9
|
|
11
11
|
Description-Content-Type: text/markdown
|
12
12
|
License-File: LICENSE
|
13
13
|
Requires-Dist: duckdb>=1.2.0
|
14
|
-
Requires-Dist: deltalake
|
14
|
+
Requires-Dist: deltalake<=0.18.2
|
15
15
|
Requires-Dist: requests>=2.28.0
|
16
16
|
Requires-Dist: obstore>=0.2.0
|
17
17
|
Provides-Extra: local
|
@@ -20,7 +20,7 @@ Dynamic: license-file
|
|
20
20
|
|
21
21
|
<img src="https://raw.githubusercontent.com/djouallah/duckrun/main/duckrun.png" width="400" alt="Duckrun">
|
22
22
|
|
23
|
-
|
23
|
+
A helper package for stuff that made my life easier when working with Fabric Python notebooks. Just the things that actually made sense to me - nothing fancy
|
24
24
|
|
25
25
|
## Important Notes
|
26
26
|
|
@@ -30,6 +30,10 @@ Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and
|
|
30
30
|
|
31
31
|
**Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
|
32
32
|
|
33
|
+
## What It Does
|
34
|
+
|
35
|
+
It does orchestration, arbitrary SQL statements, and file manipulation. That's it - just stuff I encounter in my daily workflow when working with Fabric notebooks.
|
36
|
+
|
33
37
|
## Installation
|
34
38
|
|
35
39
|
```bash
|
@@ -101,7 +105,7 @@ con.sql("SELECT * FROM dbo_customers").show()
|
|
101
105
|
con.sql("SELECT * FROM bronze_raw_data").show()
|
102
106
|
```
|
103
107
|
|
104
|
-
##
|
108
|
+
## Three Ways to Use Duckrun
|
105
109
|
|
106
110
|
### 1. Data Exploration (Spark-Style API)
|
107
111
|
|
@@ -7,7 +7,4 @@ duckrun.egg-info/PKG-INFO
|
|
7
7
|
duckrun.egg-info/SOURCES.txt
|
8
8
|
duckrun.egg-info/dependency_links.txt
|
9
9
|
duckrun.egg-info/requires.txt
|
10
|
-
duckrun.egg-info/top_level.txt
|
11
|
-
tests/test_download.py
|
12
|
-
tests/test_new_methods.py
|
13
|
-
tests/test_signatures.py
|
10
|
+
duckrun.egg-info/top_level.txt
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "duckrun"
|
7
|
-
version = "0.1.
|
7
|
+
version = "0.1.8"
|
8
8
|
description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
|
9
9
|
readme = "README.md"
|
10
10
|
license = {text = "MIT"}
|
@@ -14,7 +14,7 @@ authors = [
|
|
14
14
|
requires-python = ">=3.9"
|
15
15
|
dependencies = [
|
16
16
|
"duckdb>=1.2.0",
|
17
|
-
"deltalake
|
17
|
+
"deltalake<=0.18.2",
|
18
18
|
"requests>=2.28.0",
|
19
19
|
"obstore>=0.2.0"
|
20
20
|
]
|
@@ -1,77 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
Quick test for the download() method
|
4
|
-
"""
|
5
|
-
import os
|
6
|
-
import sys
|
7
|
-
import shutil
|
8
|
-
|
9
|
-
# Add the local duckrun module to the path
|
10
|
-
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
11
|
-
|
12
|
-
import duckrun
|
13
|
-
|
14
|
-
def test_download():
|
15
|
-
"""Test the download method"""
|
16
|
-
print("🚀 Quick OneLake download test...")
|
17
|
-
|
18
|
-
# Connect to lakehouse
|
19
|
-
print("\n🔗 Connecting to lakehouse...")
|
20
|
-
con = duckrun.connect("temp/power.lakehouse")
|
21
|
-
|
22
|
-
# Download files from the folder we just uploaded to
|
23
|
-
print("\n📥 Testing download from OneLake Files...")
|
24
|
-
download_folder = "test_download_output"
|
25
|
-
|
26
|
-
# Clean up any existing download folder
|
27
|
-
if os.path.exists(download_folder):
|
28
|
-
shutil.rmtree(download_folder)
|
29
|
-
|
30
|
-
# Test download from the quick_test_folder we uploaded to
|
31
|
-
success = con.download("quick_test_folder", download_folder)
|
32
|
-
|
33
|
-
if success:
|
34
|
-
print("✅ DOWNLOAD SUCCESS!")
|
35
|
-
print(f"\n📂 Downloaded files to: {download_folder}/")
|
36
|
-
|
37
|
-
# List downloaded files
|
38
|
-
if os.path.exists(download_folder):
|
39
|
-
print(" Downloaded files:")
|
40
|
-
for root, dirs, files in os.walk(download_folder):
|
41
|
-
for file in files:
|
42
|
-
full_path = os.path.join(root, file)
|
43
|
-
rel_path = os.path.relpath(full_path, download_folder)
|
44
|
-
size = os.path.getsize(full_path)
|
45
|
-
print(f" - {rel_path} ({size} bytes)")
|
46
|
-
|
47
|
-
# Show content of text files
|
48
|
-
if file.endswith('.txt'):
|
49
|
-
print(f"\n📄 Content of {rel_path}:")
|
50
|
-
try:
|
51
|
-
with open(full_path, 'r') as f:
|
52
|
-
content = f.read()
|
53
|
-
print(f" {content[:200]}...") # First 200 chars
|
54
|
-
except Exception as e:
|
55
|
-
print(f" Error reading file: {e}")
|
56
|
-
|
57
|
-
print(f"\n🎯 SUCCESS! The download() method works perfectly!")
|
58
|
-
print(f" Files were successfully downloaded from OneLake Files to local folder")
|
59
|
-
|
60
|
-
else:
|
61
|
-
print("❌ Download failed")
|
62
|
-
print(" Check if files exist in OneLake Files/quick_test_folder/")
|
63
|
-
|
64
|
-
return success
|
65
|
-
|
66
|
-
if __name__ == "__main__":
|
67
|
-
try:
|
68
|
-
success = test_download()
|
69
|
-
if success:
|
70
|
-
print("\n🎉 Clean API validation complete!")
|
71
|
-
print(" copy() ✅ - Upload works")
|
72
|
-
print(" download() ✅ - Download works")
|
73
|
-
print("\n🚀 Both methods ready for production!")
|
74
|
-
except Exception as e:
|
75
|
-
print(f"❌ Error: {e}")
|
76
|
-
import traceback
|
77
|
-
traceback.print_exc()
|
@@ -1,240 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
Test script for new duckrun copy and download_from_files methods
|
4
|
-
"""
|
5
|
-
import os
|
6
|
-
import sys
|
7
|
-
import tempfile
|
8
|
-
import shutil
|
9
|
-
from pathlib import Path
|
10
|
-
|
11
|
-
# Add the local duckrun module to the path so we test the local version
|
12
|
-
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
13
|
-
|
14
|
-
import duckrun
|
15
|
-
|
16
|
-
def create_test_files(test_dir):
|
17
|
-
"""Create some test files for uploading"""
|
18
|
-
print(f"📁 Creating test files in: {test_dir}")
|
19
|
-
|
20
|
-
# Create main folder
|
21
|
-
os.makedirs(test_dir, exist_ok=True)
|
22
|
-
|
23
|
-
# Create a CSV file
|
24
|
-
csv_content = """name,age,city
|
25
|
-
Alice,25,New York
|
26
|
-
Bob,30,Los Angeles
|
27
|
-
Charlie,35,Chicago"""
|
28
|
-
|
29
|
-
with open(os.path.join(test_dir, "people.csv"), "w") as f:
|
30
|
-
f.write(csv_content)
|
31
|
-
|
32
|
-
# Create a text file
|
33
|
-
txt_content = "This is a test file created by duckrun test script."
|
34
|
-
with open(os.path.join(test_dir, "readme.txt"), "w") as f:
|
35
|
-
f.write(txt_content)
|
36
|
-
|
37
|
-
# Create a subfolder with another file
|
38
|
-
subfolder = os.path.join(test_dir, "reports")
|
39
|
-
os.makedirs(subfolder, exist_ok=True)
|
40
|
-
|
41
|
-
report_content = """date,sales,region
|
42
|
-
2024-01-01,1000,North
|
43
|
-
2024-01-02,1500,South"""
|
44
|
-
|
45
|
-
with open(os.path.join(subfolder, "daily_sales.csv"), "w") as f:
|
46
|
-
f.write(report_content)
|
47
|
-
|
48
|
-
# List created files
|
49
|
-
print("✅ Created test files:")
|
50
|
-
for root, dirs, files in os.walk(test_dir):
|
51
|
-
for file in files:
|
52
|
-
full_path = os.path.join(root, file)
|
53
|
-
rel_path = os.path.relpath(full_path, test_dir)
|
54
|
-
print(f" - {rel_path}")
|
55
|
-
|
56
|
-
return test_dir
|
57
|
-
|
58
|
-
def test_duckrun_methods():
|
59
|
-
"""Test the new copy and download_from_files methods"""
|
60
|
-
print("=" * 60)
|
61
|
-
print("🧪 TESTING DUCKRUN NEW METHODS")
|
62
|
-
print("=" * 60)
|
63
|
-
|
64
|
-
# Create temporary directories for testing
|
65
|
-
temp_dir = tempfile.mkdtemp(prefix="duckrun_test_")
|
66
|
-
test_upload_dir = os.path.join(temp_dir, "upload_test")
|
67
|
-
test_download_dir = os.path.join(temp_dir, "download_test")
|
68
|
-
|
69
|
-
try:
|
70
|
-
# Step 1: Create test files
|
71
|
-
print("\n🔧 Step 1: Creating test files...")
|
72
|
-
create_test_files(test_upload_dir)
|
73
|
-
|
74
|
-
# Step 2: Connect to lakehouse
|
75
|
-
print("\n🔧 Step 2: Connecting to lakehouse...")
|
76
|
-
try:
|
77
|
-
con = duckrun.connect("temp/power.lakehouse")
|
78
|
-
print("✅ Connected successfully!")
|
79
|
-
except Exception as e:
|
80
|
-
print(f"❌ Connection failed: {e}")
|
81
|
-
print("This might be expected if not authenticated with Azure CLI")
|
82
|
-
return False
|
83
|
-
|
84
|
-
# Step 3: Test copy method (upload)
|
85
|
-
print("\n🔧 Step 3: Testing copy method...")
|
86
|
-
try:
|
87
|
-
# Test the new copy method with mandatory remote_folder
|
88
|
-
success = con.copy(test_upload_dir, "test_upload_folder", overwrite=False)
|
89
|
-
print(f"Upload result: {success}")
|
90
|
-
|
91
|
-
if success:
|
92
|
-
print("✅ Copy method test passed!")
|
93
|
-
else:
|
94
|
-
print("⚠ Copy method completed with some issues")
|
95
|
-
|
96
|
-
except Exception as e:
|
97
|
-
print(f"❌ Copy method failed: {e}")
|
98
|
-
return False
|
99
|
-
|
100
|
-
# Step 4: Test download method
|
101
|
-
print("\n🔧 Step 4: Testing download method...")
|
102
|
-
try:
|
103
|
-
success = con.download("test_upload_folder", test_download_dir, overwrite=False)
|
104
|
-
print(f"Download result: {success}")
|
105
|
-
|
106
|
-
if success:
|
107
|
-
print("✅ Download method test passed!")
|
108
|
-
|
109
|
-
# Verify downloaded files
|
110
|
-
if os.path.exists(test_download_dir):
|
111
|
-
print("📂 Downloaded files verification:")
|
112
|
-
for root, dirs, files in os.walk(test_download_dir):
|
113
|
-
for file in files:
|
114
|
-
full_path = os.path.join(root, file)
|
115
|
-
rel_path = os.path.relpath(full_path, test_download_dir)
|
116
|
-
print(f" - {rel_path}")
|
117
|
-
else:
|
118
|
-
print("⚠ Download method completed with some issues")
|
119
|
-
|
120
|
-
except Exception as e:
|
121
|
-
print(f"❌ Download method failed: {e}")
|
122
|
-
return False
|
123
|
-
|
124
|
-
# Step 5: Test method signatures and parameters
|
125
|
-
print("\n🔧 Step 5: Testing method signatures...")
|
126
|
-
|
127
|
-
# Test that copy method requires remote_folder (should fail without it)
|
128
|
-
try:
|
129
|
-
# This should raise a TypeError since remote_folder is now mandatory
|
130
|
-
con.copy(test_upload_dir) # Missing required remote_folder parameter
|
131
|
-
print("❌ copy() should require remote_folder parameter!")
|
132
|
-
return False
|
133
|
-
except TypeError as e:
|
134
|
-
print("✅ copy() correctly requires remote_folder parameter")
|
135
|
-
|
136
|
-
# Test default overwrite=False behavior
|
137
|
-
print("✅ Both methods default to overwrite=False")
|
138
|
-
|
139
|
-
print("\n" + "=" * 60)
|
140
|
-
print("✅ ALL TESTS PASSED!")
|
141
|
-
print("🎉 New methods are working correctly!")
|
142
|
-
print("=" * 60)
|
143
|
-
return True
|
144
|
-
|
145
|
-
except Exception as e:
|
146
|
-
print(f"\n❌ Unexpected error during testing: {e}")
|
147
|
-
return False
|
148
|
-
|
149
|
-
finally:
|
150
|
-
# Cleanup temporary files
|
151
|
-
print(f"\n🧹 Cleaning up temporary files: {temp_dir}")
|
152
|
-
try:
|
153
|
-
shutil.rmtree(temp_dir)
|
154
|
-
print("✅ Cleanup complete")
|
155
|
-
except Exception as e:
|
156
|
-
print(f"⚠ Cleanup warning: {e}")
|
157
|
-
|
158
|
-
def test_method_imports():
|
159
|
-
"""Test that methods can be imported and have correct signatures"""
|
160
|
-
print("\n🔧 Testing method availability and signatures...")
|
161
|
-
|
162
|
-
try:
|
163
|
-
# Test that we can import duckrun
|
164
|
-
import duckrun
|
165
|
-
print("✅ duckrun module imported successfully")
|
166
|
-
|
167
|
-
# Create a connection object to test methods exist
|
168
|
-
# We'll catch any auth errors since we're just testing signatures
|
169
|
-
try:
|
170
|
-
con = duckrun.connect("temp/power.lakehouse")
|
171
|
-
|
172
|
-
# Test that copy method exists and has correct signature
|
173
|
-
assert hasattr(con, 'copy'), "copy method not found"
|
174
|
-
print("✅ copy method exists")
|
175
|
-
|
176
|
-
# Test that download method exists
|
177
|
-
assert hasattr(con, 'download'), "download method not found"
|
178
|
-
print("✅ download method exists")
|
179
|
-
|
180
|
-
# Test method signatures using inspect
|
181
|
-
import inspect
|
182
|
-
|
183
|
-
copy_sig = inspect.signature(con.copy)
|
184
|
-
print(f"✅ copy signature: {copy_sig}")
|
185
|
-
|
186
|
-
download_sig = inspect.signature(con.download)
|
187
|
-
print(f"✅ download signature: {download_sig}")
|
188
|
-
|
189
|
-
# Verify copy method requires remote_folder (no default)
|
190
|
-
copy_params = copy_sig.parameters
|
191
|
-
assert 'remote_folder' in copy_params, "remote_folder parameter missing"
|
192
|
-
assert copy_params['remote_folder'].default == inspect.Parameter.empty, "remote_folder should not have default value"
|
193
|
-
print("✅ copy method correctly requires remote_folder parameter")
|
194
|
-
|
195
|
-
# Verify overwrite defaults to False
|
196
|
-
assert copy_params['overwrite'].default == False, "copy overwrite should default to False"
|
197
|
-
download_params = download_sig.parameters
|
198
|
-
assert download_params['overwrite'].default == False, "download overwrite should default to False"
|
199
|
-
print("✅ Both methods correctly default overwrite=False")
|
200
|
-
|
201
|
-
return True
|
202
|
-
|
203
|
-
except Exception as auth_error:
|
204
|
-
print(f"⚠ Authentication issue (expected): {auth_error}")
|
205
|
-
print("✅ This is normal if Azure CLI is not configured")
|
206
|
-
return True
|
207
|
-
|
208
|
-
except Exception as e:
|
209
|
-
print(f"❌ Import/signature test failed: {e}")
|
210
|
-
return False
|
211
|
-
|
212
|
-
if __name__ == "__main__":
|
213
|
-
print("🚀 Starting duckrun method tests...")
|
214
|
-
|
215
|
-
# Test 1: Method imports and signatures
|
216
|
-
print("\n" + "=" * 60)
|
217
|
-
print("TEST 1: Method Availability & Signatures")
|
218
|
-
print("=" * 60)
|
219
|
-
|
220
|
-
signature_ok = test_method_imports()
|
221
|
-
|
222
|
-
if signature_ok:
|
223
|
-
print("\n✅ Signature tests passed!")
|
224
|
-
|
225
|
-
# Test 2: Full functionality (requires Azure auth)
|
226
|
-
print("\n" + "=" * 60)
|
227
|
-
print("TEST 2: Full Functionality (requires Azure CLI auth)")
|
228
|
-
print("=" * 60)
|
229
|
-
|
230
|
-
functionality_ok = test_duckrun_methods()
|
231
|
-
|
232
|
-
if functionality_ok:
|
233
|
-
print("\n🎉 ALL TESTS COMPLETED SUCCESSFULLY!")
|
234
|
-
print("The new copy() and download() methods are ready to use!")
|
235
|
-
else:
|
236
|
-
print("\n⚠ Functionality tests had issues (likely due to authentication)")
|
237
|
-
print("But the methods are correctly implemented and should work with proper Azure auth")
|
238
|
-
else:
|
239
|
-
print("\n❌ Signature tests failed - there may be issues with the implementation")
|
240
|
-
sys.exit(1)
|
@@ -1,162 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
Simple test for duckrun method signatures (no auth required)
|
4
|
-
"""
|
5
|
-
import os
|
6
|
-
import sys
|
7
|
-
import inspect
|
8
|
-
|
9
|
-
# Add the local duckrun module to the path
|
10
|
-
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
11
|
-
|
12
|
-
def test_signatures_only():
|
13
|
-
"""Test method signatures without authentication"""
|
14
|
-
print("🔧 Testing duckrun method signatures (no auth required)...")
|
15
|
-
|
16
|
-
try:
|
17
|
-
# Import the Duckrun class directly to avoid connection
|
18
|
-
from duckrun.core import Duckrun
|
19
|
-
print("✅ Duckrun class imported successfully")
|
20
|
-
|
21
|
-
# Check that methods exist on the class
|
22
|
-
assert hasattr(Duckrun, 'copy'), "copy method not found"
|
23
|
-
print("✅ copy method exists")
|
24
|
-
|
25
|
-
assert hasattr(Duckrun, 'download'), "download method not found"
|
26
|
-
print("✅ download method exists")
|
27
|
-
|
28
|
-
# Get method signatures
|
29
|
-
copy_sig = inspect.signature(Duckrun.copy)
|
30
|
-
download_sig = inspect.signature(Duckrun.download)
|
31
|
-
|
32
|
-
print(f"\n📋 Method Signatures:")
|
33
|
-
print(f" copy{copy_sig}")
|
34
|
-
print(f" download{download_sig}")
|
35
|
-
|
36
|
-
# Verify copy method parameters
|
37
|
-
copy_params = copy_sig.parameters
|
38
|
-
|
39
|
-
# Check required parameters exist
|
40
|
-
required_params = ['self', 'local_folder', 'remote_folder']
|
41
|
-
for param in required_params:
|
42
|
-
assert param in copy_params, f"Missing required parameter: {param}"
|
43
|
-
print(f"✅ copy method has all required parameters: {required_params}")
|
44
|
-
|
45
|
-
# Check that remote_folder has no default (is required)
|
46
|
-
remote_folder_param = copy_params['remote_folder']
|
47
|
-
assert remote_folder_param.default == inspect.Parameter.empty, "remote_folder should be required (no default)"
|
48
|
-
print("✅ remote_folder parameter is correctly required (no default)")
|
49
|
-
|
50
|
-
# Check overwrite defaults to False
|
51
|
-
overwrite_param = copy_params.get('overwrite')
|
52
|
-
assert overwrite_param is not None, "overwrite parameter missing"
|
53
|
-
assert overwrite_param.default == False, f"overwrite should default to False, got {overwrite_param.default}"
|
54
|
-
print("✅ copy method overwrite parameter defaults to False")
|
55
|
-
|
56
|
-
# Verify download method parameters
|
57
|
-
download_params = download_sig.parameters
|
58
|
-
download_overwrite = download_params.get('overwrite')
|
59
|
-
assert download_overwrite is not None, "download overwrite parameter missing"
|
60
|
-
assert download_overwrite.default == False, f"download overwrite should default to False, got {download_overwrite.default}"
|
61
|
-
print("✅ download method overwrite parameter defaults to False")
|
62
|
-
|
63
|
-
# Test parameter types (if available)
|
64
|
-
print("\n📋 Parameter Details:")
|
65
|
-
for name, param in copy_params.items():
|
66
|
-
if name != 'self':
|
67
|
-
default_str = f" = {param.default}" if param.default != inspect.Parameter.empty else " (required)"
|
68
|
-
print(f" copy.{name}{default_str}")
|
69
|
-
|
70
|
-
print()
|
71
|
-
for name, param in download_params.items():
|
72
|
-
if name != 'self':
|
73
|
-
default_str = f" = {param.default}" if param.default != inspect.Parameter.empty else " (required)"
|
74
|
-
print(f" download.{name}{default_str}")
|
75
|
-
|
76
|
-
return True
|
77
|
-
|
78
|
-
except Exception as e:
|
79
|
-
print(f"❌ Test failed: {e}")
|
80
|
-
import traceback
|
81
|
-
traceback.print_exc()
|
82
|
-
return False
|
83
|
-
|
84
|
-
def test_method_call_signature():
|
85
|
-
"""Test that method calls fail appropriately when missing required params"""
|
86
|
-
print("\n🔧 Testing method call requirements...")
|
87
|
-
|
88
|
-
try:
|
89
|
-
from duckrun.core import Duckrun
|
90
|
-
import tempfile
|
91
|
-
import os
|
92
|
-
|
93
|
-
# Create a temporary directory for testing
|
94
|
-
temp_dir = tempfile.mkdtemp(prefix="duckrun_test_")
|
95
|
-
|
96
|
-
# Create a mock instance (won't actually connect)
|
97
|
-
# We'll just test the method signature validation
|
98
|
-
class MockDuckrun(Duckrun):
|
99
|
-
def __init__(self):
|
100
|
-
# Skip the parent __init__ to avoid connection
|
101
|
-
pass
|
102
|
-
|
103
|
-
mock_con = MockDuckrun()
|
104
|
-
|
105
|
-
# Test that copy method requires remote_folder
|
106
|
-
try:
|
107
|
-
# This should fail because remote_folder is required
|
108
|
-
mock_con.copy(temp_dir) # Missing remote_folder
|
109
|
-
print("❌ copy() should require remote_folder parameter!")
|
110
|
-
return False
|
111
|
-
except TypeError as e:
|
112
|
-
if "remote_folder" in str(e):
|
113
|
-
print("✅ copy() correctly requires remote_folder parameter")
|
114
|
-
else:
|
115
|
-
print(f"✅ copy() requires parameters (error: {e})")
|
116
|
-
|
117
|
-
# Test that copy method accepts all required parameters
|
118
|
-
try:
|
119
|
-
# This might fail due to missing implementation details, but signature should be OK
|
120
|
-
mock_con.copy(temp_dir, "target_folder")
|
121
|
-
print("✅ copy() accepts required parameters correctly")
|
122
|
-
except Exception as e:
|
123
|
-
# Expected to fail due to missing implementation, but signature is OK
|
124
|
-
print("✅ copy() signature accepts required parameters (implementation error expected)")
|
125
|
-
|
126
|
-
# Cleanup
|
127
|
-
import shutil
|
128
|
-
shutil.rmtree(temp_dir, ignore_errors=True)
|
129
|
-
|
130
|
-
return True
|
131
|
-
|
132
|
-
except Exception as e:
|
133
|
-
print(f"❌ Method call test failed: {e}")
|
134
|
-
return False
|
135
|
-
|
136
|
-
if __name__ == "__main__":
|
137
|
-
print("=" * 60)
|
138
|
-
print("🧪 DUCKRUN METHOD SIGNATURE TESTS")
|
139
|
-
print("=" * 60)
|
140
|
-
|
141
|
-
# Test 1: Basic signatures
|
142
|
-
signature_ok = test_signatures_only()
|
143
|
-
|
144
|
-
# Test 2: Call requirements
|
145
|
-
if signature_ok:
|
146
|
-
call_ok = test_method_call_signature()
|
147
|
-
|
148
|
-
if call_ok:
|
149
|
-
print("\n" + "=" * 60)
|
150
|
-
print("✅ ALL SIGNATURE TESTS PASSED!")
|
151
|
-
print("🎉 The new methods are correctly implemented!")
|
152
|
-
print("=" * 60)
|
153
|
-
print("\n📋 Summary of Changes:")
|
154
|
-
print(" • copy_to_files() → copy()")
|
155
|
-
print(" • download_from_files() → download()")
|
156
|
-
print(" • remote_folder parameter is now REQUIRED")
|
157
|
-
print(" • overwrite defaults to False (both methods)")
|
158
|
-
print(" • Methods are ready for use with proper Azure authentication")
|
159
|
-
else:
|
160
|
-
print("\n❌ Method call tests failed")
|
161
|
-
else:
|
162
|
-
print("\n❌ Signature tests failed")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|