duckrun 0.1.6.3__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.6.3
3
+ Version: 0.1.8
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -11,7 +11,7 @@ Requires-Python: >=3.9
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
13
  Requires-Dist: duckdb>=1.2.0
14
- Requires-Dist: deltalake>=0.18.2
14
+ Requires-Dist: deltalake<=0.18.2
15
15
  Requires-Dist: requests>=2.28.0
16
16
  Requires-Dist: obstore>=0.2.0
17
17
  Provides-Extra: local
@@ -20,7 +20,7 @@ Dynamic: license-file
20
20
 
21
21
  <img src="https://raw.githubusercontent.com/djouallah/duckrun/main/duckrun.png" width="400" alt="Duckrun">
22
22
 
23
- Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and Delta Lake.
23
+ A helper package for stuff that made my life easier when working with Fabric Python notebooks. Just the things that actually made sense to me - nothing fancy
24
24
 
25
25
  ## Important Notes
26
26
 
@@ -30,6 +30,10 @@ Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and
30
30
 
31
31
  **Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
32
32
 
33
+ ## What It Does
34
+
35
+ It does orchestration, arbitrary SQL statements, and file manipulation. That's it - just stuff I encounter in my daily workflow when working with Fabric notebooks.
36
+
33
37
  ## Installation
34
38
 
35
39
  ```bash
@@ -101,7 +105,7 @@ con.sql("SELECT * FROM dbo_customers").show()
101
105
  con.sql("SELECT * FROM bronze_raw_data").show()
102
106
  ```
103
107
 
104
- ## Two Ways to Use Duckrun
108
+ ## Three Ways to Use Duckrun
105
109
 
106
110
  ### 1. Data Exploration (Spark-Style API)
107
111
 
@@ -1,6 +1,6 @@
1
1
  <img src="https://raw.githubusercontent.com/djouallah/duckrun/main/duckrun.png" width="400" alt="Duckrun">
2
2
 
3
- Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and Delta Lake.
3
+ A helper package for stuff that made my life easier when working with Fabric Python notebooks. Just the things that actually made sense to me - nothing fancy
4
4
 
5
5
  ## Important Notes
6
6
 
@@ -10,6 +10,10 @@ Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and
10
10
 
11
11
  **Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
12
12
 
13
+ ## What It Does
14
+
15
+ It does orchestration, arbitrary SQL statements, and file manipulation. That's it - just stuff I encounter in my daily workflow when working with Fabric notebooks.
16
+
13
17
  ## Installation
14
18
 
15
19
  ```bash
@@ -81,7 +85,7 @@ con.sql("SELECT * FROM dbo_customers").show()
81
85
  con.sql("SELECT * FROM bronze_raw_data").show()
82
86
  ```
83
87
 
84
- ## Two Ways to Use Duckrun
88
+ ## Three Ways to Use Duckrun
85
89
 
86
90
  ### 1. Data Exploration (Spark-Style API)
87
91
 
@@ -8,6 +8,9 @@ from string import Template
8
8
  import obstore as obs
9
9
  from obstore.store import AzureStore
10
10
 
11
+ # Row Group configuration for optimal Delta Lake performance
12
+ RG = 8_000_000
13
+
11
14
 
12
15
  class DeltaWriter:
13
16
  """Spark-style write API for Delta Lake"""
@@ -48,7 +51,7 @@ class DeltaWriter:
48
51
  df = self.relation.record_batch()
49
52
 
50
53
  print(f"Writing to Delta table: {schema}.{table} (mode={self._mode})")
51
- write_deltalake(path, df, mode=self._mode)
54
+ write_deltalake(path, df, mode=self._mode, max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
52
55
 
53
56
  self.duckrun.con.sql(f"DROP VIEW IF EXISTS {table}")
54
57
  self.duckrun.con.sql(f"""
@@ -127,77 +130,57 @@ class Duckrun:
127
130
  self._attach_lakehouse()
128
131
 
129
132
  @classmethod
130
- def connect(cls, workspace: Union[str, None] = None, lakehouse_name: Optional[str] = None,
131
- schema: str = "dbo", sql_folder: Optional[str] = None,
133
+ def connect(cls, connection_string: str, sql_folder: Optional[str] = None,
132
134
  compaction_threshold: int = 100):
133
135
  """
134
136
  Create and connect to lakehouse.
135
137
 
136
- Supports two formats:
137
- 1. Compact: connect("ws/lh.lakehouse/schema", sql_folder=...) or connect("ws/lh.lakehouse")
138
- 2. Traditional: connect("ws", "lh", "schema", sql_folder) or connect("ws", "lh")
138
+ Uses compact format: connect("ws/lh.lakehouse/schema") or connect("ws/lh.lakehouse")
139
139
 
140
140
  Args:
141
- workspace: Workspace name or full path "ws/lh.lakehouse/schema"
142
- lakehouse_name: Lakehouse name (optional if using compact format)
143
- schema: Schema name (defaults to "dbo")
141
+ connection_string: OneLake path "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
144
142
  sql_folder: Optional path or URL to SQL files folder
145
143
  compaction_threshold: File count threshold for compaction
146
144
 
147
145
  Examples:
148
- # Compact format (second param treated as sql_folder if it's a URL/path string)
149
- dr = Duckrun.connect("temp/power.lakehouse/wa", "https://github.com/.../sql/")
150
- dr = Duckrun.connect("ws/lh.lakehouse/schema", "./sql")
146
+ dr = Duckrun.connect("ws/lh.lakehouse/schema", sql_folder="./sql")
151
147
  dr = Duckrun.connect("ws/lh.lakehouse/schema") # no SQL folder
152
-
153
- # Traditional format
154
- dr = Duckrun.connect("ws", "lh", "schema", "./sql")
155
- dr = Duckrun.connect("ws", "lh", "schema")
148
+ dr = Duckrun.connect("ws/lh.lakehouse") # defaults to dbo schema
156
149
  """
157
150
  print("Connecting to Lakehouse...")
158
151
 
159
152
  scan_all_schemas = False
160
153
 
161
- # Check if using compact format: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
162
- # If second param looks like a path/URL and not a lakehouse name, treat it as sql_folder
163
- if workspace and "/" in workspace and (lakehouse_name is None or
164
- (isinstance(lakehouse_name, str) and ('/' in lakehouse_name or lakehouse_name.startswith('http') or lakehouse_name.startswith('.')))):
165
-
166
- # If lakehouse_name looks like a sql_folder, shift it
167
- if lakehouse_name and ('/' in lakehouse_name or lakehouse_name.startswith('http') or lakehouse_name.startswith('.')):
168
- sql_folder = lakehouse_name
169
- lakehouse_name = None
170
-
171
- parts = workspace.split("/")
172
- if len(parts) == 2:
173
- workspace, lakehouse_name = parts
174
- scan_all_schemas = True
175
- print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
176
- print(f" Scanning all schemas for table discovery...\n")
177
- elif len(parts) == 3:
178
- workspace, lakehouse_name, schema = parts
179
- else:
180
- raise ValueError(
181
- f"Invalid connection string format: '{workspace}'. "
182
- "Expected format: 'workspace/lakehouse.lakehouse' or 'workspace/lakehouse.lakehouse/schema'"
183
- )
184
-
185
- if lakehouse_name.endswith(".lakehouse"):
186
- lakehouse_name = lakehouse_name[:-10]
187
- elif lakehouse_name is not None:
188
- # Traditional format - check if schema was explicitly provided
189
- if schema == "dbo":
190
- scan_all_schemas = True
191
- print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
192
- print(f" Scanning all schemas for table discovery...\n")
154
+ # Only support compact format: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
155
+ if not connection_string or "/" not in connection_string:
156
+ raise ValueError(
157
+ "Invalid connection string format. "
158
+ "Expected format: 'workspace/lakehouse.lakehouse/schema' or 'workspace/lakehouse.lakehouse'"
159
+ )
160
+
161
+ parts = connection_string.split("/")
162
+ if len(parts) == 2:
163
+ workspace, lakehouse_name = parts
164
+ scan_all_schemas = True
165
+ schema = "dbo"
166
+ print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
167
+ print(f" Scanning all schemas for table discovery...\n")
168
+ elif len(parts) == 3:
169
+ workspace, lakehouse_name, schema = parts
170
+ else:
171
+ raise ValueError(
172
+ f"Invalid connection string format: '{connection_string}'. "
173
+ "Expected format: 'workspace/lakehouse.lakehouse' or 'workspace/lakehouse.lakehouse/schema'"
174
+ )
175
+
176
+ if lakehouse_name.endswith(".lakehouse"):
177
+ lakehouse_name = lakehouse_name[:-10]
193
178
 
194
179
  if not workspace or not lakehouse_name:
195
180
  raise ValueError(
196
- "Missing required parameters. Use either:\n"
181
+ "Missing required parameters. Use compact format:\n"
197
182
  " connect('workspace/lakehouse.lakehouse/schema', 'sql_folder')\n"
198
- " connect('workspace/lakehouse.lakehouse') # defaults to dbo\n"
199
- " connect('workspace', 'lakehouse', 'schema', 'sql_folder')\n"
200
- " connect('workspace', 'lakehouse') # defaults to dbo"
183
+ " connect('workspace/lakehouse.lakehouse') # defaults to dbo"
201
184
  )
202
185
 
203
186
  return cls(workspace, lakehouse_name, schema, sql_folder, compaction_threshold, scan_all_schemas)
@@ -210,7 +193,7 @@ class Duckrun:
210
193
  if token != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
211
194
  self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
212
195
  else:
213
- print("Please login to Azure CLI")
196
+ print("Authenticating with Azure (trying CLI, will fallback to browser if needed)...")
214
197
  from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
215
198
  credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
216
199
  token = credential.get_token("https://storage.azure.com/.default")
@@ -227,7 +210,7 @@ class Duckrun:
227
210
  """
228
211
  token = self._get_storage_token()
229
212
  if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
230
- print("Getting Azure token for table discovery...")
213
+ print("Authenticating with Azure for table discovery (trying CLI, will fallback to browser if needed)...")
231
214
  from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
232
215
  credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
233
216
  token_obj = credential.get_token("https://storage.azure.com/.default")
@@ -426,7 +409,7 @@ class Duckrun:
426
409
  if mode == 'overwrite':
427
410
  self.con.sql(f"DROP VIEW IF EXISTS {normalized_table}")
428
411
  df = self.con.sql(sql).record_batch()
429
- write_deltalake(path, df, mode='overwrite')
412
+ write_deltalake(path, df, mode='overwrite', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
430
413
  self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
431
414
  dt = DeltaTable(path)
432
415
  dt.vacuum(retention_hours=0, dry_run=False, enforce_retention_duration=False)
@@ -434,7 +417,7 @@ class Duckrun:
434
417
 
435
418
  elif mode == 'append':
436
419
  df = self.con.sql(sql).record_batch()
437
- write_deltalake(path, df, mode='append')
420
+ write_deltalake(path, df, mode='append', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
438
421
  self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
439
422
  dt = DeltaTable(path)
440
423
  if len(dt.file_uris()) > self.compaction_threshold:
@@ -451,7 +434,7 @@ class Duckrun:
451
434
  print(f"Table {normalized_table} doesn't exist. Creating...")
452
435
  self.con.sql(f"DROP VIEW IF EXISTS {normalized_table}")
453
436
  df = self.con.sql(sql).record_batch()
454
- write_deltalake(path, df, mode='overwrite')
437
+ write_deltalake(path, df, mode='overwrite', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
455
438
  self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
456
439
  dt = DeltaTable(path)
457
440
  dt.vacuum(dry_run=False)
@@ -542,7 +525,7 @@ class Duckrun:
542
525
  # Get Azure token
543
526
  token = self._get_storage_token()
544
527
  if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
545
- print("Getting Azure token for file upload...")
528
+ print("Authenticating with Azure for file upload (trying CLI, will fallback to browser if needed)...")
546
529
  from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
547
530
  credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
548
531
  token_obj = credential.get_token("https://storage.azure.com/.default")
@@ -649,7 +632,7 @@ class Duckrun:
649
632
  # Get Azure token
650
633
  token = self._get_storage_token()
651
634
  if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
652
- print("Getting Azure token for file download...")
635
+ print("Authenticating with Azure for file download (trying CLI, will fallback to browser if needed)...")
653
636
  from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
654
637
  credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
655
638
  token_obj = credential.get_token("https://storage.azure.com/.default")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.6.3
3
+ Version: 0.1.8
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -11,7 +11,7 @@ Requires-Python: >=3.9
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
13
  Requires-Dist: duckdb>=1.2.0
14
- Requires-Dist: deltalake>=0.18.2
14
+ Requires-Dist: deltalake<=0.18.2
15
15
  Requires-Dist: requests>=2.28.0
16
16
  Requires-Dist: obstore>=0.2.0
17
17
  Provides-Extra: local
@@ -20,7 +20,7 @@ Dynamic: license-file
20
20
 
21
21
  <img src="https://raw.githubusercontent.com/djouallah/duckrun/main/duckrun.png" width="400" alt="Duckrun">
22
22
 
23
- Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and Delta Lake.
23
+ A helper package for stuff that made my life easier when working with Fabric Python notebooks. Just the things that actually made sense to me - nothing fancy
24
24
 
25
25
  ## Important Notes
26
26
 
@@ -30,6 +30,10 @@ Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and
30
30
 
31
31
  **Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
32
32
 
33
+ ## What It Does
34
+
35
+ It does orchestration, arbitrary SQL statements, and file manipulation. That's it - just stuff I encounter in my daily workflow when working with Fabric notebooks.
36
+
33
37
  ## Installation
34
38
 
35
39
  ```bash
@@ -101,7 +105,7 @@ con.sql("SELECT * FROM dbo_customers").show()
101
105
  con.sql("SELECT * FROM bronze_raw_data").show()
102
106
  ```
103
107
 
104
- ## Two Ways to Use Duckrun
108
+ ## Three Ways to Use Duckrun
105
109
 
106
110
  ### 1. Data Exploration (Spark-Style API)
107
111
 
@@ -7,7 +7,4 @@ duckrun.egg-info/PKG-INFO
7
7
  duckrun.egg-info/SOURCES.txt
8
8
  duckrun.egg-info/dependency_links.txt
9
9
  duckrun.egg-info/requires.txt
10
- duckrun.egg-info/top_level.txt
11
- tests/test_download.py
12
- tests/test_new_methods.py
13
- tests/test_signatures.py
10
+ duckrun.egg-info/top_level.txt
@@ -1,5 +1,5 @@
1
1
  duckdb>=1.2.0
2
- deltalake>=0.18.2
2
+ deltalake<=0.18.2
3
3
  requests>=2.28.0
4
4
  obstore>=0.2.0
5
5
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "duckrun"
7
- version = "0.1.6.3"
7
+ version = "0.1.8"
8
8
  description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -14,7 +14,7 @@ authors = [
14
14
  requires-python = ">=3.9"
15
15
  dependencies = [
16
16
  "duckdb>=1.2.0",
17
- "deltalake>=0.18.2",
17
+ "deltalake<=0.18.2",
18
18
  "requests>=2.28.0",
19
19
  "obstore>=0.2.0"
20
20
  ]
@@ -1,77 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Quick test for the download() method
4
- """
5
- import os
6
- import sys
7
- import shutil
8
-
9
- # Add the local duckrun module to the path
10
- sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11
-
12
- import duckrun
13
-
14
- def test_download():
15
- """Test the download method"""
16
- print("🚀 Quick OneLake download test...")
17
-
18
- # Connect to lakehouse
19
- print("\n🔗 Connecting to lakehouse...")
20
- con = duckrun.connect("temp/power.lakehouse")
21
-
22
- # Download files from the folder we just uploaded to
23
- print("\n📥 Testing download from OneLake Files...")
24
- download_folder = "test_download_output"
25
-
26
- # Clean up any existing download folder
27
- if os.path.exists(download_folder):
28
- shutil.rmtree(download_folder)
29
-
30
- # Test download from the quick_test_folder we uploaded to
31
- success = con.download("quick_test_folder", download_folder)
32
-
33
- if success:
34
- print("✅ DOWNLOAD SUCCESS!")
35
- print(f"\n📂 Downloaded files to: {download_folder}/")
36
-
37
- # List downloaded files
38
- if os.path.exists(download_folder):
39
- print(" Downloaded files:")
40
- for root, dirs, files in os.walk(download_folder):
41
- for file in files:
42
- full_path = os.path.join(root, file)
43
- rel_path = os.path.relpath(full_path, download_folder)
44
- size = os.path.getsize(full_path)
45
- print(f" - {rel_path} ({size} bytes)")
46
-
47
- # Show content of text files
48
- if file.endswith('.txt'):
49
- print(f"\n📄 Content of {rel_path}:")
50
- try:
51
- with open(full_path, 'r') as f:
52
- content = f.read()
53
- print(f" {content[:200]}...") # First 200 chars
54
- except Exception as e:
55
- print(f" Error reading file: {e}")
56
-
57
- print(f"\n🎯 SUCCESS! The download() method works perfectly!")
58
- print(f" Files were successfully downloaded from OneLake Files to local folder")
59
-
60
- else:
61
- print("❌ Download failed")
62
- print(" Check if files exist in OneLake Files/quick_test_folder/")
63
-
64
- return success
65
-
66
- if __name__ == "__main__":
67
- try:
68
- success = test_download()
69
- if success:
70
- print("\n🎉 Clean API validation complete!")
71
- print(" copy() ✅ - Upload works")
72
- print(" download() ✅ - Download works")
73
- print("\n🚀 Both methods ready for production!")
74
- except Exception as e:
75
- print(f"❌ Error: {e}")
76
- import traceback
77
- traceback.print_exc()
@@ -1,240 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test script for new duckrun copy and download_from_files methods
4
- """
5
- import os
6
- import sys
7
- import tempfile
8
- import shutil
9
- from pathlib import Path
10
-
11
- # Add the local duckrun module to the path so we test the local version
12
- sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
13
-
14
- import duckrun
15
-
16
- def create_test_files(test_dir):
17
- """Create some test files for uploading"""
18
- print(f"📁 Creating test files in: {test_dir}")
19
-
20
- # Create main folder
21
- os.makedirs(test_dir, exist_ok=True)
22
-
23
- # Create a CSV file
24
- csv_content = """name,age,city
25
- Alice,25,New York
26
- Bob,30,Los Angeles
27
- Charlie,35,Chicago"""
28
-
29
- with open(os.path.join(test_dir, "people.csv"), "w") as f:
30
- f.write(csv_content)
31
-
32
- # Create a text file
33
- txt_content = "This is a test file created by duckrun test script."
34
- with open(os.path.join(test_dir, "readme.txt"), "w") as f:
35
- f.write(txt_content)
36
-
37
- # Create a subfolder with another file
38
- subfolder = os.path.join(test_dir, "reports")
39
- os.makedirs(subfolder, exist_ok=True)
40
-
41
- report_content = """date,sales,region
42
- 2024-01-01,1000,North
43
- 2024-01-02,1500,South"""
44
-
45
- with open(os.path.join(subfolder, "daily_sales.csv"), "w") as f:
46
- f.write(report_content)
47
-
48
- # List created files
49
- print("✅ Created test files:")
50
- for root, dirs, files in os.walk(test_dir):
51
- for file in files:
52
- full_path = os.path.join(root, file)
53
- rel_path = os.path.relpath(full_path, test_dir)
54
- print(f" - {rel_path}")
55
-
56
- return test_dir
57
-
58
- def test_duckrun_methods():
59
- """Test the new copy and download_from_files methods"""
60
- print("=" * 60)
61
- print("🧪 TESTING DUCKRUN NEW METHODS")
62
- print("=" * 60)
63
-
64
- # Create temporary directories for testing
65
- temp_dir = tempfile.mkdtemp(prefix="duckrun_test_")
66
- test_upload_dir = os.path.join(temp_dir, "upload_test")
67
- test_download_dir = os.path.join(temp_dir, "download_test")
68
-
69
- try:
70
- # Step 1: Create test files
71
- print("\n🔧 Step 1: Creating test files...")
72
- create_test_files(test_upload_dir)
73
-
74
- # Step 2: Connect to lakehouse
75
- print("\n🔧 Step 2: Connecting to lakehouse...")
76
- try:
77
- con = duckrun.connect("temp/power.lakehouse")
78
- print("✅ Connected successfully!")
79
- except Exception as e:
80
- print(f"❌ Connection failed: {e}")
81
- print("This might be expected if not authenticated with Azure CLI")
82
- return False
83
-
84
- # Step 3: Test copy method (upload)
85
- print("\n🔧 Step 3: Testing copy method...")
86
- try:
87
- # Test the new copy method with mandatory remote_folder
88
- success = con.copy(test_upload_dir, "test_upload_folder", overwrite=False)
89
- print(f"Upload result: {success}")
90
-
91
- if success:
92
- print("✅ Copy method test passed!")
93
- else:
94
- print("⚠ Copy method completed with some issues")
95
-
96
- except Exception as e:
97
- print(f"❌ Copy method failed: {e}")
98
- return False
99
-
100
- # Step 4: Test download method
101
- print("\n🔧 Step 4: Testing download method...")
102
- try:
103
- success = con.download("test_upload_folder", test_download_dir, overwrite=False)
104
- print(f"Download result: {success}")
105
-
106
- if success:
107
- print("✅ Download method test passed!")
108
-
109
- # Verify downloaded files
110
- if os.path.exists(test_download_dir):
111
- print("📂 Downloaded files verification:")
112
- for root, dirs, files in os.walk(test_download_dir):
113
- for file in files:
114
- full_path = os.path.join(root, file)
115
- rel_path = os.path.relpath(full_path, test_download_dir)
116
- print(f" - {rel_path}")
117
- else:
118
- print("⚠ Download method completed with some issues")
119
-
120
- except Exception as e:
121
- print(f"❌ Download method failed: {e}")
122
- return False
123
-
124
- # Step 5: Test method signatures and parameters
125
- print("\n🔧 Step 5: Testing method signatures...")
126
-
127
- # Test that copy method requires remote_folder (should fail without it)
128
- try:
129
- # This should raise a TypeError since remote_folder is now mandatory
130
- con.copy(test_upload_dir) # Missing required remote_folder parameter
131
- print("❌ copy() should require remote_folder parameter!")
132
- return False
133
- except TypeError as e:
134
- print("✅ copy() correctly requires remote_folder parameter")
135
-
136
- # Test default overwrite=False behavior
137
- print("✅ Both methods default to overwrite=False")
138
-
139
- print("\n" + "=" * 60)
140
- print("✅ ALL TESTS PASSED!")
141
- print("🎉 New methods are working correctly!")
142
- print("=" * 60)
143
- return True
144
-
145
- except Exception as e:
146
- print(f"\n❌ Unexpected error during testing: {e}")
147
- return False
148
-
149
- finally:
150
- # Cleanup temporary files
151
- print(f"\n🧹 Cleaning up temporary files: {temp_dir}")
152
- try:
153
- shutil.rmtree(temp_dir)
154
- print("✅ Cleanup complete")
155
- except Exception as e:
156
- print(f"⚠ Cleanup warning: {e}")
157
-
158
- def test_method_imports():
159
- """Test that methods can be imported and have correct signatures"""
160
- print("\n🔧 Testing method availability and signatures...")
161
-
162
- try:
163
- # Test that we can import duckrun
164
- import duckrun
165
- print("✅ duckrun module imported successfully")
166
-
167
- # Create a connection object to test methods exist
168
- # We'll catch any auth errors since we're just testing signatures
169
- try:
170
- con = duckrun.connect("temp/power.lakehouse")
171
-
172
- # Test that copy method exists and has correct signature
173
- assert hasattr(con, 'copy'), "copy method not found"
174
- print("✅ copy method exists")
175
-
176
- # Test that download method exists
177
- assert hasattr(con, 'download'), "download method not found"
178
- print("✅ download method exists")
179
-
180
- # Test method signatures using inspect
181
- import inspect
182
-
183
- copy_sig = inspect.signature(con.copy)
184
- print(f"✅ copy signature: {copy_sig}")
185
-
186
- download_sig = inspect.signature(con.download)
187
- print(f"✅ download signature: {download_sig}")
188
-
189
- # Verify copy method requires remote_folder (no default)
190
- copy_params = copy_sig.parameters
191
- assert 'remote_folder' in copy_params, "remote_folder parameter missing"
192
- assert copy_params['remote_folder'].default == inspect.Parameter.empty, "remote_folder should not have default value"
193
- print("✅ copy method correctly requires remote_folder parameter")
194
-
195
- # Verify overwrite defaults to False
196
- assert copy_params['overwrite'].default == False, "copy overwrite should default to False"
197
- download_params = download_sig.parameters
198
- assert download_params['overwrite'].default == False, "download overwrite should default to False"
199
- print("✅ Both methods correctly default overwrite=False")
200
-
201
- return True
202
-
203
- except Exception as auth_error:
204
- print(f"⚠ Authentication issue (expected): {auth_error}")
205
- print("✅ This is normal if Azure CLI is not configured")
206
- return True
207
-
208
- except Exception as e:
209
- print(f"❌ Import/signature test failed: {e}")
210
- return False
211
-
212
- if __name__ == "__main__":
213
- print("🚀 Starting duckrun method tests...")
214
-
215
- # Test 1: Method imports and signatures
216
- print("\n" + "=" * 60)
217
- print("TEST 1: Method Availability & Signatures")
218
- print("=" * 60)
219
-
220
- signature_ok = test_method_imports()
221
-
222
- if signature_ok:
223
- print("\n✅ Signature tests passed!")
224
-
225
- # Test 2: Full functionality (requires Azure auth)
226
- print("\n" + "=" * 60)
227
- print("TEST 2: Full Functionality (requires Azure CLI auth)")
228
- print("=" * 60)
229
-
230
- functionality_ok = test_duckrun_methods()
231
-
232
- if functionality_ok:
233
- print("\n🎉 ALL TESTS COMPLETED SUCCESSFULLY!")
234
- print("The new copy() and download() methods are ready to use!")
235
- else:
236
- print("\n⚠ Functionality tests had issues (likely due to authentication)")
237
- print("But the methods are correctly implemented and should work with proper Azure auth")
238
- else:
239
- print("\n❌ Signature tests failed - there may be issues with the implementation")
240
- sys.exit(1)
@@ -1,162 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Simple test for duckrun method signatures (no auth required)
4
- """
5
- import os
6
- import sys
7
- import inspect
8
-
9
- # Add the local duckrun module to the path
10
- sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
11
-
12
- def test_signatures_only():
13
- """Test method signatures without authentication"""
14
- print("🔧 Testing duckrun method signatures (no auth required)...")
15
-
16
- try:
17
- # Import the Duckrun class directly to avoid connection
18
- from duckrun.core import Duckrun
19
- print("✅ Duckrun class imported successfully")
20
-
21
- # Check that methods exist on the class
22
- assert hasattr(Duckrun, 'copy'), "copy method not found"
23
- print("✅ copy method exists")
24
-
25
- assert hasattr(Duckrun, 'download'), "download method not found"
26
- print("✅ download method exists")
27
-
28
- # Get method signatures
29
- copy_sig = inspect.signature(Duckrun.copy)
30
- download_sig = inspect.signature(Duckrun.download)
31
-
32
- print(f"\n📋 Method Signatures:")
33
- print(f" copy{copy_sig}")
34
- print(f" download{download_sig}")
35
-
36
- # Verify copy method parameters
37
- copy_params = copy_sig.parameters
38
-
39
- # Check required parameters exist
40
- required_params = ['self', 'local_folder', 'remote_folder']
41
- for param in required_params:
42
- assert param in copy_params, f"Missing required parameter: {param}"
43
- print(f"✅ copy method has all required parameters: {required_params}")
44
-
45
- # Check that remote_folder has no default (is required)
46
- remote_folder_param = copy_params['remote_folder']
47
- assert remote_folder_param.default == inspect.Parameter.empty, "remote_folder should be required (no default)"
48
- print("✅ remote_folder parameter is correctly required (no default)")
49
-
50
- # Check overwrite defaults to False
51
- overwrite_param = copy_params.get('overwrite')
52
- assert overwrite_param is not None, "overwrite parameter missing"
53
- assert overwrite_param.default == False, f"overwrite should default to False, got {overwrite_param.default}"
54
- print("✅ copy method overwrite parameter defaults to False")
55
-
56
- # Verify download method parameters
57
- download_params = download_sig.parameters
58
- download_overwrite = download_params.get('overwrite')
59
- assert download_overwrite is not None, "download overwrite parameter missing"
60
- assert download_overwrite.default == False, f"download overwrite should default to False, got {download_overwrite.default}"
61
- print("✅ download method overwrite parameter defaults to False")
62
-
63
- # Test parameter types (if available)
64
- print("\n📋 Parameter Details:")
65
- for name, param in copy_params.items():
66
- if name != 'self':
67
- default_str = f" = {param.default}" if param.default != inspect.Parameter.empty else " (required)"
68
- print(f" copy.{name}{default_str}")
69
-
70
- print()
71
- for name, param in download_params.items():
72
- if name != 'self':
73
- default_str = f" = {param.default}" if param.default != inspect.Parameter.empty else " (required)"
74
- print(f" download.{name}{default_str}")
75
-
76
- return True
77
-
78
- except Exception as e:
79
- print(f"❌ Test failed: {e}")
80
- import traceback
81
- traceback.print_exc()
82
- return False
83
-
84
- def test_method_call_signature():
85
- """Test that method calls fail appropriately when missing required params"""
86
- print("\n🔧 Testing method call requirements...")
87
-
88
- try:
89
- from duckrun.core import Duckrun
90
- import tempfile
91
- import os
92
-
93
- # Create a temporary directory for testing
94
- temp_dir = tempfile.mkdtemp(prefix="duckrun_test_")
95
-
96
- # Create a mock instance (won't actually connect)
97
- # We'll just test the method signature validation
98
- class MockDuckrun(Duckrun):
99
- def __init__(self):
100
- # Skip the parent __init__ to avoid connection
101
- pass
102
-
103
- mock_con = MockDuckrun()
104
-
105
- # Test that copy method requires remote_folder
106
- try:
107
- # This should fail because remote_folder is required
108
- mock_con.copy(temp_dir) # Missing remote_folder
109
- print("❌ copy() should require remote_folder parameter!")
110
- return False
111
- except TypeError as e:
112
- if "remote_folder" in str(e):
113
- print("✅ copy() correctly requires remote_folder parameter")
114
- else:
115
- print(f"✅ copy() requires parameters (error: {e})")
116
-
117
- # Test that copy method accepts all required parameters
118
- try:
119
- # This might fail due to missing implementation details, but signature should be OK
120
- mock_con.copy(temp_dir, "target_folder")
121
- print("✅ copy() accepts required parameters correctly")
122
- except Exception as e:
123
- # Expected to fail due to missing implementation, but signature is OK
124
- print("✅ copy() signature accepts required parameters (implementation error expected)")
125
-
126
- # Cleanup
127
- import shutil
128
- shutil.rmtree(temp_dir, ignore_errors=True)
129
-
130
- return True
131
-
132
- except Exception as e:
133
- print(f"❌ Method call test failed: {e}")
134
- return False
135
-
136
- if __name__ == "__main__":
137
- print("=" * 60)
138
- print("🧪 DUCKRUN METHOD SIGNATURE TESTS")
139
- print("=" * 60)
140
-
141
- # Test 1: Basic signatures
142
- signature_ok = test_signatures_only()
143
-
144
- # Test 2: Call requirements
145
- if signature_ok:
146
- call_ok = test_method_call_signature()
147
-
148
- if call_ok:
149
- print("\n" + "=" * 60)
150
- print("✅ ALL SIGNATURE TESTS PASSED!")
151
- print("🎉 The new methods are correctly implemented!")
152
- print("=" * 60)
153
- print("\n📋 Summary of Changes:")
154
- print(" • copy_to_files() → copy()")
155
- print(" • download_from_files() → download()")
156
- print(" • remote_folder parameter is now REQUIRED")
157
- print(" • overwrite defaults to False (both methods)")
158
- print(" • Methods are ready for use with proper Azure authentication")
159
- else:
160
- print("\n❌ Method call tests failed")
161
- else:
162
- print("\n❌ Signature tests failed")
File without changes
File without changes
File without changes