duckrun 0.1.6.2__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/core.py CHANGED
@@ -127,77 +127,57 @@ class Duckrun:
127
127
  self._attach_lakehouse()
128
128
 
129
129
  @classmethod
130
- def connect(cls, workspace: Union[str, None] = None, lakehouse_name: Optional[str] = None,
131
- schema: str = "dbo", sql_folder: Optional[str] = None,
130
+ def connect(cls, connection_string: str, sql_folder: Optional[str] = None,
132
131
  compaction_threshold: int = 100):
133
132
  """
134
133
  Create and connect to lakehouse.
135
134
 
136
- Supports two formats:
137
- 1. Compact: connect("ws/lh.lakehouse/schema", sql_folder=...) or connect("ws/lh.lakehouse")
138
- 2. Traditional: connect("ws", "lh", "schema", sql_folder) or connect("ws", "lh")
135
+ Uses compact format: connect("ws/lh.lakehouse/schema") or connect("ws/lh.lakehouse")
139
136
 
140
137
  Args:
141
- workspace: Workspace name or full path "ws/lh.lakehouse/schema"
142
- lakehouse_name: Lakehouse name (optional if using compact format)
143
- schema: Schema name (defaults to "dbo")
138
+ connection_string: OneLake path "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
144
139
  sql_folder: Optional path or URL to SQL files folder
145
140
  compaction_threshold: File count threshold for compaction
146
141
 
147
142
  Examples:
148
- # Compact format (second param treated as sql_folder if it's a URL/path string)
149
- dr = Duckrun.connect("temp/power.lakehouse/wa", "https://github.com/.../sql/")
150
- dr = Duckrun.connect("ws/lh.lakehouse/schema", "./sql")
143
+ dr = Duckrun.connect("ws/lh.lakehouse/schema", sql_folder="./sql")
151
144
  dr = Duckrun.connect("ws/lh.lakehouse/schema") # no SQL folder
152
-
153
- # Traditional format
154
- dr = Duckrun.connect("ws", "lh", "schema", "./sql")
155
- dr = Duckrun.connect("ws", "lh", "schema")
145
+ dr = Duckrun.connect("ws/lh.lakehouse") # defaults to dbo schema
156
146
  """
157
147
  print("Connecting to Lakehouse...")
158
148
 
159
149
  scan_all_schemas = False
160
150
 
161
- # Check if using compact format: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
162
- # If second param looks like a path/URL and not a lakehouse name, treat it as sql_folder
163
- if workspace and "/" in workspace and (lakehouse_name is None or
164
- (isinstance(lakehouse_name, str) and ('/' in lakehouse_name or lakehouse_name.startswith('http') or lakehouse_name.startswith('.')))):
165
-
166
- # If lakehouse_name looks like a sql_folder, shift it
167
- if lakehouse_name and ('/' in lakehouse_name or lakehouse_name.startswith('http') or lakehouse_name.startswith('.')):
168
- sql_folder = lakehouse_name
169
- lakehouse_name = None
170
-
171
- parts = workspace.split("/")
172
- if len(parts) == 2:
173
- workspace, lakehouse_name = parts
174
- scan_all_schemas = True
175
- print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
176
- print(f" Scanning all schemas for table discovery...\n")
177
- elif len(parts) == 3:
178
- workspace, lakehouse_name, schema = parts
179
- else:
180
- raise ValueError(
181
- f"Invalid connection string format: '{workspace}'. "
182
- "Expected format: 'workspace/lakehouse.lakehouse' or 'workspace/lakehouse.lakehouse/schema'"
183
- )
184
-
185
- if lakehouse_name.endswith(".lakehouse"):
186
- lakehouse_name = lakehouse_name[:-10]
187
- elif lakehouse_name is not None:
188
- # Traditional format - check if schema was explicitly provided
189
- if schema == "dbo":
190
- scan_all_schemas = True
191
- print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
192
- print(f" Scanning all schemas for table discovery...\n")
151
+ # Only support compact format: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
152
+ if not connection_string or "/" not in connection_string:
153
+ raise ValueError(
154
+ "Invalid connection string format. "
155
+ "Expected format: 'workspace/lakehouse.lakehouse/schema' or 'workspace/lakehouse.lakehouse'"
156
+ )
157
+
158
+ parts = connection_string.split("/")
159
+ if len(parts) == 2:
160
+ workspace, lakehouse_name = parts
161
+ scan_all_schemas = True
162
+ schema = "dbo"
163
+ print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
164
+ print(f" Scanning all schemas for table discovery...\n")
165
+ elif len(parts) == 3:
166
+ workspace, lakehouse_name, schema = parts
167
+ else:
168
+ raise ValueError(
169
+ f"Invalid connection string format: '{connection_string}'. "
170
+ "Expected format: 'workspace/lakehouse.lakehouse' or 'workspace/lakehouse.lakehouse/schema'"
171
+ )
172
+
173
+ if lakehouse_name.endswith(".lakehouse"):
174
+ lakehouse_name = lakehouse_name[:-10]
193
175
 
194
176
  if not workspace or not lakehouse_name:
195
177
  raise ValueError(
196
- "Missing required parameters. Use either:\n"
178
+ "Missing required parameters. Use compact format:\n"
197
179
  " connect('workspace/lakehouse.lakehouse/schema', 'sql_folder')\n"
198
- " connect('workspace/lakehouse.lakehouse') # defaults to dbo\n"
199
- " connect('workspace', 'lakehouse', 'schema', 'sql_folder')\n"
200
- " connect('workspace', 'lakehouse') # defaults to dbo"
180
+ " connect('workspace/lakehouse.lakehouse') # defaults to dbo"
201
181
  )
202
182
 
203
183
  return cls(workspace, lakehouse_name, schema, sql_folder, compaction_threshold, scan_all_schemas)
@@ -210,7 +190,7 @@ class Duckrun:
210
190
  if token != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
211
191
  self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
212
192
  else:
213
- print("Please login to Azure CLI")
193
+ print("Authenticating with Azure (trying CLI, will fallback to browser if needed)...")
214
194
  from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
215
195
  credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
216
196
  token = credential.get_token("https://storage.azure.com/.default")
@@ -227,7 +207,7 @@ class Duckrun:
227
207
  """
228
208
  token = self._get_storage_token()
229
209
  if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
230
- print("Getting Azure token for table discovery...")
210
+ print("Authenticating with Azure for table discovery (trying CLI, will fallback to browser if needed)...")
231
211
  from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
232
212
  credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
233
213
  token_obj = credential.get_token("https://storage.azure.com/.default")
@@ -506,6 +486,246 @@ class Duckrun:
506
486
  print('='*60)
507
487
  return True
508
488
 
489
+ def copy(self, local_folder: str, remote_folder: str,
490
+ file_extensions: Optional[List[str]] = None,
491
+ overwrite: bool = False) -> bool:
492
+ """
493
+ Copy files from a local folder to OneLake Files section.
494
+
495
+ Args:
496
+ local_folder: Path to local folder containing files to upload
497
+ remote_folder: Target subfolder path in OneLake Files (e.g., "reports/daily") - REQUIRED
498
+ file_extensions: Optional list of file extensions to filter (e.g., ['.csv', '.parquet'])
499
+ overwrite: Whether to overwrite existing files (default: False)
500
+
501
+ Returns:
502
+ True if all files uploaded successfully, False otherwise
503
+
504
+ Examples:
505
+ # Upload all files from local folder to a target folder
506
+ dr.copy("./local_data", "uploaded_data")
507
+
508
+ # Upload only CSV files to a specific subfolder
509
+ dr.copy("./reports", "daily_reports", ['.csv'])
510
+
511
+ # Upload with overwrite enabled
512
+ dr.copy("./backup", "backups", overwrite=True)
513
+ """
514
+ if not os.path.exists(local_folder):
515
+ print(f"❌ Local folder not found: {local_folder}")
516
+ return False
517
+
518
+ if not os.path.isdir(local_folder):
519
+ print(f"❌ Path is not a directory: {local_folder}")
520
+ return False
521
+
522
+ # Get Azure token
523
+ token = self._get_storage_token()
524
+ if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
525
+ print("Authenticating with Azure for file upload (trying CLI, will fallback to browser if needed)...")
526
+ from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
527
+ credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
528
+ token_obj = credential.get_token("https://storage.azure.com/.default")
529
+ token = token_obj.token
530
+ os.environ["AZURE_STORAGE_TOKEN"] = token
531
+
532
+ # Setup OneLake Files URL (not Tables)
533
+ files_base_url = f'abfss://{self.workspace}@onelake.dfs.fabric.microsoft.com/{self.lakehouse_name}.Lakehouse/Files/'
534
+ store = AzureStore.from_url(files_base_url, bearer_token=token)
535
+
536
+ # Collect files to upload
537
+ files_to_upload = []
538
+ for root, dirs, files in os.walk(local_folder):
539
+ for file in files:
540
+ local_file_path = os.path.join(root, file)
541
+
542
+ # Filter by extensions if specified
543
+ if file_extensions:
544
+ _, ext = os.path.splitext(file)
545
+ if ext.lower() not in [e.lower() for e in file_extensions]:
546
+ continue
547
+
548
+ # Calculate relative path from local_folder
549
+ rel_path = os.path.relpath(local_file_path, local_folder)
550
+
551
+ # Build remote path in OneLake Files (remote_folder is now mandatory)
552
+ remote_path = f"{remote_folder.strip('/')}/{rel_path}".replace("\\", "/")
553
+
554
+ files_to_upload.append((local_file_path, remote_path))
555
+
556
+ if not files_to_upload:
557
+ print(f"No files found to upload in {local_folder}")
558
+ if file_extensions:
559
+ print(f" (filtered by extensions: {file_extensions})")
560
+ return True
561
+
562
+ print(f"📁 Uploading {len(files_to_upload)} files from '{local_folder}' to OneLake Files...")
563
+ print(f" Target folder: {remote_folder}")
564
+
565
+ uploaded_count = 0
566
+ failed_count = 0
567
+
568
+ for local_path, remote_path in files_to_upload:
569
+ try:
570
+ # Check if file exists (if not overwriting)
571
+ if not overwrite:
572
+ try:
573
+ obs.head(store, remote_path)
574
+ print(f" ⏭ Skipped (exists): {remote_path}")
575
+ continue
576
+ except Exception:
577
+ # File doesn't exist, proceed with upload
578
+ pass
579
+
580
+ # Read local file
581
+ with open(local_path, 'rb') as f:
582
+ file_data = f.read()
583
+
584
+ # Upload to OneLake Files
585
+ obs.put(store, remote_path, file_data)
586
+
587
+ file_size = len(file_data)
588
+ size_mb = file_size / (1024 * 1024) if file_size > 1024*1024 else file_size / 1024
589
+ size_unit = "MB" if file_size > 1024*1024 else "KB"
590
+
591
+ print(f" ✓ Uploaded: {local_path} → {remote_path} ({size_mb:.1f} {size_unit})")
592
+ uploaded_count += 1
593
+
594
+ except Exception as e:
595
+ print(f" ❌ Failed: {local_path} → {remote_path} | Error: {str(e)[:100]}")
596
+ failed_count += 1
597
+
598
+ print(f"\n{'='*60}")
599
+ if failed_count == 0:
600
+ print(f"✅ Successfully uploaded all {uploaded_count} files to OneLake Files")
601
+ else:
602
+ print(f"⚠ Uploaded {uploaded_count} files, {failed_count} failed")
603
+ print(f"{'='*60}")
604
+
605
+ return failed_count == 0
606
+
607
+ def download(self, remote_folder: str = "", local_folder: str = "./downloaded_files",
608
+ file_extensions: Optional[List[str]] = None,
609
+ overwrite: bool = False) -> bool:
610
+ """
611
+ Download files from OneLake Files section to a local folder.
612
+
613
+ Args:
614
+ remote_folder: Optional subfolder path in OneLake Files to download from
615
+ local_folder: Local folder path to download files to (default: "./downloaded_files")
616
+ file_extensions: Optional list of file extensions to filter (e.g., ['.csv', '.parquet'])
617
+ overwrite: Whether to overwrite existing local files (default: False)
618
+
619
+ Returns:
620
+ True if all files downloaded successfully, False otherwise
621
+
622
+ Examples:
623
+ # Download all files from OneLake Files root
624
+ dr.download_from_files()
625
+
626
+ # Download only CSV files from a specific subfolder
627
+ dr.download_from_files("daily_reports", "./reports", ['.csv'])
628
+ """
629
+ # Get Azure token
630
+ token = self._get_storage_token()
631
+ if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
632
+ print("Authenticating with Azure for file download (trying CLI, will fallback to browser if needed)...")
633
+ from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
634
+ credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
635
+ token_obj = credential.get_token("https://storage.azure.com/.default")
636
+ token = token_obj.token
637
+ os.environ["AZURE_STORAGE_TOKEN"] = token
638
+
639
+ # Setup OneLake Files URL (not Tables)
640
+ files_base_url = f'abfss://{self.workspace}@onelake.dfs.fabric.microsoft.com/{self.lakehouse_name}.Lakehouse/Files/'
641
+ store = AzureStore.from_url(files_base_url, bearer_token=token)
642
+
643
+ # Create local directory
644
+ os.makedirs(local_folder, exist_ok=True)
645
+
646
+ # List files in OneLake Files
647
+ print(f"📁 Discovering files in OneLake Files...")
648
+ if remote_folder:
649
+ print(f" Source folder: {remote_folder}")
650
+ prefix = f"{remote_folder.strip('/')}/"
651
+ else:
652
+ prefix = ""
653
+
654
+ try:
655
+ list_stream = obs.list(store, prefix=prefix)
656
+ files_to_download = []
657
+
658
+ for batch in list_stream:
659
+ for obj in batch:
660
+ remote_path = obj["path"]
661
+
662
+ # Filter by extensions if specified
663
+ if file_extensions:
664
+ _, ext = os.path.splitext(remote_path)
665
+ if ext.lower() not in [e.lower() for e in file_extensions]:
666
+ continue
667
+
668
+ # Calculate local path
669
+ if remote_folder:
670
+ rel_path = os.path.relpath(remote_path, remote_folder.strip('/'))
671
+ else:
672
+ rel_path = remote_path
673
+
674
+ local_path = os.path.join(local_folder, rel_path).replace('/', os.sep)
675
+ files_to_download.append((remote_path, local_path))
676
+
677
+ if not files_to_download:
678
+ print(f"No files found to download")
679
+ if file_extensions:
680
+ print(f" (filtered by extensions: {file_extensions})")
681
+ return True
682
+
683
+ print(f"📥 Downloading {len(files_to_download)} files to '{local_folder}'...")
684
+
685
+ downloaded_count = 0
686
+ failed_count = 0
687
+
688
+ for remote_path, local_path in files_to_download:
689
+ try:
690
+ # Check if local file exists (if not overwriting)
691
+ if not overwrite and os.path.exists(local_path):
692
+ print(f" ⏭ Skipped (exists): {local_path}")
693
+ continue
694
+
695
+ # Ensure local directory exists
696
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
697
+
698
+ # Download file
699
+ data = obs.get(store, remote_path).bytes()
700
+
701
+ # Write to local file
702
+ with open(local_path, 'wb') as f:
703
+ f.write(data)
704
+
705
+ file_size = len(data)
706
+ size_mb = file_size / (1024 * 1024) if file_size > 1024*1024 else file_size / 1024
707
+ size_unit = "MB" if file_size > 1024*1024 else "KB"
708
+
709
+ print(f" ✓ Downloaded: {remote_path} → {local_path} ({size_mb:.1f} {size_unit})")
710
+ downloaded_count += 1
711
+
712
+ except Exception as e:
713
+ print(f" ❌ Failed: {remote_path} → {local_path} | Error: {str(e)[:100]}")
714
+ failed_count += 1
715
+
716
+ print(f"\n{'='*60}")
717
+ if failed_count == 0:
718
+ print(f"✅ Successfully downloaded all {downloaded_count} files from OneLake Files")
719
+ else:
720
+ print(f"⚠ Downloaded {downloaded_count} files, {failed_count} failed")
721
+ print(f"{'='*60}")
722
+
723
+ return failed_count == 0
724
+
725
+ except Exception as e:
726
+ print(f"❌ Error listing files from OneLake: {e}")
727
+ return False
728
+
509
729
  def sql(self, query: str):
510
730
  """
511
731
  Execute raw SQL query with Spark-style write API.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.6.2
3
+ Version: 0.1.7
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -20,7 +20,7 @@ Dynamic: license-file
20
20
 
21
21
  <img src="https://raw.githubusercontent.com/djouallah/duckrun/main/duckrun.png" width="400" alt="Duckrun">
22
22
 
23
- Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and Delta Lake.
23
+ A helper package for stuff that made my life easier when working with Fabric Python notebooks. Just the things that actually made sense to me - nothing fancy
24
24
 
25
25
  ## Important Notes
26
26
 
@@ -30,6 +30,10 @@ Simple task runner for Microsoft Fabric Python notebooks, powered by DuckDB and
30
30
 
31
31
  **Why no spaces?** Duckrun uses simple name-based paths instead of GUIDs. This keeps the code clean and readable, which is perfect for data engineering workspaces where naming conventions are already well-established. Just use underscores or hyphens instead: `my_workspace` or `my-lakehouse`.
32
32
 
33
+ ## What It Does
34
+
35
+ It does orchestration, arbitrary SQL statements, and file manipulation. That's it - just stuff I encounter in my daily workflow when working with Fabric notebooks.
36
+
33
37
  ## Installation
34
38
 
35
39
  ```bash
@@ -58,6 +62,10 @@ con.sql("SELECT * FROM my_table LIMIT 10").show()
58
62
 
59
63
  # Write to Delta tables (Spark-style API)
60
64
  con.sql("SELECT * FROM source").write.mode("overwrite").saveAsTable("target")
65
+
66
+ # Upload/download files to/from OneLake Files
67
+ con.copy("./local_folder", "target_folder") # Upload files
68
+ con.download("target_folder", "./downloaded") # Download files
61
69
  ```
62
70
 
63
71
  That's it! No `sql_folder` needed for data exploration.
@@ -97,7 +105,7 @@ con.sql("SELECT * FROM dbo_customers").show()
97
105
  con.sql("SELECT * FROM bronze_raw_data").show()
98
106
  ```
99
107
 
100
- ## Two Ways to Use Duckrun
108
+ ## Three Ways to Use Duckrun
101
109
 
102
110
  ### 1. Data Exploration (Spark-Style API)
103
111
 
@@ -127,7 +135,38 @@ con.sql("SELECT * FROM new_orders").write.mode("append").saveAsTable("orders")
127
135
 
128
136
  **Note:** `.format("delta")` is optional - Delta is the default format!
129
137
 
130
- ### 2. Pipeline Orchestration
138
+ ### 2. File Management (OneLake Files)
139
+
140
+ Upload and download files to/from OneLake Files section (not Delta tables):
141
+
142
+ ```python
143
+ con = duckrun.connect("workspace/lakehouse.lakehouse/dbo")
144
+
145
+ # Upload files to OneLake Files (remote_folder is required)
146
+ con.copy("./local_data", "uploaded_data")
147
+
148
+ # Upload only specific file types
149
+ con.copy("./reports", "daily_reports", ['.csv', '.parquet'])
150
+
151
+ # Upload with overwrite enabled (default is False for safety)
152
+ con.copy("./backup", "backups", overwrite=True)
153
+
154
+ # Download files from OneLake Files
155
+ con.download("uploaded_data", "./downloaded")
156
+
157
+ # Download only CSV files from a specific folder
158
+ con.download("daily_reports", "./reports", ['.csv'])
159
+ ```
160
+
161
+ **Key Features:**
162
+ - ✅ **Files go to OneLake Files section** (not Delta Tables)
163
+ - ✅ **`remote_folder` parameter is required** for uploads (prevents accidental uploads)
164
+ - ✅ **`overwrite=False` by default** (safer - prevents accidental overwrites)
165
+ - ✅ **File extension filtering** (e.g., only `.csv` or `.parquet` files)
166
+ - ✅ **Preserves folder structure** during upload/download
167
+ - ✅ **Progress reporting** with file sizes and upload status
168
+
169
+ ### 3. Pipeline Orchestration
131
170
 
132
171
  For production workflows with reusable SQL and Python tasks:
133
172
 
@@ -286,6 +325,63 @@ con = duckrun.connect(
286
325
  )
287
326
  ```
288
327
 
328
+ ## File Management API Reference
329
+
330
+ ### `copy(local_folder, remote_folder, file_extensions=None, overwrite=False)`
331
+
332
+ Upload files from a local folder to OneLake Files section.
333
+
334
+ **Parameters:**
335
+ - `local_folder` (str): Path to local folder containing files to upload
336
+ - `remote_folder` (str): **Required** target folder path in OneLake Files
337
+ - `file_extensions` (list, optional): Filter by file extensions (e.g., `['.csv', '.parquet']`)
338
+ - `overwrite` (bool, optional): Whether to overwrite existing files (default: False)
339
+
340
+ **Returns:** `True` if all files uploaded successfully, `False` otherwise
341
+
342
+ **Examples:**
343
+ ```python
344
+ # Upload all files to a target folder
345
+ con.copy("./data", "processed_data")
346
+
347
+ # Upload only CSV and Parquet files
348
+ con.copy("./reports", "monthly_reports", ['.csv', '.parquet'])
349
+
350
+ # Upload with overwrite enabled
351
+ con.copy("./backup", "daily_backup", overwrite=True)
352
+ ```
353
+
354
+ ### `download(remote_folder="", local_folder="./downloaded_files", file_extensions=None, overwrite=False)`
355
+
356
+ Download files from OneLake Files section to a local folder.
357
+
358
+ **Parameters:**
359
+ - `remote_folder` (str, optional): Source folder path in OneLake Files (default: root)
360
+ - `local_folder` (str, optional): Local destination folder (default: "./downloaded_files")
361
+ - `file_extensions` (list, optional): Filter by file extensions (e.g., `['.csv', '.json']`)
362
+ - `overwrite` (bool, optional): Whether to overwrite existing local files (default: False)
363
+
364
+ **Returns:** `True` if all files downloaded successfully, `False` otherwise
365
+
366
+ **Examples:**
367
+ ```python
368
+ # Download all files from OneLake Files root
369
+ con.download()
370
+
371
+ # Download from specific folder
372
+ con.download("processed_data", "./local_data")
373
+
374
+ # Download only JSON files
375
+ con.download("config", "./configs", ['.json'])
376
+ ```
377
+
378
+ **Important Notes:**
379
+ - Files are uploaded/downloaded to/from the **OneLake Files section**, not Delta Tables
380
+ - The `remote_folder` parameter is **required** for uploads to prevent accidental uploads
381
+ - Both methods default to `overwrite=False` for safety
382
+ - Folder structure is preserved during upload/download operations
383
+ - Progress is reported with file names, sizes, and upload/download status
384
+
289
385
  ## Complete Example
290
386
 
291
387
  ```python
@@ -294,7 +390,10 @@ import duckrun
294
390
  # Connect (specify schema for best performance)
295
391
  con = duckrun.connect("Analytics/Sales.lakehouse/dbo", sql_folder="./sql")
296
392
 
297
- # Pipeline with mixed tasks
393
+ # 1. Upload raw data files to OneLake Files
394
+ con.copy("./raw_data", "raw_uploads", ['.csv', '.json'])
395
+
396
+ # 2. Pipeline with mixed tasks
298
397
  pipeline = [
299
398
  # Download raw data (Python)
300
399
  ('fetch_api_data', ('https://api.example.com/sales', 'raw')),
@@ -309,20 +408,30 @@ pipeline = [
309
408
  ('sales_history', 'append')
310
409
  ]
311
410
 
312
- # Run
411
+ # Run pipeline
313
412
  success = con.run(pipeline)
314
413
 
315
- # Explore results
414
+ # 3. Explore results using DuckDB
316
415
  con.sql("SELECT * FROM regional_summary").show()
317
416
 
318
- # Export to new table
417
+ # 4. Export to new Delta table
319
418
  con.sql("""
320
419
  SELECT region, SUM(total) as grand_total
321
420
  FROM regional_summary
322
421
  GROUP BY region
323
422
  """).write.mode("overwrite").saveAsTable("region_totals")
423
+
424
+ # 5. Download processed files for external systems
425
+ con.download("processed_reports", "./exports", ['.csv'])
324
426
  ```
325
427
 
428
+ **This example demonstrates:**
429
+ - 📁 **File uploads** to OneLake Files section
430
+ - 🔄 **Pipeline orchestration** with SQL and Python tasks
431
+ - ⚡ **Fast data exploration** with DuckDB
432
+ - 💾 **Delta table creation** with Spark-style API
433
+ - 📤 **File downloads** from OneLake Files
434
+
326
435
  ## How It Works
327
436
 
328
437
  1. **Connection**: Duckrun connects to your Fabric lakehouse using OneLake and Azure authentication
@@ -0,0 +1,7 @@
1
+ duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
+ duckrun/core.py,sha256=PzeY1WJVhAGTOuN5Yf86oNhKpK_zw6GYdylZ_BdSJfg,32982
3
+ duckrun-0.1.7.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
+ duckrun-0.1.7.dist-info/METADATA,sha256=BIsqAq6Z1JwSv7RwJ6wthzTC7xKSDeigZfVom5RJH0s,13847
5
+ duckrun-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ duckrun-0.1.7.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
+ duckrun-0.1.7.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
- duckrun/core.py,sha256=_18GjaaT_CqhtivyDQuLIQx5UUuUIZNBMK9nBQgavXc,23180
3
- duckrun-0.1.6.2.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
- duckrun-0.1.6.2.dist-info/METADATA,sha256=dYy1d8V2yq2JwqkLXwJC8iBLMP6UbbFm9ZGHsBJLGuY,9497
5
- duckrun-0.1.6.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- duckrun-0.1.6.2.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
- duckrun-0.1.6.2.dist-info/RECORD,,