duckrun 0.2.11.dev0__py3-none-any.whl → 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/auth.py CHANGED
@@ -20,7 +20,6 @@ def get_token() -> Optional[str]:
20
20
  # Check if we already have a cached token
21
21
  token_env = os.environ.get("AZURE_STORAGE_TOKEN")
22
22
  if token_env and token_env != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
23
- print("✅ Using existing Azure Storage token")
24
23
  return token_env
25
24
 
26
25
  print("🔐 Starting Azure authentication...")
@@ -38,21 +37,16 @@ def get_token() -> Optional[str]:
38
37
  except Exception as e:
39
38
  print(f"⚠️ Fabric notebook authentication failed: {e}")
40
39
 
41
- # Detect environment type for fallback authentication
40
+ # Try local/VS Code authentication (Azure CLI + browser)
41
+ print("🖥️ Trying local authentication (Azure CLI + browser fallback)...")
42
+ token = _get_local_token()
43
+ if token:
44
+ return token
45
+
46
+ # If local auth failed, fall back to device code flow
47
+ print("🔐 Falling back to device code flow for remote/headless environment...")
42
48
  try:
43
- # Check if we're in Google Colab first
44
- try:
45
- import google.colab
46
- print("🚀 Google Colab detected - using device code flow")
47
- return _get_device_code_token()
48
- except ImportError:
49
- pass
50
-
51
- # For all other environments (including VS Code), try Azure CLI first
52
- # This includes local development, VS Code notebooks, etc.
53
- print("🖥️ Local/VS Code environment detected - trying Azure CLI first, then browser fallback")
54
- return _get_local_token()
55
-
49
+ return _get_device_code_token()
56
50
  except Exception as e:
57
51
  print(f"❌ Authentication failed: {e}")
58
52
  print("💡 Try refreshing and running again, or check your Azure permissions")
duckrun/core.py CHANGED
@@ -133,11 +133,8 @@ class Duckrun:
133
133
 
134
134
  # Check if it's a workspace-only connection (no "/" means workspace name only)
135
135
  if "/" not in connection_string:
136
- print(f"Connecting to workspace '{connection_string}' for management operations...")
137
136
  return WorkspaceConnection(connection_string)
138
137
 
139
- print("Connecting to Lakehouse...")
140
-
141
138
  scan_all_schemas = False
142
139
 
143
140
  # Parse lakehouse connection string: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
@@ -195,17 +192,14 @@ class Duckrun:
195
192
  guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
196
193
 
197
194
  if guid_pattern.match(workspace_name) and guid_pattern.match(lakehouse_name):
198
- print(f"✅ Names are already GUIDs: workspace={workspace_name}, lakehouse={lakehouse_name}")
199
195
  return workspace_name, lakehouse_name
200
196
 
201
197
  # Optimization: If workspace name has no spaces, use both names directly (old behavior)
202
198
  # Note: Lakehouse names cannot contain spaces in Microsoft Fabric, only workspace names can
203
199
  if " " not in workspace_name:
204
- print(f"✅ Using names directly (workspace has no spaces): workspace={workspace_name}, lakehouse={lakehouse_name}")
205
200
  return workspace_name, lakehouse_name
206
201
 
207
202
  # Workspace name contains spaces - need to resolve both to GUIDs for proper ABFSS URLs
208
- print(f"🔍 Resolving '{workspace_name}' workspace and '{lakehouse_name}' lakehouse to GUIDs (workspace has spaces)...")
209
203
 
210
204
  try:
211
205
  # Get authentication token using enhanced auth system
@@ -242,7 +236,6 @@ class Duckrun:
242
236
  if not lakehouse_id:
243
237
  raise ValueError(f"Lakehouse '{lakehouse_name}' not found in workspace '{workspace_name}'")
244
238
 
245
- print(f"✅ Resolved: {workspace_name} → {workspace_id}, {lakehouse_name} → {lakehouse_id}")
246
239
  return workspace_id, lakehouse_id
247
240
 
248
241
  except Exception as e:
@@ -388,7 +381,6 @@ class Duckrun:
388
381
  tables_found.append((schema_name, table_name))
389
382
  else:
390
383
  # Scan specific schema only
391
- print(f"🔍 Discovering tables in schema '{self.schema}'...")
392
384
  schema_path = f"{base_path}{self.schema}/"
393
385
  result = obs.list_with_delimiter(store, prefix=schema_path)
394
386
 
@@ -407,27 +399,10 @@ class Duckrun:
407
399
  tables = self._discover_tables_fast()
408
400
 
409
401
  if not tables:
410
- if self.scan_all_schemas:
411
- print(f"No Delta tables found in {self.lakehouse_name}/Tables/")
412
- else:
413
- print(f"No Delta tables found in {self.lakehouse_name}/Tables/{self.schema}/")
414
402
  return
415
403
 
416
- # Group tables by schema for display
417
- schema_tables = {}
418
- for schema_name, table_name in tables:
419
- if schema_name not in schema_tables:
420
- schema_tables[schema_name] = []
421
- schema_tables[schema_name].append(table_name)
422
-
423
- # Display tables by schema
424
- print(f"\n📊 Found {len(tables)} tables:")
425
- for schema_name in sorted(schema_tables.keys()):
426
- table_list = sorted(schema_tables[schema_name])
427
- print(f" {schema_name}: {', '.join(table_list)}")
428
-
429
- attached_count = 0
430
- skipped_tables = []
404
+ # Collect table names for display
405
+ table_names = []
431
406
 
432
407
  for schema_name, table_name in tables:
433
408
  try:
@@ -435,28 +410,25 @@ class Duckrun:
435
410
  # Create proper schema.table structure in DuckDB
436
411
  self.con.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
437
412
  view_name = f"{schema_name}.{table_name}"
413
+ table_names.append(view_name)
438
414
  else:
439
415
  # Single schema mode - use just table name
440
416
  view_name = table_name
417
+ table_names.append(table_name)
441
418
 
442
419
  self.con.sql(f"""
443
420
  CREATE OR REPLACE VIEW {view_name}
444
421
  AS SELECT * FROM delta_scan('{self.table_base_url}{schema_name}/{table_name}');
445
422
  """)
446
- attached_count += 1
447
423
  except Exception as e:
448
- skipped_tables.append(f"{schema_name}.{table_name}")
449
424
  continue
450
425
 
451
- print(f"\n{'='*60}")
452
- print(f"✅ Ready - {attached_count}/{len(tables)} tables available")
453
- if skipped_tables:
454
- print(f"⚠ Skipped {len(skipped_tables)} tables: {', '.join(skipped_tables[:3])}{'...' if len(skipped_tables) > 3 else ''}")
455
- print(f"{'='*60}\n")
426
+ # Print discovered tables as comma-separated list
427
+ if table_names:
428
+ print(", ".join(table_names))
456
429
 
457
430
  except Exception as e:
458
431
  print(f"❌ Error attaching lakehouse: {e}")
459
- print("Continuing without pre-attached tables.")
460
432
 
461
433
  def _register_lookup_functions(self):
462
434
  """
@@ -599,7 +571,6 @@ class Duckrun:
599
571
  self.con.create_function("get_lakehouse_name", get_lakehouse_name)
600
572
  self.con.create_function("get_workspace_id_from_name", get_workspace_id_from_name)
601
573
  self.con.create_function("get_lakehouse_id_from_name", get_lakehouse_id_from_name)
602
- print("✅ Registered lookup functions: get_workspace_name, get_lakehouse_name, get_workspace_id_from_name, get_lakehouse_id_from_name")
603
574
  except Exception as e:
604
575
  print(f"⚠️ Warning: Could not register lookup functions: {e}")
605
576
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.11.dev0
3
+ Version: 0.2.13
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -10,7 +10,7 @@ Project-URL: Issues, https://github.com/djouallah/duckrun/issues
10
10
  Requires-Python: >=3.9
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
- Requires-Dist: duckdb>=1.2.0
13
+ Requires-Dist: duckdb>=1.2.2
14
14
  Requires-Dist: deltalake<=0.18.2
15
15
  Requires-Dist: requests>=2.28.0
16
16
  Requires-Dist: obstore>=0.2.0
@@ -20,7 +20,7 @@ Dynamic: license-file
20
20
 
21
21
  <img src="https://raw.githubusercontent.com/djouallah/duckrun/main/duckrun.png" width="400" alt="Duckrun">
22
22
 
23
- A helper package for stuff that made my life easier when working with Fabric Python notebooks. Just the things that actually made sense to me - nothing fancy
23
+ A helper package for working with Microsoft Fabric lakehouses - orchestration, SQL queries, and file management powered by DuckDB.
24
24
 
25
25
  ## Important Notes
26
26
 
@@ -28,7 +28,6 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
28
28
  - Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
29
29
  - **Workspace names with spaces are fully supported!** ✅
30
30
 
31
-
32
31
  **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
33
32
 
34
33
  ## What It Does
@@ -40,12 +39,15 @@ It does orchestration, arbitrary SQL statements, and file manipulation. That's i
40
39
  ```bash
41
40
  pip install duckrun
42
41
  ```
43
- for local usage, Note: When running locally, your internet speed will be the main bottleneck.
42
+
43
+ For local usage (requires Azure CLI or interactive browser auth):
44
44
 
45
45
  ```bash
46
46
  pip install duckrun[local]
47
47
  ```
48
48
 
49
+ Note: When running locally, your internet speed will be the main bottleneck.
50
+
49
51
  ## Quick Start
50
52
 
51
53
  ### Simple Example for New Users
@@ -163,9 +165,6 @@ con.sql("""
163
165
  GROUP BY customer_id
164
166
  """).write.mode("overwrite").saveAsTable("customer_totals")
165
167
 
166
- # Append mode
167
- con.sql("SELECT * FROM new_orders").write.mode("append").saveAsTable("orders")
168
-
169
168
  # Schema evolution and partitioning (exact Spark API compatibility)
170
169
  con.sql("""
171
170
  SELECT
@@ -324,6 +323,73 @@ pipeline = [
324
323
 
325
324
  ## Advanced Features
326
325
 
326
+ ### SQL Lookup Functions
327
+
328
+ Duckrun automatically registers helper functions that allow you to resolve workspace and lakehouse names from GUIDs directly in SQL queries. These are especially useful when working with storage logs or audit data that contains workspace/lakehouse IDs.
329
+
330
+ **Available Functions:**
331
+
332
+ ```python
333
+ con = duckrun.connect("workspace/lakehouse.lakehouse/dbo")
334
+
335
+ # ID → Name lookups (most common use case)
336
+ con.sql("""
337
+ SELECT
338
+ workspace_id,
339
+ get_workspace_name(workspace_id) as workspace_name,
340
+ lakehouse_id,
341
+ get_lakehouse_name(workspace_id, lakehouse_id) as lakehouse_name
342
+ FROM storage_logs
343
+ """).show()
344
+
345
+ # Name → ID lookups (reverse)
346
+ con.sql("""
347
+ SELECT
348
+ workspace_name,
349
+ get_workspace_id_from_name(workspace_name) as workspace_id,
350
+ lakehouse_name,
351
+ get_lakehouse_id_from_name(workspace_id, lakehouse_name) as lakehouse_id
352
+ FROM configuration_table
353
+ """).show()
354
+ ```
355
+
356
+ **Function Reference:**
357
+
358
+ - `get_workspace_name(workspace_id)` - Convert workspace GUID to display name
359
+ - `get_lakehouse_name(workspace_id, lakehouse_id)` - Convert lakehouse GUID to display name
360
+ - `get_workspace_id_from_name(workspace_name)` - Convert workspace name to GUID
361
+ - `get_lakehouse_id_from_name(workspace_id, lakehouse_name)` - Convert lakehouse name to GUID
362
+
363
+ **Features:**
364
+ - ✅ **Automatic Caching**: Results are cached to avoid repeated API calls
365
+ - ✅ **NULL on Error**: Returns `NULL` instead of errors for missing or inaccessible items
366
+ - ✅ **Fabric API Integration**: Resolves names using Microsoft Fabric REST API
367
+ - ✅ **Always Available**: Functions are automatically registered on connection
368
+
369
+ **Example Use Case:**
370
+
371
+ ```python
372
+ # Enrich OneLake storage logs with friendly names
373
+ con = duckrun.connect("Analytics/Monitoring.lakehouse/dbo")
374
+
375
+ result = con.sql("""
376
+ SELECT
377
+ workspace_id,
378
+ get_workspace_name(workspace_id) as workspace_name,
379
+ lakehouse_id,
380
+ get_lakehouse_name(workspace_id, lakehouse_id) as lakehouse_name,
381
+ operation_name,
382
+ COUNT(*) as operation_count,
383
+ SUM(bytes_transferred) as total_bytes
384
+ FROM onelake_storage_logs
385
+ WHERE log_date = CURRENT_DATE
386
+ GROUP BY ALL
387
+ ORDER BY workspace_name, lakehouse_name
388
+ """).show()
389
+ ```
390
+
391
+ This makes it easy to create human-readable reports from GUID-based log data!
392
+
327
393
  ### Schema Evolution & Partitioning
328
394
 
329
395
  Handle evolving schemas and optimize query performance with partitioning:
@@ -467,63 +533,6 @@ con = duckrun.connect(
467
533
  )
468
534
  ```
469
535
 
470
- ## File Management API Reference
471
-
472
- ### `copy(local_folder, remote_folder, file_extensions=None, overwrite=False)`
473
-
474
- Upload files from a local folder to OneLake Files section.
475
-
476
- **Parameters:**
477
- - `local_folder` (str): Path to local folder containing files to upload
478
- - `remote_folder` (str): **Required** target folder path in OneLake Files
479
- - `file_extensions` (list, optional): Filter by file extensions (e.g., `['.csv', '.parquet']`)
480
- - `overwrite` (bool, optional): Whether to overwrite existing files (default: False)
481
-
482
- **Returns:** `True` if all files uploaded successfully, `False` otherwise
483
-
484
- **Examples:**
485
- ```python
486
- # Upload all files to a target folder
487
- con.copy("./data", "processed_data")
488
-
489
- # Upload only CSV and Parquet files
490
- con.copy("./reports", "monthly_reports", ['.csv', '.parquet'])
491
-
492
- # Upload with overwrite enabled
493
- con.copy("./backup", "daily_backup", overwrite=True)
494
- ```
495
-
496
- ### `download(remote_folder="", local_folder="./downloaded_files", file_extensions=None, overwrite=False)`
497
-
498
- Download files from OneLake Files section to a local folder.
499
-
500
- **Parameters:**
501
- - `remote_folder` (str, optional): Source folder path in OneLake Files (default: root)
502
- - `local_folder` (str, optional): Local destination folder (default: "./downloaded_files")
503
- - `file_extensions` (list, optional): Filter by file extensions (e.g., `['.csv', '.json']`)
504
- - `overwrite` (bool, optional): Whether to overwrite existing local files (default: False)
505
-
506
- **Returns:** `True` if all files downloaded successfully, `False` otherwise
507
-
508
- **Examples:**
509
- ```python
510
- # Download all files from OneLake Files root
511
- con.download()
512
-
513
- # Download from specific folder
514
- con.download("processed_data", "./local_data")
515
-
516
- # Download only JSON files
517
- con.download("config", "./configs", ['.json'])
518
- ```
519
-
520
- **Important Notes:**
521
- - Files are uploaded/downloaded to/from the **OneLake Files section**, not Delta Tables
522
- - The `remote_folder` parameter is **required** for uploads to prevent accidental uploads
523
- - Both methods default to `overwrite=False` for safety
524
- - Folder structure is preserved during upload/download operations
525
- - Progress is reported with file names, sizes, and upload/download status
526
-
527
536
  ## Complete Example
528
537
 
529
538
  ```python
@@ -0,0 +1,14 @@
1
+ duckrun/__init__.py,sha256=cTj6KQ6hKmgu1z7k9nhDcO5lct049luxjx1V0QnymCo,235
2
+ duckrun/auth.py,sha256=dMqIzozgEQ5v7Uc3Mb_OoFZGmsAq0m-VOoYCVL7rehc,9281
3
+ duckrun/core.py,sha256=C5nnL-MheBfJPcw-Jr8t14jsm2iwMF07cYm8g_AXtFQ,52303
4
+ duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
5
+ duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
6
+ duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
7
+ duckrun/semantic_model.py,sha256=obzlN2-dbEW3JmDop-vrZGGGLi9u3ThhTbgtDjou7uY,29509
8
+ duckrun/stats.py,sha256=oKIjZ7u5cFVT63FuOl5UqoDsOG3098woSCn-uI6i_sQ,11084
9
+ duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
10
+ duckrun-0.2.13.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
11
+ duckrun-0.2.13.dist-info/METADATA,sha256=0r-l8dWnd8KLBGj7cspK53eUdaDeUG-iHsa74rGBaCo,20766
12
+ duckrun-0.2.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ duckrun-0.2.13.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
14
+ duckrun-0.2.13.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- duckrun/__init__.py,sha256=cTj6KQ6hKmgu1z7k9nhDcO5lct049luxjx1V0QnymCo,235
2
- duckrun/auth.py,sha256=qPaLQ7InlV9leA9r6E6VEeYavFFoBi0zSN8m_l1aoQs,9545
3
- duckrun/core.py,sha256=MlaHOOz9bg3-EDXR3C4pEcp75QsnEcbTOmvsMjomLKc,54279
4
- duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
5
- duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
6
- duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
7
- duckrun/semantic_model.py,sha256=obzlN2-dbEW3JmDop-vrZGGGLi9u3ThhTbgtDjou7uY,29509
8
- duckrun/stats.py,sha256=oKIjZ7u5cFVT63FuOl5UqoDsOG3098woSCn-uI6i_sQ,11084
9
- duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
10
- duckrun-0.2.11.dev0.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
11
- duckrun-0.2.11.dev0.dist-info/METADATA,sha256=NuW94zw7gizsp_cVPFktiC-I9aMP8O37vrtfMq7cmiI,20629
12
- duckrun-0.2.11.dev0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- duckrun-0.2.11.dev0.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
14
- duckrun-0.2.11.dev0.dist-info/RECORD,,