duckrun 0.2.10.dev0__py3-none-any.whl → 0.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckrun might be problematic. Click here for more details.
- duckrun/core.py +13 -41
- duckrun-0.2.11.dist-info/METADATA +1367 -0
- {duckrun-0.2.10.dev0.dist-info → duckrun-0.2.11.dist-info}/RECORD +6 -6
- duckrun-0.2.10.dev0.dist-info/METADATA +0 -653
- {duckrun-0.2.10.dev0.dist-info → duckrun-0.2.11.dist-info}/WHEEL +0 -0
- {duckrun-0.2.10.dev0.dist-info → duckrun-0.2.11.dist-info}/licenses/LICENSE +0 -0
- {duckrun-0.2.10.dev0.dist-info → duckrun-0.2.11.dist-info}/top_level.txt +0 -0
duckrun/core.py
CHANGED
|
@@ -94,6 +94,7 @@ class Duckrun:
|
|
|
94
94
|
pass # Not in Colab, use default transport
|
|
95
95
|
|
|
96
96
|
self._attach_lakehouse()
|
|
97
|
+
self._register_lookup_functions()
|
|
97
98
|
|
|
98
99
|
@classmethod
|
|
99
100
|
def connect(cls, connection_string: str, sql_folder: Optional[str] = None,
|
|
@@ -132,11 +133,8 @@ class Duckrun:
|
|
|
132
133
|
|
|
133
134
|
# Check if it's a workspace-only connection (no "/" means workspace name only)
|
|
134
135
|
if "/" not in connection_string:
|
|
135
|
-
print(f"Connecting to workspace '{connection_string}' for management operations...")
|
|
136
136
|
return WorkspaceConnection(connection_string)
|
|
137
137
|
|
|
138
|
-
print("Connecting to Lakehouse...")
|
|
139
|
-
|
|
140
138
|
scan_all_schemas = False
|
|
141
139
|
|
|
142
140
|
# Parse lakehouse connection string: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
|
|
@@ -194,17 +192,14 @@ class Duckrun:
|
|
|
194
192
|
guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
|
|
195
193
|
|
|
196
194
|
if guid_pattern.match(workspace_name) and guid_pattern.match(lakehouse_name):
|
|
197
|
-
print(f"✅ Names are already GUIDs: workspace={workspace_name}, lakehouse={lakehouse_name}")
|
|
198
195
|
return workspace_name, lakehouse_name
|
|
199
196
|
|
|
200
197
|
# Optimization: If workspace name has no spaces, use both names directly (old behavior)
|
|
201
198
|
# Note: Lakehouse names cannot contain spaces in Microsoft Fabric, only workspace names can
|
|
202
199
|
if " " not in workspace_name:
|
|
203
|
-
print(f"✅ Using names directly (workspace has no spaces): workspace={workspace_name}, lakehouse={lakehouse_name}")
|
|
204
200
|
return workspace_name, lakehouse_name
|
|
205
201
|
|
|
206
202
|
# Workspace name contains spaces - need to resolve both to GUIDs for proper ABFSS URLs
|
|
207
|
-
print(f"🔍 Resolving '{workspace_name}' workspace and '{lakehouse_name}' lakehouse to GUIDs (workspace has spaces)...")
|
|
208
203
|
|
|
209
204
|
try:
|
|
210
205
|
# Get authentication token using enhanced auth system
|
|
@@ -241,7 +236,6 @@ class Duckrun:
|
|
|
241
236
|
if not lakehouse_id:
|
|
242
237
|
raise ValueError(f"Lakehouse '{lakehouse_name}' not found in workspace '{workspace_name}'")
|
|
243
238
|
|
|
244
|
-
print(f"✅ Resolved: {workspace_name} → {workspace_id}, {lakehouse_name} → {lakehouse_id}")
|
|
245
239
|
return workspace_id, lakehouse_id
|
|
246
240
|
|
|
247
241
|
except Exception as e:
|
|
@@ -387,7 +381,6 @@ class Duckrun:
|
|
|
387
381
|
tables_found.append((schema_name, table_name))
|
|
388
382
|
else:
|
|
389
383
|
# Scan specific schema only
|
|
390
|
-
print(f"🔍 Discovering tables in schema '{self.schema}'...")
|
|
391
384
|
schema_path = f"{base_path}{self.schema}/"
|
|
392
385
|
result = obs.list_with_delimiter(store, prefix=schema_path)
|
|
393
386
|
|
|
@@ -406,10 +399,6 @@ class Duckrun:
|
|
|
406
399
|
tables = self._discover_tables_fast()
|
|
407
400
|
|
|
408
401
|
if not tables:
|
|
409
|
-
if self.scan_all_schemas:
|
|
410
|
-
print(f"No Delta tables found in {self.lakehouse_name}/Tables/")
|
|
411
|
-
else:
|
|
412
|
-
print(f"No Delta tables found in {self.lakehouse_name}/Tables/{self.schema}/")
|
|
413
402
|
return
|
|
414
403
|
|
|
415
404
|
# Group tables by schema for display
|
|
@@ -419,12 +408,6 @@ class Duckrun:
|
|
|
419
408
|
schema_tables[schema_name] = []
|
|
420
409
|
schema_tables[schema_name].append(table_name)
|
|
421
410
|
|
|
422
|
-
# Display tables by schema
|
|
423
|
-
print(f"\n📊 Found {len(tables)} tables:")
|
|
424
|
-
for schema_name in sorted(schema_tables.keys()):
|
|
425
|
-
table_list = sorted(schema_tables[schema_name])
|
|
426
|
-
print(f" {schema_name}: {', '.join(table_list)}")
|
|
427
|
-
|
|
428
411
|
attached_count = 0
|
|
429
412
|
skipped_tables = []
|
|
430
413
|
|
|
@@ -446,19 +429,9 @@ class Duckrun:
|
|
|
446
429
|
except Exception as e:
|
|
447
430
|
skipped_tables.append(f"{schema_name}.{table_name}")
|
|
448
431
|
continue
|
|
449
|
-
|
|
450
|
-
print(f"\n{'='*60}")
|
|
451
|
-
print(f"✅ Ready - {attached_count}/{len(tables)} tables available")
|
|
452
|
-
if skipped_tables:
|
|
453
|
-
print(f"⚠ Skipped {len(skipped_tables)} tables: {', '.join(skipped_tables[:3])}{'...' if len(skipped_tables) > 3 else ''}")
|
|
454
|
-
print(f"{'='*60}\n")
|
|
455
432
|
|
|
456
433
|
except Exception as e:
|
|
457
434
|
print(f"❌ Error attaching lakehouse: {e}")
|
|
458
|
-
print("Continuing without pre-attached tables.")
|
|
459
|
-
|
|
460
|
-
# Register lookup functions as DuckDB UDFs
|
|
461
|
-
self._register_lookup_functions()
|
|
462
435
|
|
|
463
436
|
def _register_lookup_functions(self):
|
|
464
437
|
"""
|
|
@@ -488,7 +461,7 @@ class Duckrun:
|
|
|
488
461
|
from .auth import get_fabric_api_token
|
|
489
462
|
token = get_fabric_api_token()
|
|
490
463
|
if not token:
|
|
491
|
-
return
|
|
464
|
+
return None
|
|
492
465
|
|
|
493
466
|
url = "https://api.fabric.microsoft.com/v1/workspaces"
|
|
494
467
|
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
|
@@ -503,9 +476,9 @@ class Duckrun:
|
|
|
503
476
|
self._name_cache['workspace_name_to_id'][name] = workspace_id
|
|
504
477
|
return name
|
|
505
478
|
|
|
506
|
-
return
|
|
479
|
+
return None
|
|
507
480
|
except Exception as e:
|
|
508
|
-
return
|
|
481
|
+
return None
|
|
509
482
|
|
|
510
483
|
def get_lakehouse_name(workspace_id: str, lakehouse_id: str) -> str:
|
|
511
484
|
"""Get lakehouse display name from workspace ID and lakehouse ID (GUIDs)"""
|
|
@@ -517,7 +490,7 @@ class Duckrun:
|
|
|
517
490
|
from .auth import get_fabric_api_token
|
|
518
491
|
token = get_fabric_api_token()
|
|
519
492
|
if not token:
|
|
520
|
-
return
|
|
493
|
+
return None
|
|
521
494
|
|
|
522
495
|
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses"
|
|
523
496
|
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
|
@@ -533,9 +506,9 @@ class Duckrun:
|
|
|
533
506
|
self._name_cache['lakehouse_name_to_id'][lh_cache_key] = lakehouse_id
|
|
534
507
|
return name
|
|
535
508
|
|
|
536
|
-
return
|
|
509
|
+
return None
|
|
537
510
|
except Exception as e:
|
|
538
|
-
return
|
|
511
|
+
return None
|
|
539
512
|
|
|
540
513
|
def get_workspace_id_from_name(workspace_name: str) -> str:
|
|
541
514
|
"""Get workspace ID (GUID) from workspace display name"""
|
|
@@ -546,7 +519,7 @@ class Duckrun:
|
|
|
546
519
|
from .auth import get_fabric_api_token
|
|
547
520
|
token = get_fabric_api_token()
|
|
548
521
|
if not token:
|
|
549
|
-
return
|
|
522
|
+
return None
|
|
550
523
|
|
|
551
524
|
url = "https://api.fabric.microsoft.com/v1/workspaces"
|
|
552
525
|
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
|
@@ -561,9 +534,9 @@ class Duckrun:
|
|
|
561
534
|
self._name_cache['workspace_id_to_name'][workspace_id] = workspace_name
|
|
562
535
|
return workspace_id
|
|
563
536
|
|
|
564
|
-
return
|
|
537
|
+
return None
|
|
565
538
|
except Exception as e:
|
|
566
|
-
return
|
|
539
|
+
return None
|
|
567
540
|
|
|
568
541
|
def get_lakehouse_id_from_name(workspace_id: str, lakehouse_name: str) -> str:
|
|
569
542
|
"""Get lakehouse ID (GUID) from workspace ID and lakehouse display name"""
|
|
@@ -575,7 +548,7 @@ class Duckrun:
|
|
|
575
548
|
from .auth import get_fabric_api_token
|
|
576
549
|
token = get_fabric_api_token()
|
|
577
550
|
if not token:
|
|
578
|
-
return
|
|
551
|
+
return None
|
|
579
552
|
|
|
580
553
|
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses"
|
|
581
554
|
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
|
@@ -591,9 +564,9 @@ class Duckrun:
|
|
|
591
564
|
self._name_cache['lakehouse_id_to_name'][id_cache_key] = lakehouse_name
|
|
592
565
|
return lakehouse_id
|
|
593
566
|
|
|
594
|
-
return
|
|
567
|
+
return None
|
|
595
568
|
except Exception as e:
|
|
596
|
-
return
|
|
569
|
+
return None
|
|
597
570
|
|
|
598
571
|
# Register functions in DuckDB
|
|
599
572
|
try:
|
|
@@ -601,7 +574,6 @@ class Duckrun:
|
|
|
601
574
|
self.con.create_function("get_lakehouse_name", get_lakehouse_name)
|
|
602
575
|
self.con.create_function("get_workspace_id_from_name", get_workspace_id_from_name)
|
|
603
576
|
self.con.create_function("get_lakehouse_id_from_name", get_lakehouse_id_from_name)
|
|
604
|
-
print("✅ Registered lookup functions: get_workspace_name, get_lakehouse_name, get_workspace_id_from_name, get_lakehouse_id_from_name")
|
|
605
577
|
except Exception as e:
|
|
606
578
|
print(f"⚠️ Warning: Could not register lookup functions: {e}")
|
|
607
579
|
|