duckrun 0.2.13.dev0__tar.gz → 0.2.14.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckrun might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.13.dev0
3
+ Version: 0.2.14.dev1
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -2,7 +2,7 @@
2
2
 
3
3
  from duckrun.core import Duckrun
4
4
 
5
- __version__ = "0.2.9.dev5"
5
+ __version__ = "0.2.14.dev1"
6
6
 
7
7
  # Expose unified connect method at module level
8
8
  connect = Duckrun.connect
@@ -20,7 +20,6 @@ def get_token() -> Optional[str]:
20
20
  # Check if we already have a cached token
21
21
  token_env = os.environ.get("AZURE_STORAGE_TOKEN")
22
22
  if token_env and token_env != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
23
- print("✅ Using existing Azure Storage token")
24
23
  return token_env
25
24
 
26
25
  print("🔐 Starting Azure authentication...")
@@ -77,35 +76,6 @@ def _get_device_code_token() -> Optional[str]:
77
76
  return None
78
77
 
79
78
 
80
- def _is_databricks() -> bool:
81
- """Check if we're running in a Databricks environment"""
82
- # Databricks sets specific environment variables
83
- return (
84
- os.environ.get("DATABRICKS_RUNTIME_VERSION") is not None or
85
- os.environ.get("DB_HOME") is not None or
86
- "databricks" in os.environ.get("SPARK_HOME", "").lower()
87
- )
88
-
89
-
90
- def _get_databricks_token() -> Optional[str]:
91
- """Get token using DefaultAzureCredential for Databricks environments"""
92
- try:
93
- from azure.identity import DefaultAzureCredential
94
-
95
- # DefaultAzureCredential will automatically use Databricks managed identity
96
- credential = DefaultAzureCredential()
97
- token_obj = credential.get_token("https://storage.azure.com/.default")
98
-
99
- os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
100
- print("✅ Databricks authentication successful!")
101
- return token_obj.token
102
-
103
- except Exception as e:
104
- print(f"❌ Databricks authentication failed: {e}")
105
- print("💡 Make sure your Databricks cluster has the required Azure permissions")
106
- return None
107
-
108
-
109
79
  def _get_local_token() -> Optional[str]:
110
80
  """Get token using CLI first, then browser fallback for local environments"""
111
81
  # First try Azure CLI directly
@@ -53,7 +53,8 @@ class Duckrun:
53
53
 
54
54
  def __init__(self, workspace_id: str, lakehouse_id: str, schema: str = "dbo",
55
55
  sql_folder: Optional[str] = None, compaction_threshold: int = 10,
56
- scan_all_schemas: bool = False, storage_account: str = "onelake"):
56
+ scan_all_schemas: bool = False, storage_account: str = "onelake",
57
+ token_only: bool = False):
57
58
  # Store GUIDs for internal use
58
59
  self.workspace_id = workspace_id
59
60
  self.lakehouse_id = lakehouse_id
@@ -62,6 +63,7 @@ class Duckrun:
62
63
  self.compaction_threshold = compaction_threshold
63
64
  self.scan_all_schemas = scan_all_schemas
64
65
  self.storage_account = storage_account
66
+ self.token_only = token_only
65
67
 
66
68
  # Construct proper ABFSS URLs
67
69
  import re
@@ -93,12 +95,19 @@ class Duckrun:
93
95
  except ImportError:
94
96
  pass # Not in Colab, use default transport
95
97
 
96
- self._attach_lakehouse()
97
- self._register_lookup_functions()
98
+ # Only attach lakehouse and register functions if not token_only mode
99
+ if not token_only:
100
+ self._attach_lakehouse()
101
+ self._register_lookup_functions()
102
+ else:
103
+ # In token_only mode, just create the secret for authentication
104
+ self._create_onelake_secret()
105
+ print("✓ Token authenticated (fast mode - tables not listed)")
98
106
 
99
107
  @classmethod
100
108
  def connect(cls, connection_string: str, sql_folder: Optional[str] = None,
101
- compaction_threshold: int = 100, storage_account: str = "onelake"):
109
+ compaction_threshold: int = 100, storage_account: str = "onelake",
110
+ token_only: bool = False):
102
111
  """
103
112
  Create and connect to lakehouse or workspace.
104
113
 
@@ -112,6 +121,7 @@ class Duckrun:
112
121
  sql_folder: Optional path or URL to SQL files folder
113
122
  compaction_threshold: File count threshold for compaction
114
123
  storage_account: Storage account name (default: "onelake")
124
+ token_only: If True, only authenticate without listing tables (faster connection)
115
125
 
116
126
  Examples:
117
127
  # Workspace management only (supports spaces in names)
@@ -125,6 +135,9 @@ class Duckrun:
125
135
  dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse") # defaults to dbo schema
126
136
  dr = Duckrun.connect("workspace/lakehouse.lakehouse", storage_account="xxx-onelake") # custom storage
127
137
 
138
+ # Fast connection without table listing (token only)
139
+ dr = Duckrun.connect("workspace/lakehouse.lakehouse", token_only=True)
140
+
128
141
  Note:
129
142
  Internally resolves friendly names (with spaces) to GUIDs and constructs proper ABFSS URLs:
130
143
  "My Workspace/My Lakehouse.lakehouse/schema" becomes
@@ -169,7 +182,7 @@ class Duckrun:
169
182
  # Resolve friendly names to GUIDs and construct proper ABFSS path
170
183
  workspace_id, lakehouse_id = cls._resolve_names_to_guids(workspace_name, lakehouse_name)
171
184
 
172
- return cls(workspace_id, lakehouse_id, schema, sql_folder, compaction_threshold, scan_all_schemas, storage_account)
185
+ return cls(workspace_id, lakehouse_id, schema, sql_folder, compaction_threshold, scan_all_schemas, storage_account, token_only)
173
186
 
174
187
  @classmethod
175
188
  def _resolve_names_to_guids(cls, workspace_name: str, lakehouse_name: str) -> tuple[str, str]:
@@ -401,15 +414,8 @@ class Duckrun:
401
414
  if not tables:
402
415
  return
403
416
 
404
- # Group tables by schema for display
405
- schema_tables = {}
406
- for schema_name, table_name in tables:
407
- if schema_name not in schema_tables:
408
- schema_tables[schema_name] = []
409
- schema_tables[schema_name].append(table_name)
410
-
411
- attached_count = 0
412
- skipped_tables = []
417
+ # Collect table names for display
418
+ table_names = []
413
419
 
414
420
  for schema_name, table_name in tables:
415
421
  try:
@@ -417,18 +423,22 @@ class Duckrun:
417
423
  # Create proper schema.table structure in DuckDB
418
424
  self.con.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
419
425
  view_name = f"{schema_name}.{table_name}"
426
+ table_names.append(view_name)
420
427
  else:
421
428
  # Single schema mode - use just table name
422
429
  view_name = table_name
430
+ table_names.append(table_name)
423
431
 
424
432
  self.con.sql(f"""
425
433
  CREATE OR REPLACE VIEW {view_name}
426
434
  AS SELECT * FROM delta_scan('{self.table_base_url}{schema_name}/{table_name}');
427
435
  """)
428
- attached_count += 1
429
436
  except Exception as e:
430
- skipped_tables.append(f"{schema_name}.{table_name}")
431
437
  continue
438
+
439
+ # Print discovered tables as comma-separated list
440
+ if table_names:
441
+ print(", ".join(table_names))
432
442
 
433
443
  except Exception as e:
434
444
  print(f"❌ Error attaching lakehouse: {e}")
@@ -570,10 +580,10 @@ class Duckrun:
570
580
 
571
581
  # Register functions in DuckDB
572
582
  try:
573
- self.con.create_function("get_workspace_name", get_workspace_name)
574
- self.con.create_function("get_lakehouse_name", get_lakehouse_name)
575
- self.con.create_function("get_workspace_id_from_name", get_workspace_id_from_name)
576
- self.con.create_function("get_lakehouse_id_from_name", get_lakehouse_id_from_name)
583
+ self.con.create_function("get_workspace_name", get_workspace_name, null_handling='SPECIAL')
584
+ self.con.create_function("get_lakehouse_name", get_lakehouse_name, null_handling='SPECIAL')
585
+ self.con.create_function("get_workspace_id_from_name", get_workspace_id_from_name, null_handling='SPECIAL')
586
+ self.con.create_function("get_lakehouse_id_from_name", get_lakehouse_id_from_name, null_handling='SPECIAL')
577
587
  except Exception as e:
578
588
  print(f"⚠️ Warning: Could not register lookup functions: {e}")
579
589
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.13.dev0
3
+ Version: 0.2.14.dev1
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "duckrun"
7
- version = "0.2.13.dev0"
7
+ version = "0.2.14.dev1"
8
8
  description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
File without changes
File without changes
File without changes