duckrun 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/core.py CHANGED
@@ -195,21 +195,22 @@ class Duckrun:
195
195
 
196
196
  def __init__(self, workspace: str, lakehouse_name: str, schema: str = "dbo",
197
197
  sql_folder: Optional[str] = None, compaction_threshold: int = 10,
198
- scan_all_schemas: bool = False):
198
+ scan_all_schemas: bool = False, storage_account: str = "onelake"):
199
199
  self.workspace = workspace
200
200
  self.lakehouse_name = lakehouse_name
201
201
  self.schema = schema
202
202
  self.sql_folder = sql_folder.strip() if sql_folder else None
203
203
  self.compaction_threshold = compaction_threshold
204
204
  self.scan_all_schemas = scan_all_schemas
205
- self.table_base_url = f'abfss://{workspace}@onelake.dfs.fabric.microsoft.com/{lakehouse_name}.Lakehouse/Tables/'
205
+ self.storage_account = storage_account
206
+ self.table_base_url = f'abfss://{workspace}@{storage_account}.dfs.fabric.microsoft.com/{lakehouse_name}.Lakehouse/Tables/'
206
207
  self.con = duckdb.connect()
207
208
  self.con.sql("SET preserve_insertion_order = false")
208
209
  self._attach_lakehouse()
209
210
 
210
211
  @classmethod
211
212
  def connect(cls, connection_string: str, sql_folder: Optional[str] = None,
212
- compaction_threshold: int = 100):
213
+ compaction_threshold: int = 100, storage_account: str = "onelake"):
213
214
  """
214
215
  Create and connect to lakehouse.
215
216
 
@@ -219,11 +220,13 @@ class Duckrun:
219
220
  connection_string: OneLake path "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
220
221
  sql_folder: Optional path or URL to SQL files folder
221
222
  compaction_threshold: File count threshold for compaction
223
+ storage_account: Storage account name (default: "onelake")
222
224
 
223
225
  Examples:
224
226
  dr = Duckrun.connect("ws/lh.lakehouse/schema", sql_folder="./sql")
225
227
  dr = Duckrun.connect("ws/lh.lakehouse/schema") # no SQL folder
226
228
  dr = Duckrun.connect("ws/lh.lakehouse") # defaults to dbo schema
229
+ dr = Duckrun.connect("ws/lh.lakehouse", storage_account="xxx-onelake") # custom storage
227
230
  """
228
231
  print("Connecting to Lakehouse...")
229
232
 
@@ -261,7 +264,7 @@ class Duckrun:
261
264
  " connect('workspace/lakehouse.lakehouse') # defaults to dbo"
262
265
  )
263
266
 
264
- return cls(workspace, lakehouse_name, schema, sql_folder, compaction_threshold, scan_all_schemas)
267
+ return cls(workspace, lakehouse_name, schema, sql_folder, compaction_threshold, scan_all_schemas, storage_account)
265
268
 
266
269
  def _get_storage_token(self):
267
270
  return os.environ.get("AZURE_STORAGE_TOKEN", "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE")
@@ -295,7 +298,7 @@ class Duckrun:
295
298
  token = token_obj.token
296
299
  os.environ["AZURE_STORAGE_TOKEN"] = token
297
300
 
298
- url = f"abfss://{self.workspace}@onelake.dfs.fabric.microsoft.com/"
301
+ url = f"abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/"
299
302
  store = AzureStore.from_url(url, bearer_token=token)
300
303
 
301
304
  base_path = f"{self.lakehouse_name}.Lakehouse/Tables/"
@@ -354,7 +357,13 @@ class Duckrun:
354
357
  attached_count = 0
355
358
  for schema_name, table_name in tables:
356
359
  try:
357
- view_name = f"{schema_name}_{table_name}" if self.scan_all_schemas else table_name
360
+ if self.scan_all_schemas:
361
+ # Create proper schema.table structure in DuckDB
362
+ self.con.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
363
+ view_name = f"{schema_name}.{table_name}"
364
+ else:
365
+ # Single schema mode - use just table name
366
+ view_name = table_name
358
367
 
359
368
  self.con.sql(f"""
360
369
  CREATE OR REPLACE VIEW {view_name}
@@ -371,7 +380,7 @@ class Duckrun:
371
380
  print(f"{'='*60}\n")
372
381
 
373
382
  if self.scan_all_schemas:
374
- print(f"\n💡 Note: Tables are prefixed with schema (e.g., dbo_tablename)")
383
+ print(f"\n💡 Note: Tables use schema.table format (e.g., aemo.calendar, dbo.results)")
375
384
  print(f" Default schema for operations: {self.schema}\n")
376
385
 
377
386
  except Exception as e:
@@ -412,7 +421,8 @@ class Duckrun:
412
421
  full_params = {
413
422
  'ws': self.workspace,
414
423
  'lh': self.lakehouse_name,
415
- 'schema': self.schema
424
+ 'schema': self.schema,
425
+ 'storage_account': self.storage_account
416
426
  }
417
427
  if params:
418
428
  full_params.update(params)
@@ -661,7 +671,7 @@ class Duckrun:
661
671
  os.environ["AZURE_STORAGE_TOKEN"] = token
662
672
 
663
673
  # Setup OneLake Files URL (not Tables)
664
- files_base_url = f'abfss://{self.workspace}@onelake.dfs.fabric.microsoft.com/{self.lakehouse_name}.Lakehouse/Files/'
674
+ files_base_url = f'abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/{self.lakehouse_name}.Lakehouse/Files/'
665
675
  store = AzureStore.from_url(files_base_url, bearer_token=token)
666
676
 
667
677
  # Collect files to upload
@@ -768,7 +778,7 @@ class Duckrun:
768
778
  os.environ["AZURE_STORAGE_TOKEN"] = token
769
779
 
770
780
  # Setup OneLake Files URL (not Tables)
771
- files_base_url = f'abfss://{self.workspace}@onelake.dfs.fabric.microsoft.com/{self.lakehouse_name}.Lakehouse/Files/'
781
+ files_base_url = f'abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/{self.lakehouse_name}.Lakehouse/Files/'
772
782
  store = AzureStore.from_url(files_base_url, bearer_token=token)
773
783
 
774
784
  # Create local directory
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -0,0 +1,7 @@
1
+ duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
+ duckrun/core.py,sha256=VqfTL4fFE-XUXXsDy9VRFEPSQ21dfrkCGH_06C9CLNg,39416
3
+ duckrun-0.2.2.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
+ duckrun-0.2.2.dist-info/METADATA,sha256=J_Vw7Ps5afPRkofvyo-r7wufizjS431XgXpHdwaKwyo,18339
5
+ duckrun-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ duckrun-0.2.2.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
+ duckrun-0.2.2.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
- duckrun/core.py,sha256=XGw8MaXZdlnrZXHWlMjXDof4AfRC9RYrU_rZQTNOnOc,38677
3
- duckrun-0.2.0.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
- duckrun-0.2.0.dist-info/METADATA,sha256=AjMXnfXohPk1SChQULq8CCBjkFH7oTzqimxnLNjbJuc,18339
5
- duckrun-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- duckrun-0.2.0.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
- duckrun-0.2.0.dist-info/RECORD,,