duckrun 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckrun/core.py +20 -10
- {duckrun-0.2.0.dist-info → duckrun-0.2.2.dist-info}/METADATA +1 -1
- duckrun-0.2.2.dist-info/RECORD +7 -0
- duckrun-0.2.0.dist-info/RECORD +0 -7
- {duckrun-0.2.0.dist-info → duckrun-0.2.2.dist-info}/WHEEL +0 -0
- {duckrun-0.2.0.dist-info → duckrun-0.2.2.dist-info}/licenses/LICENSE +0 -0
- {duckrun-0.2.0.dist-info → duckrun-0.2.2.dist-info}/top_level.txt +0 -0
duckrun/core.py
CHANGED
@@ -195,21 +195,22 @@ class Duckrun:
|
|
195
195
|
|
196
196
|
def __init__(self, workspace: str, lakehouse_name: str, schema: str = "dbo",
|
197
197
|
sql_folder: Optional[str] = None, compaction_threshold: int = 10,
|
198
|
-
scan_all_schemas: bool = False):
|
198
|
+
scan_all_schemas: bool = False, storage_account: str = "onelake"):
|
199
199
|
self.workspace = workspace
|
200
200
|
self.lakehouse_name = lakehouse_name
|
201
201
|
self.schema = schema
|
202
202
|
self.sql_folder = sql_folder.strip() if sql_folder else None
|
203
203
|
self.compaction_threshold = compaction_threshold
|
204
204
|
self.scan_all_schemas = scan_all_schemas
|
205
|
-
self.
|
205
|
+
self.storage_account = storage_account
|
206
|
+
self.table_base_url = f'abfss://{workspace}@{storage_account}.dfs.fabric.microsoft.com/{lakehouse_name}.Lakehouse/Tables/'
|
206
207
|
self.con = duckdb.connect()
|
207
208
|
self.con.sql("SET preserve_insertion_order = false")
|
208
209
|
self._attach_lakehouse()
|
209
210
|
|
210
211
|
@classmethod
|
211
212
|
def connect(cls, connection_string: str, sql_folder: Optional[str] = None,
|
212
|
-
compaction_threshold: int = 100):
|
213
|
+
compaction_threshold: int = 100, storage_account: str = "onelake"):
|
213
214
|
"""
|
214
215
|
Create and connect to lakehouse.
|
215
216
|
|
@@ -219,11 +220,13 @@ class Duckrun:
|
|
219
220
|
connection_string: OneLake path "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
|
220
221
|
sql_folder: Optional path or URL to SQL files folder
|
221
222
|
compaction_threshold: File count threshold for compaction
|
223
|
+
storage_account: Storage account name (default: "onelake")
|
222
224
|
|
223
225
|
Examples:
|
224
226
|
dr = Duckrun.connect("ws/lh.lakehouse/schema", sql_folder="./sql")
|
225
227
|
dr = Duckrun.connect("ws/lh.lakehouse/schema") # no SQL folder
|
226
228
|
dr = Duckrun.connect("ws/lh.lakehouse") # defaults to dbo schema
|
229
|
+
dr = Duckrun.connect("ws/lh.lakehouse", storage_account="xxx-onelake") # custom storage
|
227
230
|
"""
|
228
231
|
print("Connecting to Lakehouse...")
|
229
232
|
|
@@ -261,7 +264,7 @@ class Duckrun:
|
|
261
264
|
" connect('workspace/lakehouse.lakehouse') # defaults to dbo"
|
262
265
|
)
|
263
266
|
|
264
|
-
return cls(workspace, lakehouse_name, schema, sql_folder, compaction_threshold, scan_all_schemas)
|
267
|
+
return cls(workspace, lakehouse_name, schema, sql_folder, compaction_threshold, scan_all_schemas, storage_account)
|
265
268
|
|
266
269
|
def _get_storage_token(self):
|
267
270
|
return os.environ.get("AZURE_STORAGE_TOKEN", "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE")
|
@@ -295,7 +298,7 @@ class Duckrun:
|
|
295
298
|
token = token_obj.token
|
296
299
|
os.environ["AZURE_STORAGE_TOKEN"] = token
|
297
300
|
|
298
|
-
url = f"abfss://{self.workspace}@
|
301
|
+
url = f"abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/"
|
299
302
|
store = AzureStore.from_url(url, bearer_token=token)
|
300
303
|
|
301
304
|
base_path = f"{self.lakehouse_name}.Lakehouse/Tables/"
|
@@ -354,7 +357,13 @@ class Duckrun:
|
|
354
357
|
attached_count = 0
|
355
358
|
for schema_name, table_name in tables:
|
356
359
|
try:
|
357
|
-
|
360
|
+
if self.scan_all_schemas:
|
361
|
+
# Create proper schema.table structure in DuckDB
|
362
|
+
self.con.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
|
363
|
+
view_name = f"{schema_name}.{table_name}"
|
364
|
+
else:
|
365
|
+
# Single schema mode - use just table name
|
366
|
+
view_name = table_name
|
358
367
|
|
359
368
|
self.con.sql(f"""
|
360
369
|
CREATE OR REPLACE VIEW {view_name}
|
@@ -371,7 +380,7 @@ class Duckrun:
|
|
371
380
|
print(f"{'='*60}\n")
|
372
381
|
|
373
382
|
if self.scan_all_schemas:
|
374
|
-
print(f"\n💡 Note: Tables
|
383
|
+
print(f"\n💡 Note: Tables use schema.table format (e.g., aemo.calendar, dbo.results)")
|
375
384
|
print(f" Default schema for operations: {self.schema}\n")
|
376
385
|
|
377
386
|
except Exception as e:
|
@@ -412,7 +421,8 @@ class Duckrun:
|
|
412
421
|
full_params = {
|
413
422
|
'ws': self.workspace,
|
414
423
|
'lh': self.lakehouse_name,
|
415
|
-
'schema': self.schema
|
424
|
+
'schema': self.schema,
|
425
|
+
'storage_account': self.storage_account
|
416
426
|
}
|
417
427
|
if params:
|
418
428
|
full_params.update(params)
|
@@ -661,7 +671,7 @@ class Duckrun:
|
|
661
671
|
os.environ["AZURE_STORAGE_TOKEN"] = token
|
662
672
|
|
663
673
|
# Setup OneLake Files URL (not Tables)
|
664
|
-
files_base_url = f'abfss://{self.workspace}@
|
674
|
+
files_base_url = f'abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/{self.lakehouse_name}.Lakehouse/Files/'
|
665
675
|
store = AzureStore.from_url(files_base_url, bearer_token=token)
|
666
676
|
|
667
677
|
# Collect files to upload
|
@@ -768,7 +778,7 @@ class Duckrun:
|
|
768
778
|
os.environ["AZURE_STORAGE_TOKEN"] = token
|
769
779
|
|
770
780
|
# Setup OneLake Files URL (not Tables)
|
771
|
-
files_base_url = f'abfss://{self.workspace}@
|
781
|
+
files_base_url = f'abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/{self.lakehouse_name}.Lakehouse/Files/'
|
772
782
|
store = AzureStore.from_url(files_base_url, bearer_token=token)
|
773
783
|
|
774
784
|
# Create local directory
|
@@ -0,0 +1,7 @@
|
|
1
|
+
duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
|
2
|
+
duckrun/core.py,sha256=VqfTL4fFE-XUXXsDy9VRFEPSQ21dfrkCGH_06C9CLNg,39416
|
3
|
+
duckrun-0.2.2.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
4
|
+
duckrun-0.2.2.dist-info/METADATA,sha256=J_Vw7Ps5afPRkofvyo-r7wufizjS431XgXpHdwaKwyo,18339
|
5
|
+
duckrun-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
6
|
+
duckrun-0.2.2.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
7
|
+
duckrun-0.2.2.dist-info/RECORD,,
|
duckrun-0.2.0.dist-info/RECORD
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
|
2
|
-
duckrun/core.py,sha256=XGw8MaXZdlnrZXHWlMjXDof4AfRC9RYrU_rZQTNOnOc,38677
|
3
|
-
duckrun-0.2.0.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
4
|
-
duckrun-0.2.0.dist-info/METADATA,sha256=AjMXnfXohPk1SChQULq8CCBjkFH7oTzqimxnLNjbJuc,18339
|
5
|
-
duckrun-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
6
|
-
duckrun-0.2.0.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
7
|
-
duckrun-0.2.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|