PyPI - duckrun - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

duckrun 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

duckrun/core.py CHANGED Viewed

@@ -16,23 +16,21 @@ class Duckrun:
         SQL:    ('table_name', 'mode', {params})
     Usage:
+        # For pipelines:
         dr = Duckrun.connect(workspace, lakehouse, schema, sql_folder)
-        pipeline = [
-            ('download', (urls, paths, depth)),
-            ('staging', 'overwrite', {'run_date': '2024-06-01'}),
-            ('transform', 'append')
-        ]
         dr.run(pipeline)
+        # For data exploration only:
+        dr = Duckrun.connect(workspace, lakehouse, schema)
+        dr.sql("SELECT * FROM table").show()
     """
     def __init__(self, workspace: str, lakehouse_name: str, schema: str,
-                 sql_folder: str, compaction_threshold: int = 10):
+                 sql_folder: Optional[str] = None, compaction_threshold: int = 10):
         self.workspace = workspace
         self.lakehouse_name = lakehouse_name
         self.schema = schema
-        self.sql_folder = sql_folder.strip()
+        self.sql_folder = sql_folder.strip() if sql_folder else None
         self.compaction_threshold = compaction_threshold
         self.table_base_url = f'abfss://{workspace}@onelake.dfs.fabric.microsoft.com/{lakehouse_name}.Lakehouse/Tables/'
         self.con = duckdb.connect()
@@ -41,7 +39,7 @@ class Duckrun:
     @classmethod
     def connect(cls, workspace: str, lakehouse_name: str, schema: str,
-                sql_folder: str, compaction_threshold: int = 10):
+                sql_folder: Optional[str] = None, compaction_threshold: int = 100):
         """Create and connect to lakehouse"""
         print("Connecting to Lakehouse...")
         return cls(workspace, lakehouse_name, schema, sql_folder, compaction_threshold)
@@ -114,6 +112,9 @@ class Duckrun:
         return name.split('__', 1)[0] if '__' in name else name
     def _read_sql_file(self, table_name: str, params: Optional[Dict] = None) -> Optional[str]:
+        if self.sql_folder is None:
+            raise RuntimeError("sql_folder is not configured. Cannot read SQL files.")
         is_url = self.sql_folder.startswith("http")
         if is_url:
             url = f"{self.sql_folder.rstrip('/')}/{table_name}.sql".strip()
@@ -159,6 +160,9 @@ class Duckrun:
         return content
     def _load_py_function(self, name: str) -> Optional[Callable]:
+        if self.sql_folder is None:
+            raise RuntimeError("sql_folder is not configured. Cannot load Python functions.")
         is_url = self.sql_folder.startswith("http")
         try:
             if is_url:
@@ -267,6 +271,9 @@ class Duckrun:
             ]
             dr.run(pipeline)
         """
+        if self.sql_folder is None:
+            raise RuntimeError("sql_folder is not configured. Cannot run pipelines. Set sql_folder when creating connection.")
         for i, task in enumerate(pipeline, 1):
             print(f"\n{'='*60}")
             print(f"Task {i}/{len(pipeline)}: {task[0]}")

{duckrun-0.1.3.dist-info → duckrun-0.1.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: duckrun
-Version: 0.1.3
+Version: 0.1.4
 Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
 License-Expression: MIT
 Project-URL: Homepage, https://github.com/djouallah/duckrun
@@ -35,14 +35,14 @@ pip install duckrun
 ## Quick Start
 ```python
-import duckrun as dr
+import duckrun
-# Connect to your Fabric lakehouse
-lakehouse = dr.connect(
+# Connect to your Fabric lakehouse (using `con` pattern)
+con = duckrun.connect(
     workspace="my_workspace",
     lakehouse_name="my_lakehouse",
     schema="dbo",
-    sql_folder="./sql"  # folder containing your .sql and .py files
+    sql_folder="./sql"  # optional: folder containing your .sql and .py files (only needed for pipeline tasks)
 )
 # Define your pipeline
@@ -53,9 +53,11 @@ pipeline = [
 ]
 # Run it
-lakehouse.run(pipeline)
+con.run(pipeline)
 ```
+Note: the `sql/` folder is optional — if all you want to do is explore data with SQL (for example by calling `con.sql(...)`), you don't need to provide a `sql_folder`.
 ## Early Exit
 In a pipeline run, if a task fails, the pipeline will stop without running the subsequent tasks.
@@ -138,12 +140,14 @@ Both write to the same `sales` table, but use different SQL files.
 ```python
 # Run queries
-lakehouse.sql("SELECT * FROM my_table LIMIT 10").show()
+con.sql("SELECT * FROM my_table LIMIT 10").show()
 # Get as DataFrame
-df = lakehouse.sql("SELECT COUNT(*) FROM sales").df()
+df = con.sql("SELECT COUNT(*) FROM sales").df()
 ```
+Explanation: DuckDB is connected to the lakehouse through `con`, so it is aware of the tables in that lakehouse (including tables created by your pipelines). That means you can query those tables directly with `con.sql(...)` just like any other DuckDB query. If you don't provide a `sql_folder`, you can still use `con.sql(...)` to explore existing tables.
 ## Remote SQL Files
@@ -151,7 +155,7 @@ df = lakehouse.sql("SELECT COUNT(*) FROM sales").df()
 You can load SQL/Python files from a URL:
 ```python
-lakehouse = dr.connect(
+con = duckrun.connect(
     workspace="Analytics",
     lakehouse_name="Sales",
     schema="dbo",

duckrun-0.1.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
+duckrun/core.py,sha256=u56bWZDKevbplARgnFdI0wm9BfrIVyAiu3eOIwE5FJc,14259
+duckrun-0.1.4.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
+duckrun-0.1.4.dist-info/METADATA,sha256=eoPhYn2zC0s_YyEGdiCe1Gs7iWfKY9vakYm3rZdMrrs,4377
+duckrun-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+duckrun-0.1.4.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
+duckrun-0.1.4.dist-info/RECORD,,

duckrun-0.1.3.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
-duckrun/core.py,sha256=Ok2IS15NcV6zFuFKFi2GOe1NKREoBQzjwAay-fCNf38,13774
-duckrun-0.1.3.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
-duckrun-0.1.3.dist-info/METADATA,sha256=BYek_gAWR_6QdCAJQAV7QnhoSQsaG0aprlMtAce9Z0k,3805
-duckrun-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-duckrun-0.1.3.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
-duckrun-0.1.3.dist-info/RECORD,,

{duckrun-0.1.3.dist-info → duckrun-0.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{duckrun-0.1.3.dist-info → duckrun-0.1.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{duckrun-0.1.3.dist-info → duckrun-0.1.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

duckrun 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

duckrun 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl