duckrun 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/core.py CHANGED
@@ -16,23 +16,21 @@ class Duckrun:
16
16
  SQL: ('table_name', 'mode', {params})
17
17
 
18
18
  Usage:
19
+ # For pipelines:
19
20
  dr = Duckrun.connect(workspace, lakehouse, schema, sql_folder)
20
-
21
- pipeline = [
22
- ('download', (urls, paths, depth)),
23
- ('staging', 'overwrite', {'run_date': '2024-06-01'}),
24
- ('transform', 'append')
25
- ]
26
-
27
21
  dr.run(pipeline)
22
+
23
+ # For data exploration only:
24
+ dr = Duckrun.connect(workspace, lakehouse, schema)
25
+ dr.sql("SELECT * FROM table").show()
28
26
  """
29
27
 
30
28
  def __init__(self, workspace: str, lakehouse_name: str, schema: str,
31
- sql_folder: str, compaction_threshold: int = 10):
29
+ sql_folder: Optional[str] = None, compaction_threshold: int = 10):
32
30
  self.workspace = workspace
33
31
  self.lakehouse_name = lakehouse_name
34
32
  self.schema = schema
35
- self.sql_folder = sql_folder.strip()
33
+ self.sql_folder = sql_folder.strip() if sql_folder else None
36
34
  self.compaction_threshold = compaction_threshold
37
35
  self.table_base_url = f'abfss://{workspace}@onelake.dfs.fabric.microsoft.com/{lakehouse_name}.Lakehouse/Tables/'
38
36
  self.con = duckdb.connect()
@@ -41,7 +39,7 @@ class Duckrun:
41
39
 
42
40
  @classmethod
43
41
  def connect(cls, workspace: str, lakehouse_name: str, schema: str,
44
- sql_folder: str, compaction_threshold: int = 10):
42
+ sql_folder: Optional[str] = None, compaction_threshold: int = 100):
45
43
  """Create and connect to lakehouse"""
46
44
  print("Connecting to Lakehouse...")
47
45
  return cls(workspace, lakehouse_name, schema, sql_folder, compaction_threshold)
@@ -114,6 +112,9 @@ class Duckrun:
114
112
  return name.split('__', 1)[0] if '__' in name else name
115
113
 
116
114
  def _read_sql_file(self, table_name: str, params: Optional[Dict] = None) -> Optional[str]:
115
+ if self.sql_folder is None:
116
+ raise RuntimeError("sql_folder is not configured. Cannot read SQL files.")
117
+
117
118
  is_url = self.sql_folder.startswith("http")
118
119
  if is_url:
119
120
  url = f"{self.sql_folder.rstrip('/')}/{table_name}.sql".strip()
@@ -159,6 +160,9 @@ class Duckrun:
159
160
  return content
160
161
 
161
162
  def _load_py_function(self, name: str) -> Optional[Callable]:
163
+ if self.sql_folder is None:
164
+ raise RuntimeError("sql_folder is not configured. Cannot load Python functions.")
165
+
162
166
  is_url = self.sql_folder.startswith("http")
163
167
  try:
164
168
  if is_url:
@@ -267,6 +271,9 @@ class Duckrun:
267
271
  ]
268
272
  dr.run(pipeline)
269
273
  """
274
+ if self.sql_folder is None:
275
+ raise RuntimeError("sql_folder is not configured. Cannot run pipelines. Set sql_folder when creating connection.")
276
+
270
277
  for i, task in enumerate(pipeline, 1):
271
278
  print(f"\n{'='*60}")
272
279
  print(f"Task {i}/{len(pipeline)}: {task[0]}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  License-Expression: MIT
6
6
  Project-URL: Homepage, https://github.com/djouallah/duckrun
@@ -35,14 +35,14 @@ pip install duckrun
35
35
  ## Quick Start
36
36
 
37
37
  ```python
38
- import duckrun as dr
38
+ import duckrun
39
39
 
40
- # Connect to your Fabric lakehouse
41
- lakehouse = dr.connect(
40
+ # Connect to your Fabric lakehouse (using `con` pattern)
41
+ con = duckrun.connect(
42
42
  workspace="my_workspace",
43
43
  lakehouse_name="my_lakehouse",
44
44
  schema="dbo",
45
- sql_folder="./sql" # folder containing your .sql and .py files
45
+ sql_folder="./sql" # optional: folder containing your .sql and .py files (only needed for pipeline tasks)
46
46
  )
47
47
 
48
48
  # Define your pipeline
@@ -53,9 +53,11 @@ pipeline = [
53
53
  ]
54
54
 
55
55
  # Run it
56
- lakehouse.run(pipeline)
56
+ con.run(pipeline)
57
57
  ```
58
58
 
59
+ Note: the `sql/` folder is optional — if all you want to do is explore data with SQL (for example by calling `con.sql(...)`), you don't need to provide a `sql_folder`.
60
+
59
61
  ## Early Exit
60
62
 
61
63
  In a pipeline run, if a task fails, the pipeline will stop without running the subsequent tasks.
@@ -138,12 +140,14 @@ Both write to the same `sales` table, but use different SQL files.
138
140
 
139
141
  ```python
140
142
  # Run queries
141
- lakehouse.sql("SELECT * FROM my_table LIMIT 10").show()
143
+ con.sql("SELECT * FROM my_table LIMIT 10").show()
142
144
 
143
145
  # Get as DataFrame
144
- df = lakehouse.sql("SELECT COUNT(*) FROM sales").df()
146
+ df = con.sql("SELECT COUNT(*) FROM sales").df()
145
147
  ```
146
148
 
149
+ Explanation: DuckDB is connected to the lakehouse through `con`, so it is aware of the tables in that lakehouse (including tables created by your pipelines). That means you can query those tables directly with `con.sql(...)` just like any other DuckDB query. If you don't provide a `sql_folder`, you can still use `con.sql(...)` to explore existing tables.
150
+
147
151
 
148
152
 
149
153
  ## Remote SQL Files
@@ -151,7 +155,7 @@ df = lakehouse.sql("SELECT COUNT(*) FROM sales").df()
151
155
  You can load SQL/Python files from a URL:
152
156
 
153
157
  ```python
154
- lakehouse = dr.connect(
158
+ con = duckrun.connect(
155
159
  workspace="Analytics",
156
160
  lakehouse_name="Sales",
157
161
  schema="dbo",
@@ -0,0 +1,7 @@
1
+ duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
+ duckrun/core.py,sha256=u56bWZDKevbplARgnFdI0wm9BfrIVyAiu3eOIwE5FJc,14259
3
+ duckrun-0.1.4.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
+ duckrun-0.1.4.dist-info/METADATA,sha256=eoPhYn2zC0s_YyEGdiCe1Gs7iWfKY9vakYm3rZdMrrs,4377
5
+ duckrun-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ duckrun-0.1.4.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
+ duckrun-0.1.4.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
- duckrun/core.py,sha256=Ok2IS15NcV6zFuFKFi2GOe1NKREoBQzjwAay-fCNf38,13774
3
- duckrun-0.1.3.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
- duckrun-0.1.3.dist-info/METADATA,sha256=BYek_gAWR_6QdCAJQAV7QnhoSQsaG0aprlMtAce9Z0k,3805
5
- duckrun-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- duckrun-0.1.3.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
- duckrun-0.1.3.dist-info/RECORD,,