duckrun 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/core.py CHANGED
@@ -64,36 +64,50 @@ class Duckrun:
64
64
  def _attach_lakehouse(self):
65
65
  self._create_onelake_secret()
66
66
  try:
67
+ # Use expensive list operation but filter for _delta_log folders only
68
+ # This avoids parsing JSON content that causes Iceberg metadata issues
69
+ print(f"Scanning for Delta tables in {self.schema}... (this may take a moment)")
70
+
67
71
  list_tables_query = f"""
68
- SELECT DISTINCT(split_part(file, '_delta_log', 1)) as tables
69
- FROM glob ("abfss://{self.workspace}@onelake.dfs.fabric.microsoft.com/{self.lakehouse_name}.Lakehouse/Tables/*/*/_delta_log/*.json")
72
+ SELECT DISTINCT
73
+ regexp_extract(file, 'Tables/{self.schema}/([^/]+)/_delta_log', 1) as table_name
74
+ FROM glob("abfss://{self.workspace}@onelake.dfs.fabric.microsoft.com/{self.lakehouse_name}.Lakehouse/Tables/{self.schema}/**")
75
+ WHERE file LIKE '%/_delta_log/%'
76
+ AND file NOT LIKE '%/metadata/%'
77
+ AND file NOT LIKE '%/iceberg/%'
78
+ AND regexp_extract(file, 'Tables/{self.schema}/([^/]+)/_delta_log', 1) IS NOT NULL
70
79
  """
80
+
71
81
  list_tables_df = self.con.sql(list_tables_query).df()
72
- list_tables = list_tables_df['tables'].tolist() if not list_tables_df.empty else []
73
-
74
- if not list_tables:
75
- print(f"No Delta tables found in {self.lakehouse_name}.Lakehouse/Tables.")
82
+
83
+ if list_tables_df.empty:
84
+ print(f"No Delta tables found in {self.lakehouse_name}.Lakehouse/Tables/{self.schema}.")
76
85
  return
86
+
87
+ table_names = list_tables_df['table_name'].tolist()
77
88
 
78
- print(f"Found {len(list_tables)} Delta tables. Attaching as views...")
89
+ print(f"Found {len(table_names)} Delta tables. Attaching as views...")
79
90
 
80
- for table_path in list_tables:
81
- parts = table_path.strip("/").split("/")
82
- if len(parts) >= 2:
83
- potential_schema = parts[-2]
84
- table = parts[-1]
85
- if potential_schema == self.schema:
86
- try:
87
- self.con.sql(f"""
88
- CREATE OR REPLACE VIEW {table}
89
- AS SELECT * FROM delta_scan('{self.table_base_url}{self.schema}/{table}');
90
- """)
91
- except Exception as e:
92
- print(f"Error creating view for table {table}: {e}")
91
+ for table in table_names:
92
+ # Skip Iceberg-related folders and empty names
93
+ if not table or table in ('metadata', 'iceberg'):
94
+ continue
95
+
96
+ try:
97
+ self.con.sql(f"""
98
+ CREATE OR REPLACE VIEW {table}
99
+ AS SELECT * FROM delta_scan('{self.table_base_url}{self.schema}/{table}');
100
+ """)
101
+ print(f" ✓ Attached: {table}")
102
+ except Exception as e:
103
+ print(f" Skipped {table}: {str(e)[:100]}")
104
+ continue
105
+
93
106
  print("\nAttached tables (views) in DuckDB:")
94
107
  self.con.sql("SELECT name FROM (SHOW ALL TABLES) WHERE database='memory'").show()
95
108
  except Exception as e:
96
109
  print(f"Error attaching lakehouse: {e}")
110
+ print("Continuing without pre-attached tables.")
97
111
 
98
112
  def _normalize_table_name(self, name: str) -> str:
99
113
  """Extract base table name before first '__'"""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  License-Expression: MIT
6
6
  Project-URL: Homepage, https://github.com/djouallah/duckrun
@@ -14,10 +14,16 @@ Requires-Dist: deltalake>=0.18.2
14
14
  Requires-Dist: requests>=2.28.0
15
15
  Dynamic: license-file
16
16
 
17
- # 🦆 Duckrun
17
+
18
+ <img src="duckrun.png" width="400" alt="Duckrun">
18
19
 
19
20
  Simple task runner for Microsoft Fabric Python notebook, powered by DuckDB and Delta_rs.
20
21
 
22
+
23
+ ## Known Limitation
24
+
25
+ Support only Lakehouse with schema, Workspace and lakehouse names should not contains space
26
+
21
27
  ## Installation
22
28
 
23
29
  ```bash
@@ -50,6 +56,10 @@ pipeline = [
50
56
  lakehouse.run(pipeline)
51
57
  ```
52
58
 
59
+ ## Early Exit
60
+
61
+ In a pipeline run, if a task fails, the pipeline will stop without running the subsequent tasks.
62
+
53
63
  ## How It Works
54
64
 
55
65
  Duckrun runs two types of tasks:
@@ -117,8 +127,8 @@ Use `__` to create variants of the same table:
117
127
 
118
128
  ```python
119
129
  pipeline = [
120
- ('sales__initial', 'overwrite', {}), # writes to 'sales' table
121
- ('sales__incremental', 'append', {}), # appends to 'sales' table
130
+ ('sales__initial', 'overwrite'), # writes to 'sales' table
131
+ ('sales__incremental', 'append'), # appends to 'sales' table
122
132
  ]
123
133
  ```
124
134
 
@@ -134,32 +144,7 @@ lakehouse.sql("SELECT * FROM my_table LIMIT 10").show()
134
144
  df = lakehouse.sql("SELECT COUNT(*) FROM sales").df()
135
145
  ```
136
146
 
137
- ## Real-World Example
138
147
 
139
- ```python
140
- import duckrun as dr
141
-
142
- lakehouse = dr.connect(
143
- workspace="Analytics",
144
- lakehouse_name="Sales",
145
- schema="dbo",
146
- sql_folder="./etl"
147
- )
148
-
149
- # Daily pipeline
150
- daily = [
151
- ('download_files', (api_url, local_path)),
152
- ('staging_orders', 'overwrite', {'run_date': '2024-06-01'}),
153
- ('staging_customers', 'overwrite', {'run_date': '2024-06-01'}),
154
- ('fact_sales', 'append'),
155
- ('dim_customer', 'overwrite')
156
- ]
157
-
158
- lakehouse.run(daily)
159
-
160
- # Check results
161
- lakehouse.sql("SELECT COUNT(*) FROM fact_sales").show()
162
- ```
163
148
 
164
149
  ## Remote SQL Files
165
150
 
@@ -0,0 +1,7 @@
1
+ duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
+ duckrun/core.py,sha256=Ok2IS15NcV6zFuFKFi2GOe1NKREoBQzjwAay-fCNf38,13774
3
+ duckrun-0.1.3.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
+ duckrun-0.1.3.dist-info/METADATA,sha256=BYek_gAWR_6QdCAJQAV7QnhoSQsaG0aprlMtAce9Z0k,3805
5
+ duckrun-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ duckrun-0.1.3.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
+ duckrun-0.1.3.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
- duckrun/core.py,sha256=-Vf2nYwhdsVpTZS9mGBtm8j_HNAcHR7Cj075pida3Yw,13133
3
- duckrun-0.1.1.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
- duckrun-0.1.1.dist-info/METADATA,sha256=4KZAURlPgjIDGYW_htE4gdHLi6WX-3gpfrCY0r1sFPE,4114
5
- duckrun-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- duckrun-0.1.1.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
- duckrun-0.1.1.dist-info/RECORD,,