duckrun 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/core.py CHANGED
@@ -100,8 +100,6 @@ class Duckrun:
100
100
  workspace, lakehouse_name = parts
101
101
  scan_all_schemas = True
102
102
  schema = "dbo"
103
- print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
104
- print(f" Scanning all schemas for table discovery...\n")
105
103
  elif len(parts) == 3:
106
104
  workspace, lakehouse_name, schema = parts
107
105
  else:
@@ -162,16 +160,13 @@ class Duckrun:
162
160
 
163
161
  if self.scan_all_schemas:
164
162
  # Discover all schemas first
165
- print("🔍 Discovering schemas...")
166
163
  schemas_result = obs.list_with_delimiter(store, prefix=base_path)
167
164
  schemas = [
168
165
  prefix.rstrip('/').split('/')[-1]
169
166
  for prefix in schemas_result['common_prefixes']
170
167
  ]
171
- print(f" Found {len(schemas)} schemas: {', '.join(schemas)}\n")
172
168
 
173
169
  # Discover tables in each schema
174
- print("🔍 Discovering tables...")
175
170
  for schema_name in schemas:
176
171
  schema_path = f"{base_path}{schema_name}/"
177
172
  result = obs.list_with_delimiter(store, prefix=schema_path)
@@ -208,9 +203,22 @@ class Duckrun:
208
203
  print(f"No Delta tables found in {self.lakehouse_name}.Lakehouse/Tables/{self.schema}/")
209
204
  return
210
205
 
211
- print(f"\n📊 Found {len(tables)} Delta tables. Attaching as views...\n")
206
+ # Group tables by schema for display
207
+ schema_tables = {}
208
+ for schema_name, table_name in tables:
209
+ if schema_name not in schema_tables:
210
+ schema_tables[schema_name] = []
211
+ schema_tables[schema_name].append(table_name)
212
+
213
+ # Display tables by schema
214
+ print(f"\n📊 Found {len(tables)} tables:")
215
+ for schema_name in sorted(schema_tables.keys()):
216
+ table_list = sorted(schema_tables[schema_name])
217
+ print(f" {schema_name}: {', '.join(table_list)}")
212
218
 
213
219
  attached_count = 0
220
+ skipped_tables = []
221
+
214
222
  for schema_name, table_name in tables:
215
223
  try:
216
224
  if self.scan_all_schemas:
@@ -225,19 +233,16 @@ class Duckrun:
225
233
  CREATE OR REPLACE VIEW {view_name}
226
234
  AS SELECT * FROM delta_scan('{self.table_base_url}{schema_name}/{table_name}');
227
235
  """)
228
- print(f" ✓ Attached: {schema_name}.{table_name} → {view_name}")
229
236
  attached_count += 1
230
237
  except Exception as e:
231
- print(f" ⚠ Skipped {schema_name}.{table_name}: {str(e)[:100]}")
238
+ skipped_tables.append(f"{schema_name}.{table_name}")
232
239
  continue
233
240
 
234
241
  print(f"\n{'='*60}")
235
- print(f"✅ Successfully attached {attached_count}/{len(tables)} tables")
242
+ print(f"✅ Ready - {attached_count}/{len(tables)} tables available")
243
+ if skipped_tables:
244
+ print(f"⚠ Skipped {len(skipped_tables)} tables: {', '.join(skipped_tables[:3])}{'...' if len(skipped_tables) > 3 else ''}")
236
245
  print(f"{'='*60}\n")
237
-
238
- if self.scan_all_schemas:
239
- print(f"\n💡 Note: Tables use schema.table format (e.g., aemo.calendar, dbo.results)")
240
- print(f" Default schema for operations: {self.schema}\n")
241
246
 
242
247
  except Exception as e:
243
248
  print(f"❌ Error attaching lakehouse: {e}")
duckrun/stats.py CHANGED
@@ -21,33 +21,39 @@ def _table_exists(duckrun_instance, schema_name: str, table_name: str) -> bool:
21
21
 
22
22
 
23
23
  def _schema_exists(duckrun_instance, schema_name: str) -> bool:
24
- """Check if a schema exists by trying to show its tables."""
24
+ """Check if a schema exists by querying information_schema."""
25
25
  try:
26
- # For main schema, just show tables
26
+ # For main schema, always exists
27
27
  if schema_name == "main":
28
- query = "SHOW TABLES"
28
+ return True
29
29
  else:
30
- query = f"SHOW TABLES FROM {schema_name}"
31
- duckrun_instance.con.execute(query)
32
- return True
30
+ # Use information_schema which works in DuckDB 1.2.2
31
+ query = f"SELECT 1 FROM information_schema.schemata WHERE schema_name = '{schema_name}' LIMIT 1"
32
+ result = duckrun_instance.con.execute(query).fetchall()
33
+ return len(result) > 0
33
34
  except:
34
35
  return False
35
36
 
36
37
 
37
38
  def _get_existing_tables_in_schema(duckrun_instance, schema_name: str) -> list:
38
- """Get all existing tables in a schema by showing tables, excluding temporary tables."""
39
+ """Get all existing tables in a schema using information_schema, excluding temporary tables."""
39
40
  try:
40
- # For main schema, just show tables
41
+ # For main schema, use SHOW TABLES
41
42
  if schema_name == "main":
42
43
  query = "SHOW TABLES"
44
+ result = duckrun_instance.con.execute(query).fetchall()
45
+ if result:
46
+ tables = [row[0] for row in result]
47
+ filtered_tables = [tbl for tbl in tables if not tbl.startswith('tbl_')]
48
+ return filtered_tables
43
49
  else:
44
- query = f"SHOW TABLES FROM {schema_name}"
45
- result = duckrun_instance.con.execute(query).fetchall()
46
- if result:
47
- # Filter out temporary tables created by stats processing (tbl_0, tbl_1, etc.)
48
- tables = [row[0] for row in result]
49
- filtered_tables = [tbl for tbl in tables if not tbl.startswith('tbl_')]
50
- return filtered_tables
50
+ # Use information_schema which works in DuckDB 1.2.2
51
+ query = f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema_name}'"
52
+ result = duckrun_instance.con.execute(query).fetchall()
53
+ if result:
54
+ tables = [row[0] for row in result]
55
+ filtered_tables = [tbl for tbl in tables if not tbl.startswith('tbl_')]
56
+ return filtered_tables
51
57
  return []
52
58
  except:
53
59
  return []
@@ -218,7 +224,7 @@ def get_stats(duckrun_instance, source: str):
218
224
  WHERE tbl IS NOT NULL
219
225
  GROUP BY tbl
220
226
  ORDER BY total_rows DESC
221
- ''').fetch_arrow_table()
227
+ ''').df()
222
228
 
223
229
  return final_result
224
230
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -0,0 +1,11 @@
1
+ duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
+ duckrun/core.py,sha256=m_9DuSZNZ5DOETnkjNGn8HJBYheCgs_7NewcbM9VECI,16500
3
+ duckrun/files.py,sha256=xba0juMEQPgaznDudmXcwaGH0wv-6aCoHmV_cNF6Y7I,10665
4
+ duckrun/runner.py,sha256=X5g-57OCHQZ7USKpcBbhYGUcZwLQny2x147DLKrV32c,11417
5
+ duckrun/stats.py,sha256=B9UfGOndRNfcB2AhOVjuSqgfmF2x-uRmdmBn3usx_jQ,9881
6
+ duckrun/writer.py,sha256=eWrGtDQTbXi8H3sSt2WucYTdEQUjK97KmQxzCbqAuMs,6221
7
+ duckrun-0.2.4.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
8
+ duckrun-0.2.4.dist-info/METADATA,sha256=2t7-pNzcPCeseXTjp6Bc18_V41MpjDarG0z-2IzY-Lk,18339
9
+ duckrun-0.2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
+ duckrun-0.2.4.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
11
+ duckrun-0.2.4.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
- duckrun/core.py,sha256=LN5rc5B3HLimgslZdC8tLKe3rjTl_KD8WxCh1qoJhdM,16443
3
- duckrun/files.py,sha256=xba0juMEQPgaznDudmXcwaGH0wv-6aCoHmV_cNF6Y7I,10665
4
- duckrun/runner.py,sha256=X5g-57OCHQZ7USKpcBbhYGUcZwLQny2x147DLKrV32c,11417
5
- duckrun/stats.py,sha256=jLEkxNo7MjibPMpjMsXyedrJqv9-BAnP1C0L2a7H8Z8,9417
6
- duckrun/writer.py,sha256=eWrGtDQTbXi8H3sSt2WucYTdEQUjK97KmQxzCbqAuMs,6221
7
- duckrun-0.2.3.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
8
- duckrun-0.2.3.dist-info/METADATA,sha256=CpJvtR9l8c9b1AV9-KnjN4fZODE_3oJxS3omz4p-qlc,18339
9
- duckrun-0.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
- duckrun-0.2.3.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
11
- duckrun-0.2.3.dist-info/RECORD,,