duckrun 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {duckrun-0.2.3 → duckrun-0.2.4}/PKG-INFO +1 -1
- {duckrun-0.2.3 → duckrun-0.2.4}/duckrun/core.py +18 -13
- {duckrun-0.2.3 → duckrun-0.2.4}/duckrun/stats.py +22 -16
- {duckrun-0.2.3 → duckrun-0.2.4}/duckrun.egg-info/PKG-INFO +1 -1
- {duckrun-0.2.3 → duckrun-0.2.4}/pyproject.toml +1 -1
- {duckrun-0.2.3 → duckrun-0.2.4}/LICENSE +0 -0
- {duckrun-0.2.3 → duckrun-0.2.4}/README.md +0 -0
- {duckrun-0.2.3 → duckrun-0.2.4}/duckrun/__init__.py +0 -0
- {duckrun-0.2.3 → duckrun-0.2.4}/duckrun/files.py +0 -0
- {duckrun-0.2.3 → duckrun-0.2.4}/duckrun/runner.py +0 -0
- {duckrun-0.2.3 → duckrun-0.2.4}/duckrun/writer.py +0 -0
- {duckrun-0.2.3 → duckrun-0.2.4}/duckrun.egg-info/SOURCES.txt +0 -0
- {duckrun-0.2.3 → duckrun-0.2.4}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.2.3 → duckrun-0.2.4}/duckrun.egg-info/requires.txt +0 -0
- {duckrun-0.2.3 → duckrun-0.2.4}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.2.3 → duckrun-0.2.4}/setup.cfg +0 -0
@@ -100,8 +100,6 @@ class Duckrun:
|
|
100
100
|
workspace, lakehouse_name = parts
|
101
101
|
scan_all_schemas = True
|
102
102
|
schema = "dbo"
|
103
|
-
print(f"ℹ️ No schema specified. Using default schema 'dbo' for operations.")
|
104
|
-
print(f" Scanning all schemas for table discovery...\n")
|
105
103
|
elif len(parts) == 3:
|
106
104
|
workspace, lakehouse_name, schema = parts
|
107
105
|
else:
|
@@ -162,16 +160,13 @@ class Duckrun:
|
|
162
160
|
|
163
161
|
if self.scan_all_schemas:
|
164
162
|
# Discover all schemas first
|
165
|
-
print("🔍 Discovering schemas...")
|
166
163
|
schemas_result = obs.list_with_delimiter(store, prefix=base_path)
|
167
164
|
schemas = [
|
168
165
|
prefix.rstrip('/').split('/')[-1]
|
169
166
|
for prefix in schemas_result['common_prefixes']
|
170
167
|
]
|
171
|
-
print(f" Found {len(schemas)} schemas: {', '.join(schemas)}\n")
|
172
168
|
|
173
169
|
# Discover tables in each schema
|
174
|
-
print("🔍 Discovering tables...")
|
175
170
|
for schema_name in schemas:
|
176
171
|
schema_path = f"{base_path}{schema_name}/"
|
177
172
|
result = obs.list_with_delimiter(store, prefix=schema_path)
|
@@ -208,9 +203,22 @@ class Duckrun:
|
|
208
203
|
print(f"No Delta tables found in {self.lakehouse_name}.Lakehouse/Tables/{self.schema}/")
|
209
204
|
return
|
210
205
|
|
211
|
-
|
206
|
+
# Group tables by schema for display
|
207
|
+
schema_tables = {}
|
208
|
+
for schema_name, table_name in tables:
|
209
|
+
if schema_name not in schema_tables:
|
210
|
+
schema_tables[schema_name] = []
|
211
|
+
schema_tables[schema_name].append(table_name)
|
212
|
+
|
213
|
+
# Display tables by schema
|
214
|
+
print(f"\n📊 Found {len(tables)} tables:")
|
215
|
+
for schema_name in sorted(schema_tables.keys()):
|
216
|
+
table_list = sorted(schema_tables[schema_name])
|
217
|
+
print(f" {schema_name}: {', '.join(table_list)}")
|
212
218
|
|
213
219
|
attached_count = 0
|
220
|
+
skipped_tables = []
|
221
|
+
|
214
222
|
for schema_name, table_name in tables:
|
215
223
|
try:
|
216
224
|
if self.scan_all_schemas:
|
@@ -225,19 +233,16 @@ class Duckrun:
|
|
225
233
|
CREATE OR REPLACE VIEW {view_name}
|
226
234
|
AS SELECT * FROM delta_scan('{self.table_base_url}{schema_name}/{table_name}');
|
227
235
|
""")
|
228
|
-
print(f" ✓ Attached: {schema_name}.{table_name} → {view_name}")
|
229
236
|
attached_count += 1
|
230
237
|
except Exception as e:
|
231
|
-
|
238
|
+
skipped_tables.append(f"{schema_name}.{table_name}")
|
232
239
|
continue
|
233
240
|
|
234
241
|
print(f"\n{'='*60}")
|
235
|
-
print(f"✅
|
242
|
+
print(f"✅ Ready - {attached_count}/{len(tables)} tables available")
|
243
|
+
if skipped_tables:
|
244
|
+
print(f"⚠ Skipped {len(skipped_tables)} tables: {', '.join(skipped_tables[:3])}{'...' if len(skipped_tables) > 3 else ''}")
|
236
245
|
print(f"{'='*60}\n")
|
237
|
-
|
238
|
-
if self.scan_all_schemas:
|
239
|
-
print(f"\n💡 Note: Tables use schema.table format (e.g., aemo.calendar, dbo.results)")
|
240
|
-
print(f" Default schema for operations: {self.schema}\n")
|
241
246
|
|
242
247
|
except Exception as e:
|
243
248
|
print(f"❌ Error attaching lakehouse: {e}")
|
@@ -21,33 +21,39 @@ def _table_exists(duckrun_instance, schema_name: str, table_name: str) -> bool:
|
|
21
21
|
|
22
22
|
|
23
23
|
def _schema_exists(duckrun_instance, schema_name: str) -> bool:
|
24
|
-
"""Check if a schema exists by
|
24
|
+
"""Check if a schema exists by querying information_schema."""
|
25
25
|
try:
|
26
|
-
# For main schema,
|
26
|
+
# For main schema, always exists
|
27
27
|
if schema_name == "main":
|
28
|
-
|
28
|
+
return True
|
29
29
|
else:
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
# Use information_schema which works in DuckDB 1.2.2
|
31
|
+
query = f"SELECT 1 FROM information_schema.schemata WHERE schema_name = '{schema_name}' LIMIT 1"
|
32
|
+
result = duckrun_instance.con.execute(query).fetchall()
|
33
|
+
return len(result) > 0
|
33
34
|
except:
|
34
35
|
return False
|
35
36
|
|
36
37
|
|
37
38
|
def _get_existing_tables_in_schema(duckrun_instance, schema_name: str) -> list:
|
38
|
-
"""Get all existing tables in a schema
|
39
|
+
"""Get all existing tables in a schema using information_schema, excluding temporary tables."""
|
39
40
|
try:
|
40
|
-
# For main schema,
|
41
|
+
# For main schema, use SHOW TABLES
|
41
42
|
if schema_name == "main":
|
42
43
|
query = "SHOW TABLES"
|
44
|
+
result = duckrun_instance.con.execute(query).fetchall()
|
45
|
+
if result:
|
46
|
+
tables = [row[0] for row in result]
|
47
|
+
filtered_tables = [tbl for tbl in tables if not tbl.startswith('tbl_')]
|
48
|
+
return filtered_tables
|
43
49
|
else:
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
50
|
+
# Use information_schema which works in DuckDB 1.2.2
|
51
|
+
query = f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema_name}'"
|
52
|
+
result = duckrun_instance.con.execute(query).fetchall()
|
53
|
+
if result:
|
54
|
+
tables = [row[0] for row in result]
|
55
|
+
filtered_tables = [tbl for tbl in tables if not tbl.startswith('tbl_')]
|
56
|
+
return filtered_tables
|
51
57
|
return []
|
52
58
|
except:
|
53
59
|
return []
|
@@ -218,7 +224,7 @@ def get_stats(duckrun_instance, source: str):
|
|
218
224
|
WHERE tbl IS NOT NULL
|
219
225
|
GROUP BY tbl
|
220
226
|
ORDER BY total_rows DESC
|
221
|
-
''').
|
227
|
+
''').df()
|
222
228
|
|
223
229
|
return final_result
|
224
230
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|