duckrun 0.2.13__py3-none-any.whl → 0.2.19.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/stats.py CHANGED
@@ -60,32 +60,89 @@ def _get_existing_tables_in_schema(duckrun_instance, schema_name: str) -> list:
60
60
  return []
61
61
 
62
62
 
63
- def get_stats(duckrun_instance, source: str):
63
+ def _match_tables_by_pattern(duckrun_instance, pattern: str) -> dict:
64
+ """Match tables across all schemas using a wildcard pattern.
65
+ Pattern can be:
66
+ - '*.summary' - matches 'summary' table in all schemas
67
+ - '*summary' - matches any table ending with 'summary'
68
+ - 'schema.*' - matches all tables in 'schema'
69
+ Returns a dict mapping schema names to lists of matching table names."""
70
+ import fnmatch
71
+
72
+ try:
73
+ # Query all schemas and tables in one go
74
+ query = """
75
+ SELECT table_schema, table_name
76
+ FROM information_schema.tables
77
+ WHERE table_schema NOT LIKE 'pg_%'
78
+ AND table_schema != 'information_schema'
79
+ AND table_name NOT LIKE 'tbl_%'
80
+ """
81
+ result = duckrun_instance.con.execute(query).fetchall()
82
+
83
+ matched = {}
84
+
85
+ # Check if pattern contains a dot (schema.table pattern)
86
+ if '.' in pattern:
87
+ schema_pattern, table_pattern = pattern.split('.', 1)
88
+ for schema, table in result:
89
+ if fnmatch.fnmatch(schema, schema_pattern) and fnmatch.fnmatch(table, table_pattern):
90
+ if schema not in matched:
91
+ matched[schema] = []
92
+ matched[schema].append(table)
93
+ else:
94
+ # Pattern matches only table names
95
+ for schema, table in result:
96
+ if fnmatch.fnmatch(table, pattern):
97
+ if schema not in matched:
98
+ matched[schema] = []
99
+ matched[schema].append(table)
100
+
101
+ return matched
102
+ except:
103
+ return {}
104
+
105
+
106
+ def get_stats(duckrun_instance, source: str = None, detailed = False):
64
107
  """
65
108
  Get comprehensive statistics for Delta Lake tables.
66
109
 
67
110
  Args:
68
111
  duckrun_instance: The Duckrun connection instance
69
- source: Can be one of:
112
+ source: Optional. Can be one of:
113
+ - None: Use all tables in the connection's schema (default)
70
114
  - Table name: 'table_name' (uses main schema in DuckDB)
71
115
  - Schema.table: 'schema.table_name' (specific table in schema, if multi-schema)
72
116
  - Schema only: 'schema' (all tables in schema, if multi-schema)
117
+ - Wildcard pattern: '*.summary' (matches tables across all schemas)
118
+ detailed: Optional. Controls the level of detail in statistics:
119
+ - False (default): Aggregated table-level stats (total rows, file count,
120
+ row groups, average row group size, file sizes, VORDER status)
121
+ - True: Row group level statistics with compression details, row group sizes,
122
+ and parquet metadata
73
123
 
74
124
  Returns:
75
- Arrow table with statistics including total rows, file count, row groups,
76
- average row group size, file sizes, VORDER status, and timestamp
125
+ DataFrame with statistics based on detailed parameter:
126
+ - If detailed=False: Aggregated table-level summary
127
+ - If detailed=True: Granular file and row group level stats
77
128
 
78
129
  Examples:
79
130
  con = duckrun.connect("tmp/data.lakehouse/test")
80
131
 
81
- # Single table in main schema (DuckDB uses 'main', not 'test')
82
- stats = con.get_stats('price_today')
132
+ # All tables in the connection's schema (aggregated)
133
+ stats = con.get_stats()
134
+
135
+ # Single table with detailed row group statistics
136
+ stats_detailed = con.get_stats('price_today', detailed=True)
83
137
 
84
138
  # Specific table in different schema (only if multi-schema enabled)
85
139
  stats = con.get_stats('aemo.price')
86
140
 
87
141
  # All tables in a schema (only if multi-schema enabled)
88
142
  stats = con.get_stats('aemo')
143
+
144
+ # Wildcard pattern across all schemas (only if multi-schema enabled)
145
+ stats = con.get_stats('*.summary')
89
146
  """
90
147
  timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
91
148
 
@@ -93,8 +150,31 @@ def get_stats(duckrun_instance, source: str):
93
150
  duckdb_schema = "main"
94
151
  url_schema = duckrun_instance.schema # This is from the connection URL path
95
152
 
153
+ # If source is not provided, default to all tables in the connection's schema
154
+ if source is None:
155
+ source = url_schema
156
+
157
+ # Check if source contains wildcard characters
158
+ if '*' in source or '?' in source:
159
+ # Wildcard pattern mode - only valid if multi-schema is enabled
160
+ if not duckrun_instance.scan_all_schemas:
161
+ raise ValueError(f"Wildcard pattern '{source}' not supported. Connection was made to a specific schema '{url_schema}'. Enable multi-schema mode to use wildcards.")
162
+
163
+ matched_tables = _match_tables_by_pattern(duckrun_instance, source)
164
+
165
+ if not matched_tables:
166
+ raise ValueError(f"No tables found matching pattern '{source}'")
167
+
168
+ # Flatten the matched tables into a list with schema info
169
+ tables_with_schemas = []
170
+ for schema, tables in matched_tables.items():
171
+ for table in tables:
172
+ tables_with_schemas.append((schema, table))
173
+
174
+ print(f"Found {len(tables_with_schemas)} tables matching pattern '{source}'")
175
+
96
176
  # Parse the source and validate existence
97
- if '.' in source:
177
+ elif '.' in source:
98
178
  # Format: schema.table - only valid if multi-schema is enabled
99
179
  schema_name, table_name = source.split('.', 1)
100
180
 
@@ -105,44 +185,45 @@ def get_stats(duckrun_instance, source: str):
105
185
  if not _table_exists(duckrun_instance, schema_name, table_name):
106
186
  raise ValueError(f"Table '{table_name}' does not exist in schema '{schema_name}'")
107
187
 
108
- list_tables = [table_name]
188
+ tables_with_schemas = [(schema_name, table_name)]
109
189
  else:
110
190
  # Could be just table name or schema name
111
191
  if duckrun_instance.scan_all_schemas:
112
192
  # Multi-schema mode: DuckDB has actual schemas
113
193
  # First check if it's a table in main schema
114
194
  if _table_exists(duckrun_instance, duckdb_schema, source):
115
- list_tables = [source]
116
- schema_name = duckdb_schema
195
+ tables_with_schemas = [(duckdb_schema, source)]
117
196
  # Otherwise, check if it's a schema name
118
197
  elif _schema_exists(duckrun_instance, source):
119
198
  schema_name = source
120
199
  list_tables = _get_existing_tables_in_schema(duckrun_instance, source)
121
200
  if not list_tables:
122
201
  raise ValueError(f"Schema '{source}' exists but contains no tables")
202
+ tables_with_schemas = [(schema_name, tbl) for tbl in list_tables]
123
203
  else:
124
204
  raise ValueError(f"Neither table '{source}' in main schema nor schema '{source}' exists")
125
205
  else:
126
206
  # Single-schema mode: tables are in DuckDB's main schema, use URL schema for file paths
127
207
  if _table_exists(duckrun_instance, duckdb_schema, source):
128
208
  # It's a table name
129
- list_tables = [source]
130
- schema_name = url_schema # Use URL schema for file path construction
209
+ tables_with_schemas = [(url_schema, source)]
131
210
  elif source == url_schema:
132
211
  # Special case: user asked for stats on the URL schema name - list all tables
133
212
  list_tables = _get_existing_tables_in_schema(duckrun_instance, duckdb_schema)
134
- schema_name = url_schema # Use URL schema for file path construction
135
213
  if not list_tables:
136
214
  raise ValueError(f"No tables found in schema '{url_schema}'")
215
+ tables_with_schemas = [(url_schema, tbl) for tbl in list_tables]
137
216
  else:
138
217
  raise ValueError(f"Table '{source}' does not exist in the current context (schema: {url_schema})")
139
218
 
140
219
  # Use the existing connection
141
220
  con = duckrun_instance.con
142
221
 
143
- print(f"Processing {len(list_tables)} tables: {list_tables}")
222
+ print(f"Processing {len(tables_with_schemas)} tables from {len(set(s for s, t in tables_with_schemas))} schema(s)")
144
223
 
145
- for idx, tbl in enumerate(list_tables):
224
+ successful_tables = []
225
+ for idx, (schema_name, tbl) in enumerate(tables_with_schemas):
226
+ print(f"[{idx+1}/{len(tables_with_schemas)}] Processing table '{schema_name}.{tbl}'...")
146
227
  # Construct lakehouse path using correct ABFSS URL format (no .Lakehouse suffix)
147
228
  table_path = f"{duckrun_instance.table_base_url}{schema_name}/{tbl}"
148
229
 
@@ -169,8 +250,18 @@ def get_stats(duckrun_instance, source: str):
169
250
  print(f"Warning: Could not convert RecordBatch for table '{tbl}': Unexpected type {type(add_actions)}")
170
251
  xx = {}
171
252
 
172
- # Check if VORDER exists
173
- vorder = 'tags.VORDER' in xx.keys()
253
+ # Check if VORDER exists - handle both formats:
254
+ # 1. Flattened format: 'tags.VORDER' or 'tags.vorder' in keys
255
+ # 2. Nested format: check in 'tags' dict for 'VORDER' or 'vorder'
256
+ vorder = False
257
+ if 'tags.VORDER' in xx.keys() or 'tags.vorder' in xx.keys():
258
+ vorder = True
259
+ elif 'tags' in xx.keys() and xx['tags']:
260
+ # Check nested tags dictionary (tags is a list of dicts, one per file)
261
+ for tag_dict in xx['tags']:
262
+ if tag_dict and ('VORDER' in tag_dict or 'vorder' in tag_dict):
263
+ vorder = True
264
+ break
174
265
 
175
266
  # Calculate total size
176
267
  total_size = sum(xx['size_bytes']) if xx['size_bytes'] else 0
@@ -185,66 +276,180 @@ def get_stats(duckrun_instance, source: str):
185
276
  con.execute(f'''
186
277
  CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
187
278
  SELECT
279
+ '{schema_name}' as schema,
188
280
  '{tbl}' as tbl,
189
281
  'empty' as file_name,
190
282
  0 as num_rows,
191
283
  0 as num_row_groups,
192
284
  0 as size,
193
285
  {vorder} as vorder,
286
+ '' as compression,
194
287
  '{timestamp}' as timestamp
195
288
  WHERE false
196
289
  ''')
197
290
  else:
198
- # Get parquet metadata and create temp table
199
- con.execute(f'''
200
- CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
201
- SELECT
202
- '{tbl}' as tbl,
203
- file_name,
204
- num_rows,
205
- num_row_groups,
206
- CEIL({total_size}/(1024*1024)) as size,
207
- {vorder} as vorder,
208
- '{timestamp}' as timestamp
209
- FROM parquet_file_metadata({delta})
210
- ''')
291
+ # Get parquet metadata and create temp table with compression info
292
+ if detailed == True:
293
+ # Detailed mode: Include ALL parquet_metadata columns
294
+ con.execute(f'''
295
+ CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
296
+ SELECT
297
+ '{schema_name}' as schema,
298
+ '{tbl}' as tbl,
299
+ {vorder} as vorder,
300
+ pm.*,
301
+ '{timestamp}' as timestamp
302
+ FROM parquet_metadata({delta}) pm
303
+ ''')
304
+ else:
305
+ # Aggregated mode: Original summary statistics
306
+ con.execute(f'''
307
+ CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
308
+ SELECT
309
+ '{schema_name}' as schema,
310
+ '{tbl}' as tbl,
311
+ fm.file_name,
312
+ fm.num_rows,
313
+ fm.num_row_groups,
314
+ CEIL({total_size}/(1024*1024)) as size,
315
+ {vorder} as vorder,
316
+ COALESCE(STRING_AGG(DISTINCT pm.compression, ', ' ORDER BY pm.compression), 'UNCOMPRESSED') as compression,
317
+ '{timestamp}' as timestamp
318
+ FROM parquet_file_metadata({delta}) fm
319
+ LEFT JOIN parquet_metadata({delta}) pm ON fm.file_name = pm.file_name
320
+ GROUP BY fm.file_name, fm.num_rows, fm.num_row_groups
321
+ ''')
211
322
 
212
323
  except Exception as e:
213
- print(f"Warning: Could not process table '{tbl}': {e}")
214
- # Create empty temp table for failed tables
215
- con.execute(f'''
216
- CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
217
- SELECT
218
- '{tbl}' as tbl,
219
- 'error' as file_name,
220
- 0 as num_rows,
221
- 0 as num_row_groups,
222
- 0 as size,
223
- false as vorder,
224
- '{timestamp}' as timestamp
225
- WHERE false
226
- ''')
324
+ error_msg = str(e)
325
+ print(f"Warning: Could not process table '{tbl}' using DeltaTable API: {e}")
326
+
327
+ # Fallback: Use DuckDB's delta_scan with filename parameter
328
+ if "Invalid JSON" in error_msg or "MetadataValue" in error_msg:
329
+ print(f" Detected JSON parsing issue - falling back to DuckDB delta_scan")
330
+ else:
331
+ print(f" Falling back to DuckDB delta_scan")
332
+
333
+ try:
334
+ # First get the list of actual parquet files using delta_scan
335
+ file_list_result = con.execute(f'''
336
+ SELECT DISTINCT filename
337
+ FROM delta_scan('{table_path}', filename=1)
338
+ ''').fetchall()
339
+
340
+ if not file_list_result:
341
+ # Empty table
342
+ con.execute(f'''
343
+ CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
344
+ SELECT
345
+ '{schema_name}' as schema,
346
+ '{tbl}' as tbl,
347
+ 'empty' as file_name,
348
+ 0 as num_rows,
349
+ 0 as num_row_groups,
350
+ 0 as size,
351
+ false as vorder,
352
+ '' as compression,
353
+ '{timestamp}' as timestamp
354
+ WHERE false
355
+ ''')
356
+ else:
357
+ # Extract just the filename (not the full path) from delta_scan results
358
+ # delta_scan returns full ABFSS paths, we need to extract just the filename part
359
+ filenames = []
360
+ for row in file_list_result:
361
+ full_path = row[0]
362
+ # Extract just the filename from the full ABFSS path
363
+ if '/' in full_path:
364
+ filename = full_path.split('/')[-1]
365
+ else:
366
+ filename = full_path
367
+ filenames.append(table_path + "/" + filename)
368
+
369
+ # Use parquet_file_metadata to get actual parquet stats with compression
370
+ if detailed == True:
371
+ # Detailed mode: Include ALL parquet_metadata columns
372
+ con.execute(f'''
373
+ CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
374
+ SELECT
375
+ '{schema_name}' as schema,
376
+ '{tbl}' as tbl,
377
+ false as vorder,
378
+ pm.*,
379
+ '{timestamp}' as timestamp
380
+ FROM parquet_metadata({filenames}) pm
381
+ ''')
382
+ else:
383
+ # Aggregated mode: Original summary statistics
384
+ con.execute(f'''
385
+ CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
386
+ SELECT
387
+ '{schema_name}' as schema,
388
+ '{tbl}' as tbl,
389
+ fm.file_name,
390
+ fm.num_rows,
391
+ fm.num_row_groups,
392
+ 0 as size,
393
+ false as vorder,
394
+ COALESCE(STRING_AGG(DISTINCT pm.compression, ', ' ORDER BY pm.compression), 'UNCOMPRESSED') as compression,
395
+ '{timestamp}' as timestamp
396
+ FROM parquet_file_metadata({filenames}) fm
397
+ LEFT JOIN parquet_metadata({filenames}) pm ON fm.file_name = pm.file_name
398
+ GROUP BY fm.file_name, fm.num_rows, fm.num_row_groups
399
+ ''')
400
+
401
+ print(f" ✓ Successfully processed '{tbl}' using DuckDB fallback with parquet metadata")
402
+ except Exception as fallback_error:
403
+ print(f" ✗ DuckDB fallback also failed for '{tbl}': {fallback_error}")
404
+ print(f" ⏭️ Skipping table '{tbl}'")
405
+ continue
406
+
407
+ # Mark this table as successfully processed
408
+ successful_tables.append(idx)
409
+
410
+ # Only union tables that were successfully processed
411
+ if not successful_tables:
412
+ # No tables were processed successfully - return empty dataframe
413
+ print("⚠️ No tables could be processed successfully")
414
+ import pandas as pd
415
+ if detailed == True:
416
+ return pd.DataFrame(columns=['schema', 'tbl', 'vorder', 'timestamp'])
417
+ else:
418
+ return pd.DataFrame(columns=['schema', 'tbl', 'total_rows', 'num_files', 'num_row_group',
419
+ 'average_row_group', 'file_size_MB', 'vorder', 'compression', 'timestamp'])
227
420
 
228
- # Union all temp tables
229
- union_parts = [f'SELECT * FROM tbl_{i}' for i in range(len(list_tables))]
421
+ # Union all successfully processed temp tables
422
+ union_parts = [f'SELECT * FROM tbl_{i}' for i in successful_tables]
230
423
  union_query = ' UNION ALL '.join(union_parts)
231
424
 
232
- # Generate final summary
233
- final_result = con.execute(f'''
234
- SELECT
235
- tbl,
236
- SUM(num_rows) as total_rows,
237
- COUNT(*) as num_files,
238
- SUM(num_row_groups) as num_row_group,
239
- CAST(CEIL(SUM(num_rows)::DOUBLE / NULLIF(SUM(num_row_groups), 0)) AS INTEGER) as average_row_group,
240
- MIN(size) as file_size_MB,
241
- ANY_VALUE(vorder) as vorder,
242
- ANY_VALUE(timestamp) as timestamp
243
- FROM ({union_query})
244
- WHERE tbl IS NOT NULL
245
- GROUP BY tbl
246
- ORDER BY total_rows DESC
247
- ''').df()
425
+ # Generate final summary based on detailed flag
426
+ if detailed == True:
427
+ # Detailed mode: Return ALL parquet_metadata columns
428
+ final_result = con.execute(f'''
429
+ SELECT *
430
+ FROM ({union_query})
431
+ WHERE tbl IS NOT NULL
432
+ ORDER BY schema, tbl, file_name, row_group_id, column_id
433
+ ''').df()
434
+ else:
435
+ # Aggregated mode: Original summary statistics
436
+ final_result = con.execute(f'''
437
+ SELECT
438
+ schema,
439
+ tbl,
440
+ SUM(num_rows) as total_rows,
441
+ COUNT(*) as num_files,
442
+ SUM(num_row_groups) as num_row_group,
443
+ CAST(CEIL(SUM(num_rows)::DOUBLE / NULLIF(SUM(num_row_groups), 0)) AS INTEGER) as average_row_group,
444
+ MIN(size) as file_size_MB,
445
+ ANY_VALUE(vorder) as vorder,
446
+ STRING_AGG(DISTINCT compression, ', ' ORDER BY compression) as compression,
447
+ ANY_VALUE(timestamp) as timestamp
448
+ FROM ({union_query})
449
+ WHERE tbl IS NOT NULL
450
+ GROUP BY schema, tbl
451
+ ORDER BY total_rows DESC
452
+ ''').df()
248
453
 
249
454
  return final_result
250
455
 
duckrun/writer.py CHANGED
@@ -3,6 +3,20 @@ Delta Lake writer functionality for duckrun - Spark-style write API
3
3
  """
4
4
  from deltalake import DeltaTable, write_deltalake, __version__ as deltalake_version
5
5
 
6
+ # Try to import WriterProperties for Rust engine (available in 0.18.2+)
7
+ try:
8
+ from deltalake.writer import WriterProperties
9
+ _HAS_WRITER_PROPERTIES = True
10
+ except ImportError:
11
+ _HAS_WRITER_PROPERTIES = False
12
+
13
+ # Try to import PyArrow dataset for old PyArrow engine
14
+ try:
15
+ import pyarrow.dataset as ds
16
+ _HAS_PYARROW_DATASET = True
17
+ except ImportError:
18
+ _HAS_PYARROW_DATASET = False
19
+
6
20
 
7
21
  # Row Group configuration for optimal Delta Lake performance
8
22
  RG = 8_000_000
@@ -23,12 +37,14 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
23
37
  - Has max_rows_per_file/max_rows_per_group/min_rows_per_group for optimization
24
38
  - When mergeSchema=True: must set schema_mode='merge' + engine='rust', NO row group params
25
39
  - When mergeSchema=False: use row group params, DON'T set engine (pyarrow is default)
40
+ - COMPRESSION: Defaults to ZSTD via writer_properties (rust) or file_options (pyarrow)
26
41
 
27
42
  deltalake 0.20+:
28
43
  - Does NOT have 'engine' parameter (everything is rust, pyarrow deprecated)
29
44
  - Does NOT have max_rows_per_file (row group optimization removed)
30
45
  - When mergeSchema=True: must set schema_mode='merge'
31
46
  - When mergeSchema=False: just write normally (no special params)
47
+ - COMPRESSION: Defaults to ZSTD via writer_properties (rust only)
32
48
 
33
49
  Uses version detection for simpler logic.
34
50
  """
@@ -50,7 +66,13 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
50
66
  # deltalake 0.18.2-0.19.x: must also set engine='rust' for schema merging
51
67
  # Do NOT use row group params (they conflict with rust engine)
52
68
  args['engine'] = 'rust'
53
- # For version 0.20+: just schema_mode='merge' is enough, rust is default
69
+ # Set ZSTD compression for Rust engine
70
+ if _HAS_WRITER_PROPERTIES:
71
+ args['writer_properties'] = WriterProperties(compression='ZSTD')
72
+ else:
73
+ # Version 0.20+: rust is default, just add compression
74
+ if _HAS_WRITER_PROPERTIES:
75
+ args['writer_properties'] = WriterProperties(compression='ZSTD')
54
76
  else:
55
77
  # Normal write mode (no schema merging)
56
78
  if _IS_OLD_DELTALAKE:
@@ -59,7 +81,14 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
59
81
  args['max_rows_per_file'] = RG
60
82
  args['max_rows_per_group'] = RG
61
83
  args['min_rows_per_group'] = RG
62
- # For version 0.20+: no optimization available (rust by default, no row group params supported)
84
+ # Set ZSTD compression for PyArrow engine
85
+ if _HAS_PYARROW_DATASET:
86
+ args['file_options'] = ds.ParquetFileFormat().make_write_options(compression='ZSTD')
87
+ else:
88
+ # Version 0.20+: no optimization available (rust by default, no row group params supported)
89
+ # Set ZSTD compression for Rust engine
90
+ if _HAS_WRITER_PROPERTIES:
91
+ args['writer_properties'] = WriterProperties(compression='ZSTD')
63
92
 
64
93
  return args
65
94
 
@@ -135,14 +164,14 @@ class DeltaWriter:
135
164
  # Prepare info message based on version and settings
136
165
  if self._schema_mode == 'merge':
137
166
  if _IS_OLD_DELTALAKE:
138
- engine_info = " (engine=rust, schema_mode=merge)"
167
+ engine_info = " (engine=rust, schema_mode=merge, compression=ZSTD)"
139
168
  else:
140
- engine_info = " (schema_mode=merge, rust by default)"
169
+ engine_info = " (schema_mode=merge, rust by default, compression=ZSTD)"
141
170
  else:
142
171
  if _IS_OLD_DELTALAKE:
143
- engine_info = " (engine=pyarrow, optimized row groups)"
172
+ engine_info = " (engine=pyarrow, optimized row groups, compression=ZSTD)"
144
173
  else:
145
- engine_info = " (engine=rust by default)"
174
+ engine_info = " (engine=rust by default, compression=ZSTD)"
146
175
 
147
176
  partition_info = f" partitioned by {self._partition_by}" if self._partition_by else ""
148
177
  print(f"Writing to Delta table: {schema}.{table} (mode={self._mode}){engine_info}{partition_info}")
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.13
4
- Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
3
+ Version: 0.2.19.dev5
4
+ Summary: Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)
5
5
  Author: mim
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/djouallah/duckrun
@@ -10,7 +10,7 @@ Project-URL: Issues, https://github.com/djouallah/duckrun/issues
10
10
  Requires-Python: >=3.9
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
- Requires-Dist: duckdb>=1.2.2
13
+ Requires-Dist: duckdb>=1.2.0
14
14
  Requires-Dist: deltalake<=0.18.2
15
15
  Requires-Dist: requests>=2.28.0
16
16
  Requires-Dist: obstore>=0.2.0
@@ -0,0 +1,16 @@
1
+ duckrun/__init__.py,sha256=6l57__ldQ0Yquq1BMFBTW6C0aA5p9gCEZI5te8PORyM,382
2
+ duckrun/auth.py,sha256=EMaf-L2zeNOjbHOT97xYxfZNfWo4WrwrU1h3vBQTgEc,9624
3
+ duckrun/core.py,sha256=ZzBtPqPNphyHDXp1vFuUo_fLJSvPdOFVZSACbSC2rJ4,77317
4
+ duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
5
+ duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
6
+ duckrun/notebook.py,sha256=lzDRBoWZ_lePF-_5BbA1_42BImLZC5yrq6nzlmlKglM,12183
7
+ duckrun/rle.py,sha256=8iANVvYJc-ZcTmIBusPlqBT3dCVQhTz9g_njsuuOqIs,37517
8
+ duckrun/runner.py,sha256=NGVyerJA44UP2umRdndfL0fuFM_gdOZmuJUz-PLOFf0,13461
9
+ duckrun/semantic_model.py,sha256=shRPBN1II60K_PH8JOqke-_3hAwLspcx4Add0VJRwwU,35913
10
+ duckrun/stats.py,sha256=8Qc9Mimvv7ALbOHw5-UPWrSflFrGrtkCQkB0QYL8jCw,21923
11
+ duckrun/writer.py,sha256=wIsU77DSj4J7d9_bIhvk6AbC51uUrLW0e6pcSPQOY1c,9424
12
+ duckrun-0.2.19.dev5.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
13
+ duckrun-0.2.19.dev5.dist-info/METADATA,sha256=Hx_ZCHIYG6eNNXRauNJEbvDGseaPWdoRTP6GLPEK4_w,20807
14
+ duckrun-0.2.19.dev5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ duckrun-0.2.19.dev5.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
16
+ duckrun-0.2.19.dev5.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- duckrun/__init__.py,sha256=cTj6KQ6hKmgu1z7k9nhDcO5lct049luxjx1V0QnymCo,235
2
- duckrun/auth.py,sha256=dMqIzozgEQ5v7Uc3Mb_OoFZGmsAq0m-VOoYCVL7rehc,9281
3
- duckrun/core.py,sha256=C5nnL-MheBfJPcw-Jr8t14jsm2iwMF07cYm8g_AXtFQ,52303
4
- duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
5
- duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
6
- duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
7
- duckrun/semantic_model.py,sha256=obzlN2-dbEW3JmDop-vrZGGGLi9u3ThhTbgtDjou7uY,29509
8
- duckrun/stats.py,sha256=oKIjZ7u5cFVT63FuOl5UqoDsOG3098woSCn-uI6i_sQ,11084
9
- duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
10
- duckrun-0.2.13.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
11
- duckrun-0.2.13.dist-info/METADATA,sha256=0r-l8dWnd8KLBGj7cspK53eUdaDeUG-iHsa74rGBaCo,20766
12
- duckrun-0.2.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- duckrun-0.2.13.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
14
- duckrun-0.2.13.dist-info/RECORD,,