duckrun 0.1.7__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -11,7 +11,7 @@ Requires-Python: >=3.9
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
13
  Requires-Dist: duckdb>=1.2.0
14
- Requires-Dist: deltalake>=0.18.2
14
+ Requires-Dist: deltalake<=0.18.2
15
15
  Requires-Dist: requests>=2.28.0
16
16
  Requires-Dist: obstore>=0.2.0
17
17
  Provides-Extra: local
@@ -8,6 +8,9 @@ from string import Template
8
8
  import obstore as obs
9
9
  from obstore.store import AzureStore
10
10
 
11
+ # Row Group configuration for optimal Delta Lake performance
12
+ RG = 8_000_000
13
+
11
14
 
12
15
  class DeltaWriter:
13
16
  """Spark-style write API for Delta Lake"""
@@ -48,7 +51,7 @@ class DeltaWriter:
48
51
  df = self.relation.record_batch()
49
52
 
50
53
  print(f"Writing to Delta table: {schema}.{table} (mode={self._mode})")
51
- write_deltalake(path, df, mode=self._mode)
54
+ write_deltalake(path, df, mode=self._mode, max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
52
55
 
53
56
  self.duckrun.con.sql(f"DROP VIEW IF EXISTS {table}")
54
57
  self.duckrun.con.sql(f"""
@@ -406,7 +409,7 @@ class Duckrun:
406
409
  if mode == 'overwrite':
407
410
  self.con.sql(f"DROP VIEW IF EXISTS {normalized_table}")
408
411
  df = self.con.sql(sql).record_batch()
409
- write_deltalake(path, df, mode='overwrite')
412
+ write_deltalake(path, df, mode='overwrite', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
410
413
  self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
411
414
  dt = DeltaTable(path)
412
415
  dt.vacuum(retention_hours=0, dry_run=False, enforce_retention_duration=False)
@@ -414,7 +417,7 @@ class Duckrun:
414
417
 
415
418
  elif mode == 'append':
416
419
  df = self.con.sql(sql).record_batch()
417
- write_deltalake(path, df, mode='append')
420
+ write_deltalake(path, df, mode='append', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
418
421
  self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
419
422
  dt = DeltaTable(path)
420
423
  if len(dt.file_uris()) > self.compaction_threshold:
@@ -431,7 +434,7 @@ class Duckrun:
431
434
  print(f"Table {normalized_table} doesn't exist. Creating...")
432
435
  self.con.sql(f"DROP VIEW IF EXISTS {normalized_table}")
433
436
  df = self.con.sql(sql).record_batch()
434
- write_deltalake(path, df, mode='overwrite')
437
+ write_deltalake(path, df, mode='overwrite', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
435
438
  self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
436
439
  dt = DeltaTable(path)
437
440
  dt.vacuum(dry_run=False)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -11,7 +11,7 @@ Requires-Python: >=3.9
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
13
  Requires-Dist: duckdb>=1.2.0
14
- Requires-Dist: deltalake>=0.18.2
14
+ Requires-Dist: deltalake<=0.18.2
15
15
  Requires-Dist: requests>=2.28.0
16
16
  Requires-Dist: obstore>=0.2.0
17
17
  Provides-Extra: local
@@ -1,5 +1,5 @@
1
1
  duckdb>=1.2.0
2
- deltalake>=0.18.2
2
+ deltalake<=0.18.2
3
3
  requests>=2.28.0
4
4
  obstore>=0.2.0
5
5
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "duckrun"
7
- version = "0.1.7"
7
+ version = "0.1.8"
8
8
  description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -14,7 +14,7 @@ authors = [
14
14
  requires-python = ">=3.9"
15
15
  dependencies = [
16
16
  "duckdb>=1.2.0",
17
- "deltalake>=0.18.2",
17
+ "deltalake<=0.18.2",
18
18
  "requests>=2.28.0",
19
19
  "obstore>=0.2.0"
20
20
  ]
File without changes
File without changes
File without changes
File without changes