datazone-sdk 6.0.1.dev5__tar.gz → 6.0.1.dev7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/PKG-INFO +1 -1
  2. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/__init__.py +1 -8
  3. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/deltastorage/__init__.py +1 -1
  4. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/deltastorage/table.py +6 -38
  5. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/pyproject.toml +1 -1
  6. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/README.md +0 -0
  7. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/backtesting.py +0 -0
  8. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/caching/__init__.py +0 -0
  9. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/caching/parquet.py +0 -0
  10. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/db/__init__.py +0 -0
  11. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/db/base.py +0 -0
  12. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/db/cached.py +0 -0
  13. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/db/snapshot.py +0 -0
  14. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/db/standard.py +0 -0
  15. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/deltastorage/data_types.py +0 -0
  16. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/deltastorage/generated_columns.py +0 -0
  17. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/deltastorage/schema.py +0 -0
  18. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/deltastorage/slicing.py +0 -0
  19. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/deltastorage/store.py +0 -0
  20. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/testing/__init__.py +0 -0
  21. {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev7}/datazone/testing/database_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datazone-sdk
3
- Version: 6.0.1.dev5
3
+ Version: 6.0.1.dev7
4
4
  Summary: Database and Delta storage client library for working with Delta Lake tables
5
5
  Author: Team Enigma
6
6
  Author-email: enigma@energinet.dk
@@ -1,13 +1,6 @@
1
1
  from datazone.backtesting import backtest
2
2
  from datazone.caching import ParquetCache
3
3
  from datazone.db import DatabaseClient, SnapshotDatabaseClient
4
- from datazone.deltastorage import (
5
- DeltaOverwriteFallbackWarning,
6
- Field,
7
- HyperSlice,
8
- Schema,
9
- Store,
10
- Table,
11
- )
4
+ from datazone.deltastorage import Field, HyperSlice, Schema, Store, Table
12
5
 
13
6
  from . import testing
@@ -1,4 +1,4 @@
1
1
  from .schema import Field, Schema
2
2
  from .slicing import HyperSlice
3
3
  from .store import Store
4
- from .table import DeltaOverwriteFallbackWarning, Table
4
+ from .table import Table
@@ -1,11 +1,9 @@
1
1
  import datetime as dt
2
- import warnings
3
2
  from typing import Any, Optional
4
3
 
5
4
  import deltalake as dl
6
5
  import polars as pl
7
6
  import pyarrow as pa
8
- from deltalake.exceptions import CommitFailedError
9
7
 
10
8
  from .schema import Schema
11
9
  from .slicing import HyperSlice
@@ -48,14 +46,6 @@ def _dnf_to_sql(dnf: list[tuple]) -> str:
48
46
  return " AND ".join(sql_parts)
49
47
 
50
48
 
51
- def _is_arrow_cast_commit_failure(error: CommitFailedError) -> bool:
52
- return "arrow_cast should have been simplified to cast" in str(error)
53
-
54
-
55
- def _is_safe_partition_filter(filter_: tuple) -> bool:
56
- return filter_[1] in ["=", "in"]
57
-
58
-
59
49
  def _empty_to_none(filters: list[tuple]) -> list[tuple] | None:
60
50
  return filters if len(filters) > 0 else None
61
51
 
@@ -77,10 +67,6 @@ def _filter_to_polars_expr(filter_: tuple) -> pl.Expr:
77
67
  raise ValueError(f"Unsupported operation: {op}")
78
68
 
79
69
 
80
- class DeltaOverwriteFallbackWarning(RuntimeWarning):
81
- pass
82
-
83
-
84
70
  class Table:
85
71
  def __init__(
86
72
  self,
@@ -140,9 +126,9 @@ class Table:
140
126
  if hyper_slice is None:
141
127
  hyper_slice = []
142
128
 
143
- # Generated filters are an optimization for partition pruning. Keep them out
144
- # of row filters because partition values may be represented as strings by
145
- # PyArrow even when the Delta schema has a richer logical type.
129
+ # Generated filters are an optimization for partition pruning. Keep all
130
+ # partition filters out of row filters because partition values may be
131
+ # represented differently by PyArrow than by Delta's partition pruning.
146
132
  partition_hyper_slice = self.schema().add_generated_filters(hyper_slice)
147
133
 
148
134
  delta_table = self.delta_table
@@ -157,16 +143,12 @@ class Table:
157
143
  [f for f in hyper_slice if f[0] not in partition_cols]
158
144
  )
159
145
  partition_filters = _empty_to_none(
160
- [
161
- f
162
- for f in partition_hyper_slice
163
- if f[0] in partition_cols and _is_safe_partition_filter(f)
164
- ]
146
+ [f for f in partition_hyper_slice if f[0] in partition_cols]
165
147
  )
166
148
  post_filters = [
167
149
  f
168
150
  for f in hyper_slice
169
- if f[0] in partition_cols and not _is_safe_partition_filter(f)
151
+ if f[0] in partition_cols and f not in partition_hyper_slice
170
152
  ]
171
153
 
172
154
  pyarrow_table_existing_data = delta_table.to_pyarrow_table(
@@ -239,21 +221,7 @@ class Table:
239
221
  else:
240
222
  predicate = _dnf_to_sql(hyper_slice)
241
223
 
242
- try:
243
- self._write_deltalake(data=data, mode="overwrite", predicate=predicate)
244
- except CommitFailedError as error:
245
- if predicate is None or not _is_arrow_cast_commit_failure(error):
246
- raise
247
-
248
- warnings.warn(
249
- "Delta predicate overwrite failed with the DataFusion arrow_cast bug. "
250
- "Falling back to delete followed by append; this is not atomic as a "
251
- "single overwrite commit.",
252
- DeltaOverwriteFallbackWarning,
253
- stacklevel=2,
254
- )
255
- self.delta_table.delete(predicate)
256
- self._write_deltalake(data=data, mode="append", predicate=None)
224
+ self._write_deltalake(data=data, mode="overwrite", predicate=predicate)
257
225
 
258
226
  def append(self, df: pl.DataFrame) -> None:
259
227
  """Append rows to Delta Lake. This will write data to the Delta Lake.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "datazone-sdk"
3
- version = "6.0.1.dev5"
3
+ version = "6.0.1.dev7"
4
4
  description = "Database and Delta storage client library for working with Delta Lake tables"
5
5
  authors = [{ name = "Team Enigma", email = "enigma@energinet.dk" }]
6
6
  requires-python = ">=3.10"