datazone-sdk 6.0.1.dev1__tar.gz → 6.0.1.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/PKG-INFO +1 -1
  2. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/generated_columns.py +12 -7
  3. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/table.py +13 -2
  4. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/pyproject.toml +1 -1
  5. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/README.md +0 -0
  6. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/__init__.py +0 -0
  7. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/backtesting.py +0 -0
  8. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/caching/__init__.py +0 -0
  9. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/caching/parquet.py +0 -0
  10. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/db/__init__.py +0 -0
  11. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/db/base.py +0 -0
  12. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/db/cached.py +0 -0
  13. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/db/snapshot.py +0 -0
  14. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/db/standard.py +0 -0
  15. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/__init__.py +0 -0
  16. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/data_types.py +0 -0
  17. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/schema.py +0 -0
  18. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/slicing.py +0 -0
  19. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/store.py +0 -0
  20. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/testing/__init__.py +0 -0
  21. {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/testing/database_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datazone-sdk
3
- Version: 6.0.1.dev1
3
+ Version: 6.0.1.dev3
4
4
  Summary: Database and Delta storage client library for working with Delta Lake tables
5
5
  Author: Team Enigma
6
6
  Author-email: enigma@energinet.dk
@@ -97,18 +97,23 @@ class DateBucket(GeneratedColumn):
97
97
  Returns:
98
98
  tuple[str, dt.date]: List of conditions on the generated column
99
99
  """
100
- timestamp = copy.copy(value)
101
- if self.as_tz is not None:
102
- timestamp = timestamp.astimezone(ZoneInfo(self.as_tz))
103
- date = timestamp.date()
100
+
101
+ def date_from_timestamp(timestamp: dt.datetime) -> dt.date:
102
+ timestamp = copy.copy(timestamp)
103
+ if self.as_tz is not None:
104
+ timestamp = timestamp.astimezone(ZoneInfo(self.as_tz))
105
+ return timestamp.date()
104
106
 
105
107
  match op:
106
108
  case "=":
107
- return [("=", date)]
109
+ return [("=", date_from_timestamp(value))]
108
110
  case ("<" | "<="):
109
- return [("<=", date)]
111
+ return [("<=", date_from_timestamp(value))]
110
112
  case (">" | ">="):
111
- return [(">=", date)]
113
+ return [(">=", date_from_timestamp(value))]
114
+ case "in":
115
+ dates = [date_from_timestamp(timestamp) for timestamp in value]
116
+ return [("in", list(dict.fromkeys(dates)))]
112
117
  case _:
113
118
  # for other operations, we cannot make any
114
119
  # useful filters on the generated column
@@ -4,6 +4,7 @@ from typing import Any, Optional
4
4
  import deltalake as dl
5
5
  import polars as pl
6
6
  import pyarrow as pa
7
+ from deltalake.exceptions import CommitFailedError
7
8
 
8
9
  from .schema import Schema
9
10
  from .slicing import HyperSlice
@@ -46,6 +47,10 @@ def _dnf_to_sql(dnf: list[tuple]) -> str:
46
47
  return " AND ".join(sql_parts)
47
48
 
48
49
 
50
+ def _is_arrow_cast_commit_failure(error: CommitFailedError) -> bool:
51
+ return "arrow_cast should have been simplified to cast" in str(error)
52
+
53
+
49
54
  class Table:
50
55
  def __init__(
51
56
  self,
@@ -178,13 +183,19 @@ class Table:
178
183
  schema = self.schema()
179
184
  data = self._to_writable_pyarrow_table(df=df, schema=schema)
180
185
 
181
- hyper_slice = schema.add_generated_filters(hyper_slice)
182
186
  if len(hyper_slice) == 0:
183
187
  predicate = None
184
188
  else:
185
189
  predicate = _dnf_to_sql(hyper_slice)
186
190
 
187
- self._write_deltalake(data=data, mode="overwrite", predicate=predicate)
191
+ try:
192
+ self._write_deltalake(data=data, mode="overwrite", predicate=predicate)
193
+ except CommitFailedError as error:
194
+ if predicate is None or not _is_arrow_cast_commit_failure(error):
195
+ raise
196
+
197
+ self.delta_table.delete(predicate)
198
+ self._write_deltalake(data=data, mode="append", predicate=None)
188
199
 
189
200
  def append(self, df: pl.DataFrame) -> None:
190
201
  """Append rows to Delta Lake. This will write data to the Delta Lake.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "datazone-sdk"
3
- version = "6.0.1.dev1"
3
+ version = "6.0.1.dev3"
4
4
  description = "Database and Delta storage client library for working with Delta Lake tables"
5
5
  authors = [{ name = "Team Enigma", email = "enigma@energinet.dk" }]
6
6
  requires-python = ">=3.10"