datazone-sdk 6.0.1.dev1__tar.gz → 6.0.1.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/PKG-INFO +1 -1
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/generated_columns.py +12 -7
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/table.py +13 -2
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/pyproject.toml +1 -1
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/README.md +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/backtesting.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/caching/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/caching/parquet.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/db/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/db/base.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/db/cached.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/db/snapshot.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/db/standard.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/data_types.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/schema.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/slicing.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/store.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/testing/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/testing/database_client.py +0 -0
{datazone_sdk-6.0.1.dev1 → datazone_sdk-6.0.1.dev3}/datazone/deltastorage/generated_columns.py
RENAMED
|
@@ -97,18 +97,23 @@ class DateBucket(GeneratedColumn):
|
|
|
97
97
|
Returns:
|
|
98
98
|
tuple[str, dt.date]: List of conditions on the generated column
|
|
99
99
|
"""
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
timestamp =
|
|
103
|
-
|
|
100
|
+
|
|
101
|
+
def date_from_timestamp(timestamp: dt.datetime) -> dt.date:
|
|
102
|
+
timestamp = copy.copy(timestamp)
|
|
103
|
+
if self.as_tz is not None:
|
|
104
|
+
timestamp = timestamp.astimezone(ZoneInfo(self.as_tz))
|
|
105
|
+
return timestamp.date()
|
|
104
106
|
|
|
105
107
|
match op:
|
|
106
108
|
case "=":
|
|
107
|
-
return [("=",
|
|
109
|
+
return [("=", date_from_timestamp(value))]
|
|
108
110
|
case ("<" | "<="):
|
|
109
|
-
return [("<=",
|
|
111
|
+
return [("<=", date_from_timestamp(value))]
|
|
110
112
|
case (">" | ">="):
|
|
111
|
-
return [(">=",
|
|
113
|
+
return [(">=", date_from_timestamp(value))]
|
|
114
|
+
case "in":
|
|
115
|
+
dates = [date_from_timestamp(timestamp) for timestamp in value]
|
|
116
|
+
return [("in", list(dict.fromkeys(dates)))]
|
|
112
117
|
case _:
|
|
113
118
|
# for other operations, we cannot make any
|
|
114
119
|
# useful filters on the generated column
|
|
@@ -4,6 +4,7 @@ from typing import Any, Optional
|
|
|
4
4
|
import deltalake as dl
|
|
5
5
|
import polars as pl
|
|
6
6
|
import pyarrow as pa
|
|
7
|
+
from deltalake.exceptions import CommitFailedError
|
|
7
8
|
|
|
8
9
|
from .schema import Schema
|
|
9
10
|
from .slicing import HyperSlice
|
|
@@ -46,6 +47,10 @@ def _dnf_to_sql(dnf: list[tuple]) -> str:
|
|
|
46
47
|
return " AND ".join(sql_parts)
|
|
47
48
|
|
|
48
49
|
|
|
50
|
+
def _is_arrow_cast_commit_failure(error: CommitFailedError) -> bool:
|
|
51
|
+
return "arrow_cast should have been simplified to cast" in str(error)
|
|
52
|
+
|
|
53
|
+
|
|
49
54
|
class Table:
|
|
50
55
|
def __init__(
|
|
51
56
|
self,
|
|
@@ -178,13 +183,19 @@ class Table:
|
|
|
178
183
|
schema = self.schema()
|
|
179
184
|
data = self._to_writable_pyarrow_table(df=df, schema=schema)
|
|
180
185
|
|
|
181
|
-
hyper_slice = schema.add_generated_filters(hyper_slice)
|
|
182
186
|
if len(hyper_slice) == 0:
|
|
183
187
|
predicate = None
|
|
184
188
|
else:
|
|
185
189
|
predicate = _dnf_to_sql(hyper_slice)
|
|
186
190
|
|
|
187
|
-
|
|
191
|
+
try:
|
|
192
|
+
self._write_deltalake(data=data, mode="overwrite", predicate=predicate)
|
|
193
|
+
except CommitFailedError as error:
|
|
194
|
+
if predicate is None or not _is_arrow_cast_commit_failure(error):
|
|
195
|
+
raise
|
|
196
|
+
|
|
197
|
+
self.delta_table.delete(predicate)
|
|
198
|
+
self._write_deltalake(data=data, mode="append", predicate=None)
|
|
188
199
|
|
|
189
200
|
def append(self, df: pl.DataFrame) -> None:
|
|
190
201
|
"""Append rows to Delta Lake. This will write data to the Delta Lake.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "datazone-sdk"
|
|
3
|
-
version = "6.0.1.
|
|
3
|
+
version = "6.0.1.dev3"
|
|
4
4
|
description = "Database and Delta storage client library for working with Delta Lake tables"
|
|
5
5
|
authors = [{ name = "Team Enigma", email = "enigma@energinet.dk" }]
|
|
6
6
|
requires-python = ">=3.10"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|