datazone-sdk 6.0.1.dev2__tar.gz → 6.0.1.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/PKG-INFO +1 -1
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/__init__.py +8 -1
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/deltastorage/__init__.py +1 -1
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/deltastorage/table.py +25 -1
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/pyproject.toml +1 -1
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/README.md +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/backtesting.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/caching/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/caching/parquet.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/db/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/db/base.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/db/cached.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/db/snapshot.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/db/standard.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/deltastorage/data_types.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/deltastorage/generated_columns.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/deltastorage/schema.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/deltastorage/slicing.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/deltastorage/store.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/testing/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/testing/database_client.py +0 -0
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
from datazone.backtesting import backtest
|
|
2
2
|
from datazone.caching import ParquetCache
|
|
3
3
|
from datazone.db import DatabaseClient, SnapshotDatabaseClient
|
|
4
|
-
from datazone.deltastorage import
|
|
4
|
+
from datazone.deltastorage import (
|
|
5
|
+
DeltaOverwriteFallbackWarning,
|
|
6
|
+
Field,
|
|
7
|
+
HyperSlice,
|
|
8
|
+
Schema,
|
|
9
|
+
Store,
|
|
10
|
+
Table,
|
|
11
|
+
)
|
|
5
12
|
|
|
6
13
|
from . import testing
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import datetime as dt
|
|
2
|
+
import warnings
|
|
2
3
|
from typing import Any, Optional
|
|
3
4
|
|
|
4
5
|
import deltalake as dl
|
|
5
6
|
import polars as pl
|
|
6
7
|
import pyarrow as pa
|
|
8
|
+
from deltalake.exceptions import CommitFailedError
|
|
7
9
|
|
|
8
10
|
from .schema import Schema
|
|
9
11
|
from .slicing import HyperSlice
|
|
@@ -46,6 +48,14 @@ def _dnf_to_sql(dnf: list[tuple]) -> str:
|
|
|
46
48
|
return " AND ".join(sql_parts)
|
|
47
49
|
|
|
48
50
|
|
|
51
|
+
def _is_arrow_cast_commit_failure(error: CommitFailedError) -> bool:
|
|
52
|
+
return "arrow_cast should have been simplified to cast" in str(error)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class DeltaOverwriteFallbackWarning(RuntimeWarning):
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
|
|
49
59
|
class Table:
|
|
50
60
|
def __init__(
|
|
51
61
|
self,
|
|
@@ -183,7 +193,21 @@ class Table:
|
|
|
183
193
|
else:
|
|
184
194
|
predicate = _dnf_to_sql(hyper_slice)
|
|
185
195
|
|
|
186
|
-
|
|
196
|
+
try:
|
|
197
|
+
self._write_deltalake(data=data, mode="overwrite", predicate=predicate)
|
|
198
|
+
except CommitFailedError as error:
|
|
199
|
+
if predicate is None or not _is_arrow_cast_commit_failure(error):
|
|
200
|
+
raise
|
|
201
|
+
|
|
202
|
+
warnings.warn(
|
|
203
|
+
"Delta predicate overwrite failed with the DataFusion arrow_cast bug. "
|
|
204
|
+
"Falling back to delete followed by append; this is not atomic as a "
|
|
205
|
+
"single overwrite commit.",
|
|
206
|
+
DeltaOverwriteFallbackWarning,
|
|
207
|
+
stacklevel=2,
|
|
208
|
+
)
|
|
209
|
+
self.delta_table.delete(predicate)
|
|
210
|
+
self._write_deltalake(data=data, mode="append", predicate=None)
|
|
187
211
|
|
|
188
212
|
def append(self, df: pl.DataFrame) -> None:
|
|
189
213
|
"""Append rows to Delta Lake. This will write data to the Delta Lake.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "datazone-sdk"
|
|
3
|
-
version = "6.0.1.
|
|
3
|
+
version = "6.0.1.dev4"
|
|
4
4
|
description = "Database and Delta storage client library for working with Delta Lake tables"
|
|
5
5
|
authors = [{ name = "Team Enigma", email = "enigma@energinet.dk" }]
|
|
6
6
|
requires-python = ">=3.10"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datazone_sdk-6.0.1.dev2 → datazone_sdk-6.0.1.dev4}/datazone/deltastorage/generated_columns.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|