datazone-sdk 6.0.1.dev5__tar.gz → 6.0.1.dev6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/PKG-INFO +1 -1
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/__init__.py +1 -8
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/deltastorage/__init__.py +1 -1
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/deltastorage/table.py +1 -25
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/pyproject.toml +1 -1
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/README.md +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/backtesting.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/caching/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/caching/parquet.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/db/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/db/base.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/db/cached.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/db/snapshot.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/db/standard.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/deltastorage/data_types.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/deltastorage/generated_columns.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/deltastorage/schema.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/deltastorage/slicing.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/deltastorage/store.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/testing/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/testing/database_client.py +0 -0
|
@@ -1,13 +1,6 @@
|
|
|
1
1
|
from datazone.backtesting import backtest
|
|
2
2
|
from datazone.caching import ParquetCache
|
|
3
3
|
from datazone.db import DatabaseClient, SnapshotDatabaseClient
|
|
4
|
-
from datazone.deltastorage import
|
|
5
|
-
DeltaOverwriteFallbackWarning,
|
|
6
|
-
Field,
|
|
7
|
-
HyperSlice,
|
|
8
|
-
Schema,
|
|
9
|
-
Store,
|
|
10
|
-
Table,
|
|
11
|
-
)
|
|
4
|
+
from datazone.deltastorage import Field, HyperSlice, Schema, Store, Table
|
|
12
5
|
|
|
13
6
|
from . import testing
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
import datetime as dt
|
|
2
|
-
import warnings
|
|
3
2
|
from typing import Any, Optional
|
|
4
3
|
|
|
5
4
|
import deltalake as dl
|
|
6
5
|
import polars as pl
|
|
7
6
|
import pyarrow as pa
|
|
8
|
-
from deltalake.exceptions import CommitFailedError
|
|
9
7
|
|
|
10
8
|
from .schema import Schema
|
|
11
9
|
from .slicing import HyperSlice
|
|
@@ -48,10 +46,6 @@ def _dnf_to_sql(dnf: list[tuple]) -> str:
|
|
|
48
46
|
return " AND ".join(sql_parts)
|
|
49
47
|
|
|
50
48
|
|
|
51
|
-
def _is_arrow_cast_commit_failure(error: CommitFailedError) -> bool:
|
|
52
|
-
return "arrow_cast should have been simplified to cast" in str(error)
|
|
53
|
-
|
|
54
|
-
|
|
55
49
|
def _is_safe_partition_filter(filter_: tuple) -> bool:
|
|
56
50
|
return filter_[1] in ["=", "in"]
|
|
57
51
|
|
|
@@ -77,10 +71,6 @@ def _filter_to_polars_expr(filter_: tuple) -> pl.Expr:
|
|
|
77
71
|
raise ValueError(f"Unsupported operation: {op}")
|
|
78
72
|
|
|
79
73
|
|
|
80
|
-
class DeltaOverwriteFallbackWarning(RuntimeWarning):
|
|
81
|
-
pass
|
|
82
|
-
|
|
83
|
-
|
|
84
74
|
class Table:
|
|
85
75
|
def __init__(
|
|
86
76
|
self,
|
|
@@ -239,21 +229,7 @@ class Table:
|
|
|
239
229
|
else:
|
|
240
230
|
predicate = _dnf_to_sql(hyper_slice)
|
|
241
231
|
|
|
242
|
-
|
|
243
|
-
self._write_deltalake(data=data, mode="overwrite", predicate=predicate)
|
|
244
|
-
except CommitFailedError as error:
|
|
245
|
-
if predicate is None or not _is_arrow_cast_commit_failure(error):
|
|
246
|
-
raise
|
|
247
|
-
|
|
248
|
-
warnings.warn(
|
|
249
|
-
"Delta predicate overwrite failed with the DataFusion arrow_cast bug. "
|
|
250
|
-
"Falling back to delete followed by append; this is not atomic as a "
|
|
251
|
-
"single overwrite commit.",
|
|
252
|
-
DeltaOverwriteFallbackWarning,
|
|
253
|
-
stacklevel=2,
|
|
254
|
-
)
|
|
255
|
-
self.delta_table.delete(predicate)
|
|
256
|
-
self._write_deltalake(data=data, mode="append", predicate=None)
|
|
232
|
+
self._write_deltalake(data=data, mode="overwrite", predicate=predicate)
|
|
257
233
|
|
|
258
234
|
def append(self, df: pl.DataFrame) -> None:
|
|
259
235
|
"""Append rows to Delta Lake. This will write data to the Delta Lake.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "datazone-sdk"
|
|
3
|
-
version = "6.0.1.
|
|
3
|
+
version = "6.0.1.dev6"
|
|
4
4
|
description = "Database and Delta storage client library for working with Delta Lake tables"
|
|
5
5
|
authors = [{ name = "Team Enigma", email = "enigma@energinet.dk" }]
|
|
6
6
|
requires-python = ">=3.10"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datazone_sdk-6.0.1.dev5 → datazone_sdk-6.0.1.dev6}/datazone/deltastorage/generated_columns.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|