datazone-sdk 6.0.1.dev8__tar.gz → 6.0.1.dev9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/PKG-INFO +1 -1
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/generated_columns.py +3 -2
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/table.py +23 -4
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/pyproject.toml +1 -1
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/README.md +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/backtesting.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/caching/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/caching/parquet.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/db/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/db/base.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/db/cached.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/db/snapshot.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/db/standard.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/data_types.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/schema.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/slicing.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/store.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/testing/__init__.py +0 -0
- {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/testing/database_client.py +0 -0
{datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/generated_columns.py
RENAMED
|
@@ -112,8 +112,9 @@ class DateBucket(GeneratedColumn):
|
|
|
112
112
|
case (">" | ">="):
|
|
113
113
|
return [(">=", date_from_timestamp(value))]
|
|
114
114
|
case "in":
|
|
115
|
-
|
|
116
|
-
|
|
115
|
+
# de-duplicate: multiple timestamps can bucket to the same date
|
|
116
|
+
dates = {date_from_timestamp(timestamp) for timestamp in value}
|
|
117
|
+
return [("in", sorted(dates))]
|
|
117
118
|
case _:
|
|
118
119
|
# for other operations, we cannot make any
|
|
119
120
|
# useful filters on the generated column
|
|
@@ -10,6 +10,16 @@ from .slicing import HyperSlice
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def _sql_literal(value: Any) -> str:
|
|
13
|
+
"""Render a Python value as a *type-correct* SQL literal for Delta predicates.
|
|
14
|
+
|
|
15
|
+
Earlier versions quoted every value as a string (``f"{col} {op} '{val}'"``),
|
|
16
|
+
which worked because the old delta-rs/datafusion implicitly cast the string
|
|
17
|
+
literal to the column type. The upgraded datafusion type-checks predicates
|
|
18
|
+
strictly and rejects comparing a non-string column to a string literal
|
|
19
|
+
(e.g. ``Timestamp(us, "UTC") <= Utf8View``). So numbers are left unquoted,
|
|
20
|
+
booleans become ``TRUE``/``FALSE`` and ``None`` becomes ``NULL``; only real
|
|
21
|
+
strings/dates are quoted (and escaped).
|
|
22
|
+
"""
|
|
13
23
|
if isinstance(value, dt.datetime):
|
|
14
24
|
return f"'{value.isoformat()}'"
|
|
15
25
|
if isinstance(value, dt.date):
|
|
@@ -25,7 +35,12 @@ def _sql_literal(value: Any) -> str:
|
|
|
25
35
|
|
|
26
36
|
|
|
27
37
|
def _dnf_to_sql(dnf: list[tuple]) -> str:
|
|
28
|
-
"""Convert DNF expression to SQL
|
|
38
|
+
"""Convert a hyper slice (DNF expression) to a Delta predicate SQL string.
|
|
39
|
+
|
|
40
|
+
Needed because delta-rs overwrite/delete operations accept a SQL predicate
|
|
41
|
+
string, not the tuple filter format the SDK uses everywhere else. ``in`` is
|
|
42
|
+
expanded to ``OR`` of equalities since the predicate dialect has no ``IN``.
|
|
43
|
+
"""
|
|
29
44
|
if len(dnf) == 0:
|
|
30
45
|
return "1=1"
|
|
31
46
|
|
|
@@ -47,6 +62,13 @@ def _dnf_to_sql(dnf: list[tuple]) -> str:
|
|
|
47
62
|
|
|
48
63
|
|
|
49
64
|
def _filter_to_polars_expr(filter_: tuple) -> pl.Expr:
|
|
65
|
+
"""Convert a single tuple filter to a Polars expression.
|
|
66
|
+
|
|
67
|
+
Needed so the SDK's ``(column, op, value)`` filters can be pushed down as
|
|
68
|
+
predicates to the native Polars Delta reader in ``Table.read``, instead of
|
|
69
|
+
PyArrow filters, whose compute kernels fail on delta-rs ``string_view``
|
|
70
|
+
columns (``ArrowNotImplementedError``).
|
|
71
|
+
"""
|
|
50
72
|
col, op, val = filter_
|
|
51
73
|
if op == "=":
|
|
52
74
|
return pl.col(col) == val
|
|
@@ -130,9 +152,6 @@ class Table:
|
|
|
130
152
|
matching input types (string_view, string_view)`. The native reader
|
|
131
153
|
evaluates predicates in its own engine and avoids this entirely.
|
|
132
154
|
|
|
133
|
-
Any further (post-)filtering that cannot be expressed as a pushdown
|
|
134
|
-
predicate is the responsibility of the caller.
|
|
135
|
-
|
|
136
155
|
Args:
|
|
137
156
|
hyper_slice (HyperSlice): Hyper slice used to filter data.
|
|
138
157
|
columns: Optional list of columns to project.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "datazone-sdk"
|
|
3
|
-
version = "6.0.1.
|
|
3
|
+
version = "6.0.1.dev9"
|
|
4
4
|
description = "Database and Delta storage client library for working with Delta Lake tables"
|
|
5
5
|
authors = [{ name = "Team Enigma", email = "enigma@energinet.dk" }]
|
|
6
6
|
requires-python = ">=3.10"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|