datazone-sdk 6.0.1.dev8__tar.gz → 6.0.1.dev9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/PKG-INFO +1 -1
  2. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/generated_columns.py +3 -2
  3. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/table.py +23 -4
  4. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/pyproject.toml +1 -1
  5. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/README.md +0 -0
  6. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/__init__.py +0 -0
  7. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/backtesting.py +0 -0
  8. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/caching/__init__.py +0 -0
  9. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/caching/parquet.py +0 -0
  10. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/db/__init__.py +0 -0
  11. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/db/base.py +0 -0
  12. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/db/cached.py +0 -0
  13. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/db/snapshot.py +0 -0
  14. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/db/standard.py +0 -0
  15. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/__init__.py +0 -0
  16. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/data_types.py +0 -0
  17. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/schema.py +0 -0
  18. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/slicing.py +0 -0
  19. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/deltastorage/store.py +0 -0
  20. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/testing/__init__.py +0 -0
  21. {datazone_sdk-6.0.1.dev8 → datazone_sdk-6.0.1.dev9}/datazone/testing/database_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datazone-sdk
3
- Version: 6.0.1.dev8
3
+ Version: 6.0.1.dev9
4
4
  Summary: Database and Delta storage client library for working with Delta Lake tables
5
5
  Author: Team Enigma
6
6
  Author-email: enigma@energinet.dk
@@ -112,8 +112,9 @@ class DateBucket(GeneratedColumn):
112
112
  case (">" | ">="):
113
113
  return [(">=", date_from_timestamp(value))]
114
114
  case "in":
115
- dates = [date_from_timestamp(timestamp) for timestamp in value]
116
- return [("in", list(dict.fromkeys(dates)))]
115
+ # de-duplicate: multiple timestamps can bucket to the same date
116
+ dates = {date_from_timestamp(timestamp) for timestamp in value}
117
+ return [("in", sorted(dates))]
117
118
  case _:
118
119
  # for other operations, we cannot make any
119
120
  # useful filters on the generated column
@@ -10,6 +10,16 @@ from .slicing import HyperSlice
10
10
 
11
11
 
12
12
  def _sql_literal(value: Any) -> str:
13
+ """Render a Python value as a *type-correct* SQL literal for Delta predicates.
14
+
15
+ Earlier versions quoted every value as a string (``f"{col} {op} '{val}'"``),
16
+ which worked because the old delta-rs/datafusion implicitly cast the string
17
+ literal to the column type. The upgraded datafusion type-checks predicates
18
+ strictly and rejects comparing a non-string column to a string literal
19
+ (e.g. ``Timestamp(us, "UTC") <= Utf8View``). So numbers are left unquoted,
20
+ booleans become ``TRUE``/``FALSE`` and ``None`` becomes ``NULL``; only real
21
+ strings/dates are quoted (and escaped).
22
+ """
13
23
  if isinstance(value, dt.datetime):
14
24
  return f"'{value.isoformat()}'"
15
25
  if isinstance(value, dt.date):
@@ -25,7 +35,12 @@ def _sql_literal(value: Any) -> str:
25
35
 
26
36
 
27
37
  def _dnf_to_sql(dnf: list[tuple]) -> str:
28
- """Convert DNF expression to SQL expression."""
38
+ """Convert a hyper slice (DNF expression) to a Delta predicate SQL string.
39
+
40
+ Needed because delta-rs overwrite/delete operations accept a SQL predicate
41
+ string, not the tuple filter format the SDK uses everywhere else. ``in`` is
42
+ expanded to ``OR`` of equalities since the predicate dialect has no ``IN``.
43
+ """
29
44
  if len(dnf) == 0:
30
45
  return "1=1"
31
46
 
@@ -47,6 +62,13 @@ def _dnf_to_sql(dnf: list[tuple]) -> str:
47
62
 
48
63
 
49
64
  def _filter_to_polars_expr(filter_: tuple) -> pl.Expr:
65
+ """Convert a single tuple filter to a Polars expression.
66
+
67
+ Needed so the SDK's ``(column, op, value)`` filters can be pushed down as
68
+ predicates to the native Polars Delta reader in ``Table.read``, instead of
69
+ PyArrow filters, whose compute kernels fail on delta-rs ``string_view``
70
+ columns (``ArrowNotImplementedError``).
71
+ """
50
72
  col, op, val = filter_
51
73
  if op == "=":
52
74
  return pl.col(col) == val
@@ -130,9 +152,6 @@ class Table:
130
152
  matching input types (string_view, string_view)`. The native reader
131
153
  evaluates predicates in its own engine and avoids this entirely.
132
154
 
133
- Any further (post-)filtering that cannot be expressed as a pushdown
134
- predicate is the responsibility of the caller.
135
-
136
155
  Args:
137
156
  hyper_slice (HyperSlice): Hyper slice used to filter data.
138
157
  columns: Optional list of columns to project.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "datazone-sdk"
3
- version = "6.0.1.dev8"
3
+ version = "6.0.1.dev9"
4
4
  description = "Database and Delta storage client library for working with Delta Lake tables"
5
5
  authors = [{ name = "Team Enigma", email = "enigma@energinet.dk" }]
6
6
  requires-python = ">=3.10"