pyvista-zstd 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ """VTK zstandard compression library."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from importlib.metadata import PackageNotFoundError
6
+ from importlib.metadata import version
7
+
8
+ try:
9
+ __version__ = version("pyvista-zstd")
10
+ except PackageNotFoundError: # pragma: no cover
11
+ __version__ = "unknown"
12
+
13
+ from pyvista_zstd.append import AppendReader
14
+ from pyvista_zstd.append import append_arrays
15
+ from pyvista_zstd.append import read_array
16
+ from pyvista_zstd.pyvista_zstd import FILE_VERSION
17
+ from pyvista_zstd.pyvista_zstd import Reader
18
+ from pyvista_zstd.pyvista_zstd import Writer
19
+ from pyvista_zstd.pyvista_zstd import read
20
+ from pyvista_zstd.pyvista_zstd import write
21
+
22
+ __all__ = [
23
+ "FILE_VERSION",
24
+ "AppendReader",
25
+ "Reader",
26
+ "Writer",
27
+ "append_arrays",
28
+ "read",
29
+ "read_array",
30
+ "write",
31
+ ]
@@ -0,0 +1,24 @@
1
+ # file generated by vcs-versioning
2
+ # don't change, don't track in version control
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "__version__",
7
+ "__version_tuple__",
8
+ "version",
9
+ "version_tuple",
10
+ "__commit_id__",
11
+ "commit_id",
12
+ ]
13
+
14
+ version: str
15
+ __version__: str
16
+ __version_tuple__: tuple[int | str, ...]
17
+ version_tuple: tuple[int | str, ...]
18
+ commit_id: str | None
19
+ __commit_id__: str | None
20
+
21
+ __version__ = version = '0.2.4'
22
+ __version_tuple__ = version_tuple = (0, 2, 4)
23
+
24
+ __commit_id__ = commit_id = None
pyvista_zstd/append.py ADDED
@@ -0,0 +1,553 @@
1
+ """
2
+ Incremental append + partial/columnar read for the ``.pv`` container.
3
+
4
+ Motivation
5
+ ----------
6
+ The upstream :class:`pyvista_zstd.Writer` serialises a whole dataset in
7
+ one shot: every array is independently zstd-compressed into a pair of
8
+ frames (``[packed-metadata-frame][raw-array-frame]``), the frames are
9
+ concatenated, and a fixed-width footer table plus a trailing
10
+ ``<Q>`` frame-count is written at the very end of the file.
11
+
12
+ Because the footer lives at the *end* and every body frame is addressed
13
+ by an absolute cumulative-compressed-byte offset, a new block can be
14
+ appended by:
15
+
16
+ 1. truncating the file at the start of the existing footer
17
+ (``frame_ends[-1]``), leaving every previously-committed compressed
18
+ frame byte-for-byte untouched,
19
+ 2. writing the new block's compressed frames after it,
20
+ 3. writing a *new* footer that covers the old frames (offsets unchanged)
21
+ plus the new ones, then the new ``<Q>`` count.
22
+
23
+ This is what :func:`append_arrays` does. No previously-written
24
+ compressed frame is ever re-read or re-compressed.
25
+
26
+ On-disk layout (unchanged from upstream)
27
+ -----------------------------------------
28
+ ::
29
+
30
+ [frame 0 ........ frame N-1] # body: 2 frames per array
31
+ [<QQ> x N: (cum_compressed_end, decompressed_size)] # footer table
32
+ [<Q>: N] # trailing frame count
33
+
34
+ Appended arrays are recorded two ways so they round-trip through the
35
+ *unmodified* upstream reader:
36
+
37
+ * their two frames are added to the body and footer,
38
+ * their names are added to the file-metadata ``frame_names`` list, and
39
+ * they are registered as ``field_data`` arrays on the root dataset's
40
+ :class:`~pyvista_zstd.pyvista_zstd.DataSetMetadata` (under the
41
+ ``…__field_data`` suffix) so :pyattr:`Reader.available_field_arrays`
42
+ and :meth:`Reader.read` surface them without any reader change.
43
+
44
+ Crash safety
45
+ ------------
46
+ The append is staged into a sibling temp file (a byte-copy of the body
47
+ prefix + new frames + new footer) and then :func:`os.replace`-d over the
48
+ original. ``os.replace`` is atomic on POSIX and Windows, so an
49
+ interrupted append either leaves the original fully intact or completes;
50
+ it can never leave a half-written footer that destroys committed blocks.
51
+
52
+ Partial / columnar read
53
+ -----------------------
54
+ :func:`read_array` (and :class:`AppendReader`) decompress exactly the
55
+ two frames of one named block — never the rest of the file — so a single
56
+ array can be loaded back without touching any other block.
57
+ """
58
+
59
+ from __future__ import annotations
60
+
61
+ import json
62
+ import os
63
+ from pathlib import Path
64
+ import struct
65
+ from typing import IO
66
+ from typing import TYPE_CHECKING
67
+
68
+ import numpy as np
69
+ import zstandard as zstd
70
+
71
+ from pyvista_zstd.pyvista_zstd import _FILTER_NONE
72
+ from pyvista_zstd.pyvista_zstd import _FILTER_SHUFFLE
73
+ from pyvista_zstd.pyvista_zstd import DS_METADATA_KEY
74
+ from pyvista_zstd.pyvista_zstd import FIELD_DATA_SUFFIX
75
+ from pyvista_zstd.pyvista_zstd import FILE_METADATA_KEY
76
+ from pyvista_zstd.pyvista_zstd import FILE_VERSION
77
+ from pyvista_zstd.pyvista_zstd import MULTIBLOCK_METADATA_KEY
78
+ from pyvista_zstd.pyvista_zstd import SUPPORTED_READ_SUFFIXES
79
+ from pyvista_zstd.pyvista_zstd import UID_N_CHAR
80
+ from pyvista_zstd.pyvista_zstd import ArrayInfo
81
+ from pyvista_zstd.pyvista_zstd import DataSetMetadata
82
+ from pyvista_zstd.pyvista_zstd import ZstdFileMetadata
83
+ from pyvista_zstd.pyvista_zstd import _pack_array_metadata
84
+ from pyvista_zstd.pyvista_zstd import _reconstruct_array
85
+ from pyvista_zstd.pyvista_zstd import _resolve_shuffle
86
+ from pyvista_zstd.pyvista_zstd import _shuffle_bytes
87
+
88
+ if TYPE_CHECKING: # pragma: no cover - typing only
89
+ from collections.abc import Iterable
90
+
91
+ from numpy.typing import NDArray
92
+
93
+ from pyvista_zstd.pyvista_zstd import ShuffleSpec
94
+
95
+ __all__ = [
96
+ "AppendReader",
97
+ "append_arrays",
98
+ "read_array",
99
+ ]
100
+
101
+ # Mirror the upstream Reader's sanity bound on the trailing frame count.
102
+ _MAX_FRAMES = 1_000_000
103
+
104
+
105
+ def _read_footer(f: IO[bytes]) -> tuple[int, list[tuple[int, int]]]:
106
+ """
107
+ Return ``(num_frames, frame_meta)`` from an open ``"rb"`` handle.
108
+
109
+ ``frame_meta[i]`` is ``(cumulative_compressed_end, decompressed_size)``
110
+ exactly as written by :meth:`pyvista_zstd.Writer.write`.
111
+ """
112
+ f.seek(-8, os.SEEK_END)
113
+ num_frames = struct.unpack("<Q", f.read(8))[0]
114
+ if num_frames == 0 or num_frames > _MAX_FRAMES:
115
+ msg = "Bad number of frames. File may be corrupted."
116
+ raise RuntimeError(msg)
117
+ f.seek(-(8 + num_frames * UID_N_CHAR), os.SEEK_END)
118
+ raw = f.read(num_frames * UID_N_CHAR)
119
+ frame_meta = [struct.unpack("<QQ", raw[i * UID_N_CHAR : (i + 1) * UID_N_CHAR]) for i in range(num_frames)]
120
+ return num_frames, frame_meta
121
+
122
+
123
+ def _frame_bounds(frame_meta: list[tuple[int, int]]) -> tuple[list[int], list[int]]:
124
+ """Return per-frame ``(starts, ends)`` of compressed byte ranges."""
125
+ ends = [end for end, _ in frame_meta]
126
+ starts = [0, *ends[:-1]]
127
+ return starts, ends
128
+
129
+
130
+ def _decompress_two_blobs(
131
+ meta_blob: bytes,
132
+ data_blob: bytes,
133
+ decompressed_sizes: tuple[int, int],
134
+ ) -> tuple[str, NDArray]:
135
+ """Decompress one array from its two already-sliced compressed frames."""
136
+ dctx = zstd.ZstdDecompressor()
137
+ segments = dctx.multi_decompress_to_buffer(
138
+ [meta_blob, data_blob],
139
+ decompressed_sizes=struct.pack("<QQ", *decompressed_sizes),
140
+ threads=0,
141
+ )
142
+ return _reconstruct_array(segments[0], segments[1])
143
+
144
+
145
+ def _read_frame_pair(f: IO[bytes], starts: list[int], ends: list[int], ai: int) -> tuple[bytes, bytes]:
146
+ """Seek-read the two compressed frames of array index ``ai`` from ``f``."""
147
+ mi, di = ai * 2, ai * 2 + 1
148
+ f.seek(starts[mi])
149
+ meta_blob = f.read(ends[mi] - starts[mi])
150
+ f.seek(starts[di])
151
+ data_blob = f.read(ends[di] - starts[di])
152
+ return meta_blob, data_blob
153
+
154
+
155
+ def _load_file_meta_and_root_ds(
156
+ f: IO[bytes],
157
+ frame_meta: list[tuple[int, int]],
158
+ ) -> tuple[ZstdFileMetadata, str, DataSetMetadata]:
159
+ """
160
+ Decode the file-metadata frame and the root ``__ds_metadata`` frame.
161
+
162
+ ``f`` is an open seekable ``"rb"`` handle. Only the two small
163
+ metadata frame-pairs are read — never the array bodies — so this is
164
+ cheap even on multi-gigabyte files.
165
+
166
+ Returns ``(file_meta, root_ds_id, root_ds_meta)``.
167
+ """
168
+ starts, ends = _frame_bounds(frame_meta)
169
+ n_arrays = len(frame_meta) // 2
170
+
171
+ # File metadata is always the final array (last two frames).
172
+ meta_blob, data_blob = _read_frame_pair(f, starts, ends, n_arrays - 1)
173
+ _, arr = _decompress_two_blobs(meta_blob, data_blob, (frame_meta[-2][1], frame_meta[-1][1]))
174
+ file_meta = ZstdFileMetadata.from_json(arr.tobytes().decode("utf-8"))
175
+
176
+ # ``MULTIBLOCK_METADATA_KEY`` also ends with ``DS_METADATA_KEY``; a
177
+ # MultiBlock file has no single root dataset to append to, so reject it
178
+ # cleanly instead of misparsing its multiblock metadata as a dataset.
179
+ frame_names = file_meta.frame_names
180
+ if any(fname.endswith(MULTIBLOCK_METADATA_KEY) for fname in frame_names):
181
+ msg = "appending to MultiBlock .pv files is not supported."
182
+ raise NotImplementedError(msg)
183
+
184
+ # Find the root dataset metadata frame by name.
185
+ root_idx = None
186
+ for i, fname in enumerate(frame_names):
187
+ if fname.endswith(DS_METADATA_KEY):
188
+ root_idx = i
189
+ break
190
+ if root_idx is None: # pragma: no cover - malformed file
191
+ msg = "No dataset metadata frame found; cannot append."
192
+ raise RuntimeError(msg)
193
+
194
+ ds_id = frame_names[root_idx][:UID_N_CHAR]
195
+ mi, di = root_idx * 2, root_idx * 2 + 1
196
+ meta_blob, data_blob = _read_frame_pair(f, starts, ends, root_idx)
197
+ _, ds_arr = _decompress_two_blobs(meta_blob, data_blob, (frame_meta[mi][1], frame_meta[di][1]))
198
+ ds_meta = DataSetMetadata.from_array(ds_arr)
199
+ return file_meta, ds_id, ds_meta
200
+
201
+
202
+ def _compress_frames(payloads: list[bytes], *, level: int) -> list[bytes]:
203
+ """Compress each payload into its own single-frame zstd buffer."""
204
+ cctx = zstd.ZstdCompressor(level=level, threads=0)
205
+ buff = cctx.multi_compress_to_buffer(payloads, threads=0)
206
+ return [buff[i].tobytes() for i in range(len(buff))]
207
+
208
+
209
+ def append_arrays( # noqa: C901, PLR0912, PLR0915
210
+ filename: Path | str,
211
+ arrays: dict[str, NDArray],
212
+ *,
213
+ level: int | None = None,
214
+ shuffle: ShuffleSpec = False,
215
+ ) -> None:
216
+ """
217
+ Append named arrays to an existing ``.pv`` file in place.
218
+
219
+ Each array is stored as an independently-zstd-compressed
220
+ ``field_data`` block. Previously-written compressed frames are
221
+ **never** re-read or re-compressed; only the small footer + the
222
+ file/dataset metadata frames are rewritten, and the whole update is
223
+ committed atomically via :func:`os.replace`.
224
+
225
+ Parameters
226
+ ----------
227
+ filename : pathlib.Path | str
228
+ Path to an existing ``.pv`` file (written by
229
+ :func:`pyvista_zstd.write` or a previous :func:`append_arrays`).
230
+ arrays : dict[str, numpy.ndarray]
231
+ Mapping of ``name -> array``. Names must not already exist as a
232
+ field array in the file. Arrays are stored verbatim (bit-exact
233
+ round trip) regardless of dtype/shape.
234
+ level : int, optional
235
+ zstd compression level for the new blocks. Defaults to the
236
+ file's recorded ``compression_level`` so appended blocks match
237
+ the original.
238
+ shuffle : {"auto", True, False}, default: False
239
+ Optionally apply the reversible byte-shuffle pre-filter to the
240
+ appended blocks (see :func:`pyvista_zstd.write`). Disabled by
241
+ default; ``"auto"`` shuffles a multibyte floating-point array only
242
+ when a quick trial compression shows it shrinks the data. When any
243
+ appended block is shuffled the file is promoted to format version 1;
244
+ already-written frames are untouched and keep their own per-block
245
+ encoding.
246
+
247
+ Notes
248
+ -----
249
+ The appended blocks become visible as ``field_data`` arrays:
250
+ :meth:`pyvista_zstd.Reader.read` returns them in ``grid.field_data``
251
+ and :pyattr:`pyvista_zstd.Reader.available_field_arrays` lists them.
252
+ Use :func:`read_array` / :class:`AppendReader` to read one block back
253
+ without decompressing the rest of the file.
254
+
255
+ """
256
+ path = Path(filename)
257
+ if path.suffix not in SUPPORTED_READ_SUFFIXES:
258
+ msg = f"Filename must end in one of {SUPPORTED_READ_SUFFIXES}, not '{path.suffix}'"
259
+ raise ValueError(msg)
260
+ if not arrays:
261
+ return
262
+
263
+ with path.open("rb") as f:
264
+ _num_frames, frame_meta = _read_footer(f)
265
+ file_meta, ds_id, ds_meta = _load_file_meta_and_root_ds(f, frame_meta)
266
+
267
+ level = file_meta.compression_level if level is None else int(level)
268
+
269
+ # Validate names against the existing field arrays.
270
+ existing_fields = set(ds_meta.field_data_keys)
271
+ for name in arrays:
272
+ if name in existing_fields:
273
+ msg = (
274
+ f"field array {name!r} already exists in {path.name}; append_arrays does not overwrite existing blocks."
275
+ )
276
+ raise ValueError(msg)
277
+
278
+ # ------------------------------------------------------------------
279
+ # The file-metadata frame and the root dataset-metadata frame must be
280
+ # rewritten (they grow), so we drop their *old* frames and re-emit
281
+ # them at the tail. Their old compressed bytes are simply not copied
282
+ # into the new body prefix.
283
+ #
284
+ # frame_names ordering invariant (upstream): per array there are two
285
+ # frames; the file-metadata array is always last. The root
286
+ # __ds_metadata array sits somewhere in the body. We rebuild the
287
+ # frame_names list with the new field arrays inserted *before* the
288
+ # ds-metadata + file-metadata tail so the upstream reader's
289
+ # name->index mapping stays consistent.
290
+ # ------------------------------------------------------------------
291
+ starts, ends = _frame_bounds(frame_meta)
292
+ frame_names = list(file_meta.frame_names)
293
+
294
+ # Upstream invariant: there are ``n_arrays = num_frames // 2`` arrays
295
+ # (2 frames each), but ``frame_names`` records only the first
296
+ # ``n_arrays - 1`` of them — the trailing file-metadata array's name
297
+ # is deliberately *omitted* (``Writer.write`` snapshots ``frame_names``
298
+ # before appending the file-metadata array). The reader recovers the
299
+ # file-metadata frame positionally (last two frames), so we MUST keep
300
+ # this exclusion to stay byte/▷reader-compatible.
301
+ n_arrays = len(frame_meta) // 2
302
+ if len(frame_names) != n_arrays - 1: # pragma: no cover - malformed file
303
+ msg = f"frame_names length {len(frame_names)} != n_arrays-1 {n_arrays - 1}; cannot append."
304
+ raise RuntimeError(msg)
305
+ file_meta_array_idx = n_arrays - 1 # positional; name not in frame_names
306
+
307
+ # Index of the root ds-metadata array (in array units).
308
+ root_ds_array_idx = next(i for i, n in enumerate(frame_names) if n.endswith(DS_METADATA_KEY))
309
+
310
+ # Keep all body arrays EXCEPT the root ds-meta and the file-meta,
311
+ # whose compressed bytes we will re-emit. Everything else is copied
312
+ # verbatim (byte-for-byte) from the original body.
313
+ rewritten = {root_ds_array_idx, file_meta_array_idx}
314
+ kept_array_indices = [i for i in range(n_arrays) if i not in rewritten]
315
+
316
+ # New body = concatenation of kept compressed frame-pairs, in order
317
+ # (copied verbatim by offset, never decompressed), followed by new
318
+ # field-array frame-pairs, then the regenerated ds-meta frame-pair
319
+ # and file-meta frame-pair. We record per-frame ``(orig_start,
320
+ # orig_end, decompressed_size)`` for the kept frames so they can be
321
+ # streamed straight from the source file into the temp file.
322
+ kept_frame_specs: list[tuple[int, int, int]] = [] # (start, end, decomp)
323
+ new_frame_names: list[str] = []
324
+ for ai in kept_array_indices:
325
+ mi, di = ai * 2, ai * 2 + 1
326
+ kept_frame_specs.append((starts[mi], ends[mi], frame_meta[mi][1]))
327
+ kept_frame_specs.append((starts[di], ends[di], frame_meta[di][1]))
328
+ new_frame_names.append(frame_names[ai])
329
+
330
+ # New field-data arrays.
331
+ new_field_info: dict[str, ArrayInfo] = {}
332
+ new_payloads: list[bytes] = []
333
+ new_payload_decomp: list[int] = []
334
+ new_payload_names: list[str] = []
335
+ any_shuffled = False
336
+ for name, raw in arrays.items():
337
+ arr = np.ascontiguousarray(raw)
338
+ frame_name = f"{ds_id}{name}{FIELD_DATA_SUFFIX}"
339
+ filter_id = _FILTER_SHUFFLE if _resolve_shuffle(arr, shuffle, level) else _FILTER_NONE
340
+ any_shuffled = any_shuffled or filter_id != _FILTER_NONE
341
+ meta_payload = _pack_array_metadata(frame_name, arr, filter_id)
342
+ if filter_id == _FILTER_SHUFFLE:
343
+ data_payload = _shuffle_bytes(arr).tobytes()
344
+ else:
345
+ data_payload = arr.ravel().view(np.uint8).tobytes()
346
+ new_payloads.append(meta_payload)
347
+ new_payloads.append(data_payload)
348
+ new_payload_decomp.append(len(meta_payload))
349
+ new_payload_decomp.append(len(data_payload))
350
+ new_payload_names.append(frame_name)
351
+ new_field_info[name] = ArrayInfo(shape=tuple(int(s) for s in arr.shape), dtype=str(arr.dtype))
352
+
353
+ # Regenerate root dataset metadata with merged field_data_keys.
354
+ merged_field_keys = dict(ds_meta.field_data_keys)
355
+ merged_field_keys.update(new_field_info)
356
+ ds_meta_new = _with_field_keys(ds_meta, merged_field_keys)
357
+ ds_meta_arr = ds_meta_new.to_array()
358
+ ds_meta_frame_name = f"{ds_id}{DS_METADATA_KEY}"
359
+ ds_meta_payloads = [
360
+ _pack_array_metadata(ds_meta_frame_name, ds_meta_arr),
361
+ ds_meta_arr.ravel().view(np.uint8).tobytes(),
362
+ ]
363
+
364
+ # Regenerate file metadata. Final on-disk array order is:
365
+ # [kept arrays ...][new field arrays ...][ds-meta][file-meta]
366
+ # ``frame_names`` records every array EXCEPT the trailing file-meta
367
+ # (upstream invariant — see above), so it ends at ds-meta.
368
+ final_frame_names = [
369
+ *new_frame_names,
370
+ *new_payload_names,
371
+ ds_meta_frame_name,
372
+ ]
373
+ # Promote the file version if any appended block uses a byte-filter, so an
374
+ # older reader cleanly refuses it. Kept frames retain their own per-block
375
+ # encoding (recorded in their metadata frames), so a previously-unfiltered
376
+ # file gaining a shuffled block stays internally consistent.
377
+ new_version = max(file_meta.file_version, FILE_VERSION) if any_shuffled else file_meta.file_version
378
+ file_meta_new = ZstdFileMetadata(
379
+ frame_names=final_frame_names,
380
+ compression_level=file_meta.compression_level,
381
+ compression=file_meta.compression,
382
+ file_version=new_version,
383
+ )
384
+ file_meta_arr = file_meta_new.to_array()
385
+ file_meta_payloads = [
386
+ _pack_array_metadata(FILE_METADATA_KEY, file_meta_arr),
387
+ file_meta_arr.ravel().view(np.uint8).tobytes(),
388
+ ]
389
+
390
+ # Compress the regenerated/new payloads (kept frames are reused as-is).
391
+ to_compress = [*new_payloads, *ds_meta_payloads, *file_meta_payloads]
392
+ to_compress_decomp = [
393
+ *new_payload_decomp,
394
+ len(ds_meta_payloads[0]),
395
+ len(ds_meta_payloads[1]),
396
+ len(file_meta_payloads[0]),
397
+ len(file_meta_payloads[1]),
398
+ ]
399
+ compressed = _compress_frames(to_compress, level=level)
400
+
401
+ # Stream-assemble the new file: kept frames copied verbatim from the
402
+ # source by offset, then the freshly-compressed new/metadata frames,
403
+ # then the regenerated footer. Cumulative compressed offsets are
404
+ # tracked as we write so the footer table stays exact.
405
+ tmp_path = path.with_suffix(path.suffix + ".append.tmp")
406
+ copy_chunk = 8 * 1024 * 1024
407
+ try:
408
+ with path.open("rb") as src, tmp_path.open("wb") as out:
409
+ offset = 0
410
+ footer_entries: list[tuple[int, int]] = []
411
+
412
+ # 1) kept frames — verbatim offset copy, no decompression.
413
+ for start, end, dsz in kept_frame_specs:
414
+ src.seek(start)
415
+ remaining = end - start
416
+ while remaining > 0:
417
+ chunk = src.read(min(copy_chunk, remaining))
418
+ if not chunk: # pragma: no cover - truncated source
419
+ msg = "Source .pv truncated while copying kept frames."
420
+ raise RuntimeError(msg)
421
+ out.write(chunk)
422
+ remaining -= len(chunk)
423
+ offset += end - start
424
+ footer_entries.append((offset, dsz))
425
+
426
+ # 2) new + regenerated-metadata frames.
427
+ for part, dsz in zip(compressed, to_compress_decomp, strict=True):
428
+ out.write(part)
429
+ offset += len(part)
430
+ footer_entries.append((offset, dsz))
431
+
432
+ # 3) footer table + trailing frame count.
433
+ out.writelines(struct.pack("<QQ", off, dsz) for off, dsz in footer_entries)
434
+ out.write(struct.pack("<Q", len(footer_entries)))
435
+ out.flush()
436
+ os.fsync(out.fileno())
437
+ # Atomic commit: replace the original only once the staged file is
438
+ # fully written + fsync'd. ``Path.replace`` is atomic on POSIX and
439
+ # Windows, so an interrupted append cannot corrupt committed blocks.
440
+ tmp_path.replace(path)
441
+ finally:
442
+ if tmp_path.exists(): # pragma: no cover - cleanup on failure
443
+ tmp_path.unlink()
444
+
445
+
446
+ def _with_field_keys(meta: DataSetMetadata, field_keys: dict[str, ArrayInfo]) -> DataSetMetadata:
447
+ """
448
+ Return a copy of ``meta`` with ``field_data_keys`` replaced.
449
+
450
+ :class:`DataSetMetadata` is a frozen slotted dataclass; rebuild it
451
+ from its JSON image (the only public round-trip) with the field keys
452
+ swapped in.
453
+ """
454
+ raw = json.loads(meta.to_json())
455
+ raw["field_data_keys"] = {k: {"shape": list(v.shape), "dtype": v.dtype} for k, v in field_keys.items()}
456
+ return DataSetMetadata.from_json(json.dumps(raw, separators=(",", ":")))
457
+
458
+
459
+ def read_array(filename: Path | str, name: str) -> NDArray:
460
+ """
461
+ Read a single appended (field) array back without full decompression.
462
+
463
+ Decompresses exactly the two frames of the named block — the rest of
464
+ the file is never decompressed, so this stays cheap even on a
465
+ multi-gigabyte container.
466
+
467
+ Parameters
468
+ ----------
469
+ filename : pathlib.Path | str
470
+ Path to a ``.pv`` file.
471
+ name : str
472
+ Field-array name (the key passed to :func:`append_arrays`, or any
473
+ field-data key present in the file).
474
+
475
+ Returns
476
+ -------
477
+ numpy.ndarray
478
+ The stored array, bit-exact.
479
+
480
+ """
481
+ return AppendReader(filename).read_array(name)
482
+
483
+
484
+ class AppendReader:
485
+ """
486
+ Partial reader for ``.pv`` field-data blocks.
487
+
488
+ Opens a ``.pv`` file and exposes its field arrays for *individual*
489
+ read-back, decompressing only the requested block's two frames. The
490
+ footer + a small set of metadata frames are read on construction so
491
+ repeated single-block reads are cheap.
492
+
493
+ Parameters
494
+ ----------
495
+ filename : pathlib.Path | str
496
+ Path to a ``.pv`` file.
497
+
498
+ Examples
499
+ --------
500
+ >>> from pyvista_zstd.append import AppendReader
501
+ >>> r = AppendReader("data.pv") # doctest: +SKIP
502
+ >>> r.field_array_names # doctest: +SKIP
503
+ ['col_0000', 'col_0001']
504
+ >>> col = r.read_array("col_0001") # doctest: +SKIP
505
+
506
+ """
507
+
508
+ def __init__(self, filename: Path | str) -> None:
509
+ """Open ``filename`` and read its footer + metadata frames."""
510
+ self._path = Path(filename)
511
+ if self._path.suffix not in SUPPORTED_READ_SUFFIXES:
512
+ msg = f"Filename must end in one of {SUPPORTED_READ_SUFFIXES}, not '{self._path.suffix}'"
513
+ raise ValueError(msg)
514
+ with self._path.open("rb") as f:
515
+ self._num_frames, self._frame_meta = _read_footer(f)
516
+ self._file_meta, self._ds_id, self._ds_meta = _load_file_meta_and_root_ds(f, self._frame_meta)
517
+ self._starts, self._ends = _frame_bounds(self._frame_meta)
518
+ # name -> array index
519
+ self._name_to_idx = {n: i for i, n in enumerate(self._file_meta.frame_names)}
520
+
521
+ @property
522
+ def field_array_names(self) -> list[str]:
523
+ """Names of field-data arrays available for partial read."""
524
+ return list(self._ds_meta.field_data_keys)
525
+
526
+ @property
527
+ def field_array_info(self) -> dict[str, ArrayInfo]:
528
+ """Mapping ``name -> ArrayInfo(shape, dtype)`` for field arrays."""
529
+ return dict(self._ds_meta.field_data_keys)
530
+
531
+ def __contains__(self, name: str) -> bool:
532
+ """Return whether field array ``name`` is present."""
533
+ return name in self._ds_meta.field_data_keys
534
+
535
+ def read_array(self, name: str) -> NDArray:
536
+ """Decompress and return the single field array ``name``."""
537
+ if name not in self._ds_meta.field_data_keys:
538
+ msg = f"field array {name!r} not found; available: {sorted(self._ds_meta.field_data_keys)}"
539
+ raise KeyError(msg)
540
+ frame_name = f"{self._ds_id}{name}{FIELD_DATA_SUFFIX}"
541
+ ai = self._name_to_idx.get(frame_name)
542
+ if ai is None: # pragma: no cover - metadata/name desync
543
+ msg = f"frame for field array {name!r} not found in frame index."
544
+ raise KeyError(msg)
545
+ mi, di = ai * 2, ai * 2 + 1
546
+ with self._path.open("rb") as f:
547
+ meta_blob, data_blob = _read_frame_pair(f, self._starts, self._ends, ai)
548
+ _, arr = _decompress_two_blobs(meta_blob, data_blob, (self._frame_meta[mi][1], self._frame_meta[di][1]))
549
+ return arr
550
+
551
+ def read_arrays(self, names: Iterable[str]) -> dict[str, NDArray]:
552
+ """Decompress and return several field arrays by name."""
553
+ return {n: self.read_array(n) for n in names}