PyPI - pypff - Versions diffs - 1.0.0__tar.gz - Mend

pypff 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

pypff-1.0.0/.coverage +0 -0
pypff-1.0.0/.git +1 -0
pypff-1.0.0/.github/workflows/egg.yml +23 -0
pypff-1.0.0/.gitignore +5 -0
pypff-1.0.0/CLAUDE.md +136 -0
pypff-1.0.0/LICENSE +21 -0
pypff-1.0.0/PKG-INFO +253 -0
pypff-1.0.0/README.md +206 -0
pypff-1.0.0/docs/zarr_v3_spec.md +259 -0
pypff-1.0.0/example/example-data/collect_complete +1 -0
pypff-1.0.0/example/example-data/daq_config.json +13 -0
pypff-1.0.0/example/example-data/data_config.json +10 -0
pypff-1.0.0/example/example-data/hk.pff +73 -0
pypff-1.0.0/example/example-data/hp_stdout_192.168.1.100 +57 -0
pypff-1.0.0/example/example-data/log.txt +125 -0
pypff-1.0.0/example/example-data/obs_config.json +29 -0
pypff-1.0.0/example/example-data/quabo_config_192.168.3.248.json +311 -0
pypff-1.0.0/example/example-data/quabo_config_192.168.3.249.json +311 -0
pypff-1.0.0/example/example-data/quabo_config_192.168.3.250.json +12 -0
pypff-1.0.0/example/example-data/quabo_config_192.168.3.251.json +12 -0
pypff-1.0.0/example/example-data/quabo_config_debug.json +326 -0
pypff-1.0.0/example/example-data/quabo_ph_baseline.json +266 -0
pypff-1.0.0/example/example-data/quabo_uids.json +34 -0
pypff-1.0.0/example/example-data/recording_ended +1 -0
pypff-1.0.0/example/example-data/run_complete +1 -0
pypff-1.0.0/example/example-data/start_2023-06-08T04:30:29Z.dp_img16.bpp_2.module_1.seqno_0.pff +0 -0
pypff-1.0.0/example/example-data/start_2023-08-02T00:39:53Z.dp_ph256.bpp_2.module_254.seqno_0.pff +0 -0
pypff-1.0.0/example/example-data/sw_info.json +6 -0
pypff-1.0.0/example/notebook/data-check.ipynb +202 -0
pypff-1.0.0/example/notebook/start_2025-08-02T05:31:36Z.dp_ph256.bpp_2.module_254.seqno_0.pff +0 -0
pypff-1.0.0/example/pypff_example.py +38 -0
pypff-1.0.0/example/pypff_io2_demo.ipynb +1651 -0
pypff-1.0.0/pyproject.toml +112 -0
pypff-1.0.0/src/ci/Dockerfile.ci +59 -0
pypff-1.0.0/src/ci/legacy_tests/config-data/daq_config.json +13 -0
pypff-1.0.0/src/ci/legacy_tests/config-data/data_config.json +20 -0
pypff-1.0.0/src/ci/legacy_tests/config-data/network_config.json +33 -0
pypff-1.0.0/src/ci/legacy_tests/config-data/obs_config.json +30 -0
pypff-1.0.0/src/ci/legacy_tests/hk-data/hk.pff +215 -0
pypff-1.0.0/src/ci/legacy_tests/sci-data/start_2025-08-02T04-31-52Z.dp_ph256.bpp_2.module_254.seqno_0.pff +0 -0
pypff-1.0.0/src/ci/legacy_tests/test_pypff.py +90 -0
pypff-1.0.0/src/ci/test_io2_new_features.py +87 -0
pypff-1.0.0/src/ci/test_io2_vectorization.py +50 -0
pypff-1.0.0/src/ci/tier1_unit/test_models.py +60 -0
pypff-1.0.0/src/ci/tier1_unit/test_utils.py +49 -0
pypff-1.0.0/src/ci/tier2_logic/test_io.py +67 -0
pypff-1.0.0/src/ci/tier2_logic/test_io2_comprehensive.py +183 -0
pypff-1.0.0/src/ci/tier2_logic/test_io2_streaming.py +373 -0
pypff-1.0.0/src/ci/tier2_logic/test_io_comparison.py +85 -0
pypff-1.0.0/src/ci/tier2_logic/test_io_parsing_methods.py +91 -0
pypff-1.0.0/src/ci/tier2_logic/test_zarr_roundtrip.py +453 -0
pypff-1.0.0/src/pypff/__init__.py +8 -0
pypff-1.0.0/src/pypff/_cli/__init__.py +0 -0
pypff-1.0.0/src/pypff/_cli/profile.py +54 -0
pypff-1.0.0/src/pypff/_cli/root.py +19 -0
pypff-1.0.0/src/pypff/_cli/test.py +57 -0
pypff-1.0.0/src/pypff/_cli/zarr.py +46 -0
pypff-1.0.0/src/pypff/cli.py +55 -0
pypff-1.0.0/src/pypff/io.py +336 -0
pypff-1.0.0/src/pypff/io2.py +1234 -0
pypff-1.0.0/src/pypff/models.py +438 -0
pypff-1.0.0/src/pypff/pixelmap.py +74 -0
pypff-1.0.0/src/pypff/pixelmap_maroc2phys_bga.py +1156 -0
pypff-1.0.0/src/pypff/pixelmap_maroc2phys_qfp.py +1156 -0
pypff-1.0.0/src/pypff/pixelmap_phys2maroc_bga.py +1060 -0
pypff-1.0.0/src/pypff/pixelmap_phys2maroc_qfp.py +1060 -0
pypff-1.0.0/src/pypff/profiling.py +192 -0
pypff-1.0.0/src/pypff/util/__init__.py +0 -0
pypff-1.0.0/src/pypff/util/cli.py +160 -0
pypff-1.0.0/src/pypff/utils.py +61 -0
pypff-1.0.0/src/pypff/zarr/__init__.py +518 -0
pypff-1.0.0/src/pypff/zarr/_reader.py +231 -0
pypff-1.0.0/uv.lock +2051 -0

pypff-1.0.0/.coverage ADDED Viewed

Binary file

pypff-1.0.0/.git ADDED Viewed

	@@ -0,0 +1 @@
1	+ gitdir: ../.git/modules/pypff

pypff-1.0.0/.github/workflows/egg.yml ADDED Viewed

@@ -0,0 +1,23 @@
+name: pypff-CI
+on:
+  push:
+    branches: ["dev", "opt", "master"]
+jobs:
+  pypff-test:
+    name: pypff-test
+    runs-on: ubuntu-latest
+    steps:
+      - name: clone pypff
+        uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Install dependencies
+        run: uv sync --all-extras
+      - name: run test
+        run: uv run pypff test all --cov

pypff-1.0.0/.gitignore ADDED Viewed

@@ -0,0 +1,5 @@
+__pycache__
+.DS_Store
+*.egg-info/
+**/.ipynb_checkpoints**
+**__pycache__**

pypff-1.0.0/CLAUDE.md ADDED Viewed

@@ -0,0 +1,136 @@
+# CLAUDE.md
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+## Project Overview
+`pypff` is a Python I/O library for PanoSETI File Format (PFF) data files — binary science data produced by the PanoSETI telescope array. It provides zero-copy mmap-based access, Pydantic-validated headers, and a high-level run discovery API.
+## Commands
+All commands assume `uv sync` has been run first.
+```bash
+uv sync                          # Install all dependencies
+uv run pypff test all            # Run full test suite (unit + logic + legacy)
+uv run pypff test unit           # Tier 1 unit tests only
+uv run pypff test logic          # Tier 2 logic/IO tests only
+uv run pypff test legacy         # Legacy integration tests only
+uv run pypff test all --lint     # Also run Ruff + MyPy
+uv run pypff test all --cov      # With coverage reporting
+```
+Run a single test file directly:
+```bash
+uv run pytest src/ci/tier1_unit/test_utils.py
+uv run pytest src/ci/tier2_logic/test_io.py::test_name
+```
+Lint manually:
+```bash
+uv run ruff check .
+uv run mypy src
+```
+## Architecture
+### Two-Layer I/O Design
+**Legacy layer (`src/pypff/io.py`):** Original byte-offset based reader. Uses hardcoded byte positions (`loc_arr`) to extract metadata fields from fixed-format PFF JSON headers. Exposes `datapff`, `hkpff`, `qconfig`. Kept for backward compatibility.
+**Modern layer (`src/pypff/io2.py`):** `PFFSequence` and `PanosetiRun` — the primary API. `PFFSequence` wraps one or more sequential `.pff` files for a single data product, providing:
+- Streaming-by-default API: `for img in seq` and `for batch in seq.iter_batches(size=256)` are zero-copy within a single file.
+- `iter_byte_range(file_idx, byte_start, byte_end, batch_size)` for distributed chunked reads (Nextflow/Dask workers).
+- `read_images(indices)` for sorted-with-inverse-permutation random access; `read_images_range(start, count)` for sequential bulk.
+- `seq[i]` returns a zero-copy view; `seq[start:stop:step]` uses `read_images`.
+- Single-pass metadata extraction via NumPy composite structured dtype (`get_metadata_arrays`). Supports virtual `unix_t_ns` key.
+- `timestamps(indices=None)` returns cached `int64` ns array; `timestamps(as_datetime=True)` returns a zero-copy `datetime64[ns]` view for matplotlib/pandas. `timestamp_at(i)` and `seek_time(ns)` use a two-level binary search (file bounds, then within-file).
+- LRU-bounded mmap handles (`_MmapLRU`, default capacity 16). `PFFSequence` is a context manager.
+- Pickle-safe for multiprocessing (handles dropped on `__getstate__`, lazily reopened).
+- `get_frame(i)` returns `(dict, ndarray_view)` by default; `get_frame_validated(i)` returns `(QuaboHeader|ModuleHeader, ndarray_view)`.
+`PanosetiRun` is a lazy-loaded directory scanner over a `.pffd` run directory. It discovers and groups `.pff` files by data product and exposes typed config loading via Pydantic models.
+### Models (`src/pypff/models.py`)
+Pydantic v2 models validate all PFF headers and PANOSETI config JSON files. `BaseStrictModel` (extra=`'forbid'`) is the base for all models to catch config key typos. Key models: `PFFHeader`, `QuaboHeader`, `ModuleHeader`, `FrameConfig`.
+### Timing
+All timestamps are **`int64` nanoseconds since the Unix epoch** — never Python floats. Float64 has only ~15–16 significant digits; a Unix timestamp in nanoseconds is ~19 digits, so `float(ns) / 1e9` loses nanosecond precision at the point of division.
+- `timestamps()` → `np.ndarray[int64]` — for arithmetic, diffs, and storage.
+- `timestamps(as_datetime=True)` → `np.ndarray[datetime64[ns]]` — zero-copy view of the same `int64` data; natively understood by matplotlib date axes, pandas, and xarray. Use for display only.
+- `timestamp_at(i)` → `int` — single frame, nanoseconds.
+**Integer-space epoch rule:** when converting to float seconds for plotting, subtract the reference epoch first in integer space, then divide — the resulting relative values are small so float64 retains sub-nanosecond resolution:
+```python
+# CORRECT — subtract first (int64), then divide (small values, no precision loss)
+rel_s = (times_ns - t0_ns) / 1e9
+# WRONG — 1.7e18 ÷ 1e9 ≈ 1.7e9 s, only ~6 decimal digits remain after the decimal
+times_ns / 1e9 - t0_s
+```
+`utils.get_precise_time_ns()` reconciles `tv_sec`/`tv_usec` (system clock) against `pkt_nsec`/`pkt_tai` (quabo hardware clock) using 10-bit TAI counter wraparound logic.
+### Pixel Maps
+`src/pypff/pixelmap.py` and the four `pixelmap_*.py` files provide MAROC↔physical pixel coordinate conversions for BGA and QFP package variants. These are static lookup tables.
+### Zarr v3 Conversion (`src/pypff/zarr/`, optional extra `pypff[zarr]`)
+Install with `uv sync --extra zarr`. The zarr extra requires zarr-python ≥ 3, xarray, and dask.
+**Conversion:**
+```bash
+uv run pypff zarr <obs.pffd> <out_dir>   # CLI
+```
+```python
+from pypff import PanosetiRun
+from pypff.zarr import convert_run
+stores = convert_run(PanosetiRun("obs.pffd"), "out/")
+```
+**Reading:**
+```python
+from pypff.zarr import PanosetiZarrRun
+zrun = PanosetiZarrRun("out/")
+store = zrun.get_product("dp_ph256.bpp_2.module_254")
+store.timestamps()    # int64 ns
+store.to_dataset()    # xarray.Dataset — images + unix_t_ns + all header fields
+```
+**Key design decisions (see [`docs/zarr_v3_spec.md`](docs/zarr_v3_spec.md) for the full spec):**
+- **Flat root layout**: all arrays (`images`, `unix_t_ns`, header fields) live at the zarr store root — no sub-groups. `xr.open_zarr(store)` surfaces every variable automatically. Sub-groups are invisible to `xr.open_zarr` without `group=`, so the hierarchical layout was rejected.
+- **Module-level header naming**: `quabo_0.pkt_num` → `quabo_0_pkt_num` (dots replaced by underscores) for xarray compatibility.
+- **Discoverability attrs**: `header_fields` and `quabo_fields` lists in root attrs let consumers separate image columns from metadata columns without re-deriving naming conventions.
+- **No consolidated metadata**: `zarr.consolidate_metadata()` is not called by default — it is non-spec for Zarr v3 and emits `ZarrUserWarning`. Use `xr.open_zarr(store, consolidated=False)`.
+- **Sidecar bundle**: `convert_run` writes a sibling `<run>.panoseti-meta/` directory with all non-PFF ancillary files (configs/, logs/, hk.pff, sentinels/). Run configs are also embedded in each store's root attrs under `run_configs`.
+**Key files:**
+- `src/pypff/zarr/__init__.py` — `ZarrWriter` protocol, `ZarrPythonWriter`, `PFFToZarrConverter`, `convert_run`
+- `src/pypff/zarr/_reader.py` — `PanosetiZarrStore`, `PanosetiZarrRun`, `open_zarr_run`
+- `docs/zarr_v3_spec.md` — full store layout specification
+### CLI (`src/pypff/cli.py`, `src/pypff/_cli/`)
+Built with `typer`. Entry point is `pypff` (defined in `pyproject.toml`). Sub-commands live in `src/pypff/_cli/`. The `test` sub-command (`_cli/test.py`) delegates to `pytest` via `subprocess`. The `zarr` sub-command (`_cli/zarr.py`) wraps `convert_run`.
+### Test Structure
+```
+src/ci/
+  tier1_unit/      # Fast unit tests (models, utils)
+  tier2_logic/     # IO correctness, slicing, concurrency
+  legacy_tests/    # Original test suite against sample .pff files
+```
+Test data for legacy tests lives alongside the tests in `src/ci/legacy_tests/{hk-data,sci-data,config-data}/`.
+## Key Conventions
+- `ruff` is the linter/formatter; `mypy` runs in strict mode with pydantic plugin.
+- Line length limit is 100 (ruff), but E501 is ignored so mypy drives strictness.
+- `orjson` (not `json`) is used for all JSON parsing in the hot path — it's faster and handles bytes directly.
+- Data product names follow the pattern `dp_<type>.bpp_<bits>.module_<id>`, e.g. `dp_img16.bpp_2.module_1`.

pypff-1.0.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 PANOSETI
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

pypff-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,253 @@
+Metadata-Version: 2.4
+Name: pypff
+Version: 1.0.0
+Summary: High-performance PanoSETI File Format (PFF) I/O library
+Project-URL: Homepage, https://github.com/panoseti/pypff
+Project-URL: Repository, https://github.com/panoseti/pypff
+Project-URL: Issues, https://github.com/panoseti/pypff/issues
+Author-email: Wei Liu <liuwei_berkeley@berkeley.edu>, Nicolas Rault-Wang <nraultwang@berkeley.edu>
+License-File: LICENSE
+Keywords: astronomy,dask,high-performance,io,panoseti,pff,xarray,zarr
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Topic :: Scientific/Engineering :: Astronomy
+Requires-Python: >=3.11
+Requires-Dist: astropy
+Requires-Dist: dask[distributed]>=2024.1
+Requires-Dist: matplotlib
+Requires-Dist: numcodecs>=0.12
+Requires-Dist: numpy>=1.24.0
+Requires-Dist: orjson>=3.9.0
+Requires-Dist: plotly
+Requires-Dist: pydantic>=2.5.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: scipy
+Requires-Dist: tqdm
+Requires-Dist: typer>=0.9.0
+Requires-Dist: xarray>=2024.1
+Requires-Dist: zarr>=3.0
+Provides-Extra: dev
+Requires-Dist: dask; extra == 'dev'
+Requires-Dist: hatchling; extra == 'dev'
+Requires-Dist: mypy; extra == 'dev'
+Requires-Dist: pytest-asyncio; extra == 'dev'
+Requires-Dist: pytest>=7.0.0; extra == 'dev'
+Requires-Dist: ruff; extra == 'dev'
+Provides-Extra: zarr
+Requires-Dist: dask[distributed]>=2024.1; extra == 'zarr'
+Requires-Dist: numcodecs>=0.12; extra == 'zarr'
+Requires-Dist: xarray>=2024.1; extra == 'zarr'
+Requires-Dist: zarr>=3.0; extra == 'zarr'
+Description-Content-Type: text/markdown
+# pypff - High-performance PanoSETI I/O Library
+[![pypff-CI](https://github.com/panoseti/pypff/actions/workflows/egg.yml/badge.svg)](https://github.com/panoseti/pypff/actions)
+[![Version](https://img.shields.io/badge/version-1.0.0-blue)](https://github.com/panoseti/pypff)
+[![Python](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
+[![Coverage](https://img.shields.io/badge/coverage-65%25-green)](https://github.com/panoseti/pypff)
+[![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
+A high-performance Python package for reading and analyzing data files generated by PanoSETI (PanoSETI File Format - PFF).
+## Features
+- **Streaming by default:** `iter_batches(size=256)` and `__iter__` yield zero-copy views without materializing the full sequence into RAM — safe for Jupyter and HPC pipelines alike.
+- **Distributed chunked reads:** `iter_byte_range(file_idx, byte_start, byte_end)` lets Dask/Nextflow workers parse frame-aligned byte ranges in parallel without coordination.
+- **Zero-copy random access:** `seq[i]` returns a strided mmap view; slicing and `read_images(indices)` use a sort + inverse-permutation for disk locality.
+- **Single-pass metadata:** `get_metadata_arrays(keys)` extracts any number of header fields in one `np.frombuffer` pass per file via a composite NumPy structured dtype. Supports virtual `unix_t_ns` key.
+- **Nanosecond-precise timestamps:** `timestamps()` returns `int64` ns (no float precision loss); `timestamps(as_datetime=True)` returns a zero-copy `datetime64[ns]` view for matplotlib and pandas.
+- **PFF → Zarr v3 conversion:** `pypff[zarr]` optional extra converts any `.pffd` run to Zarr v3 stores readable by xarray, dask, numpy, TensorStore, and Julia — lossless, compressed, HPC/ML-ready. See [Zarr v3 spec](docs/zarr_v3_spec.md).
+- **Bounded resources:** LRU mmap handle cache (default 16 files); `PFFSequence` is a context manager.
+- **Multiprocessing-safe:** pickle-compatible — file handles are dropped on serialisation and lazily reopened in workers.
+- **Pydantic validation:** strict schema validation for all PFF headers and PANOSETI config files.
+- **Run discovery:** `PanosetiRun` lazily scans a `.pffd` directory and exposes typed configs, housekeeping, and data products.
+## Installation
+The package uses `uv` for dependency management.
+```bash
+cd pypff
+uv sync                   # core library
+uv sync --extra zarr      # + Zarr v3 conversion (zarr-python, xarray, dask)
+```
+## Quick Start
+### Run discovery
+```python
+from pypff import PanosetiRun
+run = PanosetiRun("path/to/run_directory.pffd")
+run.show()                          # pretty-print structure
+print(run.list_products())          # ['dp_img16.bpp_2.module_1', ...]
+seq = run.get_product("dp_img16.bpp_2.module_1")
+obs_cfg  = run.get_config("obs_config")   # returns Pydantic model
+data_cfg = run.get_config("data_config")
+hk       = run.get_hk()                   # dict[device, dict[field, np.ndarray]]
+```
+### Streaming (memory-bounded)
+```python
+# Iterate frame-by-frame (zero-copy views into mmap)
+for img in seq:
+    process(img)
+# Batch iteration — never holds more than one batch in RAM
+for batch in seq.iter_batches(size=256):
+    batch  # shape (256, H, W), dtype matches file
+# With timestamps and headers in one pass
+for batch, ts in seq.iter_batches(size=256, with_timestamps=True):
+    ts  # int64 nanoseconds, shape (256,)
+# Distributed byte-range reads (Dask / Nextflow workers)
+for batch in seq.iter_byte_range(file_idx=0, byte_start=0, byte_end=size):
+    ...
+```
+### Random access and slicing
+```python
+img   = seq[42]            # zero-copy view, shape (H, W)
+imgs  = seq[0:100:2]       # every other frame, shape (50, H, W)
+imgs  = seq.read_images(np.array([5, 0, 3]))   # unsorted — sorted internally for locality
+imgs  = seq.read_images_range(start=0, count=500)
+```
+### Timestamps — always `int64` nanoseconds
+```python
+# Full cached array — int64 ns, no float precision loss
+ts = seq.timestamps()                      # np.ndarray[int64]
+# Zero-copy datetime64[ns] view for matplotlib / pandas
+ts_dt = seq.timestamps(as_datetime=True)   # np.ndarray[datetime64[ns]]
+# Indexed subset
+ts_sub = seq.timestamps(indices=np.arange(0, len(seq), 100))
+# Single frame
+t_ns = seq.timestamp_at(42)               # int, nanoseconds
+# Time-based navigation
+idx = seq.seek_time(t_ns + 1_000_000_000)  # 1 s later
+# Arithmetic rule: subtract epoch in integer space before dividing
+rel_s = (ts - ts[0]) / 1e9               # CORRECT — small values, no precision loss
+# ts / 1e9 - t0_s                        # WRONG  — loses ns precision at ~1.7e9 s
+```
+### Metadata extraction (single pass)
+```python
+# One np.frombuffer call per file regardless of number of keys
+meta = seq.get_metadata_arrays(["pkt_num", "tv_sec", "unix_t_ns"])
+meta["unix_t_ns"]   # int64 ns, same as timestamps()
+meta["pkt_num"]     # int64
+# All fields at once
+all_meta = seq.get_all_metadata()
+```
+### Headers and Pydantic validation
+```python
+# Fast path — raw dict, no Pydantic overhead
+header, img = seq.get_frame(0)
+header["pkt_num"]                 # quabo file
+header["quabo_0"]["pkt_tai"]      # module file
+# Validated path — full Pydantic model
+from pypff.models import ModuleHeader
+hdr, img = seq.get_frame_validated(0)
+assert isinstance(hdr, ModuleHeader)
+```
+### Context manager and multiprocessing
+```python
+# Deterministic handle cleanup
+with run.get_product("dp_ph256.bpp_2.module_254") as seq:
+    data = seq.read_images_range(0, 1000)
+# PFFSequence is pickle-safe — handles are dropped on serialisation
+import concurrent.futures
+with concurrent.futures.ProcessPoolExecutor() as ex:
+    futures = [ex.submit(lambda s, i: s[i].sum(), seq, i) for i in range(len(seq))]
+```
+## PFF → Zarr v3 conversion (`pypff[zarr]`)
+Convert a `.pffd` observation run to Zarr v3 stores readable by xarray, dask,
+numpy, TensorStore, Julia, and Rust. See [docs/zarr_v3_spec.md](docs/zarr_v3_spec.md)
+for the full layout specification.
+### Write
+```python
+from pypff.io2 import PanosetiRun
+from pypff.zarr import convert_run
+run = PanosetiRun("path/to/obs.pffd")
+stores = convert_run(run, "output/L0_zarr")
+# → one .zarr per (data_product, module)
+# → one .panoseti-meta/ sidecar bundle (configs, logs, hk.pff)
+```
+Or via the CLI:
+```bash
+uv run pypff zarr path/to/obs.pffd output/L0_zarr
+```
+### Read
+```python
+from pypff.zarr import PanosetiZarrRun
+import xarray as xr
+# High-level wrapper (mirrors PanosetiRun)
+zrun = PanosetiZarrRun("output/L0_zarr")
+store = zrun.get_product("dp_ph256.bpp_2.module_254")
+ts    = store.timestamps()           # int64 ns array
+ds    = store.to_dataset()           # xarray.Dataset
+cfgs  = zrun.configs                 # parsed run configs
+# Or open directly with xarray — all arrays visible as variables
+ds = xr.open_zarr(str(stores[0]), consolidated=False)
+# ds.images      (T, H, W)   int16 / uint16
+# ds.unix_t_ns   (T,)        int64  — nanosecond timestamps
+# ds.pkt_num     (T,)        uint32 ─┐ per-frame header fields
+# ds.pkt_nsec    (T,)        uint32  │ (single-level: ph256)
+# ds.quabo_num   (T,)        uint8  ─┘
+# ds.quabo_0_pkt_num  (T,)   uint32 ─┐ module-level headers
+# …                                  ─┘ (img16, ph1024)
+```
+### Zarr store layout
+All arrays live at the **root** of each store (no sub-groups), so
+`xr.open_zarr(store)` surfaces every variable — images, timestamps, and all
+header fields — as a single Dataset aligned on the shared `time` dimension.
+Logical grouping of headers is expressed via `header_fields` and `quabo_fields`
+root attributes. Full specification: [docs/zarr_v3_spec.md](docs/zarr_v3_spec.md).
+## Testing
+Run the test suite via the built-in CLI:
+```bash
+uv run pypff test all
+```
+The test suite includes:
+- **Tier 1 (Unit):** Basic logic and timing tests.
+- **Tier 2 (Logic):** Higher-level I/O, slicing, and concurrency tests.
+- **Legacy Integration:** The original `pypff` test suite using provided sample data.
+## Dockerized CI
+Build the CI environment:
+```bash
+docker build -t pypff-ci -f src/ci/Dockerfile.ci .
+```

pypff-1.0.0/README.md ADDED Viewed

@@ -0,0 +1,206 @@
+# pypff - High-performance PanoSETI I/O Library
+[![pypff-CI](https://github.com/panoseti/pypff/actions/workflows/egg.yml/badge.svg)](https://github.com/panoseti/pypff/actions)
+[![Version](https://img.shields.io/badge/version-1.0.0-blue)](https://github.com/panoseti/pypff)
+[![Python](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
+[![Coverage](https://img.shields.io/badge/coverage-65%25-green)](https://github.com/panoseti/pypff)
+[![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
+A high-performance Python package for reading and analyzing data files generated by PanoSETI (PanoSETI File Format - PFF).
+## Features
+- **Streaming by default:** `iter_batches(size=256)` and `__iter__` yield zero-copy views without materializing the full sequence into RAM — safe for Jupyter and HPC pipelines alike.
+- **Distributed chunked reads:** `iter_byte_range(file_idx, byte_start, byte_end)` lets Dask/Nextflow workers parse frame-aligned byte ranges in parallel without coordination.
+- **Zero-copy random access:** `seq[i]` returns a strided mmap view; slicing and `read_images(indices)` use a sort + inverse-permutation for disk locality.
+- **Single-pass metadata:** `get_metadata_arrays(keys)` extracts any number of header fields in one `np.frombuffer` pass per file via a composite NumPy structured dtype. Supports virtual `unix_t_ns` key.
+- **Nanosecond-precise timestamps:** `timestamps()` returns `int64` ns (no float precision loss); `timestamps(as_datetime=True)` returns a zero-copy `datetime64[ns]` view for matplotlib and pandas.
+- **PFF → Zarr v3 conversion:** `pypff[zarr]` optional extra converts any `.pffd` run to Zarr v3 stores readable by xarray, dask, numpy, TensorStore, and Julia — lossless, compressed, HPC/ML-ready. See [Zarr v3 spec](docs/zarr_v3_spec.md).
+- **Bounded resources:** LRU mmap handle cache (default 16 files); `PFFSequence` is a context manager.
+- **Multiprocessing-safe:** pickle-compatible — file handles are dropped on serialisation and lazily reopened in workers.
+- **Pydantic validation:** strict schema validation for all PFF headers and PANOSETI config files.
+- **Run discovery:** `PanosetiRun` lazily scans a `.pffd` directory and exposes typed configs, housekeeping, and data products.
+## Installation
+The package uses `uv` for dependency management.
+```bash
+cd pypff
+uv sync                   # core library
+uv sync --extra zarr      # + Zarr v3 conversion (zarr-python, xarray, dask)
+```
+## Quick Start
+### Run discovery
+```python
+from pypff import PanosetiRun
+run = PanosetiRun("path/to/run_directory.pffd")
+run.show()                          # pretty-print structure
+print(run.list_products())          # ['dp_img16.bpp_2.module_1', ...]
+seq = run.get_product("dp_img16.bpp_2.module_1")
+obs_cfg  = run.get_config("obs_config")   # returns Pydantic model
+data_cfg = run.get_config("data_config")
+hk       = run.get_hk()                   # dict[device, dict[field, np.ndarray]]
+```
+### Streaming (memory-bounded)
+```python
+# Iterate frame-by-frame (zero-copy views into mmap)
+for img in seq:
+    process(img)
+# Batch iteration — never holds more than one batch in RAM
+for batch in seq.iter_batches(size=256):
+    batch  # shape (256, H, W), dtype matches file
+# With timestamps and headers in one pass
+for batch, ts in seq.iter_batches(size=256, with_timestamps=True):
+    ts  # int64 nanoseconds, shape (256,)
+# Distributed byte-range reads (Dask / Nextflow workers)
+for batch in seq.iter_byte_range(file_idx=0, byte_start=0, byte_end=size):
+    ...
+```
+### Random access and slicing
+```python
+img   = seq[42]            # zero-copy view, shape (H, W)
+imgs  = seq[0:100:2]       # every other frame, shape (50, H, W)
+imgs  = seq.read_images(np.array([5, 0, 3]))   # unsorted — sorted internally for locality
+imgs  = seq.read_images_range(start=0, count=500)
+```
+### Timestamps — always `int64` nanoseconds
+```python
+# Full cached array — int64 ns, no float precision loss
+ts = seq.timestamps()                      # np.ndarray[int64]
+# Zero-copy datetime64[ns] view for matplotlib / pandas
+ts_dt = seq.timestamps(as_datetime=True)   # np.ndarray[datetime64[ns]]
+# Indexed subset
+ts_sub = seq.timestamps(indices=np.arange(0, len(seq), 100))
+# Single frame
+t_ns = seq.timestamp_at(42)               # int, nanoseconds
+# Time-based navigation
+idx = seq.seek_time(t_ns + 1_000_000_000)  # 1 s later
+# Arithmetic rule: subtract epoch in integer space before dividing
+rel_s = (ts - ts[0]) / 1e9               # CORRECT — small values, no precision loss
+# ts / 1e9 - t0_s                        # WRONG  — loses ns precision at ~1.7e9 s
+```
+### Metadata extraction (single pass)
+```python
+# One np.frombuffer call per file regardless of number of keys
+meta = seq.get_metadata_arrays(["pkt_num", "tv_sec", "unix_t_ns"])
+meta["unix_t_ns"]   # int64 ns, same as timestamps()
+meta["pkt_num"]     # int64
+# All fields at once
+all_meta = seq.get_all_metadata()
+```
+### Headers and Pydantic validation
+```python
+# Fast path — raw dict, no Pydantic overhead
+header, img = seq.get_frame(0)
+header["pkt_num"]                 # quabo file
+header["quabo_0"]["pkt_tai"]      # module file
+# Validated path — full Pydantic model
+from pypff.models import ModuleHeader
+hdr, img = seq.get_frame_validated(0)
+assert isinstance(hdr, ModuleHeader)
+```
+### Context manager and multiprocessing
+```python
+# Deterministic handle cleanup
+with run.get_product("dp_ph256.bpp_2.module_254") as seq:
+    data = seq.read_images_range(0, 1000)
+# PFFSequence is pickle-safe — handles are dropped on serialisation
+import concurrent.futures
+with concurrent.futures.ProcessPoolExecutor() as ex:
+    futures = [ex.submit(lambda s, i: s[i].sum(), seq, i) for i in range(len(seq))]
+```
+## PFF → Zarr v3 conversion (`pypff[zarr]`)
+Convert a `.pffd` observation run to Zarr v3 stores readable by xarray, dask,
+numpy, TensorStore, Julia, and Rust. See [docs/zarr_v3_spec.md](docs/zarr_v3_spec.md)
+for the full layout specification.
+### Write
+```python
+from pypff.io2 import PanosetiRun
+from pypff.zarr import convert_run
+run = PanosetiRun("path/to/obs.pffd")
+stores = convert_run(run, "output/L0_zarr")
+# → one .zarr per (data_product, module)
+# → one .panoseti-meta/ sidecar bundle (configs, logs, hk.pff)
+```
+Or via the CLI:
+```bash
+uv run pypff zarr path/to/obs.pffd output/L0_zarr
+```
+### Read
+```python
+from pypff.zarr import PanosetiZarrRun
+import xarray as xr
+# High-level wrapper (mirrors PanosetiRun)
+zrun = PanosetiZarrRun("output/L0_zarr")
+store = zrun.get_product("dp_ph256.bpp_2.module_254")
+ts    = store.timestamps()           # int64 ns array
+ds    = store.to_dataset()           # xarray.Dataset
+cfgs  = zrun.configs                 # parsed run configs
+# Or open directly with xarray — all arrays visible as variables
+ds = xr.open_zarr(str(stores[0]), consolidated=False)
+# ds.images      (T, H, W)   int16 / uint16
+# ds.unix_t_ns   (T,)        int64  — nanosecond timestamps
+# ds.pkt_num     (T,)        uint32 ─┐ per-frame header fields
+# ds.pkt_nsec    (T,)        uint32  │ (single-level: ph256)
+# ds.quabo_num   (T,)        uint8  ─┘
+# ds.quabo_0_pkt_num  (T,)   uint32 ─┐ module-level headers
+# …                                  ─┘ (img16, ph1024)
+```
+### Zarr store layout
+All arrays live at the **root** of each store (no sub-groups), so
+`xr.open_zarr(store)` surfaces every variable — images, timestamps, and all
+header fields — as a single Dataset aligned on the shared `time` dimension.
+Logical grouping of headers is expressed via `header_fields` and `quabo_fields`
+root attributes. Full specification: [docs/zarr_v3_spec.md](docs/zarr_v3_spec.md).
+## Testing
+Run the test suite via the built-in CLI:
+```bash
+uv run pypff test all
+```
+The test suite includes:
+- **Tier 1 (Unit):** Basic logic and timing tests.
+- **Tier 2 (Logic):** Higher-level I/O, slicing, and concurrency tests.
+- **Legacy Integration:** The original `pypff` test suite using provided sample data.
+## Dockerized CI
+Build the CI environment:
+```bash
+docker build -t pypff-ci -f src/ci/Dockerfile.ci .
+```