msw-io 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,80 @@
1
+ Metadata-Version: 2.4
2
+ Name: msw-io
3
+ Version: 1.0.2
4
+ Summary: Murine Shift Work session data IO: file codec, namespace utilities, and session readers.
5
+ Author-email: "Lars B. Rollik" <lars@rollik.me>
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.11
8
+ Requires-Dist: acquisition-namespace>=1.2.2
9
+ Requires-Dist: numpy
10
+ Requires-Dist: pandas
11
+ Requires-Dist: pydantic>=2.0
12
+ Requires-Dist: pyyaml
13
+ Requires-Dist: ttl-barcoder>=0.4.1
14
+ Provides-Extra: dev
15
+ Requires-Dist: commitizen; extra == 'dev'
16
+ Requires-Dist: mypy; extra == 'dev'
17
+ Requires-Dist: pre-commit; extra == 'dev'
18
+ Requires-Dist: pyarrow; extra == 'dev'
19
+ Requires-Dist: pytest-cov; extra == 'dev'
20
+ Requires-Dist: pytest>=8; extra == 'dev'
21
+ Requires-Dist: types-pyyaml; extra == 'dev'
22
+ Provides-Extra: docs
23
+ Requires-Dist: mkdocs-material; extra == 'docs'
24
+ Requires-Dist: mkdocstrings[python]; extra == 'docs'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # msw-io
28
+
29
+ [![PyPI](https://img.shields.io/pypi/v/msw-io.svg)](https://pypi.org/project/msw-io)
30
+
31
+ Murine Shift Work session data IO: file codec, namespace utilities, and session readers.
32
+
33
+ Provides a lean, installable library for reading and writing MSW session data without
34
+ requiring the full `murineshiftwork` acquisition stack. Install it in analysis
35
+ environments where you only need to load sessions, not run them.
36
+
37
+ ## Key features
38
+
39
+ - **Session readers** - load MSW session data (JSONL, PKL, YAML) into structured `MswSession` models
40
+ - **Namespace utilities** - build and parse MSW session paths from the canonical `subject__datetime__task` spec
41
+ - **IO codec** - save and load trial data with numpy/tuple encoding
42
+ - **Standalone** - no dependency on the `murineshiftwork` acquisition stack
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ pip install msw-io
48
+ ```
49
+
50
+ ## Quick start
51
+
52
+ ```python
53
+ from murineshiftwork.readers import load_session
54
+
55
+ session = load_session("/data/mouse_01/session__20260514_143022_123456__gonogo")
56
+ print(session.subject, session.task, session.n_trials)
57
+ ```
58
+
59
+ Load an entire acquisition (all sessions in a container directory):
60
+
61
+ ```python
62
+ from murineshiftwork.readers import load_acquisition
63
+
64
+ sessions = load_acquisition("/data/mouse_01/session__20260514_143022_123456__session_gonogo")
65
+ for s in sessions:
66
+ print(s.basename, s.is_complete)
67
+ ```
68
+
69
+ Generate session paths for a new recording:
70
+
71
+ ```python
72
+ from murineshiftwork.namespace import generate_session_paths
73
+
74
+ paths = generate_session_paths("mouse_01", "gonogo", "/data", printout=False)
75
+ print(paths["session_folder"])
76
+ ```
77
+
78
+ ## Documentation
79
+
80
+ Full documentation including API reference: <https://murineshiftwork.github.io/msw-io>
@@ -0,0 +1,20 @@
1
+ murineshiftwork/_version.py,sha256=913Ea4P4y2ZD15ebE66os1ChbBKutOXqQ54_tjABoS0,520
2
+ murineshiftwork/io/__init__.py,sha256=DYoxKuTRF2ZKI6FrvKnwGKXkjbjKbh7waug_3xUukls,4602
3
+ murineshiftwork/namespace/__init__.py,sha256=merw31cRsjycjHJGpfJBvuMckAwkaLHyZ1A2GZ0Yixs,847
4
+ murineshiftwork/namespace/manifest.py,sha256=hapuQTQXzatd2XbcWQDjmpFZy7rClvlrA5Of1amP7Io,4778
5
+ murineshiftwork/namespace/msw_files.py,sha256=nXNtRvtgRFzsoYc-YZLj23H0TsPcVfMWxV_cvRx1TXU,1774
6
+ murineshiftwork/namespace/namespace.msw.yaml,sha256=L0twEJZKssZvQhJWYw3u21g6JOHr_ruryzPdqyJHrT4,771
7
+ murineshiftwork/namespace/paths.py,sha256=iI6w5EPvdS9P-yHnN7YPF5zM3DpN9KTbuiC14MA8UCc,7394
8
+ murineshiftwork/namespace/spec.py,sha256=Rn9QzVel-AWAqtsyutc7pRDY616T9GE7rOvMAv2CWlg,282
9
+ murineshiftwork/readers/__init__.py,sha256=oJkm1N5_jzMnACAMH4_1CBgd4Nx2FDCcBsuSGIiU0gU,372
10
+ murineshiftwork/readers/alignment.py,sha256=7T3ZgNmEoManzWVUlNSS4gahA5o2TA5fHJAAkSKa4HA,10842
11
+ murineshiftwork/readers/batch.py,sha256=z42sQfc3b9RvgwqqB5rBS2EO6kCKX-O0JFThwxt_OyY,6166
12
+ murineshiftwork/readers/files.py,sha256=qa0oEyNBrpkQFxwrlYe_RUibuZMu-xTulWB7qIkj1ZQ,3379
13
+ murineshiftwork/readers/models.py,sha256=fdVJHWrqsVqobzIbbIXZ8r44OxsXqjLVkY3DUnUEmbI,4271
14
+ murineshiftwork/readers/namespace.py,sha256=G8A7oH6QQ7Udik16XtB3AeD4w0H_ymWRBqKK0_VpNOM,4506
15
+ murineshiftwork/readers/session.py,sha256=lvfl2SkfRyQ_5KFoZZCAYJ490TAjhp3SxvDKGPWhiY8,7923
16
+ murineshiftwork/readers/validate.py,sha256=RpWCa8xQXC8HNhUA04t4PRvzna7hc3BZVU1QHyIcrP8,2770
17
+ msw_io-1.0.2.dist-info/METADATA,sha256=u9tCh1xNvJ7mccyVbAm6FRs8WUwjqwOu3c5TP1dFm7o,2567
18
+ msw_io-1.0.2.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
19
+ msw_io-1.0.2.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ msw_io-1.0.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
File without changes
@@ -0,0 +1,24 @@
1
+ # file generated by vcs-versioning
2
+ # don't change, don't track in version control
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "__version__",
7
+ "__version_tuple__",
8
+ "version",
9
+ "version_tuple",
10
+ "__commit_id__",
11
+ "commit_id",
12
+ ]
13
+
14
+ version: str
15
+ __version__: str
16
+ __version_tuple__: tuple[int | str, ...]
17
+ version_tuple: tuple[int | str, ...]
18
+ commit_id: str | None
19
+ __commit_id__: str | None
20
+
21
+ __version__ = version = '1.0.2'
22
+ __version_tuple__ = version_tuple = (1, 0, 2)
23
+
24
+ __commit_id__ = commit_id = None
@@ -0,0 +1,134 @@
1
+ """JSONL trial data codec for MSW sessions (MSW_FILE_VERSION 1.0.0).
2
+
3
+ Format: newline-delimited JSON. First line is a version header.
4
+ Numpy arrays are serialised as plain lists; on load they remain lists,
5
+ which is compatible with all downstream DataFrame operations.
6
+
7
+ Tuple preservation
8
+ ------------------
9
+ JSON has no tuple type: both lists and tuples serialise as arrays. To preserve
10
+ Python tuples across the round-trip, tuples are encoded as {"__tuple__": [...]}
11
+ and decoded back to tuples on load. This keeps the computational interface
12
+ identical between JSONL and legacy pkl sessions (e.g. block_type_values).
13
+ """
14
+
15
+ import json
16
+ import logging
17
+ from pathlib import Path
18
+
19
+ import numpy as np
20
+
21
+ MSW_FILE_VERSION = "1.0.0"
22
+
23
+
24
+ def _encode_tuples(obj):
25
+ """Recursively replace tuples with {"__tuple__": [...]} before JSON encoding."""
26
+ if isinstance(obj, tuple):
27
+ return {"__tuple__": [_encode_tuples(v) for v in obj]}
28
+ if isinstance(obj, dict):
29
+ return {k: _encode_tuples(v) for k, v in obj.items()}
30
+ if isinstance(obj, list):
31
+ return [_encode_tuples(v) for v in obj]
32
+ return obj
33
+
34
+
35
+ def _decode_tuples(obj):
36
+ """Recursively restore {"__tuple__": [...]} sentinels to Python tuples."""
37
+ if isinstance(obj, dict):
38
+ if "__tuple__" in obj and len(obj) == 1:
39
+ return tuple(_decode_tuples(v) for v in obj["__tuple__"])
40
+ return {k: _decode_tuples(v) for k, v in obj.items()}
41
+ if isinstance(obj, list):
42
+ return [_decode_tuples(v) for v in obj]
43
+ return obj
44
+
45
+
46
+ _FLOAT_DECIMALS = 4
47
+
48
+
49
+ def _round_floats(obj):
50
+ """Recursively round all floats to _FLOAT_DECIMALS decimal places."""
51
+ if isinstance(obj, float):
52
+ return round(obj, _FLOAT_DECIMALS)
53
+ if isinstance(obj, dict):
54
+ return {k: _round_floats(v) for k, v in obj.items()}
55
+ if isinstance(obj, list):
56
+ return [_round_floats(v) for v in obj]
57
+ if isinstance(obj, tuple):
58
+ return tuple(_round_floats(v) for v in obj)
59
+ if isinstance(obj, np.ndarray):
60
+ if np.issubdtype(obj.dtype, np.floating):
61
+ return obj.round(_FLOAT_DECIMALS).tolist()
62
+ return obj.tolist()
63
+ if isinstance(obj, np.floating):
64
+ return round(float(obj), _FLOAT_DECIMALS)
65
+ if isinstance(obj, np.integer):
66
+ return int(obj)
67
+ if isinstance(obj, np.bool_):
68
+ return bool(obj)
69
+ return obj
70
+
71
+
72
+ class _NumpyEncoder(json.JSONEncoder):
73
+ def default(self, obj):
74
+ if isinstance(obj, np.ndarray):
75
+ if np.issubdtype(obj.dtype, np.floating):
76
+ return obj.round(_FLOAT_DECIMALS).tolist()
77
+ return obj.tolist()
78
+ if isinstance(obj, np.integer):
79
+ return int(obj)
80
+ if isinstance(obj, np.floating):
81
+ return round(float(obj), _FLOAT_DECIMALS)
82
+ if isinstance(obj, np.bool_):
83
+ return bool(obj)
84
+ return super().default(obj)
85
+
86
+
87
+ def save_trial_data(trial_data_list: list, filepath: str | Path) -> None:
88
+ """Save a list of trial dicts to a JSONL file.
89
+
90
+ Overwrites any existing file. The first line is a version header;
91
+ subsequent lines are one JSON object per trial. Numpy arrays are
92
+ converted to lists; tuples are encoded as ``{"__tuple__": [...]}``
93
+ so they survive the round-trip through ``load_trial_data``.
94
+
95
+ Args:
96
+ trial_data_list: List of per-trial dicts as returned by the task.
97
+ filepath: Destination path (parent directories are created if absent).
98
+ """
99
+ filepath = Path(filepath)
100
+ filepath.parent.mkdir(parents=True, exist_ok=True)
101
+ with filepath.open("w") as f:
102
+ f.write(json.dumps({"_msw_version": MSW_FILE_VERSION}) + "\n")
103
+ for trial in trial_data_list:
104
+ f.write(
105
+ json.dumps(_round_floats(_encode_tuples(trial)), cls=_NumpyEncoder)
106
+ + "\n"
107
+ )
108
+ logging.debug(f"Saved {len(trial_data_list)} trials to {filepath}")
109
+
110
+
111
+ def load_trial_data(filepath) -> list:
112
+ """Load trial dicts from a JSONL file written by ``save_trial_data``.
113
+
114
+ Skips the version header line and restores ``{"__tuple__": [...]}``
115
+ sentinels back to Python tuples.
116
+
117
+ Args:
118
+ filepath: Path to the ``.jsonl`` file.
119
+
120
+ Returns:
121
+ List of per-trial dicts with tuples restored.
122
+ """
123
+ filepath = Path(filepath)
124
+ trials = []
125
+ with filepath.open("r") as f:
126
+ for line in f:
127
+ line = line.strip()
128
+ if not line:
129
+ continue
130
+ obj = json.loads(line)
131
+ if "_msw_version" in obj:
132
+ continue
133
+ trials.append(_decode_tuples(obj))
134
+ return trials
@@ -0,0 +1,16 @@
1
+ from murineshiftwork.namespace.msw_files import is_msw_file as is_msw_file
2
+ from murineshiftwork.namespace.msw_files import msw_artifact as msw_artifact
3
+ from murineshiftwork.namespace.msw_files import msw_file as msw_file
4
+ from murineshiftwork.namespace.paths import (
5
+ CURRENT_NAMESPACE_VERSION as CURRENT_NAMESPACE_VERSION,
6
+ )
7
+ from murineshiftwork.namespace.paths import NAMESPACE_LEGACY as NAMESPACE_LEGACY
8
+ from murineshiftwork.namespace.paths import NAMESPACE_V1 as NAMESPACE_V1
9
+ from murineshiftwork.namespace.paths import build_data_paths as build_data_paths
10
+ from murineshiftwork.namespace.paths import (
11
+ generate_session_paths as generate_session_paths,
12
+ )
13
+ from murineshiftwork.namespace.paths import get_msw_builder as get_msw_builder
14
+ from murineshiftwork.namespace.paths import (
15
+ parse_session_basename as parse_session_basename,
16
+ )
@@ -0,0 +1,163 @@
1
+ """Acquisition and session manifest writers.
2
+
3
+ Manifests are YAML files written progressively during a session:
4
+ acquisition_manifest.yaml : inside the acquisition dir; lists sessions
5
+ session_manifest.yaml : inside the session dir; lists subprotocols (opto) or empty
6
+
7
+ All write operations are atomic (write temp file, rename).
8
+ """
9
+
10
+ from datetime import UTC, datetime
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ import yaml
15
+
16
+
17
+ def _now_iso() -> str:
18
+ return datetime.now(UTC).isoformat(timespec="seconds")
19
+
20
+
21
+ def _read_yaml(path: Path) -> dict:
22
+ return yaml.safe_load(path.read_text()) or {}
23
+
24
+
25
+ def _write_yaml(path: Path, data: dict) -> None:
26
+ tmp = path.with_suffix(".yaml.tmp")
27
+ with tmp.open("w") as f:
28
+ yaml.dump(
29
+ data, f, default_flow_style=False, allow_unicode=True, sort_keys=False
30
+ )
31
+ tmp.replace(path)
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Acquisition manifest
36
+
37
+
38
+ def init_acquisition_manifest(
39
+ acquisition_folder: str | Path, acquisition_name: str
40
+ ) -> None:
41
+ """Create acquisition_manifest.yaml if it does not exist."""
42
+ p = Path(acquisition_folder) / "acquisition_manifest.yaml"
43
+ if p.exists():
44
+ return
45
+ _write_yaml(
46
+ p,
47
+ {
48
+ "msw_manifest_version": 1,
49
+ "type": "acquisition",
50
+ "acquisition_name": acquisition_name,
51
+ "sessions": [],
52
+ },
53
+ )
54
+
55
+
56
+ def append_session_to_acquisition(
57
+ acquisition_folder: str | Path,
58
+ session_basename: str,
59
+ started_at: str | None = None,
60
+ ) -> None:
61
+ """Add a session entry (status=running). Call at TaskProcess init."""
62
+ p = Path(acquisition_folder) / "acquisition_manifest.yaml"
63
+ data: dict[str, Any] = (
64
+ _read_yaml(p)
65
+ if p.exists()
66
+ else {"msw_manifest_version": 1, "type": "acquisition", "sessions": []}
67
+ )
68
+ sessions = data.setdefault("sessions", [])
69
+ if not any(s.get("basename") == session_basename for s in sessions):
70
+ sessions.append(
71
+ {
72
+ "basename": session_basename,
73
+ "started_at": started_at or _now_iso(),
74
+ "ended_at": None,
75
+ "status": "running",
76
+ }
77
+ )
78
+ _write_yaml(p, data)
79
+
80
+
81
+ def finalize_session_in_acquisition(
82
+ acquisition_folder: str | Path,
83
+ session_basename: str,
84
+ status: str = "complete",
85
+ ended_at: str | None = None,
86
+ ) -> None:
87
+ """Set status and ended_at. Call at TaskProcess exit."""
88
+ p = Path(acquisition_folder) / "acquisition_manifest.yaml"
89
+ if not p.exists():
90
+ return
91
+ data = _read_yaml(p)
92
+ for s in data.get("sessions", []):
93
+ if s.get("basename") == session_basename:
94
+ s["status"] = status
95
+ s["ended_at"] = ended_at or _now_iso()
96
+ break
97
+ _write_yaml(p, data)
98
+
99
+
100
+ # ---------------------------------------------------------------------------
101
+ # Session manifest
102
+
103
+
104
+ def init_session_manifest(session_folder: str | Path, session_basename: str) -> None:
105
+ """Write session_manifest.yaml with empty subprotocols. Call at session start."""
106
+ p = Path(session_folder) / "session_manifest.yaml"
107
+ if p.exists():
108
+ return
109
+ _write_yaml(
110
+ p,
111
+ {
112
+ "msw_manifest_version": 1,
113
+ "type": "session",
114
+ "session_basename": session_basename,
115
+ "subprotocols": [],
116
+ },
117
+ )
118
+
119
+
120
+ def append_subprotocol(
121
+ session_folder: str | Path,
122
+ name: str,
123
+ filename: str,
124
+ barcode_start: int | None = None,
125
+ ) -> None:
126
+ """Add a subprotocol entry (status=running). Call before each opto protocol."""
127
+ p = Path(session_folder) / "session_manifest.yaml"
128
+ data: dict[str, Any] = (
129
+ _read_yaml(p)
130
+ if p.exists()
131
+ else {"msw_manifest_version": 1, "type": "session", "subprotocols": []}
132
+ )
133
+ protos = data.setdefault("subprotocols", [])
134
+ if not any(sp.get("name") == name for sp in protos):
135
+ protos.append(
136
+ {
137
+ "name": name,
138
+ "file": filename,
139
+ "barcode_start": barcode_start,
140
+ "barcode_end": None,
141
+ "status": "running",
142
+ }
143
+ )
144
+ _write_yaml(p, data)
145
+
146
+
147
+ def finalize_subprotocol(
148
+ session_folder: str | Path,
149
+ name: str,
150
+ barcode_end: int | None = None,
151
+ status: str = "complete",
152
+ ) -> None:
153
+ """Set barcode_end and status. Call in finally block after each opto protocol."""
154
+ p = Path(session_folder) / "session_manifest.yaml"
155
+ if not p.exists():
156
+ return
157
+ data = _read_yaml(p)
158
+ for sp in data.get("subprotocols", []):
159
+ if sp.get("name") == name:
160
+ sp["barcode_end"] = barcode_end
161
+ sp["status"] = status
162
+ break
163
+ _write_yaml(p, data)
@@ -0,0 +1,57 @@
1
+ """MSW session file naming: the .msw. artifact namespace.
2
+
3
+ Session-derived files follow the pattern defined in namespace.msw.yaml:
4
+ {session_basename}.msw.{artifact}
5
+
6
+ Use msw_file() for non-task callers; TaskRunner.get_path() for task code
7
+ that already has self available.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pathlib import Path
13
+
14
+
15
+ def msw_file(session_file_path: str | Path, artifact: str) -> Path:
16
+ """Return the full Path for a session-derived .msw.{artifact} file.
17
+
18
+ Args:
19
+ session_file_path: Base session path (session_paths["session_file_path"]).
20
+ artifact: Artifact name suffix (e.g. "session.yaml", "df.jsonl", "log").
21
+ """
22
+ from murineshiftwork.namespace.paths import get_msw_builder
23
+
24
+ p = Path(session_file_path)
25
+ b = get_msw_builder()
26
+ values = b.extract_level_values("session", p.name)
27
+ values["artifact"] = artifact
28
+ return p.parent / b.build_path("file", values)
29
+
30
+
31
+ def is_msw_file(path: str | Path) -> bool:
32
+ """Return True if the filename matches the MSW .msw. file pattern."""
33
+ from murineshiftwork.namespace.paths import get_msw_builder
34
+
35
+ try:
36
+ get_msw_builder().extract_level_values("file", Path(path).name)
37
+ return True
38
+ except ValueError:
39
+ return False
40
+
41
+
42
+ def msw_artifact(path: str | Path) -> str:
43
+ """Extract the artifact suffix from an MSW filename.
44
+
45
+ E.g. "subject__dt__task.msw.session.yaml" -> "session.yaml"
46
+
47
+ Raises:
48
+ ValueError: If the path is not an MSW file.
49
+ """
50
+ from murineshiftwork.namespace.paths import get_msw_builder
51
+
52
+ try:
53
+ return get_msw_builder().extract_level_values("file", Path(path).name)[
54
+ "artifact"
55
+ ]
56
+ except ValueError:
57
+ raise ValueError(f"Not an MSW file: {path!r}") from None
@@ -0,0 +1,25 @@
1
+ version: "3.0"
2
+ description: "MSW acquisition namespace: subject > session > acquisition > file."
3
+ hierarchy:
4
+ - subject
5
+ - session
6
+ - acquisition
7
+ - file
8
+ optional_levels: []
9
+ levels:
10
+ subject:
11
+ template: "{subject}"
12
+ regex: "(?P<subject>[\\w\\-]+)"
13
+ optional_fields: []
14
+ session:
15
+ template: "{subject}__{datetime}__{task}"
16
+ regex: "(?P<subject>[\\w\\-]+)__(?P<datetime>\\d{8}_\\d{6}(?:_\\d{6})?)__(?P<task>[\\w\\-]+)"
17
+ optional_fields: []
18
+ acquisition:
19
+ template: "{subject}__{datetime}__{task}"
20
+ regex: "(?P<subject>[\\w\\-]+)__(?P<datetime>\\d{8}_\\d{6}(?:_\\d{6})?)__(?P<task>[\\w\\-]+)"
21
+ optional_fields: []
22
+ file:
23
+ template: "{session}.msw.{artifact}"
24
+ regex: "(?P<session>.+)\\.msw\\.(?P<artifact>.+)"
25
+ optional_fields: []