acquisition-namespace 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ from acquisition_namespace.spec import (
6
+ NamespaceBuilder,
7
+ NamespaceLevelSpec,
8
+ NamespaceSpec,
9
+ )
10
+
11
+ try:
12
+ __version__ = version("acquisition_namespace")
13
+ except PackageNotFoundError: # pragma: no cover
14
+ __version__ = "unknown"
15
+
16
+ __all__ = [
17
+ "NamespaceBuilder",
18
+ "NamespaceLevelSpec",
19
+ "NamespaceSpec",
20
+ ]
@@ -0,0 +1,24 @@
1
+ # file generated by vcs-versioning
2
+ # don't change, don't track in version control
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "__version__",
7
+ "__version_tuple__",
8
+ "version",
9
+ "version_tuple",
10
+ "__commit_id__",
11
+ "commit_id",
12
+ ]
13
+
14
+ version: str
15
+ __version__: str
16
+ __version_tuple__: tuple[int | str, ...]
17
+ version_tuple: tuple[int | str, ...]
18
+ commit_id: str | None
19
+ __commit_id__: str | None
20
+
21
+ __version__ = version = '1.2.0'
22
+ __version_tuple__ = version_tuple = (1, 2, 0)
23
+
24
+ __commit_id__ = commit_id = None
File without changes
@@ -0,0 +1,299 @@
1
+ """Namespace spec: Pydantic models + YAML-backed NamespaceBuilder.
2
+
3
+ Load a spec file and build / validate hierarchical acquisition paths:
4
+
5
+ builder = NamespaceBuilder.from_yaml("my_namespace.yaml")
6
+ basename = builder.build_path("session", {"subject": "mouse_01", ...})
7
+ parts = builder.extract_level_values("session", basename)
8
+
9
+ The spec YAML defines a hierarchy of levels, each with a ``template``
10
+ (Python format-string) and a ``regex`` (named capture groups). Higher
11
+ levels may reference lower-level names in their template; the builder
12
+ resolves them automatically.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import logging
19
+ import re
20
+ import string
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+ import yaml
25
+ from pydantic import BaseModel, field_validator
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Pydantic models
29
+
30
+
31
+ class NamespaceLevelSpec(BaseModel):
32
+ template: str
33
+ regex: str
34
+ optional_fields: list[str] = []
35
+
36
+ @field_validator("regex")
37
+ @classmethod
38
+ def _check_regex(cls, v: str) -> str:
39
+ try:
40
+ re.compile(v)
41
+ except re.error as exc:
42
+ raise ValueError(f"Invalid regex {v!r}: {exc}") from exc
43
+ return v
44
+
45
+
46
+ class NamespaceSpec(BaseModel):
47
+ version: str
48
+ description: str = ""
49
+ hierarchy: list[str]
50
+ optional_levels: list[str] = []
51
+ levels: dict[str, NamespaceLevelSpec]
52
+
53
+ @field_validator("levels")
54
+ @classmethod
55
+ def _all_hierarchy_levels_present(cls, v: dict, info: Any) -> dict:
56
+ if "hierarchy" in (info.data or {}):
57
+ missing = [h for h in info.data["hierarchy"] if h not in v]
58
+ if missing:
59
+ raise ValueError(
60
+ f"Hierarchy level(s) {missing} have no entry in 'levels'"
61
+ )
62
+ return v
63
+
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Helper
67
+
68
+
69
+ def _template_fields(template: str) -> list[str]:
70
+ return [t[1] for t in string.Formatter().parse(template) if t[1]]
71
+
72
+
73
+ # ---------------------------------------------------------------------------
74
+ # NamespaceBuilder
75
+
76
+
77
+ class NamespaceBuilder:
78
+ """Build and validate hierarchical acquisition paths from a YAML spec.
79
+
80
+ Each level in the hierarchy has a template (for construction) and a regex
81
+ (for parsing/validation). Higher levels may reference lower-level names
82
+ in their template; the builder resolves them recursively.
83
+
84
+ Typical usage::
85
+
86
+ b = NamespaceBuilder.from_yaml("my_namespace.yaml")
87
+
88
+ # Build a path segment for a given level
89
+ name = b.build_path("session", {"subject": "m01", "datetime": "20260101"})
90
+
91
+ # Build the full directory path from root to a level
92
+ path = b.generate_path("session", values)
93
+
94
+ # Parse an existing path back into its component values
95
+ parts = b.validate_path(path, stop_at="session")
96
+
97
+ # Extract values from a single level's string
98
+ parts = b.extract_level_values("session", name)
99
+ """
100
+
101
+ def __init__(self, spec: NamespaceSpec) -> None:
102
+ self.spec = spec
103
+ self.hierarchy: list[str] = spec.hierarchy
104
+ self.optional_levels: list[str] = spec.optional_levels
105
+ self._compiled: dict[str, re.Pattern] = {
106
+ name: re.compile(level.regex) for name, level in spec.levels.items()
107
+ }
108
+
109
+ # ------------------------------------------------------------------
110
+ # Construction
111
+
112
+ @classmethod
113
+ def from_yaml(cls, config_path: str | Path) -> NamespaceBuilder:
114
+ """Load a :class:`NamespaceSpec` from *config_path* and return a builder."""
115
+ path = Path(config_path)
116
+ with path.open() as f:
117
+ data = yaml.safe_load(f)
118
+ spec = NamespaceSpec.model_validate(data)
119
+ logging.debug("Loaded NamespaceSpec v%s from %s", spec.version, path)
120
+ return cls(spec)
121
+
122
+ @classmethod
123
+ def from_dict(cls, data: dict) -> NamespaceBuilder:
124
+ """Build from a plain dict (e.g. after :meth:`to_dict`)."""
125
+ return cls(NamespaceSpec.model_validate(data))
126
+
127
+ # ------------------------------------------------------------------
128
+ # Serialisation
129
+
130
+ def to_dict(self) -> dict[str, Any]:
131
+ return self.spec.model_dump()
132
+
133
+ def __str__(self) -> str:
134
+ return f"NamespaceBuilder({json.dumps(self.to_dict())})"
135
+
136
+ def __repr__(self) -> str:
137
+ return f"NamespaceBuilder({self.to_dict()})"
138
+
139
+ def write_yaml(self, path: str | Path) -> None:
140
+ """Serialise the spec back to a YAML file."""
141
+ with Path(path).open("w") as f:
142
+ yaml.dump(
143
+ self.spec.model_dump(),
144
+ f,
145
+ default_flow_style=False,
146
+ allow_unicode=True,
147
+ sort_keys=False,
148
+ )
149
+ logging.info("NamespaceSpec written to %s", path)
150
+
151
+ # ------------------------------------------------------------------
152
+ # Path building
153
+
154
+ def _build_one(
155
+ self, level_name: str, values: dict[str, str], parts: dict[str, str]
156
+ ) -> str:
157
+ if level_name in parts:
158
+ return parts[level_name]
159
+ level = self.spec.levels[level_name]
160
+ fields = _template_fields(level.template)
161
+ for field in fields:
162
+ if field in self.hierarchy and field not in parts and field != level_name:
163
+ parts[field] = self._build_one(field, values, parts)
164
+ elif field not in values and field not in parts:
165
+ raise ValueError(
166
+ f"Missing value for field '{field}' in level '{level_name}'"
167
+ )
168
+ fmt = {k: parts.get(k, values.get(k, "")) for k in fields}
169
+ result = level.template.format(**fmt)
170
+ parts[level_name] = result
171
+ return result
172
+
173
+ def build_path(self, level: str, values: dict[str, str]) -> str:
174
+ """Return the path segment string for *level* constructed from *values*.
175
+
176
+ Parent levels referenced in the template are resolved automatically.
177
+ """
178
+ if level not in self.spec.levels:
179
+ raise ValueError(f"Unknown level: {level!r}")
180
+ return self._build_one(level, values, {})
181
+
182
+ def generate_path(
183
+ self,
184
+ level: str,
185
+ values: dict[str, str],
186
+ include_optional_levels: bool = True,
187
+ level_overrides: dict[str, str] | None = None,
188
+ ) -> str:
189
+ """Return the full filesystem path from root up to (and including) *level*.
190
+
191
+ Joins each hierarchy level with :func:`pathlib.Path` so the result
192
+ uses the platform separator.
193
+
194
+ Parameters
195
+ ----------
196
+ level_overrides:
197
+ Pre-built segment strings keyed by level name. When a level
198
+ appears here its value is used verbatim instead of being
199
+ constructed from *values*. The segment is still recorded in the
200
+ internal parts dict so higher levels can reference it in their
201
+ templates. Use this when a level's basename comes from an
202
+ external system (e.g. an OE acquisition name) and cannot be
203
+ reconstructed from the current session's values.
204
+ """
205
+ if level not in self.hierarchy:
206
+ raise ValueError(f"Unknown level: {level!r}")
207
+ overrides = level_overrides or {}
208
+ parts: dict[str, str] = {}
209
+ segments: list[str] = []
210
+ for name in self.hierarchy:
211
+ if name in self.optional_levels and not include_optional_levels:
212
+ continue
213
+ if name in overrides:
214
+ segment = overrides[name]
215
+ parts[name] = segment
216
+ else:
217
+ segment = self._build_one(name, values, parts)
218
+ segments.append(segment)
219
+ if name == level:
220
+ break
221
+ return str(Path(*segments))
222
+
223
+ # ------------------------------------------------------------------
224
+ # Parsing / validation
225
+
226
+ def _match_level(
227
+ self, level_name: str, segment: str, known_values: dict[str, str]
228
+ ) -> dict[str, str]:
229
+ level = self.spec.levels[level_name]
230
+ pattern = level.regex
231
+ for k, v in known_values.items():
232
+ if v is not None:
233
+ pattern = pattern.replace("{" + k + "}", re.escape(str(v)))
234
+ m = re.match(pattern, segment.strip())
235
+ if not m:
236
+ raise ValueError(
237
+ f"Segment {segment!r} did not match regex for level {level_name!r}"
238
+ )
239
+ return m.groupdict()
240
+
241
+ def validate_path_level(
242
+ self, level: str, segment: str, known_values: dict[str, str]
243
+ ) -> dict[str, str]:
244
+ """Match *segment* against the regex for *level*, return captured groups."""
245
+ return self._match_level(level, segment, known_values)
246
+
247
+ def validate_path(
248
+ self, path: str | Path, stop_at: str | None = None
249
+ ) -> dict[str, str]:
250
+ """Walk *path* level by level and return all captured values.
251
+
252
+ Parameters
253
+ ----------
254
+ path:
255
+ Filesystem path to validate (may be absolute or relative).
256
+ stop_at:
257
+ Stop after matching this hierarchy level. If ``None``, walks
258
+ the entire hierarchy.
259
+
260
+ Raises
261
+ ------
262
+ ValueError
263
+ If any segment does not match the expected regex.
264
+ """
265
+ if stop_at and stop_at not in self.hierarchy:
266
+ raise ValueError(f"stop_at level {stop_at!r} is not in hierarchy")
267
+ max_depth = (
268
+ self.hierarchy.index(stop_at) + 1 if stop_at else len(self.hierarchy)
269
+ )
270
+ segments = Path(path).parts
271
+ result: dict[str, str] = {}
272
+ for i, (segment, level_name) in enumerate(
273
+ zip(segments, self.hierarchy, strict=False)
274
+ ):
275
+ if i >= max_depth:
276
+ break
277
+ result.update(self._match_level(level_name, segment, result))
278
+ if level_name == stop_at:
279
+ break
280
+ return result
281
+
282
+ def extract_level_values(self, level: str, name: str) -> dict[str, str]:
283
+ """Parse *name* as a *level* segment and return template-field values.
284
+
285
+ Unlike :meth:`validate_path` (which walks a directory path), this
286
+ matches a single string against a single level's regex.
287
+
288
+ Raises
289
+ ------
290
+ ValueError
291
+ If *level* is not in the hierarchy, or *name* does not match.
292
+ """
293
+ if level not in self.hierarchy:
294
+ raise ValueError(f"Unknown level: {level!r}")
295
+ match = self._compiled[level].match(name.strip())
296
+ if not match:
297
+ raise ValueError(f"Name {name!r} does not match regex for level {level!r}")
298
+ fields = _template_fields(self.spec.levels[level].template)
299
+ return {f: match.groupdict().get(f, "") for f in fields}
@@ -0,0 +1,124 @@
1
+ Metadata-Version: 2.4
2
+ Name: acquisition-namespace
3
+ Version: 1.2.0
4
+ Summary: YAML-driven hierarchical path namespace builder for acquisition data pipelines.
5
+ Project-URL: Repository, https://github.com/murineshiftwork/acquisition-namespace
6
+ Project-URL: Issue Tracker, https://github.com/murineshiftwork/acquisition-namespace/issues
7
+ Author-email: "Lars B. Rollik" <L.B.Rollik@protonmail.com>
8
+ License: Copyright (c) [[ year ]] [[ author_name ]]
9
+ All rights reserved.
10
+
11
+ No license is granted to use, copy, modify, merge, publish, distribute,
12
+ sublicense, or sell copies of this software or its documentation without
13
+ explicit written permission from the copyright holder.
14
+
15
+ Replace this file with your chosen open-source license before publishing.
16
+ Preferred default for research code: GNU GPL v3.0 (retains authorship rights,
17
+ requires attribution and source disclosure for derivatives).
18
+ Permissive alternatives: MIT or BSD-3-Clause (allow commercial use without
19
+ source disclosure — use only if explicitly intended).
20
+ License-File: LICENSE
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: pydantic>=2
23
+ Requires-Dist: pyyaml
24
+ Provides-Extra: dev
25
+ Requires-Dist: commitizen; extra == 'dev'
26
+ Requires-Dist: mkdocs-material; extra == 'dev'
27
+ Requires-Dist: mypy; extra == 'dev'
28
+ Requires-Dist: pre-commit; extra == 'dev'
29
+ Requires-Dist: pytest-cov; extra == 'dev'
30
+ Requires-Dist: pytest>=8; extra == 'dev'
31
+ Description-Content-Type: text/markdown
32
+
33
+ # Acquisition Namespace
34
+
35
+ YAML-driven hierarchical path namespace builder for acquisition data pipelines.
36
+
37
+ Define your session directory layout once in a YAML spec; the library builds,
38
+ parses, and validates paths at every level of the hierarchy — with zero
39
+ hard-coded separators or string constants in your application code.
40
+
41
+ ## Installation
42
+
43
+ ```sh
44
+ pip install acquisition-namespace
45
+ ```
46
+
47
+ Or with uv:
48
+
49
+ ```sh
50
+ uv add acquisition-namespace
51
+ ```
52
+
53
+ ## Quick start
54
+
55
+ ```python
56
+ from acquisition_namespace import NamespaceBuilder
57
+
58
+ builder = NamespaceBuilder.from_yaml("my_namespace.yaml")
59
+
60
+ # Build the session basename from component values
61
+ name = builder.build_path("session", {
62
+ "subject": "mouse_01",
63
+ "datetime": "20260524_143022_123456",
64
+ "task": "sequence",
65
+ })
66
+ # → "mouse_01__20260524_143022_123456__sequence"
67
+
68
+ # Build the full directory path from root to the session level
69
+ path = builder.generate_path("session", {...})
70
+ # → "mouse_01/mouse_01__20260524_143022_123456__sequence"
71
+
72
+ # Parse an existing path back into its fields
73
+ parts = builder.extract_level_values("session", name)
74
+ # → {"subject": "mouse_01", "datetime": "...", "task": "sequence"}
75
+ ```
76
+
77
+ ### Spec YAML format
78
+
79
+ ```yaml
80
+ version: "1.0"
81
+ description: "My acquisition namespace."
82
+ hierarchy:
83
+ - subject
84
+ - session
85
+ - file
86
+ optional_levels: []
87
+ levels:
88
+ subject:
89
+ template: "{subject}"
90
+ regex: "(?P<subject>[\\w\\-]+)"
91
+ optional_fields: []
92
+ session:
93
+ template: "{subject}__{datetime}__{task}"
94
+ regex: "(?P<subject>[\\w\\-]+)__(?P<datetime>\\d{8}_\\d{6}(?:_\\d{6})?)__(?P<task>[\\w\\-]+)"
95
+ optional_fields: []
96
+ file:
97
+ template: "{session}.{suffix}.{extension}"
98
+ regex: "(?P<session>.+)\\.(?P<suffix>\\w+)\\.(?P<extension>\\w+)"
99
+ optional_fields: []
100
+ ```
101
+
102
+ Higher-level templates may reference lower-level names (e.g. `{session}` in
103
+ the `file` template); the builder resolves them automatically.
104
+
105
+ ## Development setup
106
+
107
+ ```sh
108
+ git clone https://github.com/murineshiftwork/acquisition-namespace.git
109
+ cd acquisition-namespace
110
+ uv sync --group dev
111
+ uv run pre-commit install --hook-type pre-commit --hook-type commit-msg
112
+ uv run pytest
113
+ ```
114
+
115
+ ## Release workflow
116
+
117
+ 1. Work on a `feature/` or `fix/` branch, committing with `cz commit`
118
+ 2. Open a PR — CI (lint + tests + secrets scan) must pass before merge
119
+ 3. Merge to main → version bump and tag are created automatically
120
+ 4. Tag triggers release: GitHub release + PyPI publish
121
+
122
+ ## License
123
+
124
+ See [LICENSE](LICENSE).
@@ -0,0 +1,8 @@
1
+ acquisition_namespace/__init__.py,sha256=Q21K4tvN47d_PDsZNEuEj-kW9SRfGfgoKtG8AsVVVWI,426
2
+ acquisition_namespace/_version.py,sha256=SewWHxQ2iszz6imNAB6v_XpUebEg78bh18SmbYEYay8,520
3
+ acquisition_namespace/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ acquisition_namespace/spec.py,sha256=AwMyE52RSViQVPdaK80Z1rHPojubfvsBcFx-kcLT-Uo,10758
5
+ acquisition_namespace-1.2.0.dist-info/METADATA,sha256=FSnXdT5H7qTg-EGcNY_SRt2s3OooIfOyaR9nIfAFcKM,3949
6
+ acquisition_namespace-1.2.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
7
+ acquisition_namespace-1.2.0.dist-info/licenses/LICENSE,sha256=lhb_GzCtwM_yLxMjdJvJCl96US9uZLjtOvBAfA3w1H0,610
8
+ acquisition_namespace-1.2.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,12 @@
1
+ Copyright (c) [[ year ]] [[ author_name ]]
2
+ All rights reserved.
3
+
4
+ No license is granted to use, copy, modify, merge, publish, distribute,
5
+ sublicense, or sell copies of this software or its documentation without
6
+ explicit written permission from the copyright holder.
7
+
8
+ Replace this file with your chosen open-source license before publishing.
9
+ Preferred default for research code: GNU GPL v3.0 (retains authorship rights,
10
+ requires attribution and source disclosure for derivatives).
11
+ Permissive alternatives: MIT or BSD-3-Clause (allow commercial use without
12
+ source disclosure — use only if explicitly intended).