samplekit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
samplekit/sample.py ADDED
@@ -0,0 +1,320 @@
1
+ """Sample — container for scientific properties with structured I/O."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import yaml
9
+
10
+ from .property import Property
11
+ from .table import Table
12
+
13
+
14
+ # ════════════════════════════════════════════════════════════
15
+ # YAML configuration — compact leaf dicts, block nested dicts
16
+ # ════════════════════════════════════════════════════════════
17
+
18
+ class _RootDict(dict):
19
+ """Marker so the dumper always uses block style for the root mapping."""
20
+ pass
21
+
22
+
23
+ class _Dumper(yaml.SafeDumper):
24
+ pass
25
+
26
+
27
+ def _is_leaf(v: Any) -> bool:
28
+ if isinstance(v, (int, float, str, bool)) or v is None:
29
+ return True
30
+ return isinstance(v, list) and all(
31
+ isinstance(x, (int, float, str, bool)) or x is None for x in v
32
+ )
33
+
34
+
35
+ _Dumper.add_representer(_RootDict, lambda d, data:
36
+ d.represent_mapping('tag:yaml.org,2002:map', data, flow_style=False))
37
+
38
+ _Dumper.add_representer(dict, lambda d, data:
39
+ d.represent_mapping('tag:yaml.org,2002:map', data,
40
+ flow_style=bool(data and all(_is_leaf(v) for v in data.values()))))
41
+
42
+ def _repr_float(d: yaml.Dumper, v: float) -> yaml.Node:
43
+ if v != v: return d.represent_scalar('tag:yaml.org,2002:float', '.nan')
44
+ if v == float('inf'): return d.represent_scalar('tag:yaml.org,2002:float', '.inf')
45
+ if v == -float('inf'): return d.represent_scalar('tag:yaml.org,2002:float', '-.inf')
46
+ if v == int(v):
47
+ return d.represent_int(int(v))
48
+ s = f"{v:.6e}" if (v != 0 and (abs(v) < 0.001 or abs(v) >= 1e7)) else f"{v:.10g}"
49
+ return d.represent_scalar('tag:yaml.org,2002:float', s)
50
+
51
+ _Dumper.add_representer(float, _repr_float)
52
+
53
+
54
+ def _parse_frontmatter(content: str) -> tuple[dict, str]:
55
+ """Split ``---`` YAML frontmatter and Markdown body."""
56
+ if not content.startswith("---"):
57
+ return {}, content
58
+ end = content.find("\n---", 3)
59
+ if end == -1:
60
+ return {}, content
61
+ yaml_data = yaml.safe_load(content[4:end]) or {}
62
+ body = content[end + 4:].lstrip("\n")
63
+ return yaml_data, body
64
+
65
+
66
+ class Sample:
67
+ """
68
+ Named container of Properties and Tables.
69
+
70
+ Subclass and define properties in __init__. Properties and Tables
71
+ are auto-registered when assigned as instance attributes.
72
+
73
+ Examples
74
+ --------
75
+ >>> class MySample(Sample):
76
+ ... def __init__(self, name=None, filepath=None):
77
+ ... super().__init__(name, filepath)
78
+ ... self.temperature = Property(value=25.0, unit="°C", symbol_math="T")
79
+ ... self.pressure = Property(value=101.3, unit="kPa", symbol_math="P")
80
+ ...
81
+ ... def template(self, style="math"):
82
+ ... from .report import properties_table
83
+ ... return properties_table(self, ["temperature", "pressure"], style=style)
84
+ >>>
85
+ >>> sample = MySample("EXP_001")
86
+ >>> sample.save("sample.md")
87
+ >>> loaded = MySample.load("sample.md")
88
+ """
89
+
90
+ def __init__(self, name: str | None = None, filepath: str | Path | None = None):
91
+ # Use object.__setattr__ to bypass our custom __setattr__
92
+ object.__setattr__(self, '_props', {})
93
+ object.__setattr__(self, '_tables', {})
94
+ object.__setattr__(self, '_order', [])
95
+ fp = Path(filepath) if filepath else None
96
+ object.__setattr__(self, '_filepath', fp)
97
+ n = name if name is not None else (fp.stem if fp else "Unnamed")
98
+ object.__setattr__(self, 'name', n)
99
+ object.__setattr__(self, '_hydrating', False)
100
+
101
+ def _auto_hydrate(self):
102
+ """Load data from filepath if the file exists."""
103
+ fp = object.__getattribute__(self, '_filepath')
104
+ if fp is not None and fp.exists():
105
+ object.__setattr__(self, '_hydrating', True)
106
+ try:
107
+ self._hydrate_from_file(fp)
108
+ finally:
109
+ object.__setattr__(self, '_hydrating', False)
110
+
111
+ def __init_subclass__(cls, **kwargs):
112
+ super().__init_subclass__(**kwargs)
113
+ original_init = cls.__init__
114
+
115
+ def _wrapped_init(self, *args, **kw):
116
+ original_init(self, *args, **kw)
117
+ if type(self) is cls and not object.__getattribute__(self, '_hydrating'):
118
+ self._auto_hydrate()
119
+
120
+ cls.__init__ = _wrapped_init
121
+
122
+ # ── Auto-registration ───────────────────────────────
123
+
124
+ def __setattr__(self, name: str, value):
125
+ object.__setattr__(self, name, value)
126
+ if name.startswith('_') or name == 'name':
127
+ return
128
+ if isinstance(value, Property):
129
+ props = object.__getattribute__(self, '_props')
130
+ order = object.__getattribute__(self, '_order')
131
+ props[name] = value
132
+ value._name = name
133
+ value._parent = self
134
+ if value.symbol is None:
135
+ value.symbol = name
136
+ if value.symbol_math is None:
137
+ value.symbol_math = value.symbol
138
+ if name not in order:
139
+ order.append(name)
140
+ value._wire_dependencies()
141
+ elif isinstance(value, Table):
142
+ tables = object.__getattribute__(self, '_tables')
143
+ order = object.__getattribute__(self, '_order')
144
+ tables[name] = value
145
+ value._name = name
146
+ value._parent = self
147
+ if name not in order:
148
+ order.append(name)
149
+
150
+ # ── Access ──────────────────────────────────────────
151
+
152
+ @property
153
+ def props(self) -> dict[str, Property]:
154
+ """All registered Properties (ordered)."""
155
+ return dict(object.__getattribute__(self, '_props'))
156
+
157
+ @property
158
+ def tables(self) -> dict[str, Table]:
159
+ """All registered Tables (ordered)."""
160
+ return dict(object.__getattribute__(self, '_tables'))
161
+
162
+ def __getitem__(self, key: str) -> Property | Table:
163
+ """Access property or table by name."""
164
+ props = object.__getattribute__(self, '_props')
165
+ if key in props:
166
+ return props[key]
167
+ tables = object.__getattribute__(self, '_tables')
168
+ if key in tables:
169
+ return tables[key]
170
+ raise KeyError(key)
171
+
172
+ def __contains__(self, key: str) -> bool:
173
+ props = object.__getattribute__(self, '_props')
174
+ tables = object.__getattribute__(self, '_tables')
175
+ return key in props or key in tables
176
+
177
+ # ── Template ────────────────────────────────────────
178
+
179
+ def template(self, style: str = "math") -> str:
180
+ """Override in subclass for custom markdown layout.
181
+
182
+ Parameters
183
+ ----------
184
+ style : "math" or "text"
185
+ """
186
+ return ""
187
+
188
+ # ── I/O ─────────────────────────────────────────────
189
+
190
+ def _build_yaml_data(self) -> dict:
191
+ """Build the complete YAML dict for this sample."""
192
+ data: dict[str, Any] = _RootDict(name=self.name)
193
+
194
+ props = object.__getattribute__(self, '_props')
195
+ tables = object.__getattribute__(self, '_tables')
196
+ order = object.__getattribute__(self, '_order')
197
+
198
+ for key in order:
199
+ if key in props:
200
+ serialized = props[key].to_yaml()
201
+ if serialized is not None:
202
+ data[key] = serialized
203
+ elif key in tables:
204
+ serialized = tables[key].to_yaml()
205
+ if serialized:
206
+ data[key] = serialized
207
+
208
+ return data
209
+
210
+ def _hydrate_from_yaml(self, yaml_data: dict):
211
+ """Populate registered Properties and Tables from YAML data."""
212
+ props = object.__getattribute__(self, '_props')
213
+ tables = object.__getattribute__(self, '_tables')
214
+
215
+ for key, raw in yaml_data.items():
216
+ if key in props:
217
+ props[key].from_yaml(raw)
218
+ elif key in tables and isinstance(raw, dict):
219
+ tables[key].from_yaml(raw)
220
+ elif isinstance(raw, dict) and "_rows" in raw:
221
+ # Detected an unregistered table → create dynamically
222
+ new_table = Table()
223
+ new_table.from_yaml(raw)
224
+ setattr(self, key, new_table)
225
+ else:
226
+ # Unknown key → create a dynamic Property
227
+ new_prop = Property()
228
+ new_prop.from_yaml(raw)
229
+ setattr(self, key, new_prop)
230
+
231
+ def _hydrate_from_file(self, filepath: Path):
232
+ """Read and hydrate from a Markdown file."""
233
+ content = filepath.read_text(encoding="utf-8")
234
+ yaml_data, _body = _parse_frontmatter(content)
235
+ name = yaml_data.pop("name", None)
236
+ if name is not None:
237
+ object.__setattr__(self, 'name', name)
238
+ self._hydrate_from_yaml(yaml_data)
239
+
240
+ def save(self, filepath: str | Path | None = None, style: str = "math") -> Path:
241
+ """Save to markdown file with YAML frontmatter.
242
+
243
+ Parameters
244
+ ----------
245
+ filepath : path, optional
246
+ Defaults to the filepath used at construction.
247
+ style : "math" or "text"
248
+ Controls math rendering in the body.
249
+ """
250
+ fp = Path(filepath) if filepath else self._filepath
251
+ if fp is None:
252
+ raise ValueError("No filepath specified")
253
+
254
+ yaml_data = self._build_yaml_data()
255
+ yaml_str = yaml.dump(
256
+ yaml_data,
257
+ Dumper=_Dumper,
258
+ sort_keys=False,
259
+ allow_unicode=True,
260
+ default_flow_style=False,
261
+ )
262
+
263
+ body = self.template(style=style)
264
+
265
+ content = f"---\n{yaml_str}---\n"
266
+ if body:
267
+ content += f"\n{body}\n"
268
+
269
+ fp.parent.mkdir(parents=True, exist_ok=True)
270
+ fp.write_text(content, encoding="utf-8")
271
+ self._filepath = fp
272
+ return fp
273
+
274
+ @classmethod
275
+ def load(cls, filepath: str | Path) -> Sample:
276
+ """Load from a markdown file with YAML frontmatter.
277
+
278
+ Creates an instance of *cls* (calling its __init__ to define
279
+ properties), then hydrates from the YAML data via _auto_hydrate.
280
+ """
281
+ fp = Path(filepath)
282
+ # _auto_hydrate (called by __init_subclass__ wrapper) will read
283
+ # the file and hydrate since fp exists on disk.
284
+ return cls(filepath=fp)
285
+
286
+ # ── Converters (delegated to samplekit.converters) ──
287
+
288
+ def __getattr__(self, name: str):
289
+ if name.startswith('_'):
290
+ raise AttributeError(name)
291
+ from . import converters
292
+ fn = getattr(converters, f"sample_{name}", None)
293
+ if fn is not None:
294
+ return lambda *args, **kw: fn(self, *args, **kw)
295
+ raise AttributeError(f"'{type(self).__name__}' has no attribute {name!r}")
296
+
297
+ # ── Display ─────────────────────────────────────────
298
+
299
+ def __str__(self):
300
+ lines = [f"Sample: {self.name}"]
301
+ props = object.__getattribute__(self, '_props')
302
+ tables = object.__getattribute__(self, '_tables')
303
+ order = object.__getattribute__(self, '_order')
304
+
305
+ for key in order:
306
+ if key in props:
307
+ prop = props[key]
308
+ if prop.value is not None:
309
+ lines.append(f" {key}: {prop.text}")
310
+ elif key in tables:
311
+ table = tables[key]
312
+ lines.append(f" {key}: {len(table)} rows × {len(table.columns)} columns")
313
+
314
+ return "\n".join(lines)
315
+
316
+ def __repr__(self):
317
+ props = object.__getattribute__(self, '_props')
318
+ tables = object.__getattribute__(self, '_tables')
319
+ return (f"<Sample '{self.name}' with "
320
+ f"{len(props)} properties, {len(tables)} tables>")
@@ -0,0 +1,190 @@
1
+ """SampleList — collection of samples with filtering, sorting, and conversion."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Callable
8
+
9
+ from .sample import Sample
10
+
11
+
12
+ class SampleList:
13
+ """
14
+ Collection of Sample objects loaded from a directory, file list, or built
15
+ programmatically.
16
+
17
+ Parameters
18
+ ----------
19
+ source : list[Sample] | list[Path|str] | Path | str | None
20
+ - None: empty collection
21
+ - list[Sample]: direct objects
22
+ - list[Path|str]: paths to .md files
23
+ - Path/str: directory to glob for .md files
24
+ sample_class : type
25
+ Sample subclass for loading from paths (default: Sample).
26
+ pattern : str
27
+ Glob pattern when loading from a directory (default: "*.md").
28
+
29
+ Examples
30
+ --------
31
+ >>> samples = SampleList("data/", sample_class=MySample)
32
+ >>> high = samples.filter(lambda s: s["temperature"].value > 30)
33
+ >>> sorted_s = high.sort("temperature")
34
+ >>> df = sorted_s.to_dataframe()
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ source=None,
40
+ sample_class: type = Sample,
41
+ pattern: str = "*.md",
42
+ ):
43
+ self._samples: list[Sample] = []
44
+ self._sample_class = sample_class
45
+
46
+ if source is None:
47
+ pass
48
+ elif isinstance(source, (str, Path)):
49
+ self._load_directory(Path(source), pattern)
50
+ elif isinstance(source, list):
51
+ for item in source:
52
+ if isinstance(item, Sample):
53
+ self._samples.append(item)
54
+ elif isinstance(item, (str, Path)):
55
+ self._load_file(Path(item))
56
+ else:
57
+ raise TypeError(f"Unsupported source type: {type(source)}")
58
+
59
+ def _load_directory(self, directory: Path, pattern: str):
60
+ for f in sorted(directory.glob(pattern)):
61
+ self._load_file(f)
62
+
63
+ def _load_file(self, filepath: Path):
64
+ try:
65
+ self._samples.append(self._sample_class.load(filepath))
66
+ except Exception as e:
67
+ print(f"Warning: {filepath}: {e}", file=sys.stderr)
68
+
69
+ # ── Mutation ────────────────────────────────────────
70
+
71
+ def append(self, sample: Sample | str | Path):
72
+ """Add a sample (object or file path)."""
73
+ if isinstance(sample, Sample):
74
+ self._samples.append(sample)
75
+ else:
76
+ self._load_file(Path(sample))
77
+
78
+ # ── Filtering & sorting ─────────────────────────────
79
+
80
+ def filter(self, func: Callable[[Sample], bool]) -> SampleList:
81
+ """Return a new SampleList with samples matching the predicate."""
82
+ result = SampleList(sample_class=self._sample_class)
83
+ result._samples = [s for s in self._samples if func(s)]
84
+ return result
85
+
86
+ def sort(
87
+ self,
88
+ key: str | Callable | list,
89
+ reverse: bool | list[bool] = False,
90
+ ) -> SampleList:
91
+ """Return a sorted SampleList.
92
+
93
+ Parameters
94
+ ----------
95
+ key : str, callable, or list
96
+ - str: property name (sorts by value)
97
+ - callable: sort key function
98
+ - list: multi-key stable sort (applied in reverse order)
99
+ reverse : bool or list[bool]
100
+ """
101
+ result = SampleList(sample_class=self._sample_class)
102
+
103
+ if isinstance(key, list):
104
+ # Multi-key stable sort
105
+ reverse_list = reverse if isinstance(reverse, list) else [reverse] * len(key)
106
+ samples = list(self._samples)
107
+ for k, rev in zip(reversed(key), reversed(reverse_list)):
108
+ sort_fn = self._make_sort_key(k)
109
+ samples = sorted(samples, key=sort_fn, reverse=rev)
110
+ result._samples = samples
111
+ else:
112
+ sort_fn = self._make_sort_key(key)
113
+ rev = reverse[0] if isinstance(reverse, list) else reverse
114
+ result._samples = sorted(self._samples, key=sort_fn, reverse=rev)
115
+
116
+ return result
117
+
118
+ @staticmethod
119
+ def _make_sort_key(key) -> Callable:
120
+ if isinstance(key, str):
121
+ prop_name = key
122
+ def _key(s: Sample):
123
+ try:
124
+ v = s[prop_name].value
125
+ return v if v is not None else float('-inf')
126
+ except (KeyError, AttributeError):
127
+ return float('-inf')
128
+ return _key
129
+ return key
130
+
131
+ # ── Access ──────────────────────────────────────────
132
+
133
+ def __getitem__(self, index):
134
+ if isinstance(index, int):
135
+ return self._samples[index]
136
+ if isinstance(index, slice):
137
+ result = SampleList(sample_class=self._sample_class)
138
+ result._samples = self._samples[index]
139
+ return result
140
+ if isinstance(index, str):
141
+ for s in self._samples:
142
+ if s.name == index:
143
+ return s
144
+ raise KeyError(index)
145
+ raise TypeError(f"Invalid index type: {type(index)}")
146
+
147
+ def __len__(self) -> int:
148
+ return len(self._samples)
149
+
150
+ def __iter__(self):
151
+ return iter(self._samples)
152
+
153
+ def __bool__(self) -> bool:
154
+ return len(self._samples) > 0
155
+
156
+ # ── Converters (delegated to samplekit.converters) ──
157
+
158
+ def __getattr__(self, name: str):
159
+ if name.startswith('_'):
160
+ raise AttributeError(name)
161
+ from . import converters
162
+ fn = getattr(converters, f"samplelist_{name}", None)
163
+ if fn is not None:
164
+ return lambda *args, **kw: fn(self, *args, **kw)
165
+ raise AttributeError(f"'{type(self).__name__}' has no attribute {name!r}")
166
+
167
+ # ── I/O ─────────────────────────────────────────────
168
+
169
+ def save_all(
170
+ self,
171
+ directory: str | Path,
172
+ style: str = "math",
173
+ overwrite: bool = False,
174
+ ) -> list[Path]:
175
+ """Save all samples to a directory as individual .md files."""
176
+ directory = Path(directory)
177
+ directory.mkdir(parents=True, exist_ok=True)
178
+ paths = []
179
+ for s in self._samples:
180
+ fp = directory / f"{s.name}.md"
181
+ if fp.exists() and not overwrite:
182
+ raise FileExistsError(f"{fp} already exists")
183
+ s.save(fp, style=style)
184
+ paths.append(fp)
185
+ return paths
186
+
187
+ # ── Display ─────────────────────────────────────────
188
+
189
+ def __repr__(self):
190
+ return f"<SampleList with {len(self._samples)} samples>"