legend-pydataobj 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ """
2
+ Implements a LEGEND Data Object representing an n-dimensional array of fixed
3
+ size and corresponding utilities.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ from typing import Any
8
+
9
+ import numpy
10
+
11
+ from .array import Array
12
+
13
+
14
+ class FixedSizeArray(Array):
15
+ """An array of fixed-size arrays.
16
+
17
+ Arrays with guaranteed shape along axes > 0: for example, an array of
18
+ vectors will always length 3 on axis 1, and it will never change from
19
+ application to application. This data type is used for optimized memory
20
+ handling on some platforms. We are not that sophisticated so we are just
21
+ storing this identification for LGDO validity, i.e. for now this class is
22
+ just an alias for :class:`.Array`, but keeps track of the datatype name.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ nda: numpy.ndarray = None,
28
+ shape: tuple[int, ...] = (),
29
+ dtype: numpy.dtype = None,
30
+ fill_val: int | float = None,
31
+ attrs: dict[str, Any] = None,
32
+ ) -> None:
33
+ """
34
+ See Also
35
+ --------
36
+ :class:`.Array`
37
+ """
38
+ super().__init__(
39
+ nda=nda, shape=shape, dtype=dtype, fill_val=fill_val, attrs=attrs
40
+ )
41
+
42
+ def datatype_name(self) -> str:
43
+ return "fixedsize_array"
lgdo/types/lgdo.py ADDED
@@ -0,0 +1,51 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any
5
+
6
+
7
+ class LGDO(ABC):
8
+ """Abstract base class representing a LEGEND Data Object (LGDO)."""
9
+
10
+ @abstractmethod
11
+ def __init__(self, attrs: dict[str, Any] | None = None) -> None:
12
+ self.attrs = {} if attrs is None else dict(attrs)
13
+
14
+ if "datatype" in self.attrs:
15
+ if self.attrs["datatype"] != self.form_datatype():
16
+ raise ValueError(
17
+ f"datatype attribute ({self.attrs['datatype']}) does "
18
+ f"not match class datatype ({self.form_datatype()})!"
19
+ )
20
+ else:
21
+ self.attrs["datatype"] = self.form_datatype()
22
+
23
+ @abstractmethod
24
+ def datatype_name(self) -> str:
25
+ """The name for this LGDO's datatype attribute."""
26
+ pass
27
+
28
+ @abstractmethod
29
+ def form_datatype(self) -> str:
30
+ """Return this LGDO's datatype attribute string."""
31
+ pass
32
+
33
+ def getattrs(self, datatype: bool = False) -> dict:
34
+ """Return a copy of the LGDO attributes dictionary.
35
+
36
+ Parameters
37
+ ----------
38
+ datatype
39
+ if ``False``, remove ``datatype`` attribute from the output
40
+ dictionary.
41
+ """
42
+ d = dict(self.attrs)
43
+ if not datatype:
44
+ d.pop("datatype", None)
45
+ return d
46
+
47
+ def __str__(self) -> str:
48
+ return repr(self)
49
+
50
+ def __repr__(self) -> str:
51
+ return self.__class__.__name__ + f"(attrs={repr(self.attrs)})"
lgdo/types/scalar.py ADDED
@@ -0,0 +1,59 @@
1
+ """Implements a LEGEND Data Object representing a scalar and corresponding utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import Any
7
+
8
+ import numpy as np
9
+
10
+ from .. import lgdo_utils as utils
11
+ from .lgdo import LGDO
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+
16
+ class Scalar(LGDO):
17
+ """Holds just a scalar value and some attributes (datatype, units, ...)."""
18
+
19
+ # TODO: do scalars need proper numpy dtypes?
20
+
21
+ def __init__(self, value: int | float, attrs: dict[str, Any] = None) -> None:
22
+ """
23
+ Parameters
24
+ ----------
25
+ value
26
+ the value for this scalar.
27
+ attrs
28
+ a set of user attributes to be carried along with this LGDO.
29
+ """
30
+ if not np.isscalar(value):
31
+ raise ValueError("cannot instantiate a Scalar with a non-scalar value")
32
+
33
+ self.value = value
34
+ super().__init__(attrs)
35
+
36
+ def datatype_name(self) -> str:
37
+ if hasattr(self.value, "datatype_name"):
38
+ return self.value.datatype_name
39
+ else:
40
+ return utils.get_element_type(self.value)
41
+
42
+ def form_datatype(self) -> str:
43
+ return self.datatype_name()
44
+
45
+ def __eq__(self, other: Scalar) -> bool:
46
+ if isinstance(other, Scalar):
47
+ return self.value == other.value and self.attrs == self.attrs
48
+ else:
49
+ return False
50
+
51
+ def __str__(self) -> str:
52
+ attrs = self.getattrs()
53
+ return f"{str(self.value)} with attrs={repr(attrs)}"
54
+
55
+ def __repr__(self) -> str:
56
+ return (
57
+ self.__class__.__name__
58
+ + f"(value={repr(self.value)}, attrs={repr(self.attrs)})"
59
+ )
lgdo/types/struct.py ADDED
@@ -0,0 +1,108 @@
1
+ """
2
+ Implements a LEGEND Data Object representing a struct and corresponding
3
+ utilities.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import logging
8
+ from typing import Any
9
+
10
+ import numpy as np
11
+
12
+ from .lgdo import LGDO
13
+
14
+ log = logging.getLogger(__name__)
15
+
16
+
17
+ class Struct(LGDO, dict):
18
+ """A dictionary of LGDO's with an optional set of attributes.
19
+
20
+ After instantiation, add fields using :meth:`add_field` to keep the
21
+ datatype updated, or call :meth:`update_datatype` after adding.
22
+ """
23
+
24
+ # TODO: overload setattr to require add_field for setting?
25
+
26
+ def __init__(
27
+ self, obj_dict: dict[str, LGDO] = None, attrs: dict[str, Any] = None
28
+ ) -> None:
29
+ """
30
+ Parameters
31
+ ----------
32
+ obj_dict
33
+ instantiate this Struct using the supplied named LGDO's. Note: no
34
+ copy is performed, the objects are used directly.
35
+ attrs
36
+ a set of user attributes to be carried along with this LGDO.
37
+ """
38
+ if obj_dict is not None:
39
+ self.update(obj_dict)
40
+
41
+ super().__init__(attrs)
42
+
43
+ def datatype_name(self) -> str:
44
+ return "struct"
45
+
46
+ def form_datatype(self) -> str:
47
+ return (
48
+ self.datatype_name() + "{" + ",".join([str(k) for k in self.keys()]) + "}"
49
+ )
50
+
51
+ def update_datatype(self) -> None:
52
+ self.attrs["datatype"] = self.form_datatype()
53
+
54
+ def add_field(self, name: str | int, obj: LGDO) -> None:
55
+ """Add a field to the table."""
56
+ self[name] = obj
57
+ self.update_datatype()
58
+
59
+ def remove_field(self, name: str | int, delete: bool = False) -> None:
60
+ """Remove a field from the table.
61
+
62
+ Parameters
63
+ ----------
64
+ name
65
+ name of the field to be removed.
66
+ delete
67
+ if ``True``, delete the field object by calling :any:`del`.
68
+ """
69
+ if delete:
70
+ del self[name]
71
+ else:
72
+ self.pop(name)
73
+ self.update_datatype()
74
+
75
+ def __str__(self) -> str:
76
+ """Convert to string (e.g. for printing)."""
77
+
78
+ thr_orig = np.get_printoptions()["threshold"]
79
+ np.set_printoptions(threshold=8)
80
+
81
+ string = "{\n"
82
+ for k, v in self.items():
83
+ if "\n" in str(v):
84
+ rv = str(v).replace("\n", "\n ")
85
+ string += f" '{k}':\n {rv},\n"
86
+ else:
87
+ string += f" '{k}': {v},\n"
88
+ string += "}"
89
+
90
+ attrs = self.getattrs()
91
+ if attrs:
92
+ string += f" with attrs={attrs}"
93
+
94
+ np.set_printoptions(threshold=thr_orig)
95
+
96
+ return string
97
+
98
+ def __repr__(self) -> str:
99
+ npopt = np.get_printoptions()
100
+ np.set_printoptions(threshold=5, edgeitems=2, linewidth=100)
101
+ out = (
102
+ self.__class__.__name__
103
+ + "(dict="
104
+ + dict.__repr__(self)
105
+ + f", attrs={repr(self.attrs)})"
106
+ )
107
+ np.set_printoptions(**npopt)
108
+ return " ".join(out.replace("\n", " ").split())
lgdo/types/table.py ADDED
@@ -0,0 +1,349 @@
1
+ """
2
+ Implements a LEGEND Data Object representing a special struct of arrays of
3
+ equal length and corresponding utilities.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import logging
8
+ import re
9
+ from typing import Any
10
+
11
+ import numexpr as ne
12
+ import numpy as np
13
+ import pandas as pd
14
+ from pandas.io.formats import format as fmt
15
+
16
+ from .array import Array
17
+ from .arrayofequalsizedarrays import ArrayOfEqualSizedArrays
18
+ from .lgdo import LGDO
19
+ from .struct import Struct
20
+ from .vectorofvectors import VectorOfVectors
21
+
22
+ log = logging.getLogger(__name__)
23
+
24
+
25
+ class Table(Struct):
26
+ """A special struct of arrays or subtable columns of equal length.
27
+
28
+ Holds onto an internal read/write location ``loc`` that is useful in
29
+ managing table I/O using functions like :meth:`push_row`, :meth:`is_full`,
30
+ and :meth:`clear`.
31
+
32
+ Note
33
+ ----
34
+ If you write to a table and don't fill it up to its total size, be sure to
35
+ resize it before passing to data processing functions, as they will call
36
+ :meth:`__len__` to access valid data, which returns the ``size`` attribute.
37
+ """
38
+
39
+ # TODO: overload getattr to allow access to fields as object attributes?
40
+
41
+ def __init__(
42
+ self,
43
+ size: int = None,
44
+ col_dict: dict[str, LGDO] = None,
45
+ attrs: dict[str, Any] = None,
46
+ ) -> None:
47
+ r"""
48
+ Parameters
49
+ ----------
50
+ size
51
+ sets the number of rows in the table. :class:`.Array`\ s in
52
+ `col_dict will be resized to match size if both are not ``None``.
53
+ If `size` is left as ``None``, the number of table rows is
54
+ determined from the length of the first array in `col_dict`. If
55
+ neither is provided, a default length of 1024 is used.
56
+ col_dict
57
+ instantiate this table using the supplied named array-like LGDO's.
58
+ Note 1: no copy is performed, the objects are used directly.
59
+ Note 2: if `size` is not ``None``, all arrays will be resized to
60
+ match it. Note 3: if the arrays have different lengths, all will
61
+ be resized to match the length of the first array.
62
+ attrs
63
+ A set of user attributes to be carried along with this LGDO.
64
+
65
+ Notes
66
+ -----
67
+ the :attr:`loc` attribute is initialized to 0.
68
+ """
69
+ super().__init__(obj_dict=col_dict, attrs=attrs)
70
+
71
+ # if col_dict is not empty, set size according to it
72
+ # if size is also supplied, resize all fields to match it
73
+ # otherwise, warn if the supplied fields have varying size
74
+ if col_dict is not None and len(col_dict) > 0:
75
+ do_warn = True if size is None else False
76
+ self.resize(new_size=size, do_warn=do_warn)
77
+
78
+ # if no col_dict, just set the size (default to 1024)
79
+ else:
80
+ self.size = size if size is not None else 1024
81
+
82
+ # always start at loc=0
83
+ self.loc = 0
84
+
85
+ def datatype_name(self) -> str:
86
+ return "table"
87
+
88
+ def __len__(self) -> int:
89
+ """Provides ``__len__`` for this array-like class."""
90
+ return self.size
91
+
92
+ def resize(self, new_size: int = None, do_warn: bool = False) -> None:
93
+ # if new_size = None, use the size from the first field
94
+ for field, obj in self.items():
95
+ if new_size is None:
96
+ new_size = len(obj)
97
+ elif len(obj) != new_size:
98
+ if do_warn:
99
+ log.warning(
100
+ f"warning: resizing field {field}"
101
+ f"with size {len(obj)} != {new_size}"
102
+ )
103
+ if isinstance(obj, Table):
104
+ obj.resize(new_size)
105
+ else:
106
+ obj.resize(new_size)
107
+ self.size = new_size
108
+
109
+ def push_row(self) -> None:
110
+ self.loc += 1
111
+
112
+ def is_full(self) -> bool:
113
+ return self.loc >= self.size
114
+
115
+ def clear(self) -> None:
116
+ self.loc = 0
117
+
118
+ def add_field(
119
+ self, name: str, obj: LGDO, use_obj_size: bool = False, do_warn=True
120
+ ) -> None:
121
+ """Add a field (column) to the table.
122
+
123
+ Use the name "field" here to match the terminology used in
124
+ :class:`.Struct`.
125
+
126
+ Parameters
127
+ ----------
128
+ name
129
+ the name for the field in the table.
130
+ obj
131
+ the object to be added to the table.
132
+ use_obj_size
133
+ if ``True``, resize the table to match the length of `obj`.
134
+ do_warn
135
+ print or don't print useful info. Passed to :meth:`resize` when
136
+ `use_obj_size` is ``True``.
137
+ """
138
+ if not hasattr(obj, "__len__"):
139
+ raise TypeError("cannot add field of type", type(obj).__name__)
140
+
141
+ super().add_field(name, obj)
142
+
143
+ # check / update sizes
144
+ if self.size != len(obj):
145
+ new_size = len(obj) if use_obj_size else self.size
146
+ self.resize(new_size=new_size)
147
+
148
+ def add_column(
149
+ self, name: str, obj: LGDO, use_obj_size: bool = False, do_warn: bool = True
150
+ ) -> None:
151
+ """Alias for :meth:`.add_field` using table terminology 'column'."""
152
+ self.add_field(name, obj, use_obj_size=use_obj_size, do_warn=do_warn)
153
+
154
+ def remove_column(self, name: str, delete: bool = False) -> None:
155
+ """Alias for :meth:`.remove_field` using table terminology 'column'."""
156
+ super().remove_field(name, delete)
157
+
158
+ def join(
159
+ self, other_table: Table, cols: list[str] = None, do_warn: bool = True
160
+ ) -> None:
161
+ """Add the columns of another table to this table.
162
+
163
+ Notes
164
+ -----
165
+ Following the join, both tables have access to `other_table`'s fields
166
+ (but `other_table` doesn't have access to this table's fields). No
167
+ memory is allocated in this process. `other_table` can go out of scope
168
+ and this table will retain access to the joined data.
169
+
170
+ Parameters
171
+ ----------
172
+ other_table
173
+ the table whose columns are to be joined into this table.
174
+ cols
175
+ a list of names of columns from `other_table` to be joined into
176
+ this table.
177
+ do_warn
178
+ set to ``False`` to turn off warnings associated with mismatched
179
+ `loc` parameter or :meth:`add_column` warnings.
180
+ """
181
+ if other_table.loc != self.loc and do_warn:
182
+ log.warning(f"other_table.loc ({other_table.loc}) != self.loc({self.loc})")
183
+ if cols is None:
184
+ cols = other_table.keys()
185
+ for name in cols:
186
+ self.add_column(name, other_table[name], do_warn=do_warn)
187
+
188
+ def get_dataframe(
189
+ self, cols: list[str] = None, copy: bool = False, prefix: str = ""
190
+ ) -> pd.DataFrame:
191
+ """Get a :class:`pandas.DataFrame` from the data in the table.
192
+
193
+ Notes
194
+ -----
195
+ The requested data must be array-like, with the ``nda`` attribute.
196
+
197
+ Parameters
198
+ ----------
199
+ cols
200
+ a list of column names specifying the subset of the table's columns
201
+ to be added to the dataframe.
202
+ copy
203
+ When ``True``, the dataframe allocates new memory and copies data
204
+ into it. Otherwise, the raw ``nda``'s from the table are used directly.
205
+ prefix
206
+ The prefix to be added to the column names. Used when recursively getting the
207
+ dataframe of a Table inside this Table
208
+ """
209
+ df = pd.DataFrame(copy=copy)
210
+ if cols is None:
211
+ cols = self.keys()
212
+ for col in cols:
213
+ if isinstance(self[col], Table):
214
+ sub_df = self[col].get_dataframe(prefix=f"{prefix}{col}_")
215
+ if df.empty:
216
+ df = sub_df
217
+ else:
218
+ df = df.join(sub_df)
219
+ else:
220
+ if isinstance(self[col], VectorOfVectors):
221
+ column = self[col].to_aoesa()
222
+ else:
223
+ column = self[col]
224
+
225
+ if not hasattr(column, "nda"):
226
+ raise ValueError(f"column {col} does not have an nda")
227
+ else:
228
+ df[prefix + str(col)] = column.nda.tolist()
229
+
230
+ return df
231
+
232
+ def eval(self, expr_config: dict) -> Table:
233
+ """Apply column operations to the table and return a new table holding
234
+ the resulting columns.
235
+
236
+ Currently defers all the job to :meth:`numexpr.evaluate`. This
237
+ might change in the future.
238
+
239
+ Parameters
240
+ ----------
241
+ expr_config
242
+ dictionary that configures expressions according the following
243
+ specification:
244
+
245
+ .. code-block:: js
246
+
247
+ {
248
+ "O1": {
249
+ "expression": "p1 + p2 * a**2",
250
+ "parameters": {
251
+ "p1": "2",
252
+ "p2": "3"
253
+ }
254
+ },
255
+ "O2": {
256
+ "expression": "O1 - b"
257
+ }
258
+ // ...
259
+ }
260
+
261
+ where:
262
+
263
+ - ``expression`` is an expression string supported by
264
+ :meth:`numexpr.evaluate` (see also `here
265
+ <https://numexpr.readthedocs.io/projects/NumExpr3/en/latest/index.html>`_
266
+ for documentation). Note: because of internal limitations,
267
+ reduction operations must appear the last in the stack.
268
+ - ``parameters`` is a dictionary of function parameters. Passed to
269
+ :meth:`numexpr.evaluate`` as `local_dict` argument.
270
+
271
+
272
+ Warning
273
+ -------
274
+ Blocks in `expr_config` must be ordered according to mutual dependency.
275
+ """
276
+ out_tbl = Table(size=self.size)
277
+ for out_var, spec in expr_config.items():
278
+ in_vars = {}
279
+ # Find all valid python variables in expression (e.g "a*b+sin(Cool)" --> ['a','b','sin','Cool'])
280
+ for elem in re.findall(r"\s*[A-Za-z_]\w*\s*", spec["expression"]):
281
+ elem = elem.strip()
282
+ if elem in self: # check if the variable comes from dsp
283
+ in_vars[elem] = self[elem]
284
+ elif (
285
+ elem in out_tbl.keys()
286
+ ): # if not try from previously processed data, else ignore since it is e.g sin func
287
+ in_vars[elem] = out_tbl[elem]
288
+
289
+ else:
290
+ continue
291
+ # get the nda if it is an Array instance
292
+ if isinstance(in_vars[elem], Array):
293
+ in_vars[elem] = in_vars[elem].nda
294
+ # No vector of vectors support yet
295
+ elif isinstance(in_vars[elem], VectorOfVectors):
296
+ raise TypeError("Data of type VectorOfVectors not supported (yet)")
297
+
298
+ out_data = ne.evaluate(
299
+ f"{spec['expression']}",
300
+ local_dict=dict(in_vars, **spec["parameters"])
301
+ if "parameters" in spec
302
+ else in_vars,
303
+ ) # Division is chosen by __future__.division in the interpreter
304
+
305
+ # smart way to find right LGDO data type:
306
+
307
+ # out_data has one row and this row has a scalar (eg scalar product of two rows)
308
+ if len(np.shape(out_data)) == 0:
309
+ out_data = Array(nda=out_data)
310
+
311
+ # out_data has scalar in each row
312
+ elif len(np.shape(out_data)) == 1:
313
+ out_data = Array(nda=out_data)
314
+
315
+ # out_data is like
316
+ elif len(np.shape(out_data)) == 2:
317
+ out_data = ArrayOfEqualSizedArrays(nda=out_data)
318
+
319
+ # higher order data (eg matrix product of ArrayOfEqualSizedArrays) not supported yet
320
+ else:
321
+ ValueError(
322
+ f"Calculation resulted in {len(np.shape(out_data))-1}-D row which is not supported yet"
323
+ )
324
+
325
+ out_tbl.add_column(out_var, out_data)
326
+
327
+ return out_tbl
328
+
329
+ def __str__(self):
330
+ opts = fmt.get_dataframe_repr_params()
331
+ opts["show_dimensions"] = False
332
+ opts["index"] = False
333
+
334
+ try:
335
+ string = self.get_dataframe().to_string(**opts)
336
+ except ValueError:
337
+ string = "Cannot print Table with VectorOfVectors yet!"
338
+
339
+ string += "\n"
340
+ for k, v in self.items():
341
+ attrs = v.getattrs()
342
+ if attrs:
343
+ string += f"\nwith attrs['{k}']={attrs}"
344
+
345
+ attrs = self.getattrs()
346
+ if attrs:
347
+ string += f"\nwith attrs={attrs}"
348
+
349
+ return string