legend-pydataobj 1.11.8__py3-none-any.whl → 1.11.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lgdo/types/array.py CHANGED
@@ -6,7 +6,7 @@ corresponding utilities.
6
6
  from __future__ import annotations
7
7
 
8
8
  import logging
9
- from collections.abc import Collection, Iterator
9
+ from collections.abc import Iterator
10
10
  from typing import Any
11
11
 
12
12
  import awkward as ak
@@ -17,12 +17,12 @@ import pint_pandas # noqa: F401
17
17
 
18
18
  from .. import utils
19
19
  from ..units import default_units_registry as u
20
- from .lgdo import LGDOCollection
20
+ from .lgdo import LGDO
21
21
 
22
22
  log = logging.getLogger(__name__)
23
23
 
24
24
 
25
- class Array(LGDOCollection):
25
+ class Array(LGDO):
26
26
  r"""Holds an :class:`numpy.ndarray` and attributes.
27
27
 
28
28
  :class:`Array` (and the other various array types) holds an `nda` instead
@@ -78,7 +78,11 @@ class Array(LGDOCollection):
78
78
  elif isinstance(nda, Array):
79
79
  nda = nda.nda
80
80
 
81
+ elif not isinstance(nda, np.ndarray):
82
+ nda = np.array(nda)
83
+
81
84
  self.nda = nda
85
+ self.dtype = self.nda.dtype
82
86
 
83
87
  super().__init__(attrs)
84
88
 
@@ -92,91 +96,18 @@ class Array(LGDOCollection):
92
96
  return dt + "<" + nd + ">{" + et + "}"
93
97
 
94
98
  def __len__(self) -> int:
95
- return self._size
96
-
97
- @property
98
- def nda(self):
99
- return self._nda[: self._size, ...] if self._nda.shape != () else self._nda
100
-
101
- @nda.setter
102
- def nda(self, value):
103
- self._nda = value if isinstance(value, np.ndarray) else np.array(value)
104
- self._size = len(self._nda) if self._nda.shape != () else 0
105
-
106
- @property
107
- def dtype(self):
108
- return self._nda.dtype
109
-
110
- @property
111
- def shape(self):
112
- return (len(self),) + self._nda.shape[1:]
113
-
114
- def reserve_capacity(self, capacity: int) -> None:
115
- "Set size (number of rows) of internal memory buffer"
116
- if capacity < len(self):
117
- msg = "Cannot reduce capacity below Array length"
118
- raise ValueError(msg)
119
- self._nda.resize((capacity,) + self._nda.shape[1:], refcheck=False)
120
-
121
- def get_capacity(self) -> int:
122
- "Get capacity (i.e. max size before memory must be re-allocated)"
123
- return len(self._nda)
124
-
125
- def trim_capacity(self) -> None:
126
- "Set capacity to be minimum needed to support Array size"
127
- self.reserve_capacity(np.prod(self.shape))
128
-
129
- def resize(self, new_size: int | Collection[int], trim=False) -> None:
130
- """Set size of Array in rows. Only change capacity if it must be
131
- increased to accommodate new rows; in this case double capacity.
132
- If trim is True, capacity will be set to match size. If new_size
133
- is an int, do not change size of inner dimensions.
134
-
135
- If new_size is a collection, internal memory will be re-allocated, so
136
- this should be done only rarely!"""
137
-
138
- if isinstance(new_size, Collection):
139
- self._size = new_size[0]
140
- self._nda.resize(new_size)
141
- else:
142
- self._size = new_size
143
-
144
- if trim and new_size != self.get_capacity:
145
- self.reserve_capacity(new_size)
146
-
147
- # If capacity is not big enough, set to next power of 2 big enough
148
- if new_size > self.get_capacity():
149
- self.reserve_capacity(int(2 ** (np.ceil(np.log2(new_size)))))
99
+ return len(self.nda)
100
+
101
+ def resize(self, new_size: int) -> None:
102
+ new_shape = (new_size,) + self.nda.shape[1:]
103
+ return self.nda.resize(new_shape, refcheck=True)
150
104
 
151
105
  def append(self, value: np.ndarray) -> None:
152
- "Append value to end of array (with copy)"
153
- self.insert(len(self), value)
106
+ self.resize(len(self) + 1)
107
+ self.nda[-1] = value
154
108
 
155
109
  def insert(self, i: int, value: int | float) -> None:
156
- "Insert value into row i (with copy)"
157
- if i > len(self):
158
- msg = f"index {i} is out of bounds for array with size {len(self)}"
159
- raise IndexError(msg)
160
-
161
- value = np.array(value)
162
- if value.shape == self.shape[1:]:
163
- self.resize(len(self) + 1)
164
- self[i + 1 :] = self[i:-1]
165
- self[i] = value
166
- elif value.shape[1:] == self.shape[1:]:
167
- self.resize(len(self) + len(value))
168
- self[i + len(value) :] = self[i : -len(value)]
169
- self[i : i + len(value)] = value
170
- else:
171
- msg = f"Could not insert value with shape {value.shape} into Array with shape {self.shape}"
172
- raise ValueError(msg)
173
-
174
- def replace(self, i: int, value: int | float) -> None:
175
- "Replace value at row i"
176
- if i >= len(self):
177
- msg = f"index {i} is out of bounds for array with size {len(self)}"
178
- raise IndexError(msg)
179
- self[i] = value
110
+ self.nda = np.insert(self.nda, i, value)
180
111
 
181
112
  def __getitem__(self, key):
182
113
  return self.nda[key]
lgdo/types/encoded.py CHANGED
@@ -11,12 +11,12 @@ from numpy.typing import NDArray
11
11
 
12
12
  from .. import utils
13
13
  from .array import Array
14
- from .lgdo import LGDOCollection
14
+ from .lgdo import LGDO
15
15
  from .scalar import Scalar
16
16
  from .vectorofvectors import VectorOfVectors
17
17
 
18
18
 
19
- class VectorOfEncodedVectors(LGDOCollection):
19
+ class VectorOfEncodedVectors(LGDO):
20
20
  """An array of variable-length encoded arrays.
21
21
 
22
22
  Used to represent an encoded :class:`.VectorOfVectors`. In addition to an
@@ -92,17 +92,6 @@ class VectorOfEncodedVectors(LGDOCollection):
92
92
 
93
93
  return False
94
94
 
95
- def reserve_capacity(self, *capacity: int) -> None:
96
- self.encoded_data.reserve_capacity(*capacity)
97
- self.decoded_size.reserve_capacity(capacity[0])
98
-
99
- def get_capacity(self) -> tuple:
100
- return (self.decoded_size.get_capacity, *self.encoded_data.get_capacity())
101
-
102
- def trim_capacity(self) -> None:
103
- self.encoded_data.trim_capacity()
104
- self.decoded_size.trim_capacity()
105
-
106
95
  def resize(self, new_size: int) -> None:
107
96
  """Resize vector along the first axis.
108
97
 
@@ -113,6 +102,21 @@ class VectorOfEncodedVectors(LGDOCollection):
113
102
  self.encoded_data.resize(new_size)
114
103
  self.decoded_size.resize(new_size)
115
104
 
105
+ def append(self, value: tuple[NDArray, int]) -> None:
106
+ """Append a 1D encoded vector at the end.
107
+
108
+ Parameters
109
+ ----------
110
+ value
111
+ a tuple holding the encoded array and its decoded size.
112
+
113
+ See Also
114
+ --------
115
+ .VectorOfVectors.append
116
+ """
117
+ self.encoded_data.append(value[0])
118
+ self.decoded_size.append(value[1])
119
+
116
120
  def insert(self, i: int, value: tuple[NDArray, int]) -> None:
117
121
  """Insert an encoded vector at index `i`.
118
122
 
@@ -278,7 +282,7 @@ class VectorOfEncodedVectors(LGDOCollection):
278
282
  raise ValueError(msg)
279
283
 
280
284
 
281
- class ArrayOfEncodedEqualSizedArrays(LGDOCollection):
285
+ class ArrayOfEncodedEqualSizedArrays(LGDO):
282
286
  """An array of encoded arrays with equal decoded size.
283
287
 
284
288
  Used to represent an encoded :class:`.ArrayOfEqualSizedArrays`. In addition
@@ -345,23 +349,14 @@ class ArrayOfEncodedEqualSizedArrays(LGDOCollection):
345
349
 
346
350
  return False
347
351
 
348
- def reserve_capacity(self, *capacity: int) -> None:
349
- self.encoded_data.reserve_capacity(capacity)
350
-
351
- def get_capacity(self) -> tuple:
352
- return self.encoded_data.get_capacity()
353
-
354
- def trim_capacity(self) -> None:
355
- self.encoded_data.trim_capacity()
356
-
357
- def resize(self, new_size: int, trim: bool = False) -> None:
352
+ def resize(self, new_size: int) -> None:
358
353
  """Resize array along the first axis.
359
354
 
360
355
  See Also
361
356
  --------
362
357
  .VectorOfVectors.resize
363
358
  """
364
- self.encoded_data.resize(new_size, trim)
359
+ self.encoded_data.resize(new_size)
365
360
 
366
361
  def append(self, value: NDArray) -> None:
367
362
  """Append a 1D encoded array at the end.
lgdo/types/histogram.py CHANGED
@@ -424,7 +424,7 @@ class Histogram(Struct):
424
424
  dict.__setitem__(self, name, obj)
425
425
  else:
426
426
  msg = "histogram fields cannot be mutated "
427
- raise AttributeError(msg)
427
+ raise TypeError(msg)
428
428
 
429
429
  def __getattr__(self, name: str) -> None:
430
430
  # do not allow for new attributes on this
lgdo/types/lgdo.py CHANGED
@@ -92,53 +92,3 @@ class LGDO(ABC):
92
92
 
93
93
  def __repr__(self) -> str:
94
94
  return self.__class__.__name__ + f"(attrs={self.attrs!r})"
95
-
96
-
97
- class LGDOCollection(LGDO):
98
- """Abstract base class representing a LEGEND Collection Object (LGDO).
99
- This defines the interface for classes used as table columns.
100
- """
101
-
102
- @abstractmethod
103
- def __init__(self, attrs: dict[str, Any] | None = None) -> None:
104
- super().__init__(attrs)
105
-
106
- @abstractmethod
107
- def __len__(self) -> int:
108
- """Provides ``__len__`` for this array-like class."""
109
-
110
- @abstractmethod
111
- def reserve_capacity(self, capacity: int) -> None:
112
- """Reserve capacity (in rows) for later use. Internal memory buffers
113
- will have enough entries to store this many rows.
114
- """
115
-
116
- @abstractmethod
117
- def get_capacity(self) -> int:
118
- "get reserved capacity of internal memory buffers in rows"
119
-
120
- @abstractmethod
121
- def trim_capacity(self) -> None:
122
- """set capacity to only what is required to store current contents
123
- of LGDOCollection
124
- """
125
-
126
- @abstractmethod
127
- def resize(self, new_size: int, trim: bool = False) -> None:
128
- """Return this LGDO's datatype attribute string."""
129
-
130
- def append(self, val) -> None:
131
- "append val to end of LGDOCollection"
132
- self.insert(len(self), val)
133
-
134
- @abstractmethod
135
- def insert(self, i: int, val) -> None:
136
- "insert val into LGDOCollection at position i"
137
-
138
- @abstractmethod
139
- def replace(self, i: int, val) -> None:
140
- "replace item at position i with val in LGDOCollection"
141
-
142
- def clear(self, trim: bool = False) -> None:
143
- "set size of LGDOCollection to zero"
144
- self.resize(0, trim=trim)
lgdo/types/table.py CHANGED
@@ -19,7 +19,7 @@ from pandas.io.formats import format as fmt
19
19
 
20
20
  from .array import Array
21
21
  from .arrayofequalsizedarrays import ArrayOfEqualSizedArrays
22
- from .lgdo import LGDO, LGDOCollection
22
+ from .lgdo import LGDO
23
23
  from .scalar import Scalar
24
24
  from .struct import Struct
25
25
  from .vectorofvectors import VectorOfVectors
@@ -27,9 +27,13 @@ from .vectorofvectors import VectorOfVectors
27
27
  log = logging.getLogger(__name__)
28
28
 
29
29
 
30
- class Table(Struct, LGDOCollection):
30
+ class Table(Struct):
31
31
  """A special struct of arrays or subtable columns of equal length.
32
32
 
33
+ Holds onto an internal read/write location ``loc`` that is useful in
34
+ managing table I/O using functions like :meth:`push_row`, :meth:`is_full`,
35
+ and :meth:`clear`.
36
+
33
37
  Note
34
38
  ----
35
39
  If you write to a table and don't fill it up to its total size, be sure to
@@ -45,7 +49,7 @@ class Table(Struct, LGDOCollection):
45
49
 
46
50
  def __init__(
47
51
  self,
48
- col_dict: Mapping[str, LGDOCollection] | pd.DataFrame | ak.Array | None = None,
52
+ col_dict: Mapping[str, LGDO] | pd.DataFrame | ak.Array | None = None,
49
53
  size: int | None = None,
50
54
  attrs: Mapping[str, Any] | None = None,
51
55
  ) -> None:
@@ -61,7 +65,7 @@ class Table(Struct, LGDOCollection):
61
65
  col_dict
62
66
  instantiate this table using the supplied mapping of column names
63
67
  and array-like objects. Supported input types are: mapping of
64
- strings to LGDOCollections, :class:`pd.DataFrame` and :class:`ak.Array`.
68
+ strings to LGDOs, :class:`pd.DataFrame` and :class:`ak.Array`.
65
69
  Note 1: no copy is performed, the objects are used directly (unless
66
70
  :class:`ak.Array` is provided). Note 2: if `size` is not ``None``,
67
71
  all arrays will be resized to match it. Note 3: if the arrays have
@@ -81,8 +85,7 @@ class Table(Struct, LGDOCollection):
81
85
  col_dict = _ak_to_lgdo_or_col_dict(col_dict)
82
86
 
83
87
  # call Struct constructor
84
- Struct.__init__(self, obj_dict=col_dict)
85
- LGDOCollection.__init__(self, attrs=attrs)
88
+ super().__init__(obj_dict=col_dict, attrs=attrs)
86
89
 
87
90
  # if col_dict is not empty, set size according to it
88
91
  # if size is also supplied, resize all fields to match it
@@ -90,10 +93,13 @@ class Table(Struct, LGDOCollection):
90
93
  if col_dict is not None and len(col_dict) > 0:
91
94
  self.resize(new_size=size, do_warn=(size is None))
92
95
 
93
- # if no col_dict, just set the size
96
+ # if no col_dict, just set the size (default to 1024)
94
97
  else:
95
98
  self.size = size if size is not None else None
96
99
 
100
+ # always start at loc=0
101
+ self.loc = 0
102
+
97
103
  def datatype_name(self) -> str:
98
104
  return "table"
99
105
 
@@ -101,31 +107,7 @@ class Table(Struct, LGDOCollection):
101
107
  """Provides ``__len__`` for this array-like class."""
102
108
  return self.size
103
109
 
104
- def reserve_capacity(self, capacity: int | list) -> None:
105
- "Set size (number of rows) of internal memory buffer"
106
- if isinstance(capacity, int):
107
- for obj in self.values():
108
- obj.reserve_capacity(capacity)
109
- else:
110
- if len(capacity) != len(self.keys()):
111
- msg = "List of capacities must have same length as number of keys"
112
- raise ValueError(msg)
113
-
114
- for obj, cap in zip(self.values(), capacity):
115
- obj.reserve_capacity(cap)
116
-
117
- def get_capacity(self) -> int:
118
- "Get list of capacities for each key"
119
- return [v.get_capacity() for v in self.values()]
120
-
121
- def trim_capacity(self) -> int:
122
- "Set capacity to be minimum needed to support Array size"
123
- for v in self.values():
124
- v.trim_capacity()
125
-
126
- def resize(
127
- self, new_size: int | None = None, do_warn: bool = False, trim: bool = False
128
- ) -> None:
110
+ def resize(self, new_size: int | None = None, do_warn: bool = False) -> None:
129
111
  # if new_size = None, use the size from the first field
130
112
  for field, obj in self.items():
131
113
  if new_size is None:
@@ -137,20 +119,21 @@ class Table(Struct, LGDOCollection):
137
119
  f"with size {len(obj)} != {new_size}"
138
120
  )
139
121
  if isinstance(obj, Table):
140
- obj.resize(new_size, trim)
122
+ obj.resize(new_size)
141
123
  else:
142
- obj.resize(new_size, trim)
124
+ obj.resize(new_size)
143
125
  self.size = new_size
144
126
 
145
- def insert(self, i: int, vals: dict) -> None:
146
- "Insert vals into table at row i. Vals is a mapping from table key to val"
147
- for k, ar in self.items():
148
- ar.insert(i, vals[k])
149
- self.size += 1
127
+ def push_row(self) -> None:
128
+ self.loc += 1
150
129
 
151
- def add_field(
152
- self, name: str, obj: LGDOCollection, use_obj_size: bool = False
153
- ) -> None:
130
+ def is_full(self) -> bool:
131
+ return self.loc >= self.size
132
+
133
+ def clear(self) -> None:
134
+ self.loc = 0
135
+
136
+ def add_field(self, name: str, obj: LGDO, use_obj_size: bool = False) -> None:
154
137
  """Add a field (column) to the table.
155
138
 
156
139
  Use the name "field" here to match the terminology used in
@@ -187,9 +170,7 @@ class Table(Struct, LGDOCollection):
187
170
  new_size = len(obj) if use_obj_size else self.size
188
171
  self.resize(new_size=new_size)
189
172
 
190
- def add_column(
191
- self, name: str, obj: LGDOCollection, use_obj_size: bool = False
192
- ) -> None:
173
+ def add_column(self, name: str, obj: LGDO, use_obj_size: bool = False) -> None:
193
174
  """Alias for :meth:`.add_field` using table terminology 'column'."""
194
175
  self.add_field(name, obj, use_obj_size=use_obj_size)
195
176
 
@@ -220,10 +201,8 @@ class Table(Struct, LGDOCollection):
220
201
  set to ``False`` to turn off warnings associated with mismatched
221
202
  `loc` parameter or :meth:`add_column` warnings.
222
203
  """
223
- if len(other_table) != len(self) and do_warn:
224
- log.warning(
225
- f"len(other_table) ({len(other_table)}) != len(self) ({len(self)})"
226
- )
204
+ if other_table.loc != self.loc and do_warn:
205
+ log.warning(f"other_table.loc ({other_table.loc}) != self.loc({self.loc})")
227
206
  if cols is None:
228
207
  cols = other_table.keys()
229
208
  for name in cols: