reciprocalspaceship 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of reciprocalspaceship might be problematic. Click here for more details.

Files changed (41) hide show
  1. reciprocalspaceship/VERSION +1 -1
  2. reciprocalspaceship/__init__.py +1 -0
  3. reciprocalspaceship/algorithms/scale_merged_intensities.py +8 -7
  4. reciprocalspaceship/commandline/mtzdump.py +0 -1
  5. reciprocalspaceship/dataset.py +7 -1
  6. reciprocalspaceship/decorators.py +2 -2
  7. reciprocalspaceship/dtypes/__init__.py +16 -14
  8. reciprocalspaceship/dtypes/base.py +21 -266
  9. reciprocalspaceship/dtypes/floating.py +691 -0
  10. reciprocalspaceship/dtypes/integer.py +537 -0
  11. reciprocalspaceship/dtypes/internals.py +1365 -0
  12. reciprocalspaceship/io/__init__.py +7 -1
  13. reciprocalspaceship/io/crystfel.py +568 -234
  14. reciprocalspaceship/io/mtz.py +25 -0
  15. reciprocalspaceship/stats/completeness.py +0 -1
  16. reciprocalspaceship/utils/__init__.py +6 -1
  17. reciprocalspaceship/utils/asu.py +6 -0
  18. reciprocalspaceship/utils/cell.py +5 -0
  19. reciprocalspaceship/utils/stats.py +5 -7
  20. reciprocalspaceship/utils/structurefactors.py +5 -0
  21. reciprocalspaceship/utils/units.py +14 -4
  22. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/METADATA +26 -28
  23. reciprocalspaceship-1.0.2.dist-info/RECORD +58 -0
  24. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/WHEEL +1 -1
  25. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/entry_points.txt +0 -1
  26. tests/test_dataseries.py +1 -1
  27. tests/test_dataset_preserve_attributes.py +3 -9
  28. reciprocalspaceship/dtypes/anomalousdifference.py +0 -25
  29. reciprocalspaceship/dtypes/batch.py +0 -25
  30. reciprocalspaceship/dtypes/hklindex.py +0 -23
  31. reciprocalspaceship/dtypes/intensity.py +0 -47
  32. reciprocalspaceship/dtypes/m_isym.py +0 -25
  33. reciprocalspaceship/dtypes/mtzint.py +0 -23
  34. reciprocalspaceship/dtypes/mtzreal.py +0 -25
  35. reciprocalspaceship/dtypes/phase.py +0 -50
  36. reciprocalspaceship/dtypes/stddev.py +0 -69
  37. reciprocalspaceship/dtypes/structurefactor.py +0 -72
  38. reciprocalspaceship/dtypes/weight.py +0 -25
  39. reciprocalspaceship-1.0.0.dist-info/RECORD +0 -66
  40. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/LICENSE +0 -0
  41. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1365 @@
1
+ # BSD 3-Clause License
2
+ #
3
+ # Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
4
+ # All rights reserved.
5
+ #
6
+ # Copyright (c) 2011-2023, Open source contributors.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions are met:
10
+ #
11
+ # * Redistributions of source code must retain the above copyright notice, this
12
+ # list of conditions and the following disclaimer.
13
+ #
14
+ # * Redistributions in binary form must reproduce the above copyright notice,
15
+ # this list of conditions and the following disclaimer in the documentation
16
+ # and/or other materials provided with the distribution.
17
+ #
18
+ # * Neither the name of the copyright holder nor the names of its
19
+ # contributors may be used to endorse or promote products derived from
20
+ # this software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
26
+ # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29
+ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30
+ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+
33
+ from __future__ import annotations
34
+
35
+ import numbers
36
+ import warnings
37
+ from functools import wraps
38
+ from typing import Any, Sequence
39
+
40
+ import numpy as np
41
+ from pandas._libs import lib
42
+ from pandas._libs import missing as libmissing
43
+ from pandas._typing import ArrayLike, NpDtype, PositionalIndexer, Scalar, Shape, type_t
44
+ from pandas.compat import IS64, is_platform_windows
45
+ from pandas.compat.numpy import function as nv
46
+ from pandas.core import arraylike, missing, nanops
47
+ from pandas.core.algorithms import factorize_array, isin, take
48
+ from pandas.core.array_algos import masked_reductions
49
+ from pandas.core.array_algos.quantile import quantile_with_mask
50
+ from pandas.core.array_algos.take import take_nd
51
+ from pandas.core.arraylike import OpsMixin
52
+ from pandas.core.arrays import ExtensionArray
53
+ from pandas.core.dtypes.base import ExtensionDtype
54
+ from pandas.core.dtypes.common import (
55
+ is_bool,
56
+ is_bool_dtype,
57
+ is_dict_like,
58
+ is_dtype_equal,
59
+ is_float,
60
+ is_float_dtype,
61
+ is_integer,
62
+ is_integer_dtype,
63
+ is_list_like,
64
+ is_numeric_dtype,
65
+ is_object_dtype,
66
+ is_scalar,
67
+ is_string_dtype,
68
+ pandas_dtype,
69
+ )
70
+ from pandas.core.dtypes.generic import ABCSeries
71
+ from pandas.core.dtypes.inference import is_array_like
72
+ from pandas.core.dtypes.missing import array_equivalent, isna, notna
73
+ from pandas.core.indexers import check_array_indexer
74
+ from pandas.core.ops import invalid_comparison
75
+ from pandas.errors import AbstractMethodError
76
+ from pandas.util._decorators import cache_readonly, doc
77
+ from pandas.util._validators import validate_fillna_kwargs
78
+
79
+ # GH221: Handle import due to pandas change
80
+ try:
81
+ from pandas.core.ops import maybe_dispatch_ufunc_to_dunder_op
82
+ except ImportError:
83
+ from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
84
+
85
+
86
+ class BaseMaskedDtype(ExtensionDtype):
87
+ """
88
+ Base class for dtypes for BasedMaskedArray subclasses.
89
+ """
90
+
91
+ name: str
92
+ base = None
93
+ type: type
94
+
95
+ na_value = libmissing.NA
96
+
97
+ @cache_readonly
98
+ def numpy_dtype(self) -> np.dtype:
99
+ """Return an instance of our numpy dtype"""
100
+ return np.dtype(self.type)
101
+
102
+ @cache_readonly
103
+ def kind(self) -> str:
104
+ return self.numpy_dtype.kind
105
+
106
+ @cache_readonly
107
+ def itemsize(self) -> int:
108
+ """Return the number of bytes in this dtype"""
109
+ return self.numpy_dtype.itemsize
110
+
111
+ @classmethod
112
+ def construct_array_type(cls) -> type_t[BaseMaskedArray]:
113
+ """
114
+ Return the array type associated with this dtype.
115
+
116
+ Returns
117
+ -------
118
+ type
119
+ """
120
+ raise NotImplementedError
121
+
122
+
123
+ class BaseMaskedArray(OpsMixin, ExtensionArray):
124
+ """
125
+ Base class for masked arrays (which use _data and _mask to store the data).
126
+
127
+ numpy based
128
+ """
129
+
130
+ # The value used to fill '_data' to avoid upcasting
131
+ _internal_fill_value: Scalar
132
+ # our underlying data and mask are each ndarrays
133
+ _data: np.ndarray
134
+ _mask: np.ndarray
135
+
136
+ # Fill values used for any/all
137
+ _truthy_value = Scalar # bool(_truthy_value) = True
138
+ _falsey_value = Scalar # bool(_falsey_value) = False
139
+
140
+ @classmethod
141
+ def _simple_new(cls, values, mask):
142
+ result = BaseMaskedArray.__new__(cls)
143
+ result._data = values
144
+ result._mask = mask
145
+ return result
146
+
147
+ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
148
+ # values is supposed to already be validated in the subclass
149
+ if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
150
+ raise TypeError(
151
+ "mask should be boolean numpy array. Use "
152
+ "the 'pd.array' function instead"
153
+ )
154
+ if values.shape != mask.shape:
155
+ raise ValueError("values.shape must match mask.shape")
156
+
157
+ if copy:
158
+ values = values.copy()
159
+ mask = mask.copy()
160
+
161
+ self._data = values
162
+ self._mask = mask
163
+
164
+ @property
165
+ def dtype(self) -> BaseMaskedDtype:
166
+ raise AbstractMethodError(self)
167
+
168
+ def __getitem__(
169
+ self: BaseMaskedArrayT, item: PositionalIndexer
170
+ ) -> BaseMaskedArrayT | Any:
171
+ item = check_array_indexer(self, item)
172
+
173
+ newmask = self._mask[item]
174
+ if is_bool(newmask):
175
+ # This is a scalar indexing
176
+ if newmask:
177
+ return self.dtype.na_value
178
+ return self._data[item]
179
+
180
+ return type(self)(self._data[item], newmask)
181
+
182
+ @doc(ExtensionArray.fillna)
183
+ def fillna(
184
+ self: BaseMaskedArrayT, value=None, method=None, limit=None, copy=True
185
+ ) -> BaseMaskedArrayT:
186
+ value, method = validate_fillna_kwargs(value, method)
187
+
188
+ mask = self._mask
189
+
190
+ if is_array_like(value):
191
+ if len(value) != len(self):
192
+ raise ValueError(
193
+ f"Length of 'value' does not match. Got ({len(value)}) "
194
+ f" expected {len(self)}"
195
+ )
196
+ value = value[mask]
197
+
198
+ if mask.any():
199
+ if method is not None:
200
+ func = missing.get_fill_func(method, ndim=self.ndim)
201
+ new_values, new_mask = func(
202
+ self._data.copy().T,
203
+ limit=limit,
204
+ mask=mask.copy().T,
205
+ )
206
+ return type(self)(new_values.T, new_mask.view(np.bool_).T)
207
+ else:
208
+ # fill with value
209
+ if copy:
210
+ new_values = self.copy()
211
+ else:
212
+ new_values = self[:]
213
+ new_values[mask] = value
214
+ else:
215
+ if copy:
216
+ new_values = self.copy()
217
+ else:
218
+ new_values = self[:]
219
+ return new_values
220
+
221
+ def _pad_or_backfill(self, *, method, limit=None, limit_area=None, copy=True):
222
+ mask = self._mask
223
+
224
+ if mask.any():
225
+ func = missing.get_fill_func(method, ndim=self.ndim)
226
+
227
+ npvalues = self._data.T
228
+ new_mask = mask.T
229
+ if copy:
230
+ npvalues = npvalues.copy()
231
+ new_mask = new_mask.copy()
232
+ func(npvalues, limit=limit, mask=new_mask)
233
+
234
+ if limit_area is not None and not mask.all():
235
+ mask = mask.T
236
+ neg_mask = ~mask
237
+ first = neg_mask.argmax()
238
+ last = len(neg_mask) - neg_mask[::-1].argmax() - 1
239
+ if limit_area == "inside":
240
+ new_mask[:first] |= mask[:first]
241
+ new_mask[last + 1 :] |= mask[last + 1 :]
242
+ elif limit_area == "outside":
243
+ new_mask[first + 1 : last] |= mask[first + 1 : last]
244
+
245
+ if copy:
246
+ return self._simple_new(npvalues.T, new_mask.T)
247
+ else:
248
+ return self
249
+ else:
250
+ if copy:
251
+ new_values = self.copy()
252
+ else:
253
+ new_values = self
254
+ return new_values
255
+
256
+ def _coerce_to_array(self, values) -> tuple[np.ndarray, np.ndarray]:
257
+ raise AbstractMethodError(self)
258
+
259
+ def __setitem__(self, key, value) -> None:
260
+ _is_scalar = is_scalar(value)
261
+ if _is_scalar:
262
+ value = [value]
263
+ value, mask = self._coerce_to_array(value)
264
+
265
+ if _is_scalar:
266
+ value = value[0]
267
+ mask = mask[0]
268
+
269
+ key = check_array_indexer(self, key)
270
+ self._data[key] = value
271
+ self._mask[key] = mask
272
+
273
+ def __iter__(self):
274
+ if self.ndim == 1:
275
+ for i in range(len(self)):
276
+ if self._mask[i]:
277
+ yield self.dtype.na_value
278
+ else:
279
+ yield self._data[i]
280
+ else:
281
+ for i in range(len(self)):
282
+ yield self[i]
283
+
284
+ def __len__(self) -> int:
285
+ return len(self._data)
286
+
287
+ @property
288
+ def shape(self) -> Shape:
289
+ return self._data.shape
290
+
291
+ @property
292
+ def ndim(self) -> int:
293
+ return self._data.ndim
294
+
295
+ def swapaxes(self: BaseMaskedArrayT, axis1, axis2) -> BaseMaskedArrayT:
296
+ data = self._data.swapaxes(axis1, axis2)
297
+ mask = self._mask.swapaxes(axis1, axis2)
298
+ return type(self)(data, mask)
299
+
300
+ def delete(self: BaseMaskedArrayT, loc, axis: int = 0) -> BaseMaskedArrayT:
301
+ data = np.delete(self._data, loc, axis=axis)
302
+ mask = np.delete(self._mask, loc, axis=axis)
303
+ return type(self)(data, mask)
304
+
305
+ def reshape(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT:
306
+ data = self._data.reshape(*args, **kwargs)
307
+ mask = self._mask.reshape(*args, **kwargs)
308
+ return type(self)(data, mask)
309
+
310
+ def ravel(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT:
311
+ # TODO: need to make sure we have the same order for data/mask
312
+ data = self._data.ravel(*args, **kwargs)
313
+ mask = self._mask.ravel(*args, **kwargs)
314
+ return type(self)(data, mask)
315
+
316
+ @property
317
+ def T(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
318
+ return type(self)(self._data.T, self._mask.T)
319
+
320
+ def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
321
+ return type(self)(~self._data, self._mask.copy())
322
+
323
+ def to_numpy(
324
+ self,
325
+ dtype: npt.DTypeLike | None = None,
326
+ copy: bool = False,
327
+ na_value: Scalar = lib.no_default,
328
+ ) -> np.ndarray:
329
+ """
330
+ Convert to a NumPy Array.
331
+
332
+ By default converts to an object-dtype NumPy array. Specify the `dtype` and
333
+ `na_value` keywords to customize the conversion.
334
+
335
+ Parameters
336
+ ----------
337
+ dtype : dtype, default object
338
+ The numpy dtype to convert to.
339
+ copy : bool, default False
340
+ Whether to ensure that the returned value is a not a view on
341
+ the array. Note that ``copy=False`` does not *ensure* that
342
+ ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
343
+ a copy is made, even if not strictly necessary. This is typically
344
+ only possible when no missing values are present and `dtype`
345
+ is the equivalent numpy dtype.
346
+ na_value : scalar, optional
347
+ Scalar missing value indicator to use in numpy array. Defaults
348
+ to the native missing value indicator of this array (pd.NA).
349
+
350
+ Returns
351
+ -------
352
+ numpy.ndarray
353
+
354
+ Examples
355
+ --------
356
+ An object-dtype is the default result
357
+
358
+ >>> a = pd.array([True, False, pd.NA], dtype="boolean")
359
+ >>> a.to_numpy()
360
+ array([True, False, <NA>], dtype=object)
361
+
362
+ When no missing values are present, an equivalent dtype can be used.
363
+
364
+ >>> pd.array([True, False], dtype="boolean").to_numpy(dtype="bool")
365
+ array([ True, False])
366
+ >>> pd.array([1, 2], dtype="Int64").to_numpy("int64")
367
+ array([1, 2])
368
+
369
+ However, requesting such dtype will raise a ValueError if
370
+ missing values are present and the default missing value :attr:`NA`
371
+ is used.
372
+
373
+ >>> a = pd.array([True, False, pd.NA], dtype="boolean")
374
+ >>> a
375
+ <BooleanArray>
376
+ [True, False, <NA>]
377
+ Length: 3, dtype: boolean
378
+
379
+ >>> a.to_numpy(dtype="bool")
380
+ Traceback (most recent call last):
381
+ ...
382
+ ValueError: cannot convert to bool numpy array in presence of missing values
383
+
384
+ Specify a valid `na_value` instead
385
+
386
+ >>> a.to_numpy(dtype="bool", na_value=False)
387
+ array([ True, False, False])
388
+ """
389
+ if na_value is lib.no_default:
390
+ na_value = libmissing.NA
391
+ if dtype is None:
392
+ dtype = object
393
+ if self._hasna:
394
+ if (
395
+ not is_object_dtype(dtype)
396
+ and not is_string_dtype(dtype)
397
+ and na_value is libmissing.NA
398
+ ):
399
+ raise ValueError(
400
+ f"cannot convert to '{dtype}'-dtype NumPy array "
401
+ "with missing values. Specify an appropriate 'na_value' "
402
+ "for this dtype."
403
+ )
404
+ # don't pass copy to astype -> always need a copy since we are mutating
405
+ data = self._data.astype(dtype)
406
+ data[self._mask] = na_value
407
+ else:
408
+ data = self._data.astype(dtype, copy=copy)
409
+ return data
410
+
411
+ def astype(self, dtype, copy: bool = True) -> ArrayLike:
412
+ dtype = pandas_dtype(dtype)
413
+
414
+ if is_dtype_equal(dtype, self.dtype):
415
+ if copy:
416
+ return self.copy()
417
+ return self
418
+
419
+ # if we are astyping to another nullable masked dtype, we can fastpath
420
+ if isinstance(dtype, BaseMaskedDtype):
421
+ # TODO deal with NaNs for MTZFloatArray case
422
+ data = self._data.astype(dtype.numpy_dtype, copy=copy)
423
+ # mask is copied depending on whether the data was copied, and
424
+ # not directly depending on the `copy` keyword
425
+ mask = self._mask if data is self._data else self._mask.copy()
426
+ cls = dtype.construct_array_type()
427
+ return cls(data, mask, copy=False)
428
+
429
+ if isinstance(dtype, ExtensionDtype):
430
+ eacls = dtype.construct_array_type()
431
+ return eacls._from_sequence(self, dtype=dtype, copy=copy)
432
+
433
+ raise NotImplementedError("subclass must implement astype to np.dtype")
434
+
435
+ __array_priority__ = 1000 # higher than ndarray so ops dispatch to us
436
+
437
+ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
438
+ """
439
+ the array interface, return my values
440
+ We return an object array here to preserve our scalar values
441
+ """
442
+ return self.to_numpy(dtype=dtype)
443
+
444
+ _HANDLED_TYPES: tuple[type, ...]
445
+
446
+ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
447
+ # For MaskedArray inputs, we apply the ufunc to ._data
448
+ # and mask the result.
449
+
450
+ out = kwargs.get("out", ())
451
+
452
+ for x in inputs + out:
453
+ if not isinstance(x, self._HANDLED_TYPES + (BaseMaskedArray,)):
454
+ return NotImplemented
455
+
456
+ # for binary ops, use our custom dunder methods
457
+ result = maybe_dispatch_ufunc_to_dunder_op(
458
+ self, ufunc, method, *inputs, **kwargs
459
+ )
460
+ if result is not NotImplemented:
461
+ return result
462
+
463
+ if "out" in kwargs:
464
+ # e.g. test_ufunc_with_out
465
+ return arraylike.dispatch_ufunc_with_out(
466
+ self, ufunc, method, *inputs, **kwargs
467
+ )
468
+
469
+ if method == "reduce":
470
+ result = arraylike.dispatch_reduction_ufunc(
471
+ self, ufunc, method, *inputs, **kwargs
472
+ )
473
+ if result is not NotImplemented:
474
+ return result
475
+
476
+ mask = np.zeros(len(self), dtype=bool)
477
+ inputs2 = []
478
+ for x in inputs:
479
+ if isinstance(x, BaseMaskedArray):
480
+ mask |= x._mask
481
+ inputs2.append(x._data)
482
+ else:
483
+ inputs2.append(x)
484
+
485
+ def reconstruct(x):
486
+ # we don't worry about scalar `x` here, since we
487
+ # raise for reduce up above.
488
+ from pandas.core.arrays import BooleanArray
489
+
490
+ if is_bool_dtype(x.dtype):
491
+ m = mask.copy()
492
+ return BooleanArray(x, m)
493
+ elif is_integer_dtype(x.dtype):
494
+ m = mask.copy()
495
+ return type(self)(x, m)
496
+ elif is_float_dtype(x.dtype):
497
+ m = mask.copy()
498
+ if x.dtype == np.float16:
499
+ # reached in e.g. np.sqrt on BooleanArray
500
+ # we don't support float16
501
+ x = x.astype(np.float32)
502
+ return type(self)(x, m)
503
+ else:
504
+ x[mask] = np.nan
505
+ return x
506
+
507
+ result = getattr(ufunc, method)(*inputs2, **kwargs)
508
+ if ufunc.nout > 1:
509
+ # e.g. np.divmod
510
+ return tuple(reconstruct(x) for x in result)
511
+ elif method == "reduce":
512
+ # e.g. np.add.reduce; test_ufunc_reduce_raises
513
+ if self._mask.any():
514
+ return self._na_value
515
+ return result
516
+ else:
517
+ return reconstruct(result)
518
+
519
+ def __arrow_array__(self, type=None):
520
+ """
521
+ Convert myself into a pyarrow Array.
522
+ """
523
+ import pyarrow as pa
524
+
525
+ return pa.array(self._data, mask=self._mask, type=type)
526
+
527
+ @property
528
+ def _hasna(self) -> bool:
529
+ # Note: this is expensive right now! The hope is that we can
530
+ # make this faster by having an optional mask, but not have to change
531
+ # source code using it..
532
+
533
+ # error: Incompatible return value type (got "bool_", expected "bool")
534
+ return self._mask.any() # type: ignore[return-value]
535
+
536
+ def _cmp_method(self, other, op) -> BooleanArray:
537
+ from pandas.core.arrays import BooleanArray
538
+
539
+ mask = None
540
+
541
+ if isinstance(other, BaseMaskedArray):
542
+ other, mask = other._data, other._mask
543
+
544
+ elif is_list_like(other):
545
+ other = np.asarray(other)
546
+ if other.ndim > 1:
547
+ raise NotImplementedError("can only perform ops with 1-d structures")
548
+ if len(self) != len(other):
549
+ raise ValueError("Lengths must match to compare")
550
+
551
+ if other is libmissing.NA:
552
+ # numpy does not handle pd.NA well as "other" scalar (it returns
553
+ # a scalar False instead of an array)
554
+ # This may be fixed by NA.__array_ufunc__. Revisit this check
555
+ # once that's implemented.
556
+ result = np.zeros(self._data.shape, dtype="bool")
557
+ mask = np.ones(self._data.shape, dtype="bool")
558
+ else:
559
+ with warnings.catch_warnings():
560
+ # numpy may show a FutureWarning:
561
+ # elementwise comparison failed; returning scalar instead,
562
+ # but in the future will perform elementwise comparison
563
+ # before returning NotImplemented. We fall back to the correct
564
+ # behavior today, so that should be fine to ignore.
565
+ warnings.filterwarnings("ignore", "elementwise", FutureWarning)
566
+ with np.errstate(all="ignore"):
567
+ method = getattr(self._data, f"__{op.__name__}__")
568
+ result = method(other)
569
+
570
+ if result is NotImplemented:
571
+ result = invalid_comparison(self._data, other, op)
572
+
573
+ # nans propagate
574
+ if mask is None:
575
+ mask = self._mask.copy()
576
+ else:
577
+ mask = self._mask | mask
578
+
579
+ return BooleanArray(result, mask, copy=False)
580
+
581
+ def _maybe_mask_result(self, result, mask):
582
+ """
583
+ Parameters
584
+ ----------
585
+ result : array-like
586
+ mask : array-like bool
587
+ """
588
+ if isinstance(result, tuple):
589
+ # i.e. divmod
590
+ div, mod = result
591
+ return (
592
+ self._maybe_mask_result(div, mask),
593
+ self._maybe_mask_result(mod, mask),
594
+ )
595
+
596
+ if result.dtype.kind == "f":
597
+ from pandas.core.arrays import FloatingArray
598
+
599
+ return FloatingArray(result, mask, copy=False)
600
+
601
+ elif result.dtype.kind == "b":
602
+ from pandas.core.arrays import BooleanArray
603
+
604
+ return BooleanArray(result, mask, copy=False)
605
+
606
+ elif lib.is_np_dtype(result.dtype, "m") and is_supported_unit(
607
+ get_unit_from_dtype(result.dtype)
608
+ ):
609
+ # e.g. test_numeric_arr_mul_tdscalar_numexpr_path
610
+ from pandas.core.arrays import TimedeltaArray
611
+
612
+ result[mask] = result.dtype.type("NaT")
613
+
614
+ if not isinstance(result, TimedeltaArray):
615
+ return TimedeltaArray._simple_new(result, dtype=result.dtype)
616
+
617
+ return result
618
+
619
+ elif result.dtype.kind in "iu":
620
+ from pandas.core.arrays import IntegerArray
621
+
622
+ return IntegerArray(result, mask, copy=False)
623
+
624
+ else:
625
+ result[mask] = np.nan
626
+ return result
627
+
628
+ def isna(self) -> np.ndarray:
629
+ return self._mask.copy()
630
+
631
+ @property
632
+ def _na_value(self):
633
+ return self.dtype.na_value
634
+
635
+ @property
636
+ def nbytes(self) -> int:
637
+ return self._data.nbytes + self._mask.nbytes
638
+
639
+ @classmethod
640
+ def _concat_same_type(
641
+ cls: type[BaseMaskedArrayT],
642
+ to_concat: Sequence[BaseMaskedArrayT],
643
+ axis: int = 0,
644
+ ) -> BaseMaskedArrayT:
645
+ data = np.concatenate([x._data for x in to_concat], axis=axis)
646
+ mask = np.concatenate([x._mask for x in to_concat], axis=axis)
647
+ return cls(data, mask)
648
+
649
+ def take(
650
+ self: BaseMaskedArrayT,
651
+ indexer,
652
+ *,
653
+ allow_fill: bool = False,
654
+ fill_value: Scalar | None = None,
655
+ axis: int = 0,
656
+ ) -> BaseMaskedArrayT:
657
+ # we always fill with 1 internally
658
+ # to avoid upcasting
659
+ data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value
660
+ result = take(
661
+ self._data,
662
+ indexer,
663
+ fill_value=data_fill_value,
664
+ allow_fill=allow_fill,
665
+ axis=axis,
666
+ )
667
+
668
+ mask = take(
669
+ self._mask, indexer, fill_value=True, allow_fill=allow_fill, axis=axis
670
+ )
671
+
672
+ # if we are filling
673
+ # we only fill where the indexer is null
674
+ # not existing missing values
675
+ # TODO(jreback) what if we have a non-na float as a fill value?
676
+ if allow_fill and notna(fill_value):
677
+ fill_mask = np.asarray(indexer) == -1
678
+ result[fill_mask] = fill_value
679
+ mask = mask ^ fill_mask
680
+
681
+ return type(self)(result, mask, copy=False)
682
+
683
+ # error: Return type "BooleanArray" of "isin" incompatible with return type
684
+ # "ndarray" in supertype "ExtensionArray"
685
+ def isin(self, values) -> BooleanArray: # type: ignore[override]
686
+ from pandas.core.arrays import BooleanArray
687
+
688
+ # algorithms.isin will eventually convert values to an ndarray, so no extra
689
+ # cost to doing it here first
690
+ values_arr = np.asarray(values)
691
+ result = isin(self._data, values_arr)
692
+
693
+ if self._hasna:
694
+ values_have_NA = is_object_dtype(values_arr.dtype) and any(
695
+ val is self.dtype.na_value for val in values_arr
696
+ )
697
+
698
+ # For now, NA does not propagate so set result according to presence of NA,
699
+ # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion
700
+ result[self._mask] = values_have_NA
701
+
702
+ mask = np.zeros(self._data.shape, dtype=bool)
703
+ return BooleanArray(result, mask, copy=False)
704
+
705
+ def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
706
+ data, mask = self._data, self._mask
707
+ data = data.copy()
708
+ mask = mask.copy()
709
+ return type(self)(data, mask, copy=False)
710
+
711
+ @doc(ExtensionArray.factorize)
712
+ def factorize(
713
+ self, use_na_sentinel: int = -1, na_sentinel: int = -1
714
+ ) -> tuple[np.ndarray, ExtensionArray]:
715
+ # Give na_sentinel precedence
716
+ if use_na_sentinel != na_sentinel:
717
+ use_na_sentinel = na_sentinel
718
+
719
+ arr = self._data
720
+ mask = self._mask
721
+
722
+ codes, uniques = factorize_array(arr, use_na_sentinel, mask=mask)
723
+
724
+ # the hashtables don't handle all different types of bits
725
+ uniques = uniques.astype(self.dtype.numpy_dtype, copy=False)
726
+ uniques_ea = type(self)(uniques, np.zeros(len(uniques), dtype=bool))
727
+ return codes, uniques_ea
728
+
729
+ def value_counts(self, dropna: bool = True) -> Series:
730
+ """
731
+ Returns a Series containing counts of each unique value.
732
+
733
+ Parameters
734
+ ----------
735
+ dropna : bool, default True
736
+ Don't include counts of missing values.
737
+
738
+ Returns
739
+ -------
740
+ counts : Series
741
+
742
+ See Also
743
+ --------
744
+ Series.value_counts
745
+ """
746
+ from pandas import Index
747
+
748
+ import reciprocalspaceship as rs
749
+
750
+ # compute counts on the data with no nans
751
+ data = self._data[~self._mask]
752
+ value_counts = Index(data).value_counts()
753
+ array = value_counts.values
754
+
755
+ # TODO(extension)
756
+ # if we have allow Index to hold an ExtensionArray
757
+ # this is easier
758
+ index = value_counts.index.astype(object)
759
+
760
+ # if we want nans, count the mask
761
+ if not dropna:
762
+ # TODO(extension)
763
+ # appending to an Index *always* infers
764
+ # w/o passing the dtype
765
+ array = np.append(array, [self._mask.sum()])
766
+ index = Index(
767
+ np.concatenate(
768
+ [index.values, np.array([self.dtype.na_value], dtype=object)]
769
+ ),
770
+ dtype=object,
771
+ )
772
+
773
+ return rs.DataSeries(array, index=index)
774
+
775
+ @doc(ExtensionArray.equals)
776
+ def equals(self, other) -> bool:
777
+ if type(self) != type(other):
778
+ return False
779
+ if other.dtype != self.dtype:
780
+ return False
781
+
782
+ # GH#44382 if e.g. self[1] is np.nan and other[1] is pd.NA, we are NOT
783
+ # equal.
784
+ if not np.array_equal(self._mask, other._mask):
785
+ return False
786
+
787
+ left = self._data[~self._mask]
788
+ right = other._data[~other._mask]
789
+ return array_equivalent(left, right, dtype_equal=True)
790
+
791
+ def _quantile(
792
+ self: BaseMaskedArrayT, qs: npt.NDArray[np.float64], interpolation: str
793
+ ) -> BaseMaskedArrayT:
794
+ """
795
+ Dispatch to quantile_with_mask, needed because we do not have
796
+ _from_factorized.
797
+
798
+ Notes
799
+ -----
800
+ We assume that all impacted cases are 1D-only.
801
+ """
802
+ mask = np.atleast_2d(np.asarray(self.isna()))
803
+ npvalues: np.ndarray = np.atleast_2d(np.asarray(self))
804
+
805
+ res = quantile_with_mask(
806
+ npvalues,
807
+ mask=mask,
808
+ fill_value=self.dtype.na_value,
809
+ qs=qs,
810
+ interpolation=interpolation,
811
+ )
812
+ assert res.ndim == 2
813
+ assert res.shape[0] == 1
814
+ res = res[0]
815
+ try:
816
+ out = type(self)._from_sequence(res, dtype=self.dtype)
817
+ except TypeError:
818
+ # GH#42626: not able to safely cast Int64
819
+ # for floating point output
820
+ out = np.asarray(res, dtype=np.float64)
821
+ return out
822
+
823
+ def _reduce(
824
+ self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
825
+ ):
826
+ if name in {"any", "all", "min", "max", "sum", "prod", "mean", "var", "std"}:
827
+ result = getattr(self, name)(skipna=skipna, **kwargs)
828
+ else:
829
+ # median, skew, kurt, sem
830
+ data = self._data
831
+ mask = self._mask
832
+ op = getattr(nanops, f"nan{name}")
833
+ axis = kwargs.pop("axis", None)
834
+ result = op(data, axis=axis, skipna=skipna, mask=mask, **kwargs)
835
+
836
+ if keepdims:
837
+ if isna(result):
838
+ return self._wrap_na_result(name=name, axis=0, mask_size=(1,))
839
+ else:
840
+ result = result.reshape(1)
841
+ mask = np.zeros(1, dtype=bool)
842
+ return self._maybe_mask_result(result, mask)
843
+
844
+ if isna(result):
845
+ return libmissing.NA
846
+ else:
847
+ return result
848
+
849
+ def _wrap_reduction_result(self, name: str, result, skipna, **kwargs):
850
+ if isinstance(result, np.ndarray):
851
+ axis = kwargs["axis"]
852
+ if skipna:
853
+ # we only retain mask for all-NA rows/columns
854
+ mask = self._mask.all(axis=axis)
855
+ else:
856
+ mask = self._mask.any(axis=axis)
857
+
858
+ return self._maybe_mask_result(result, mask)
859
+ return result
860
+
861
+ def _wrap_na_result(self, *, name, axis, mask_size):
862
+ mask = np.ones(mask_size, dtype=bool)
863
+
864
+ float_dtyp = "float32" if self.dtype == "Float32" else "float64"
865
+ if name in ["mean", "median", "var", "std", "skew", "kurt"]:
866
+ np_dtype = float_dtyp
867
+ elif name in ["min", "max"] or self.dtype.itemsize == 8:
868
+ np_dtype = self.dtype.numpy_dtype.name
869
+ else:
870
+ is_windows_or_32bit = is_platform_windows() or not IS64
871
+ int_dtyp = "int32" if is_windows_or_32bit else "int64"
872
+ uint_dtyp = "uint32" if is_windows_or_32bit else "uint64"
873
+ np_dtype = {"b": int_dtyp, "i": int_dtyp, "u": uint_dtyp, "f": float_dtyp}[
874
+ self.dtype.kind
875
+ ]
876
+
877
+ value = np.array([1], dtype=np_dtype)
878
+ return self._maybe_mask_result(value, mask=mask)
879
+
880
+ def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
881
+ nv.validate_sum((), kwargs)
882
+
883
+ # TODO: do this in validate_sum?
884
+ if "out" in kwargs:
885
+ # np.sum; test_floating_array_numpy_sum
886
+ if kwargs["out"] is not None:
887
+ raise NotImplementedError
888
+ kwargs.pop("out")
889
+
890
+ result = masked_reductions.sum(
891
+ self._data,
892
+ self._mask,
893
+ skipna=skipna,
894
+ min_count=min_count,
895
+ axis=axis,
896
+ )
897
+ return self._wrap_reduction_result(
898
+ "sum", result, skipna=skipna, axis=axis, **kwargs
899
+ )
900
+
901
+ def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
902
+ nv.validate_prod((), kwargs)
903
+ result = masked_reductions.prod(
904
+ self._data,
905
+ self._mask,
906
+ skipna=skipna,
907
+ min_count=min_count,
908
+ axis=axis,
909
+ )
910
+ return self._wrap_reduction_result(
911
+ "prod", result, skipna=skipna, axis=axis, **kwargs
912
+ )
913
+
914
+ def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
915
+ nv.validate_mean((), kwargs)
916
+ result = masked_reductions.mean(
917
+ self._data,
918
+ self._mask,
919
+ skipna=skipna,
920
+ axis=axis,
921
+ )
922
+ return self._wrap_reduction_result("mean", result, skipna=skipna, axis=axis)
923
+
924
+ def var(
925
+ self, *, skipna: bool = True, axis: AxisInt | None = 0, ddof: int = 1, **kwargs
926
+ ):
927
+ nv.validate_stat_ddof_func((), kwargs, fname="var")
928
+ result = masked_reductions.var(
929
+ self._data,
930
+ self._mask,
931
+ skipna=skipna,
932
+ axis=axis,
933
+ ddof=ddof,
934
+ )
935
+ return self._wrap_reduction_result("var", result, skipna=skipna, axis=axis)
936
+
937
+ def std(
938
+ self, *, skipna: bool = True, axis: AxisInt | None = 0, ddof: int = 1, **kwargs
939
+ ):
940
+ nv.validate_stat_ddof_func((), kwargs, fname="std")
941
+ result = masked_reductions.std(
942
+ self._data,
943
+ self._mask,
944
+ skipna=skipna,
945
+ axis=axis,
946
+ ddof=ddof,
947
+ )
948
+ return self._wrap_reduction_result("std", result, skipna=skipna, axis=axis)
949
+
950
+ def min(self, *, skipna=True, axis: int | None = 0, **kwargs):
951
+ nv.validate_min((), kwargs)
952
+ return masked_reductions.min(
953
+ self._data,
954
+ self._mask,
955
+ skipna=skipna,
956
+ axis=axis,
957
+ )
958
+
959
+ def max(self, *, skipna=True, axis: int | None = 0, **kwargs):
960
+ nv.validate_max((), kwargs)
961
+ return masked_reductions.max(
962
+ self._data,
963
+ self._mask,
964
+ skipna=skipna,
965
+ axis=axis,
966
+ )
967
+
968
+ def map(self, mapper, na_action=None):
969
+ """
970
+ Map values using an input mapping or function.
971
+ """
972
+ arr = self.to_numpy()
973
+ convert = True
974
+ if na_action not in (None, "ignore"):
975
+ msg = f"na_action must either be 'ignore' or None, {na_action} was passed"
976
+ raise ValueError(msg)
977
+
978
+ # we can fastpath dict/Series to an efficient map
979
+ # as we know that we are not going to have to yield
980
+ # python types
981
+ if is_dict_like(mapper):
982
+ if isinstance(mapper, dict) and hasattr(mapper, "__missing__"):
983
+ # If a dictionary subclass defines a default value method,
984
+ # convert mapper to a lookup function (GH #15999).
985
+ dict_with_default = mapper
986
+ mapper = lambda x: dict_with_default[
987
+ np.nan if isinstance(x, float) and np.isnan(x) else x
988
+ ]
989
+ else:
990
+ # Dictionary does not have a default. Thus it's safe to
991
+ # convert to an Series for efficiency.
992
+ # we specify the keys here to handle the
993
+ # possibility that they are tuples
994
+
995
+ # The return value of mapping with an empty mapper is
996
+ # expected to be pd.Series(np.nan, ...). As np.nan is
997
+ # of dtype float64 the return value of this method should
998
+ # be float64 as well
999
+ from reciprocalspaceship import DataSeries
1000
+
1001
+ if len(mapper) == 0:
1002
+ mapper = DataSeries(mapper, dtype=arr.dtype)
1003
+ else:
1004
+ mapper = DataSeries(mapper)
1005
+
1006
+ if isinstance(mapper, ABCSeries):
1007
+ if na_action == "ignore":
1008
+ mapper = mapper[mapper.index.notna()]
1009
+
1010
+ # Since values were input this means we came from either
1011
+ # a dict or a series and mapper should be an index
1012
+ indexer = mapper.index.get_indexer(arr)
1013
+ new_values = take_nd(mapper._values, indexer)
1014
+
1015
+ return new_values
1016
+
1017
+ if not len(arr):
1018
+ return arr.copy()
1019
+
1020
+ # we must convert to python types
1021
+ values = arr.astype("object", copy=False)
1022
+ if na_action is None:
1023
+ new_values = lib.map_infer(values, mapper, convert=convert)
1024
+ else:
1025
+ new_values = lib.map_infer_mask(
1026
+ values, mapper, mask=isna(values).view(np.uint8), convert=convert
1027
+ )
1028
+ if is_float_dtype(arr):
1029
+ return new_values.astype("float32", copy=False)
1030
+ elif isna(arr).any():
1031
+ return new_values.astype("object", copy=False)
1032
+ else:
1033
+ return new_values.astype("int32", copy=False)
1034
+
1035
+ def any(self, *, skipna: bool = True, **kwargs):
1036
+ """
1037
+ Return whether any element is truthy.
1038
+
1039
+ Returns False unless there is at least one element that is truthy.
1040
+ By default, NAs are skipped. If ``skipna=False`` is specified and
1041
+ missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
1042
+ is used as for logical operations.
1043
+
1044
+ .. versionchanged:: 1.4.0
1045
+
1046
+ Parameters
1047
+ ----------
1048
+ skipna : bool, default True
1049
+ Exclude NA values. If the entire array is NA and `skipna` is
1050
+ True, then the result will be False, as for an empty array.
1051
+ If `skipna` is False, the result will still be True if there is
1052
+ at least one element that is truthy, otherwise NA will be returned
1053
+ if there are NA's present.
1054
+ **kwargs : any, default None
1055
+ Additional keywords have no effect but might be accepted for
1056
+ compatibility with NumPy.
1057
+
1058
+ Returns
1059
+ -------
1060
+ bool or :attr:`pandas.NA`
1061
+
1062
+ See Also
1063
+ --------
1064
+ numpy.any : Numpy version of this method.
1065
+ BaseMaskedArray.all : Return whether all elements are truthy.
1066
+
1067
+ Examples
1068
+ --------
1069
+ The result indicates whether any element is truthy (and by default
1070
+ skips NAs):
1071
+
1072
+ >>> pd.array([True, False, True]).any()
1073
+ True
1074
+ >>> pd.array([True, False, pd.NA]).any()
1075
+ True
1076
+ >>> pd.array([False, False, pd.NA]).any()
1077
+ False
1078
+ >>> pd.array([], dtype="boolean").any()
1079
+ False
1080
+ >>> pd.array([pd.NA], dtype="boolean").any()
1081
+ False
1082
+ >>> pd.array([pd.NA], dtype="Float64").any()
1083
+ False
1084
+
1085
+ With ``skipna=False``, the result can be NA if this is logically
1086
+ required (whether ``pd.NA`` is True or False influences the result):
1087
+
1088
+ >>> pd.array([True, False, pd.NA]).any(skipna=False)
1089
+ True
1090
+ >>> pd.array([1, 0, pd.NA]).any(skipna=False)
1091
+ True
1092
+ >>> pd.array([False, False, pd.NA]).any(skipna=False)
1093
+ <NA>
1094
+ >>> pd.array([0, 0, pd.NA]).any(skipna=False)
1095
+ <NA>
1096
+ """
1097
+ kwargs.pop("axis", None)
1098
+ nv.validate_any((), kwargs)
1099
+
1100
+ values = self._data.copy()
1101
+ # Argument 3 to "putmask" has incompatible type "object"; expected
1102
+ # "Union[_SupportsArray[dtype[Any]], _NestedSequence[
1103
+ # _SupportsArray[dtype[Any]]], bool, int, float, complex, str, bytes, _Nested
1104
+ # Sequence[Union[bool, int, float, complex, str, bytes]]]" [arg-type]
1105
+ np.putmask(values, self._mask, self._falsey_value) # type: ignore[arg-type]
1106
+ result = values.any()
1107
+ if skipna:
1108
+ return result
1109
+ else:
1110
+ if result or len(self) == 0 or not self._mask.any():
1111
+ return result
1112
+ else:
1113
+ return self.dtype.na_value
1114
+
1115
+ def all(self, *, skipna: bool = True, **kwargs):
1116
+ """
1117
+ Return whether all elements are truthy.
1118
+
1119
+ Returns True unless there is at least one element that is falsey.
1120
+ By default, NAs are skipped. If ``skipna=False`` is specified and
1121
+ missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
1122
+ is used as for logical operations.
1123
+
1124
+ .. versionchanged:: 1.4.0
1125
+
1126
+ Parameters
1127
+ ----------
1128
+ skipna : bool, default True
1129
+ Exclude NA values. If the entire array is NA and `skipna` is
1130
+ True, then the result will be True, as for an empty array.
1131
+ If `skipna` is False, the result will still be False if there is
1132
+ at least one element that is falsey, otherwise NA will be returned
1133
+ if there are NA's present.
1134
+ **kwargs : any, default None
1135
+ Additional keywords have no effect but might be accepted for
1136
+ compatibility with NumPy.
1137
+
1138
+ Returns
1139
+ -------
1140
+ bool or :attr:`pandas.NA`
1141
+
1142
+ See Also
1143
+ --------
1144
+ numpy.all : Numpy version of this method.
1145
+ BooleanArray.any : Return whether any element is truthy.
1146
+
1147
+ Examples
1148
+ --------
1149
+ The result indicates whether all elements are truthy (and by default
1150
+ skips NAs):
1151
+
1152
+ >>> pd.array([True, True, pd.NA]).all()
1153
+ True
1154
+ >>> pd.array([1, 1, pd.NA]).all()
1155
+ True
1156
+ >>> pd.array([True, False, pd.NA]).all()
1157
+ False
1158
+ >>> pd.array([], dtype="boolean").all()
1159
+ True
1160
+ >>> pd.array([pd.NA], dtype="boolean").all()
1161
+ True
1162
+ >>> pd.array([pd.NA], dtype="Float64").all()
1163
+ True
1164
+
1165
+ With ``skipna=False``, the result can be NA if this is logically
1166
+ required (whether ``pd.NA`` is True or False influences the result):
1167
+
1168
+ >>> pd.array([True, True, pd.NA]).all(skipna=False)
1169
+ <NA>
1170
+ >>> pd.array([1, 1, pd.NA]).all(skipna=False)
1171
+ <NA>
1172
+ >>> pd.array([True, False, pd.NA]).all(skipna=False)
1173
+ False
1174
+ >>> pd.array([1, 0, pd.NA]).all(skipna=False)
1175
+ False
1176
+ """
1177
+ kwargs.pop("axis", None)
1178
+ nv.validate_all((), kwargs)
1179
+
1180
+ values = self._data.copy()
1181
+ # Argument 3 to "putmask" has incompatible type "object"; expected
1182
+ # "Union[_SupportsArray[dtype[Any]], _NestedSequence[
1183
+ # _SupportsArray[dtype[Any]]], bool, int, float, complex, str, bytes, _Neste
1184
+ # dSequence[Union[bool, int, float, complex, str, bytes]]]" [arg-type]
1185
+ np.putmask(values, self._mask, self._truthy_value) # type: ignore[arg-type]
1186
+ result = values.all()
1187
+
1188
+ if skipna:
1189
+ return result
1190
+ else:
1191
+ if not result or len(self) == 0 or not self._mask.any():
1192
+ return result
1193
+ else:
1194
+ return self.dtype.na_value
1195
+
1196
+
1197
+ class NumericDtype(BaseMaskedDtype):
1198
+ def __from_arrow__(
1199
+ self, array: pyarrow.Array | pyarrow.ChunkedArray
1200
+ ) -> BaseMaskedArray:
1201
+ """
1202
+ Construct MTZIntegerArray/MTZFloatArray from pyarrow Array/ChunkedArray.
1203
+ """
1204
+ import pyarrow
1205
+ from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask
1206
+
1207
+ array_class = self.construct_array_type()
1208
+
1209
+ pyarrow_type = pyarrow.from_numpy_dtype(self.type)
1210
+ if not array.type.equals(pyarrow_type):
1211
+ # test_from_arrow_type_error raise for string, but allow
1212
+ # through itemsize conversion GH#31896
1213
+ rt_dtype = pandas_dtype(array.type.to_pandas_dtype())
1214
+ if rt_dtype.kind not in ["i", "u", "f"]:
1215
+ # Could allow "c" or potentially disallow float<->int conversion,
1216
+ # but at the moment we specifically test that uint<->int works
1217
+ raise TypeError(
1218
+ f"Expected array of {self} type, got {array.type} instead"
1219
+ )
1220
+
1221
+ array = array.cast(pyarrow_type)
1222
+
1223
+ if isinstance(array, pyarrow.Array):
1224
+ chunks = [array]
1225
+ else:
1226
+ # pyarrow.ChunkedArray
1227
+ chunks = array.chunks
1228
+
1229
+ results = []
1230
+ for arr in chunks:
1231
+ data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type)
1232
+ num_arr = array_class(data.copy(), ~mask, copy=False)
1233
+ results.append(num_arr)
1234
+
1235
+ if not results:
1236
+ return array_class(
1237
+ np.array([], dtype=self.numpy_dtype), np.array([], dtype=np.bool_)
1238
+ )
1239
+ elif len(results) == 1:
1240
+ # avoid additional copy in _concat_same_type
1241
+ return results[0]
1242
+ else:
1243
+ return array_class._concat_same_type(results)
1244
+
1245
+
1246
+ class NumericArray(BaseMaskedArray):
1247
+ """
1248
+ Base class for MTZIntegerArray and MTZFloatArray.
1249
+ """
1250
+
1251
+ def _arith_method(self, other, op):
1252
+ op_name = op.__name__
1253
+ omask = None
1254
+
1255
+ if getattr(other, "ndim", 0) > 1:
1256
+ raise NotImplementedError("can only perform ops with 1-d structures")
1257
+
1258
+ if isinstance(other, NumericArray):
1259
+ other, omask = other._data, other._mask
1260
+
1261
+ elif is_list_like(other):
1262
+ other = np.asarray(other)
1263
+ if other.ndim > 1:
1264
+ raise NotImplementedError("can only perform ops with 1-d structures")
1265
+ if len(self) != len(other):
1266
+ raise ValueError("Lengths must match")
1267
+ if not (is_float_dtype(other) or is_integer_dtype(other)):
1268
+ raise TypeError("can only perform ops with numeric values")
1269
+
1270
+ else:
1271
+ if not (is_float(other) or is_integer(other) or other is libmissing.NA):
1272
+ raise TypeError("can only perform ops with numeric values")
1273
+
1274
+ if omask is None:
1275
+ mask = self._mask.copy()
1276
+ if other is libmissing.NA:
1277
+ mask |= True
1278
+ else:
1279
+ mask = self._mask | omask
1280
+
1281
+ if op_name == "pow":
1282
+ # 1 ** x is 1.
1283
+ mask = np.where((self._data == 1) & ~self._mask, False, mask)
1284
+ # x ** 0 is 1.
1285
+ if omask is not None:
1286
+ mask = np.where((other == 0) & ~omask, False, mask)
1287
+ elif other is not libmissing.NA:
1288
+ mask = np.where(other == 0, False, mask)
1289
+
1290
+ elif op_name == "rpow":
1291
+ # 1 ** x is 1.
1292
+ if omask is not None:
1293
+ mask = np.where((other == 1) & ~omask, False, mask)
1294
+ elif other is not libmissing.NA:
1295
+ mask = np.where(other == 1, False, mask)
1296
+ # x ** 0 is 1.
1297
+ mask = np.where((self._data == 0) & ~self._mask, False, mask)
1298
+
1299
+ if other is libmissing.NA:
1300
+ result = np.ones_like(self._data)
1301
+ if "truediv" in op_name and self.dtype.kind != "f":
1302
+ # The actual data here doesn't matter since the mask
1303
+ # will be all-True, but since this is division, we want
1304
+ # to end up with floating dtype.
1305
+ result = result.astype(np.float64)
1306
+ else:
1307
+ with np.errstate(all="ignore"):
1308
+ result = op(self._data, other)
1309
+
1310
+ # divmod returns a tuple
1311
+ if op_name == "divmod":
1312
+ div, mod = result
1313
+ return (
1314
+ self._maybe_mask_result(div, mask),
1315
+ self._maybe_mask_result(mod, mask),
1316
+ )
1317
+
1318
+ return self._maybe_mask_result(result, mask)
1319
+
1320
+ _HANDLED_TYPES = (np.ndarray, numbers.Number)
1321
+
1322
+ def __neg__(self):
1323
+ return type(self)(-self._data, self._mask.copy())
1324
+
1325
+ def __pos__(self):
1326
+ return self.copy()
1327
+
1328
+ def __abs__(self):
1329
+ return type(self)(abs(self._data), self._mask.copy())
1330
+
1331
+ def round(self: T, decimals: int = 0, *args, **kwargs) -> T:
1332
+ """
1333
+ Round each value in the array a to the given number of decimals.
1334
+
1335
+ Parameters
1336
+ ----------
1337
+ decimals : int, default 0
1338
+ Number of decimal places to round to. If decimals is negative,
1339
+ it specifies the number of positions to the left of the decimal point.
1340
+ *args, **kwargs
1341
+ Additional arguments and keywords have no effect but might be
1342
+ accepted for compatibility with NumPy.
1343
+
1344
+ Returns
1345
+ -------
1346
+ NumericArray
1347
+ Rounded values of the NumericArray.
1348
+
1349
+ See Also
1350
+ --------
1351
+ numpy.around : Round values of an np.array.
1352
+ DataFrame.round : Round values of a DataFrame.
1353
+ Series.round : Round values of a Series.
1354
+ """
1355
+ nv.validate_round(args, kwargs)
1356
+ values = np.round(self._data, decimals=decimals, **kwargs)
1357
+ return type(self)(values, self._mask.copy())
1358
+
1359
+
1360
+ @wraps(libmissing.is_numeric_na)
1361
+ def is_numeric_na(values):
1362
+ allowed_dtypes = ("float32", "int32")
1363
+ if isinstance(values, np.ndarray) and values.dtype in allowed_dtypes:
1364
+ return np.isnan(values)
1365
+ return libmissing.is_numeric_na(values)