reciprocalspaceship 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of reciprocalspaceship might be problematic. Click here for more details.

@@ -34,23 +34,27 @@ from __future__ import annotations
34
34
 
35
35
  import numbers
36
36
  import warnings
37
+ from functools import wraps
37
38
  from typing import Any, Sequence
38
39
 
39
40
  import numpy as np
40
- from pandas._libs import Timedelta, iNaT, lib
41
+ from pandas._libs import lib
41
42
  from pandas._libs import missing as libmissing
42
43
  from pandas._typing import ArrayLike, NpDtype, PositionalIndexer, Scalar, Shape, type_t
44
+ from pandas.compat import IS64, is_platform_windows
43
45
  from pandas.compat.numpy import function as nv
44
- from pandas.core import arraylike, missing, nanops, ops
46
+ from pandas.core import arraylike, missing, nanops
45
47
  from pandas.core.algorithms import factorize_array, isin, take
46
48
  from pandas.core.array_algos import masked_reductions
47
49
  from pandas.core.array_algos.quantile import quantile_with_mask
50
+ from pandas.core.array_algos.take import take_nd
48
51
  from pandas.core.arraylike import OpsMixin
49
52
  from pandas.core.arrays import ExtensionArray
50
53
  from pandas.core.dtypes.base import ExtensionDtype
51
54
  from pandas.core.dtypes.common import (
52
55
  is_bool,
53
56
  is_bool_dtype,
57
+ is_dict_like,
54
58
  is_dtype_equal,
55
59
  is_float,
56
60
  is_float_dtype,
@@ -63,6 +67,7 @@ from pandas.core.dtypes.common import (
63
67
  is_string_dtype,
64
68
  pandas_dtype,
65
69
  )
70
+ from pandas.core.dtypes.generic import ABCSeries
66
71
  from pandas.core.dtypes.inference import is_array_like
67
72
  from pandas.core.dtypes.missing import array_equivalent, isna, notna
68
73
  from pandas.core.indexers import check_array_indexer
@@ -71,6 +76,12 @@ from pandas.errors import AbstractMethodError
71
76
  from pandas.util._decorators import cache_readonly, doc
72
77
  from pandas.util._validators import validate_fillna_kwargs
73
78
 
79
+ # GH221: Handle import due to pandas change
80
+ try:
81
+ from pandas.core.ops import maybe_dispatch_ufunc_to_dunder_op
82
+ except ImportError:
83
+ from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
84
+
74
85
 
75
86
  class BaseMaskedDtype(ExtensionDtype):
76
87
  """
@@ -126,6 +137,13 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
126
137
  _truthy_value = Scalar # bool(_truthy_value) = True
127
138
  _falsey_value = Scalar # bool(_falsey_value) = False
128
139
 
140
+ @classmethod
141
+ def _simple_new(cls, values, mask):
142
+ result = BaseMaskedArray.__new__(cls)
143
+ result._data = values
144
+ result._mask = mask
145
+ return result
146
+
129
147
  def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
130
148
  # values is supposed to already be validated in the subclass
131
149
  if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
@@ -163,7 +181,7 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
163
181
 
164
182
  @doc(ExtensionArray.fillna)
165
183
  def fillna(
166
- self: BaseMaskedArrayT, value=None, method=None, limit=None
184
+ self: BaseMaskedArrayT, value=None, method=None, limit=None, copy=True
167
185
  ) -> BaseMaskedArrayT:
168
186
  value, method = validate_fillna_kwargs(value, method)
169
187
 
@@ -188,10 +206,51 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
188
206
  return type(self)(new_values.T, new_mask.view(np.bool_).T)
189
207
  else:
190
208
  # fill with value
191
- new_values = self.copy()
209
+ if copy:
210
+ new_values = self.copy()
211
+ else:
212
+ new_values = self[:]
192
213
  new_values[mask] = value
193
214
  else:
194
- new_values = self.copy()
215
+ if copy:
216
+ new_values = self.copy()
217
+ else:
218
+ new_values = self[:]
219
+ return new_values
220
+
221
+ def _pad_or_backfill(self, *, method, limit=None, limit_area=None, copy=True):
222
+ mask = self._mask
223
+
224
+ if mask.any():
225
+ func = missing.get_fill_func(method, ndim=self.ndim)
226
+
227
+ npvalues = self._data.T
228
+ new_mask = mask.T
229
+ if copy:
230
+ npvalues = npvalues.copy()
231
+ new_mask = new_mask.copy()
232
+ func(npvalues, limit=limit, mask=new_mask)
233
+
234
+ if limit_area is not None and not mask.all():
235
+ mask = mask.T
236
+ neg_mask = ~mask
237
+ first = neg_mask.argmax()
238
+ last = len(neg_mask) - neg_mask[::-1].argmax() - 1
239
+ if limit_area == "inside":
240
+ new_mask[:first] |= mask[:first]
241
+ new_mask[last + 1 :] |= mask[last + 1 :]
242
+ elif limit_area == "outside":
243
+ new_mask[first + 1 : last] |= mask[first + 1 : last]
244
+
245
+ if copy:
246
+ return self._simple_new(npvalues.T, new_mask.T)
247
+ else:
248
+ return self
249
+ else:
250
+ if copy:
251
+ new_values = self.copy()
252
+ else:
253
+ new_values = self
195
254
  return new_values
196
255
 
197
256
  def _coerce_to_array(self, values) -> tuple[np.ndarray, np.ndarray]:
@@ -395,7 +454,7 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
395
454
  return NotImplemented
396
455
 
397
456
  # for binary ops, use our custom dunder methods
398
- result = ops.maybe_dispatch_ufunc_to_dunder_op(
457
+ result = maybe_dispatch_ufunc_to_dunder_op(
399
458
  self, ufunc, method, *inputs, **kwargs
400
459
  )
401
460
  if result is not NotImplemented:
@@ -519,40 +578,45 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
519
578
 
520
579
  return BooleanArray(result, mask, copy=False)
521
580
 
522
- def _maybe_mask_result(self, result, mask, other, op_name: str):
581
+ def _maybe_mask_result(self, result, mask):
523
582
  """
524
583
  Parameters
525
584
  ----------
526
585
  result : array-like
527
586
  mask : array-like bool
528
- other : scalar or array-like
529
- op_name : str
530
587
  """
531
- # if we have a float operand we are by-definition
532
- # a float result
533
- # or our op is a divide
534
- if (
535
- (is_float_dtype(other) or is_float(other))
536
- or (op_name in ["rtruediv", "truediv"])
537
- or (is_float_dtype(self.dtype) and is_numeric_dtype(result.dtype))
538
- ):
588
+ if isinstance(result, tuple):
589
+ # i.e. divmod
590
+ div, mod = result
591
+ return (
592
+ self._maybe_mask_result(div, mask),
593
+ self._maybe_mask_result(mod, mask),
594
+ )
595
+
596
+ if result.dtype.kind == "f":
539
597
  from pandas.core.arrays import FloatingArray
540
598
 
541
599
  return FloatingArray(result, mask, copy=False)
542
600
 
543
- elif is_bool_dtype(result):
601
+ elif result.dtype.kind == "b":
544
602
  from pandas.core.arrays import BooleanArray
545
603
 
546
604
  return BooleanArray(result, mask, copy=False)
547
605
 
548
- elif result.dtype == "timedelta64[ns]":
606
+ elif lib.is_np_dtype(result.dtype, "m") and is_supported_unit(
607
+ get_unit_from_dtype(result.dtype)
608
+ ):
549
609
  # e.g. test_numeric_arr_mul_tdscalar_numexpr_path
550
610
  from pandas.core.arrays import TimedeltaArray
551
611
 
552
- result[mask] = iNaT
553
- return TimedeltaArray._simple_new(result)
612
+ result[mask] = result.dtype.type("NaT")
613
+
614
+ if not isinstance(result, TimedeltaArray):
615
+ return TimedeltaArray._simple_new(result, dtype=result.dtype)
616
+
617
+ return result
554
618
 
555
- elif is_integer_dtype(result):
619
+ elif result.dtype.kind in "iu":
556
620
  from pandas.core.arrays import IntegerArray
557
621
 
558
622
  return IntegerArray(result, mask, copy=False)
@@ -756,31 +820,31 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
756
820
  out = np.asarray(res, dtype=np.float64)
757
821
  return out
758
822
 
759
- def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
760
- if name in {"any", "all", "min", "max", "sum", "prod"}:
761
- return getattr(self, name)(skipna=skipna, **kwargs)
762
-
763
- data = self._data
764
- mask = self._mask
765
-
766
- if name in {"mean"}:
767
- op = getattr(masked_reductions, name)
768
- result = op(data, mask, skipna=skipna, **kwargs)
769
- return result
770
-
771
- # coerce to a nan-aware float if needed
772
- # (we explicitly use NaN within reductions)
773
- if self._hasna:
774
- data = self.to_numpy("float64", na_value=np.nan)
775
-
776
- # median, var, std, skew, kurt, idxmin, idxmax
777
- op = getattr(nanops, "nan" + name)
778
- result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
823
+ def _reduce(
824
+ self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
825
+ ):
826
+ if name in {"any", "all", "min", "max", "sum", "prod", "mean", "var", "std"}:
827
+ result = getattr(self, name)(skipna=skipna, **kwargs)
828
+ else:
829
+ # median, skew, kurt, sem
830
+ data = self._data
831
+ mask = self._mask
832
+ op = getattr(nanops, f"nan{name}")
833
+ axis = kwargs.pop("axis", None)
834
+ result = op(data, axis=axis, skipna=skipna, mask=mask, **kwargs)
835
+
836
+ if keepdims:
837
+ if isna(result):
838
+ return self._wrap_na_result(name=name, axis=0, mask_size=(1,))
839
+ else:
840
+ result = result.reshape(1)
841
+ mask = np.zeros(1, dtype=bool)
842
+ return self._maybe_mask_result(result, mask)
779
843
 
780
- if np.isnan(result):
844
+ if isna(result):
781
845
  return libmissing.NA
782
-
783
- return result
846
+ else:
847
+ return result
784
848
 
785
849
  def _wrap_reduction_result(self, name: str, result, skipna, **kwargs):
786
850
  if isinstance(result, np.ndarray):
@@ -791,9 +855,28 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
791
855
  else:
792
856
  mask = self._mask.any(axis=axis)
793
857
 
794
- return self._maybe_mask_result(result, mask, other=None, op_name=name)
858
+ return self._maybe_mask_result(result, mask)
795
859
  return result
796
860
 
861
+ def _wrap_na_result(self, *, name, axis, mask_size):
862
+ mask = np.ones(mask_size, dtype=bool)
863
+
864
+ float_dtyp = "float32" if self.dtype == "Float32" else "float64"
865
+ if name in ["mean", "median", "var", "std", "skew", "kurt"]:
866
+ np_dtype = float_dtyp
867
+ elif name in ["min", "max"] or self.dtype.itemsize == 8:
868
+ np_dtype = self.dtype.numpy_dtype.name
869
+ else:
870
+ is_windows_or_32bit = is_platform_windows() or not IS64
871
+ int_dtyp = "int32" if is_windows_or_32bit else "int64"
872
+ uint_dtyp = "uint32" if is_windows_or_32bit else "uint64"
873
+ np_dtype = {"b": int_dtyp, "i": int_dtyp, "u": uint_dtyp, "f": float_dtyp}[
874
+ self.dtype.kind
875
+ ]
876
+
877
+ value = np.array([1], dtype=np_dtype)
878
+ return self._maybe_mask_result(value, mask=mask)
879
+
797
880
  def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
798
881
  nv.validate_sum((), kwargs)
799
882
 
@@ -828,6 +911,42 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
828
911
  "prod", result, skipna=skipna, axis=axis, **kwargs
829
912
  )
830
913
 
914
+ def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
915
+ nv.validate_mean((), kwargs)
916
+ result = masked_reductions.mean(
917
+ self._data,
918
+ self._mask,
919
+ skipna=skipna,
920
+ axis=axis,
921
+ )
922
+ return self._wrap_reduction_result("mean", result, skipna=skipna, axis=axis)
923
+
924
+ def var(
925
+ self, *, skipna: bool = True, axis: AxisInt | None = 0, ddof: int = 1, **kwargs
926
+ ):
927
+ nv.validate_stat_ddof_func((), kwargs, fname="var")
928
+ result = masked_reductions.var(
929
+ self._data,
930
+ self._mask,
931
+ skipna=skipna,
932
+ axis=axis,
933
+ ddof=ddof,
934
+ )
935
+ return self._wrap_reduction_result("var", result, skipna=skipna, axis=axis)
936
+
937
+ def std(
938
+ self, *, skipna: bool = True, axis: AxisInt | None = 0, ddof: int = 1, **kwargs
939
+ ):
940
+ nv.validate_stat_ddof_func((), kwargs, fname="std")
941
+ result = masked_reductions.std(
942
+ self._data,
943
+ self._mask,
944
+ skipna=skipna,
945
+ axis=axis,
946
+ ddof=ddof,
947
+ )
948
+ return self._wrap_reduction_result("std", result, skipna=skipna, axis=axis)
949
+
831
950
  def min(self, *, skipna=True, axis: int | None = 0, **kwargs):
832
951
  nv.validate_min((), kwargs)
833
952
  return masked_reductions.min(
@@ -846,6 +965,73 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
846
965
  axis=axis,
847
966
  )
848
967
 
968
+ def map(self, mapper, na_action=None):
969
+ """
970
+ Map values using an input mapping or function.
971
+ """
972
+ arr = self.to_numpy()
973
+ convert = True
974
+ if na_action not in (None, "ignore"):
975
+ msg = f"na_action must either be 'ignore' or None, {na_action} was passed"
976
+ raise ValueError(msg)
977
+
978
+ # we can fastpath dict/Series to an efficient map
979
+ # as we know that we are not going to have to yield
980
+ # python types
981
+ if is_dict_like(mapper):
982
+ if isinstance(mapper, dict) and hasattr(mapper, "__missing__"):
983
+ # If a dictionary subclass defines a default value method,
984
+ # convert mapper to a lookup function (GH #15999).
985
+ dict_with_default = mapper
986
+ mapper = lambda x: dict_with_default[
987
+ np.nan if isinstance(x, float) and np.isnan(x) else x
988
+ ]
989
+ else:
990
+ # Dictionary does not have a default. Thus it's safe to
991
+ # convert to an Series for efficiency.
992
+ # we specify the keys here to handle the
993
+ # possibility that they are tuples
994
+
995
+ # The return value of mapping with an empty mapper is
996
+ # expected to be pd.Series(np.nan, ...). As np.nan is
997
+ # of dtype float64 the return value of this method should
998
+ # be float64 as well
999
+ from reciprocalspaceship import DataSeries
1000
+
1001
+ if len(mapper) == 0:
1002
+ mapper = DataSeries(mapper, dtype=arr.dtype)
1003
+ else:
1004
+ mapper = DataSeries(mapper)
1005
+
1006
+ if isinstance(mapper, ABCSeries):
1007
+ if na_action == "ignore":
1008
+ mapper = mapper[mapper.index.notna()]
1009
+
1010
+ # Since values were input this means we came from either
1011
+ # a dict or a series and mapper should be an index
1012
+ indexer = mapper.index.get_indexer(arr)
1013
+ new_values = take_nd(mapper._values, indexer)
1014
+
1015
+ return new_values
1016
+
1017
+ if not len(arr):
1018
+ return arr.copy()
1019
+
1020
+ # we must convert to python types
1021
+ values = arr.astype("object", copy=False)
1022
+ if na_action is None:
1023
+ new_values = lib.map_infer(values, mapper, convert=convert)
1024
+ else:
1025
+ new_values = lib.map_infer_mask(
1026
+ values, mapper, mask=isna(values).view(np.uint8), convert=convert
1027
+ )
1028
+ if is_float_dtype(arr):
1029
+ return new_values.astype("float32", copy=False)
1030
+ elif isna(arr).any():
1031
+ return new_values.astype("object", copy=False)
1032
+ else:
1033
+ return new_values.astype("int32", copy=False)
1034
+
849
1035
  def any(self, *, skipna: bool = True, **kwargs):
850
1036
  """
851
1037
  Return whether any element is truthy.
@@ -1125,11 +1311,11 @@ class NumericArray(BaseMaskedArray):
1125
1311
  if op_name == "divmod":
1126
1312
  div, mod = result
1127
1313
  return (
1128
- self._maybe_mask_result(div, mask, other, "floordiv"),
1129
- self._maybe_mask_result(mod, mask, other, "mod"),
1314
+ self._maybe_mask_result(div, mask),
1315
+ self._maybe_mask_result(mod, mask),
1130
1316
  )
1131
1317
 
1132
- return self._maybe_mask_result(result, mask, other, op_name)
1318
+ return self._maybe_mask_result(result, mask)
1133
1319
 
1134
1320
  _HANDLED_TYPES = (np.ndarray, numbers.Number)
1135
1321
 
@@ -1169,3 +1355,11 @@ class NumericArray(BaseMaskedArray):
1169
1355
  nv.validate_round(args, kwargs)
1170
1356
  values = np.round(self._data, decimals=decimals, **kwargs)
1171
1357
  return type(self)(values, self._mask.copy())
1358
+
1359
+
1360
+ @wraps(libmissing.is_numeric_na)
1361
+ def is_numeric_na(values):
1362
+ allowed_dtypes = ("float32", "int32")
1363
+ if isinstance(values, np.ndarray) and values.dtype in allowed_dtypes:
1364
+ return np.isnan(values)
1365
+ return libmissing.is_numeric_na(values)