reciprocalspaceship 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of reciprocalspaceship might be problematic. Click here for more details.

Files changed (41) hide show
  1. reciprocalspaceship/VERSION +1 -1
  2. reciprocalspaceship/__init__.py +1 -0
  3. reciprocalspaceship/algorithms/scale_merged_intensities.py +8 -7
  4. reciprocalspaceship/commandline/mtzdump.py +0 -1
  5. reciprocalspaceship/dataset.py +7 -1
  6. reciprocalspaceship/decorators.py +2 -2
  7. reciprocalspaceship/dtypes/__init__.py +16 -14
  8. reciprocalspaceship/dtypes/base.py +21 -266
  9. reciprocalspaceship/dtypes/floating.py +691 -0
  10. reciprocalspaceship/dtypes/integer.py +537 -0
  11. reciprocalspaceship/dtypes/internals.py +1365 -0
  12. reciprocalspaceship/io/__init__.py +7 -1
  13. reciprocalspaceship/io/crystfel.py +568 -234
  14. reciprocalspaceship/io/mtz.py +25 -0
  15. reciprocalspaceship/stats/completeness.py +0 -1
  16. reciprocalspaceship/utils/__init__.py +6 -1
  17. reciprocalspaceship/utils/asu.py +6 -0
  18. reciprocalspaceship/utils/cell.py +5 -0
  19. reciprocalspaceship/utils/stats.py +5 -7
  20. reciprocalspaceship/utils/structurefactors.py +5 -0
  21. reciprocalspaceship/utils/units.py +14 -4
  22. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/METADATA +26 -28
  23. reciprocalspaceship-1.0.2.dist-info/RECORD +58 -0
  24. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/WHEEL +1 -1
  25. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/entry_points.txt +0 -1
  26. tests/test_dataseries.py +1 -1
  27. tests/test_dataset_preserve_attributes.py +3 -9
  28. reciprocalspaceship/dtypes/anomalousdifference.py +0 -25
  29. reciprocalspaceship/dtypes/batch.py +0 -25
  30. reciprocalspaceship/dtypes/hklindex.py +0 -23
  31. reciprocalspaceship/dtypes/intensity.py +0 -47
  32. reciprocalspaceship/dtypes/m_isym.py +0 -25
  33. reciprocalspaceship/dtypes/mtzint.py +0 -23
  34. reciprocalspaceship/dtypes/mtzreal.py +0 -25
  35. reciprocalspaceship/dtypes/phase.py +0 -50
  36. reciprocalspaceship/dtypes/stddev.py +0 -69
  37. reciprocalspaceship/dtypes/structurefactor.py +0 -72
  38. reciprocalspaceship/dtypes/weight.py +0 -25
  39. reciprocalspaceship-1.0.0.dist-info/RECORD +0 -66
  40. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/LICENSE +0 -0
  41. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/top_level.txt +0 -0
@@ -1 +1 @@
1
- 1.0.0
1
+ 1.0.2
@@ -37,6 +37,7 @@ from reciprocalspaceship.dtypes import StructureFactorAmplitudeDtype # F
37
37
  from reciprocalspaceship.dtypes import WeightDtype # W
38
38
  from reciprocalspaceship.dtypes import summarize_mtz_dtypes
39
39
  from reciprocalspaceship.io import (
40
+ read_cif,
40
41
  read_crystfel,
41
42
  read_csv,
42
43
  read_mtz,
@@ -185,6 +185,7 @@ def scale_merged_intensities(
185
185
  mean_intensity_method="isotropic",
186
186
  bins=100,
187
187
  bw=2.0,
188
+ minimum_sigma=-np.inf,
188
189
  ):
189
190
  """
190
191
  Scales merged intensities using Bayesian statistics in order to
@@ -240,6 +241,9 @@ def scale_merged_intensities(
240
241
  parameter controls the distance that each reflection impacts in
241
242
  reciprocal space. Only affects output if mean_intensity_method is
242
243
  \"anisotropic\".
244
+ minimum_sigma : float
245
+ Minimum value imposed on Sigma (default: -np.inf, that is: no minimum).
246
+
243
247
 
244
248
  Returns
245
249
  -------
@@ -281,14 +285,11 @@ def scale_merged_intensities(
281
285
  I, Sig = ds[intensity_key].to_numpy(), ds[sigma_key].to_numpy()
282
286
  if mean_intensity_method == "isotropic":
283
287
  dHKL = ds["dHKL"].to_numpy(dtype=np.float64)
284
- Sigma = (
285
- mean_intensity_by_resolution(I / multiplicity, dHKL, bins) * multiplicity
286
- )
288
+ Sigma = mean_intensity_by_resolution(I / multiplicity, dHKL, bins)
287
289
  elif mean_intensity_method == "anisotropic":
288
- Sigma = (
289
- mean_intensity_by_miller_index(I / multiplicity, ds.get_hkls(), bw)
290
- * multiplicity
291
- )
290
+ Sigma = mean_intensity_by_miller_index(I / multiplicity, ds.get_hkls(), bw)
291
+ Sigma = np.clip(Sigma, a_min=minimum_sigma, a_max=np.inf)
292
+ Sigma = Sigma * multiplicity
292
293
 
293
294
  # Initialize outputs
294
295
  ds[outputI] = 0.0
@@ -81,7 +81,6 @@ def summarize(mtz, precision):
81
81
 
82
82
 
83
83
  def main():
84
-
85
84
  # Parse commandline arguments
86
85
  parser = parse_arguments()
87
86
  args = parser.parse_args()
@@ -12,7 +12,8 @@ from reciprocalspaceship.decorators import (
12
12
  range_indexed,
13
13
  spacegroupify,
14
14
  )
15
- from reciprocalspaceship.dtypes.base import MTZDtype, MTZInt32Dtype
15
+ from reciprocalspaceship.dtypes.base import MTZDtype
16
+ from reciprocalspaceship.dtypes.integer import MTZInt32Dtype
16
17
  from reciprocalspaceship.utils import (
17
18
  apply_to_hkl,
18
19
  assign_with_binedges,
@@ -1233,6 +1234,11 @@ class DataSet(pd.DataFrame):
1233
1234
  # Compute new HKLs and phase shifts
1234
1235
  hkls = dataset.get_hkls()
1235
1236
  compressed_hkls, inverse = np.unique(hkls, axis=0, return_inverse=True)
1237
+
1238
+ # The behavior of np.unique changed with v2.0. This block maintains v1 compatibility
1239
+ if inverse.shape[-1] == 1:
1240
+ inverse = inverse.squeeze(-1)
1241
+
1236
1242
  asu_hkls, isym, phi_coeff, phi_shift = hkl_to_asu(
1237
1243
  compressed_hkls, dataset.spacegroup, return_phase_shifts=True
1238
1244
  )
@@ -100,7 +100,7 @@ def spacegroupify(func=None, *sg_args):
100
100
  for arg in sg_args:
101
101
  if arg in bargs.arguments:
102
102
  bargs.arguments[arg] = _convert_spacegroup(bargs.arguments[arg])
103
- return f(**bargs.arguments)
103
+ return f(*bargs.args, **bargs.kwargs)
104
104
 
105
105
  return wrapped
106
106
 
@@ -155,7 +155,7 @@ def cellify(func=None, *cell_args):
155
155
  for arg in cell_args:
156
156
  if arg in bargs.arguments:
157
157
  bargs.arguments[arg] = _convert_unitcell(bargs.arguments[arg])
158
- return f(**bargs.arguments)
158
+ return f(*bargs.args, **bargs.kwargs)
159
159
 
160
160
  return wrapped
161
161
 
@@ -1,22 +1,24 @@
1
- from reciprocalspaceship.dtypes.anomalousdifference import AnomalousDifferenceDtype
2
- from reciprocalspaceship.dtypes.batch import BatchDtype
3
- from reciprocalspaceship.dtypes.hklindex import HKLIndexDtype
4
- from reciprocalspaceship.dtypes.intensity import FriedelIntensityDtype, IntensityDtype
5
- from reciprocalspaceship.dtypes.m_isym import M_IsymDtype
6
- from reciprocalspaceship.dtypes.mtzint import MTZIntDtype
7
- from reciprocalspaceship.dtypes.mtzreal import MTZRealDtype
8
- from reciprocalspaceship.dtypes.phase import HendricksonLattmanDtype, PhaseDtype
9
- from reciprocalspaceship.dtypes.stddev import (
1
+ from reciprocalspaceship.dtypes.floating import (
2
+ AnomalousDifferenceDtype,
3
+ FriedelIntensityDtype,
4
+ FriedelStructureFactorAmplitudeDtype,
5
+ HendricksonLattmanDtype,
6
+ IntensityDtype,
7
+ MTZRealDtype,
8
+ NormalizedStructureFactorAmplitudeDtype,
9
+ PhaseDtype,
10
10
  StandardDeviationDtype,
11
11
  StandardDeviationFriedelIDtype,
12
12
  StandardDeviationFriedelSFDtype,
13
- )
14
- from reciprocalspaceship.dtypes.structurefactor import (
15
- FriedelStructureFactorAmplitudeDtype,
16
- NormalizedStructureFactorAmplitudeDtype,
17
13
  StructureFactorAmplitudeDtype,
14
+ WeightDtype,
15
+ )
16
+ from reciprocalspaceship.dtypes.integer import (
17
+ BatchDtype,
18
+ HKLIndexDtype,
19
+ M_IsymDtype,
20
+ MTZIntDtype,
18
21
  )
19
- from reciprocalspaceship.dtypes.weight import WeightDtype
20
22
 
21
23
  # ExtensionDtypes are appended to the end of the Dtype registry.
22
24
  # Since we want to overwrite a few of the one-letter strings, we need
@@ -1,282 +1,37 @@
1
- import numpy as np
2
- import pandas as pd
3
- from pandas.api.extensions import ExtensionDtype
4
- from pandas.core.arrays.floating import FloatingArray
5
- from pandas.core.arrays.floating import coerce_to_array as coerce_to_float_array
6
- from pandas.core.arrays.integer import IntegerArray
7
- from pandas.core.arrays.integer import coerce_to_array as coerce_to_int_array
8
- from pandas.core.dtypes.common import (
9
- is_float,
10
- is_float_dtype,
11
- is_integer_dtype,
12
- is_numeric_dtype,
13
- )
14
- from pandas.util._decorators import cache_readonly
1
+ from reciprocalspaceship.dtypes.internals import NumericArray, NumericDtype
15
2
 
16
3
 
17
- class MTZDtype(ExtensionDtype):
4
+ class MTZDtype(NumericDtype):
18
5
  """Base ExtensionDtype for implementing persistent MTZ data types"""
19
6
 
20
- def is_friedel_dtype(self):
21
- """Returns whether MTZ dtype represents a Friedel dtype"""
22
- raise NotImplementedError
23
-
24
- @classmethod
25
- def construct_from_string(cls, string):
26
- if not isinstance(string, str):
27
- raise TypeError(
28
- f"'construct_from_string' expects a string, got {type(string)}"
29
- )
30
- elif string != cls.name and string != cls.mtztype:
31
- raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
32
- return cls()
33
-
34
-
35
- class MTZInt32Dtype(MTZDtype, pd.Int32Dtype):
36
- """Base ExtensionDtype class for MTZDtype backed by pd.Int32Dtype"""
37
-
38
- def _get_common_dtype(self, dtypes):
39
- if len(set(dtypes)) == 1:
40
- # only itself
41
- return self
42
- else:
43
- return super(pd.Int32Dtype, self)._get_common_dtype(dtypes)
44
-
45
- def __repr__(self):
46
- return self.name
47
-
48
-
49
- class MTZIntegerArray(IntegerArray):
50
- """Base ExtensionArray class for integer arrays backed by pd.IntegerArray"""
51
-
52
- def _maybe_mask_result(self, result, mask, other, op_name: str):
53
- """
54
- Parameters
55
- ----------
56
- result : array-like
57
- mask : array-like bool
58
- other : scalar or array-like
59
- op_name : str
60
- """
61
- if is_integer_dtype(result):
62
- return type(self)(result, mask, copy=False)
63
- return super()._maybe_mask_result(
64
- result=result, mask=mask, other=other, op_name=op_name
65
- )
7
+ def __repr__(self) -> str:
8
+ return f"{self.name}"
66
9
 
67
- @cache_readonly
68
- def dtype(self):
69
- return self._dtype
10
+ @property
11
+ def _is_numeric(self) -> bool:
12
+ return True
70
13
 
71
14
  @classmethod
72
- def _from_sequence(cls, scalars, dtype=None, copy=False):
73
- values, mask = coerce_to_int_array(scalars, dtype=dtype, copy=copy)
74
- return cls(values, mask)
75
-
76
- @classmethod
77
- def _from_factorized(cls, values, original):
78
- values, mask = coerce_to_int_array(values, dtype=original.dtype)
79
- return cls(values, mask)
80
-
81
- def reshape(self, *args, **kwargs):
82
- return self._data.reshape(*args, **kwargs)
83
-
84
- def to_numpy(self, dtype=None, copy=False, **kwargs):
15
+ def construct_array_type(cls):
85
16
  """
86
- Convert to a NumPy Array.
87
-
88
- If `dtype` is None and array does not contain any NaNs, this method
89
- will return a np.int32 array. Otherwise it will return a ndarray of
90
- object dtype.
91
-
92
- Parameters
93
- ----------
94
- dtype : dtype, default np.int32 or np.float32
95
- The numpy dtype to return
96
- copy : bool, default False
97
- Whether to ensure that the returned value is a not a view on
98
- the array. Note that ``copy=False`` does not *ensure* that
99
- ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
100
- a copy is made, even if not strictly necessary. This is typically
101
- only possible when no missing values are present and `dtype`
102
- is the equivalent numpy dtype.
17
+ Return the array type associated with this dtype.
103
18
 
104
19
  Returns
105
20
  -------
106
- numpy.ndarray
21
+ type
107
22
  """
108
- if dtype is None and not self._hasna:
109
- dtype = np.int32
110
-
111
- # na_value is hard-coded to np.nan -- this prevents other functions
112
- # from resetting it.
113
- return super().to_numpy(dtype=dtype, copy=copy, na_value=np.nan)
114
-
115
- def value_counts(self, dropna=True):
116
- """
117
- Returns a DataSeries containing counts of each category.
118
- Every category will have an entry, even those with a count of 0.
119
-
120
- Parameters
121
- ----------
122
- dropna : bool, default True
123
- Don't include counts of NaN.
124
-
125
- Returns
126
- -------
127
- counts : DataSeries
128
- """
129
- from pandas import Index
130
-
131
- import reciprocalspaceship as rs
132
-
133
- # compute counts on the data with no nans
134
- data = self._data[~self._mask]
135
- value_counts = Index(data).value_counts()
136
- array = value_counts.values
137
-
138
- # TODO(extension)
139
- # if we have allow Index to hold an ExtensionArray
140
- # this is easier
141
- index = value_counts.index.astype(object)
142
-
143
- # if we want nans, count the mask
144
- if not dropna:
145
-
146
- # TODO(extension)
147
- # appending to an Index *always* infers
148
- # w/o passing the dtype
149
- array = np.append(array, [self._mask.sum()])
150
- index = Index(
151
- np.concatenate(
152
- [index.values, np.array([self.dtype.na_value], dtype=object)]
153
- ),
154
- dtype=object,
155
- )
156
-
157
- return rs.DataSeries(array, index=index)
158
-
159
-
160
- class MTZFloat32Dtype(MTZDtype, pd.Float32Dtype):
161
- """Base ExtensionDtype class for MTZDtype backed by pd.Float32Dtype"""
162
-
163
- def _get_common_dtype(self, dtypes):
164
- if len(set(dtypes)) == 1:
165
- # only itself
166
- return self
167
- else:
168
- return super(pd.Float32Dtype, self)._get_common_dtype(dtypes)
169
-
170
- def __repr__(self):
171
- return self.name
172
-
173
-
174
- class MTZFloatArray(FloatingArray):
175
- """Base ExtensionArray class for floating point arrays backed by pd.FloatingArray"""
176
-
177
- def _maybe_mask_result(self, result, mask, other, op_name: str):
178
- """
179
- Parameters
180
- ----------
181
- result : array-like
182
- mask : array-like bool
183
- other : scalar or array-like
184
- op_name : str
185
- """
186
- # if we have a float operand we are by-definition
187
- # a float result
188
- # or our op is a divide
189
- if (
190
- (is_float_dtype(other) or is_float(other))
191
- or (op_name in ["rtruediv", "truediv"])
192
- or (is_float_dtype(self.dtype) and is_numeric_dtype(result.dtype))
193
- ):
194
- return type(self)(result, mask, copy=False)
195
- return super()._maybe_mask_result(
196
- result=result, mask=mask, other=other, op_name=op_name
197
- )
198
-
199
- @cache_readonly
200
- def dtype(self):
201
- return self._dtype
23
+ return NotImplementedError
202
24
 
203
25
  @classmethod
204
- def _from_sequence(cls, scalars, dtype=None, copy=False):
205
- values, mask = coerce_to_float_array(scalars, dtype=dtype, copy=copy)
206
- return cls(values, mask)
207
-
208
- def _coerce_to_array(self, value):
209
- return coerce_to_float_array(value, dtype=self.dtype)
210
-
211
- def to_numpy(self, dtype=None, copy=False, **kwargs):
212
- """
213
- Convert to a NumPy Array.
214
-
215
- If `dtype` is None it will default to a float32 ndarray.
216
-
217
- Parameters
218
- ----------
219
- dtype : dtype, default np.float32
220
- The numpy dtype to return
221
- copy : bool, default False
222
- Whether to ensure that the returned value is a not a view on
223
- the array. Note that ``copy=False`` does not *ensure* that
224
- ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
225
- a copy is made, even if not strictly necessary. This is typically
226
- only possible when no missing values are present and `dtype`
227
- is the equivalent numpy dtype.
228
-
229
- Returns
230
- -------
231
- numpy.ndarray
232
- """
233
- if dtype is None:
234
- dtype = np.float32
235
-
236
- # na_value is hard-coded to np.nan -- this prevents other functions
237
- # from resetting it.
238
- return super().to_numpy(dtype=dtype, copy=copy, na_value=np.nan)
239
-
240
- def value_counts(self, dropna=True):
241
- """
242
- Returns a DataSeries containing counts of each category.
243
- Every category will have an entry, even those with a count of 0.
244
-
245
- Parameters
246
- ----------
247
- dropna : bool, default True
248
- Don't include counts of NaN.
249
-
250
- Returns
251
- -------
252
- counts : DataSeries
253
- """
254
- from pandas import Index
255
-
256
- import reciprocalspaceship as rs
257
-
258
- # compute counts on the data with no nans
259
- data = self._data[~self._mask]
260
- value_counts = Index(data).value_counts()
261
- array = value_counts.values
262
-
263
- # TODO(extension)
264
- # if we have allow Index to hold an ExtensionArray
265
- # this is easier
266
- index = value_counts.index.astype(object)
267
-
268
- # if we want nans, count the mask
269
- if not dropna:
270
-
271
- # TODO(extension)
272
- # appending to an Index *always* infers
273
- # w/o passing the dtype
274
- array = np.append(array, [self._mask.sum()])
275
- index = Index(
276
- np.concatenate(
277
- [index.values, np.array([self.dtype.na_value], dtype=object)]
278
- ),
279
- dtype=object,
26
+ def construct_from_string(cls, string):
27
+ if not isinstance(string, str):
28
+ raise TypeError(
29
+ f"'construct_from_string' expects a string, got {type(string)}"
280
30
  )
31
+ elif string != cls.name and string != cls.mtztype:
32
+ raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
33
+ return cls()
281
34
 
282
- return rs.DataSeries(array, index=index)
35
+ def is_friedel_dtype(self):
36
+ """Returns whether MTZ dtype represents a Friedel dtype"""
37
+ raise NotImplementedError