reciprocalspaceship 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of reciprocalspaceship might be problematic. Click here for more details.

Files changed (41) hide show
  1. reciprocalspaceship/VERSION +1 -1
  2. reciprocalspaceship/__init__.py +1 -0
  3. reciprocalspaceship/algorithms/scale_merged_intensities.py +8 -7
  4. reciprocalspaceship/commandline/mtzdump.py +0 -1
  5. reciprocalspaceship/dataset.py +7 -1
  6. reciprocalspaceship/decorators.py +2 -2
  7. reciprocalspaceship/dtypes/__init__.py +16 -14
  8. reciprocalspaceship/dtypes/base.py +21 -266
  9. reciprocalspaceship/dtypes/floating.py +691 -0
  10. reciprocalspaceship/dtypes/integer.py +537 -0
  11. reciprocalspaceship/dtypes/internals.py +1365 -0
  12. reciprocalspaceship/io/__init__.py +7 -1
  13. reciprocalspaceship/io/crystfel.py +568 -234
  14. reciprocalspaceship/io/mtz.py +25 -0
  15. reciprocalspaceship/stats/completeness.py +0 -1
  16. reciprocalspaceship/utils/__init__.py +6 -1
  17. reciprocalspaceship/utils/asu.py +6 -0
  18. reciprocalspaceship/utils/cell.py +5 -0
  19. reciprocalspaceship/utils/stats.py +5 -7
  20. reciprocalspaceship/utils/structurefactors.py +5 -0
  21. reciprocalspaceship/utils/units.py +14 -4
  22. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/METADATA +26 -28
  23. reciprocalspaceship-1.0.2.dist-info/RECORD +58 -0
  24. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/WHEEL +1 -1
  25. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/entry_points.txt +0 -1
  26. tests/test_dataseries.py +1 -1
  27. tests/test_dataset_preserve_attributes.py +3 -9
  28. reciprocalspaceship/dtypes/anomalousdifference.py +0 -25
  29. reciprocalspaceship/dtypes/batch.py +0 -25
  30. reciprocalspaceship/dtypes/hklindex.py +0 -23
  31. reciprocalspaceship/dtypes/intensity.py +0 -47
  32. reciprocalspaceship/dtypes/m_isym.py +0 -25
  33. reciprocalspaceship/dtypes/mtzint.py +0 -23
  34. reciprocalspaceship/dtypes/mtzreal.py +0 -25
  35. reciprocalspaceship/dtypes/phase.py +0 -50
  36. reciprocalspaceship/dtypes/stddev.py +0 -69
  37. reciprocalspaceship/dtypes/structurefactor.py +0 -72
  38. reciprocalspaceship/dtypes/weight.py +0 -25
  39. reciprocalspaceship-1.0.0.dist-info/RECORD +0 -66
  40. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/LICENSE +0 -0
  41. {reciprocalspaceship-1.0.0.dist-info → reciprocalspaceship-1.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,537 @@
1
+ # BSD 3-Clause License
2
+ #
3
+ # Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
4
+ # All rights reserved.
5
+ #
6
+ # Copyright (c) 2011-2023, Open source contributors.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions are met:
10
+ #
11
+ # * Redistributions of source code must retain the above copyright notice, this
12
+ # list of conditions and the following disclaimer.
13
+ #
14
+ # * Redistributions in binary form must reproduce the above copyright notice,
15
+ # this list of conditions and the following disclaimer in the documentation
16
+ # and/or other materials provided with the distribution.
17
+ #
18
+ # * Neither the name of the copyright holder nor the names of its
19
+ # contributors may be used to endorse or promote products derived from
20
+ # this software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
26
+ # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29
+ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30
+ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+
33
+ from __future__ import annotations
34
+
35
+ import numpy as np
36
+ from pandas import Float32Dtype, Int32Dtype, Int64Dtype
37
+ from pandas._libs import lib
38
+ from pandas._typing import ArrayLike, Dtype, DtypeObj
39
+ from pandas.core.arrays import ExtensionArray
40
+ from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype
41
+ from pandas.core.dtypes.cast import np_find_common_type
42
+ from pandas.core.dtypes.common import (
43
+ is_bool_dtype,
44
+ is_float_dtype,
45
+ is_integer_dtype,
46
+ is_object_dtype,
47
+ is_string_dtype,
48
+ pandas_dtype,
49
+ )
50
+ from pandas.core.tools.numeric import to_numeric
51
+ from pandas.util._decorators import cache_readonly
52
+
53
+ from reciprocalspaceship.dtypes.base import MTZDtype
54
+ from reciprocalspaceship.dtypes.internals import (
55
+ BaseMaskedDtype,
56
+ NumericArray,
57
+ is_numeric_na,
58
+ )
59
+
60
+
61
+ class MTZInt32Dtype(MTZDtype):
62
+ """
63
+ An ExtensionDtype to hold a single size & kind of integer dtype.
64
+
65
+ These specific implementations are subclasses of the non-public
66
+ MTZInt32Dtype. For example we have Int8Dtype to represent signed int 8s.
67
+
68
+ The attributes name & type are set when these subclasses are created.
69
+ """
70
+
71
+ type = np.int32
72
+
73
+ @cache_readonly
74
+ def is_signed_integer(self) -> bool:
75
+ return self.kind == "i"
76
+
77
+ @cache_readonly
78
+ def is_unsigned_integer(self) -> bool:
79
+ return self.kind == "u"
80
+
81
+ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
82
+ if len(set(dtypes)) == 1:
83
+ # only itself
84
+ return self
85
+ # we only handle nullable EA dtypes and numeric numpy dtypes
86
+ if not all(
87
+ isinstance(t, BaseMaskedDtype)
88
+ or (
89
+ isinstance(t, np.dtype)
90
+ and (np.issubdtype(t, np.number) or np.issubdtype(t, np.bool_))
91
+ )
92
+ for t in dtypes
93
+ ):
94
+ return None
95
+ np_dtype = np_find_common_type(
96
+ # error: List comprehension has incompatible type List[Union[Any,
97
+ # dtype, ExtensionDtype]]; expected List[Union[dtype, None, type,
98
+ # _SupportsDtype, str, Tuple[Any, Union[int, Sequence[int]]],
99
+ # List[Any], _DtypeDict, Tuple[Any, Any]]]
100
+ [
101
+ (
102
+ t.numpy_dtype # type: ignore[misc]
103
+ if isinstance(t, BaseMaskedDtype)
104
+ else t
105
+ )
106
+ for t in dtypes
107
+ ],
108
+ [],
109
+ )
110
+ if np.issubdtype(np_dtype, np.integer):
111
+ return Int32Dtype()
112
+ elif np.issubdtype(np_dtype, np.floating):
113
+ return Float32Dtype()
114
+ return None
115
+
116
+
117
+ def safe_cast(values, dtype, copy: bool):
118
+ """
119
+ Safely cast the values to the dtype if they
120
+ are equivalent, meaning floats must be equivalent to the
121
+ ints.
122
+ """
123
+ try:
124
+ return values.astype(dtype, casting="safe", copy=copy)
125
+ except TypeError as err:
126
+ casted = values.astype(dtype, copy=copy)
127
+ if (casted == values).all():
128
+ return casted
129
+
130
+ raise TypeError(
131
+ f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
132
+ ) from err
133
+
134
+
135
+ def coerce_to_array(
136
+ values, dtype, mask=None, copy: bool = False
137
+ ) -> tuple[np.ndarray, np.ndarray]:
138
+ """
139
+ Coerce the input values array to numpy arrays with a mask.
140
+
141
+ Parameters
142
+ ----------
143
+ values : 1D list-like
144
+ dtype : integer dtype
145
+ mask : bool 1D array, optional
146
+ copy : bool, default False
147
+ if True, copy the input
148
+
149
+ Returns
150
+ -------
151
+ tuple of (values, mask)
152
+ """
153
+ # if values is integer numpy array, preserve its dtype
154
+ if dtype is None and hasattr(values, "dtype"):
155
+ if is_integer_dtype(values.dtype):
156
+ dtype = values.dtype
157
+
158
+ if dtype is not None:
159
+ if isinstance(dtype, str) and (
160
+ dtype.startswith("Int") or dtype.startswith("UInt")
161
+ ):
162
+ # Avoid DeprecationWarning from NumPy about np.dtype("Int64")
163
+ # https://github.com/numpy/numpy/pull/7476
164
+ dtype = dtype.lower()
165
+
166
+ if not issubclass(type(dtype), MTZInt32Dtype):
167
+ try:
168
+ dtype = INT_STR_TO_DTYPE[str(np.dtype(dtype))]
169
+ except KeyError as err:
170
+ raise ValueError(f"invalid dtype specified {dtype}") from err
171
+
172
+ if isinstance(values, MTZIntegerArray):
173
+ values, mask = values._data, values._mask
174
+ if dtype is not None:
175
+ values = values.astype(dtype.numpy_dtype, copy=False)
176
+
177
+ if copy:
178
+ values = values.copy()
179
+ mask = mask.copy()
180
+ return values, mask
181
+
182
+ if copy:
183
+ values = np.array(values, copy=copy)
184
+ else:
185
+ values = np.asarray(values)
186
+
187
+ inferred_type = None
188
+ if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
189
+ inferred_type = lib.infer_dtype(values, skipna=True)
190
+ if inferred_type == "empty":
191
+ pass
192
+ elif inferred_type not in [
193
+ "floating",
194
+ "integer",
195
+ "mixed-integer",
196
+ "integer-na",
197
+ "mixed-integer-float",
198
+ "string",
199
+ "unicode",
200
+ ]:
201
+ raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
202
+
203
+ elif is_bool_dtype(values) and is_integer_dtype(dtype):
204
+ values = np.array(values, dtype=int, copy=copy)
205
+
206
+ elif not (is_integer_dtype(values) or is_float_dtype(values)):
207
+ raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
208
+
209
+ if values.ndim != 1:
210
+ raise TypeError("values must be a 1D list-like")
211
+
212
+ if mask is None:
213
+ mask = is_numeric_na(values)
214
+ else:
215
+ assert len(mask) == len(values)
216
+
217
+ if mask.ndim != 1:
218
+ raise TypeError("mask must be a 1D list-like")
219
+
220
+ # infer dtype if needed
221
+ if dtype is None:
222
+ dtype = np.dtype("int64")
223
+ else:
224
+ dtype = dtype.type
225
+
226
+ # if we are float, let's make sure that we can
227
+ # safely cast
228
+
229
+ # we copy as need to coerce here
230
+ if mask.any():
231
+ values = values.copy()
232
+ values[mask] = 1
233
+ if inferred_type in ("string", "unicode"):
234
+ # casts from str are always safe since they raise
235
+ # a ValueError if the str cannot be parsed into an int
236
+ values = values.astype(dtype, copy=copy)
237
+ else:
238
+ try:
239
+ values = safe_cast(values, dtype, copy=False)
240
+ except:
241
+ # certain outputs cannot be coerced to int32
242
+ dtype = np.dtype("float64")
243
+ values = safe_cast(values, dtype, copy=False)
244
+
245
+ return values, mask
246
+
247
+
248
+ class MTZIntegerArray(NumericArray):
249
+ """
250
+ Array of integer (optional missing) values.
251
+
252
+ .. versionchanged:: 1.0.0
253
+
254
+ Now uses :attr:`pandas.NA` as the missing value rather
255
+ than :attr:`numpy.nan`.
256
+
257
+ .. warning::
258
+
259
+ MTZIntegerArray is currently experimental, and its API or internal
260
+ implementation may change without warning.
261
+
262
+ We represent an MTZIntegerArray with 2 numpy arrays:
263
+
264
+ - data: contains a numpy integer array of the appropriate dtype
265
+ - mask: a boolean array holding a mask on the data, True is missing
266
+
267
+ To construct an MTZIntegerArray from generic array-like input, use
268
+ :func:`pandas.array` with one of the integer dtypes (see examples).
269
+
270
+ See :ref:`integer_na` for more.
271
+
272
+ Parameters
273
+ ----------
274
+ values : numpy.ndarray
275
+ A 1-d integer-dtype array.
276
+ mask : numpy.ndarray
277
+ A 1-d boolean-dtype array indicating missing values.
278
+ copy : bool, default False
279
+ Whether to copy the `values` and `mask`.
280
+
281
+ Attributes
282
+ ----------
283
+ None
284
+
285
+ Methods
286
+ -------
287
+ None
288
+
289
+ Returns
290
+ -------
291
+ MTZIntegerArray
292
+
293
+ Examples
294
+ --------
295
+ Create an MTZIntegerArray with :func:`pandas.array`.
296
+
297
+ >>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype())
298
+ >>> int_array
299
+ <MTZIntegerArray>
300
+ [1, <NA>, 3]
301
+ Length: 3, dtype: Int32
302
+
303
+ String aliases for the dtypes are also available. They are capitalized.
304
+
305
+ >>> pd.array([1, None, 3], dtype='Int32')
306
+ <MTZIntegerArray>
307
+ [1, <NA>, 3]
308
+ Length: 3, dtype: Int32
309
+
310
+ >>> pd.array([1, None, 3], dtype='UInt16')
311
+ <MTZIntegerArray>
312
+ [1, <NA>, 3]
313
+ Length: 3, dtype: UInt16
314
+ """
315
+
316
+ # The value used to fill '_data' to avoid upcasting
317
+ _internal_fill_value = 1
318
+ # Fill values used for any/all
319
+ _truthy_value = 1
320
+ _falsey_value = 0
321
+
322
+ @cache_readonly
323
+ def dtype(self) -> MTZInt32Dtype:
324
+ return self._dtype
325
+
326
+ @classmethod
327
+ def _from_sequence(
328
+ cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
329
+ ) -> MTZIntegerArray:
330
+ values, mask = coerce_to_array(scalars, dtype=dtype, copy=copy)
331
+ return cls(values, mask)
332
+
333
+ @classmethod
334
+ def _from_sequence_of_strings(
335
+ cls, strings, *, dtype: Dtype | None = None, copy: bool = False
336
+ ) -> MTZIntegerArray:
337
+ scalars = to_numeric(strings, errors="raise")
338
+ return cls._from_sequence(scalars, dtype=dtype, copy=copy)
339
+
340
+ def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]:
341
+ return coerce_to_array(value, dtype=self.dtype)
342
+
343
+ def _maybe_mask_result(self, result, mask):
344
+ """
345
+ Parameters
346
+ ----------
347
+ result : array-like
348
+ mask : array-like bool
349
+ """
350
+ if result.dtype.kind in "iu":
351
+ return type(self)(result, mask, copy=False)
352
+ return super()._maybe_mask_result(result=result, mask=mask)
353
+
354
+ def astype(self, dtype, copy: bool = True) -> ArrayLike:
355
+ """
356
+ Cast to a NumPy array or ExtensionArray with 'dtype'.
357
+
358
+ Parameters
359
+ ----------
360
+ dtype : str or dtype
361
+ Typecode or data-type to which the array is cast.
362
+ copy : bool, default True
363
+ Whether to copy the data, even if not necessary. If False,
364
+ a copy is made only if the old dtype does not match the
365
+ new dtype.
366
+
367
+ Returns
368
+ -------
369
+ ndarray or ExtensionArray
370
+ NumPy ndarray, BooleanArray or MTZIntegerArray with 'dtype' for its dtype.
371
+
372
+ Raises
373
+ ------
374
+ TypeError
375
+ if incompatible type with an IntegerDtype, equivalent of same_kind
376
+ casting
377
+ """
378
+ dtype = pandas_dtype(dtype)
379
+
380
+ if isinstance(dtype, ExtensionDtype):
381
+ return super().astype(dtype, copy=copy)
382
+
383
+ na_value: float | lib.NoDefault
384
+
385
+ # coerce
386
+ if is_float_dtype(dtype):
387
+ # In astype, we consider dtype=float to also mean na_value=np.nan
388
+ na_value = np.nan
389
+ else:
390
+ na_value = lib.no_default
391
+
392
+ return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
393
+
394
+ def _values_for_argsort(self) -> np.ndarray:
395
+ """
396
+ Return values for sorting.
397
+
398
+ Returns
399
+ -------
400
+ ndarray
401
+ The transformed values should maintain the ordering between values
402
+ within the array.
403
+
404
+ See Also
405
+ --------
406
+ ExtensionArray.argsort : Return the indices that would sort this array.
407
+ """
408
+ data = self._data.copy()
409
+ if self._mask.any():
410
+ data[self._mask] = data.min() - 1
411
+ return data
412
+
413
+ def to_numpy(self, dtype=None, copy=False, **kwargs):
414
+ """
415
+ Convert to a NumPy Array.
416
+
417
+ If `dtype` is None and array does not contain any NaNs, this method
418
+ will return a np.int32 array. Otherwise it will return a ndarray of
419
+ object dtype.
420
+
421
+ Parameters
422
+ ----------
423
+ dtype : dtype, default np.int32 or np.float32
424
+ The numpy dtype to return
425
+ copy : bool, default False
426
+ Whether to ensure that the returned value is a not a view on
427
+ the array. Note that ``copy=False`` does not *ensure* that
428
+ ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
429
+ a copy is made, even if not strictly necessary. This is typically
430
+ only possible when no missing values are present and `dtype`
431
+ is the equivalent numpy dtype.
432
+
433
+ Returns
434
+ -------
435
+ numpy.ndarray
436
+ """
437
+ if dtype is None and not self._hasna:
438
+ dtype = np.int32
439
+
440
+ # na_value is hard-coded to np.nan -- this prevents other functions
441
+ # from resetting it.
442
+ return super().to_numpy(dtype=dtype, copy=copy, na_value=np.nan)
443
+
444
+
445
+ # create the Dtypes
446
+
447
+
448
+ @register_extension_dtype
449
+ class BatchDtype(MTZInt32Dtype):
450
+ name = "Batch"
451
+ mtztype = "B"
452
+
453
+ def is_friedel_dtype(self):
454
+ return False
455
+
456
+ @classmethod
457
+ def construct_array_type(cls):
458
+ return BatchArray
459
+
460
+
461
+ class BatchArray(MTZIntegerArray):
462
+ """ExtensionArray for supporting BatchDtype"""
463
+
464
+ _dtype = BatchDtype()
465
+ pass
466
+
467
+
468
+ @register_extension_dtype
469
+ class HKLIndexDtype(MTZInt32Dtype):
470
+ """Custom MTZ Dtype for Miller indices"""
471
+
472
+ name = "HKL"
473
+ mtztype = "H"
474
+
475
+ def is_friedel_dtype(self):
476
+ return False
477
+
478
+ @classmethod
479
+ def construct_array_type(cls):
480
+ return HKLIndexArray
481
+
482
+
483
+ class HKLIndexArray(MTZIntegerArray):
484
+ _dtype = HKLIndexDtype()
485
+ pass
486
+
487
+
488
+ @register_extension_dtype
489
+ class M_IsymDtype(MTZInt32Dtype):
490
+ """Dtype for representing M/ISYM values"""
491
+
492
+ name = "M/ISYM"
493
+ mtztype = "Y"
494
+
495
+ def is_friedel_dtype(self):
496
+ return False
497
+
498
+ @classmethod
499
+ def construct_array_type(cls):
500
+ return M_IsymArray
501
+
502
+
503
+ class M_IsymArray(MTZIntegerArray):
504
+ """ExtensionArray for supporting M_IsymDtype"""
505
+
506
+ _dtype = M_IsymDtype()
507
+ pass
508
+
509
+
510
+ @register_extension_dtype
511
+ class MTZIntDtype(MTZInt32Dtype):
512
+ """Dtype for generic integer data"""
513
+
514
+ name = "MTZInt"
515
+ mtztype = "I"
516
+
517
+ def is_friedel_dtype(self):
518
+ return False
519
+
520
+ @classmethod
521
+ def construct_array_type(cls):
522
+ return MTZIntArray
523
+
524
+
525
+ class MTZIntArray(MTZIntegerArray):
526
+ _dtype = MTZIntDtype()
527
+ pass
528
+
529
+
530
+ INT_STR_TO_DTYPE: dict[str, MTZInt32Dtype] = {
531
+ "Batch": BatchDtype(),
532
+ "HKL": HKLIndexDtype(),
533
+ "M/ISYM": M_IsymDtype(),
534
+ "MTZInt": MTZIntDtype(),
535
+ "int32": Int32Dtype(),
536
+ "int64": Int64Dtype(),
537
+ }