reciprocalspaceship 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of reciprocalspaceship might be problematic. Click here for more details.

@@ -1 +1 @@
1
- 1.0.2
1
+ 1.0.3
@@ -1,8 +1,15 @@
1
1
  # Version number for reciprocalspaceship
2
2
  def getVersionNumber():
3
- import pkg_resources
3
+ version = None
4
+ try:
5
+ from setuptools.version import metadata
6
+
7
+ version = metadata.version("reciprocalspaceship")
8
+ except ImportError:
9
+ from setuptools.version import pkg_resources
10
+
11
+ version = pkg_resources.require("reciprocalspaceship")[0].version
4
12
 
5
- version = pkg_resources.require("reciprocalspaceship")[0].version
6
13
  return version
7
14
 
8
15
 
@@ -258,7 +258,14 @@ class DataSet(pd.DataFrame):
258
258
  )
259
259
 
260
260
  def reset_index(
261
- self, level=None, drop=False, inplace=False, col_level=0, col_fill=""
261
+ self,
262
+ level=None,
263
+ drop=False,
264
+ inplace=False,
265
+ col_level=0,
266
+ col_fill="",
267
+ allow_duplicates=lib.no_default,
268
+ names=None,
262
269
  ):
263
270
  """
264
271
  Reset the index or a specific level of a MultiIndex.
@@ -281,6 +288,12 @@ class DataSet(pd.DataFrame):
281
288
  col_fill : object
282
289
  If the columns have multiple levels, determines how the other
283
290
  levels are named. If None then the index name is repeated.
291
+ allow_duplicates : bool
292
+ Allow duplicate column labels to be created.
293
+ names : int, str, tuple, list
294
+ Using the given string, rename the DataSet column which contains the
295
+ index data. If the DataSet has a MultiIndex, this has to be a list or
296
+ tuple with length equal to the number of levels.
284
297
 
285
298
  Returns
286
299
  -------
@@ -317,6 +330,8 @@ class DataSet(pd.DataFrame):
317
330
  inplace=inplace,
318
331
  col_level=col_level,
319
332
  col_fill=col_fill,
333
+ allow_duplicates=allow_duplicates,
334
+ names=names,
320
335
  )
321
336
  _handle_cached_dtypes(self, columns, drop)
322
337
  return
@@ -327,6 +342,8 @@ class DataSet(pd.DataFrame):
327
342
  inplace=inplace,
328
343
  col_level=col_level,
329
344
  col_fill=col_fill,
345
+ allow_duplicates=allow_duplicates,
346
+ names=names,
330
347
  )
331
348
  dataset._index_dtypes = dataset._index_dtypes.copy()
332
349
  dataset = _handle_cached_dtypes(dataset, columns, drop)
@@ -406,6 +423,7 @@ class DataSet(pd.DataFrame):
406
423
  """
407
424
  return cls(gemmiMtz)
408
425
 
426
+ @range_indexed
409
427
  def to_gemmi(
410
428
  self,
411
429
  skip_problem_mtztypes=False,
@@ -575,6 +593,7 @@ class DataSet(pd.DataFrame):
575
593
  result = super().join(*args, **kwargs)
576
594
  return result.__finalize__(self)
577
595
 
596
+ @range_indexed
578
597
  def write_mtz(
579
598
  self,
580
599
  mtzfile,
@@ -1158,7 +1177,7 @@ class DataSet(pd.DataFrame):
1158
1177
 
1159
1178
  return result
1160
1179
 
1161
- def is_isomorphous(self, other, cell_threshold=0.05):
1180
+ def is_isomorphous(self, other, cell_threshold=0.5):
1162
1181
  """
1163
1182
  Determine whether DataSet is isomorphous to another DataSet. This
1164
1183
  method confirms isomorphism by ensuring the spacegroups are equivalent,
@@ -1195,7 +1214,8 @@ class DataSet(pd.DataFrame):
1195
1214
  for param in params:
1196
1215
  param1 = self.cell.__getattribute__(param)
1197
1216
  param2 = other.cell.__getattribute__(param)
1198
- if (np.abs((param1 - param2)) / 100.0) > cell_threshold:
1217
+ diff = 200.0 * np.abs(param1 - param2) / (param1 + param2)
1218
+ if diff > cell_threshold:
1199
1219
  return False
1200
1220
 
1201
1221
  return True
@@ -4,6 +4,8 @@ from inspect import signature
4
4
  import gemmi
5
5
  import numpy as np
6
6
 
7
+ import reciprocalspaceship as rs
8
+
7
9
 
8
10
  def inplace(f):
9
11
  """
@@ -46,9 +48,11 @@ def range_indexed(f):
46
48
  names = ds.index.names
47
49
  ds = ds._index_from_names([None], inplace=True)
48
50
  result = f(ds, *args, **kwargs)
49
- result = result._index_from_names(names, inplace=True)
50
51
  ds = ds._index_from_names(names, inplace=True)
51
- return result.__finalize__(ds)
52
+ if isinstance(result, rs.DataSet):
53
+ result = result._index_from_names(names, inplace=True)
54
+ result = result.__finalize__(ds)
55
+ return result
52
56
 
53
57
  return wrapped
54
58
 
@@ -1359,7 +1359,7 @@ class NumericArray(BaseMaskedArray):
1359
1359
 
1360
1360
  @wraps(libmissing.is_numeric_na)
1361
1361
  def is_numeric_na(values):
1362
- allowed_dtypes = ("float32", "int32")
1362
+ allowed_dtypes = ("float64", "float32", "int32")
1363
1363
  if isinstance(values, np.ndarray) and values.dtype in allowed_dtypes:
1364
1364
  return np.isnan(values)
1365
1365
  return libmissing.is_numeric_na(values)
@@ -1,6 +1,7 @@
1
1
  from reciprocalspaceship.io.ccp4map import write_ccp4_map
2
2
  from reciprocalspaceship.io.crystfel import read_crystfel
3
3
  from reciprocalspaceship.io.csv import read_csv
4
+ from reciprocalspaceship.io.dials import print_refl_info, read_dials_stills
4
5
  from reciprocalspaceship.io.mtz import (
5
6
  from_gemmi,
6
7
  read_cif,
@@ -0,0 +1,48 @@
1
+ import logging
2
+ import warnings
3
+ from contextlib import contextmanager
4
+ from importlib.util import find_spec
5
+
6
+
7
+ def set_ray_loglevel(level):
8
+ logger = logging.getLogger("ray")
9
+ logger.setLevel(level)
10
+ for handler in logger.handlers:
11
+ handler.setLevel(level)
12
+
13
+
14
+ def check_for_ray():
15
+ has_ray = True
16
+ if find_spec("ray") is None:
17
+ has_ray = False
18
+
19
+ message = (
20
+ "ray (https://www.ray.io/) is not available..." "Falling back to serial."
21
+ )
22
+ warnings.warn(message, ImportWarning)
23
+ return has_ray
24
+
25
+
26
+ def check_for_mpi():
27
+ try:
28
+ from mpi4py import MPI
29
+
30
+ return True
31
+ except Exception as err:
32
+ message = (
33
+ f"Failed `from mpi4py import MPI` with {err}. Falling back to serial mode."
34
+ )
35
+ warnings.warn(message, ImportWarning)
36
+ return False
37
+
38
+
39
+ @contextmanager
40
+ def ray_context(log_level="DEBUG", **ray_kwargs):
41
+ import ray
42
+
43
+ set_ray_loglevel(log_level)
44
+ ray.init(**ray_kwargs)
45
+ try:
46
+ yield ray
47
+ finally:
48
+ ray.shutdown()
@@ -1,13 +1,12 @@
1
1
  import mmap
2
2
  import re
3
- from contextlib import contextmanager
4
- from importlib.util import find_spec
5
3
  from typing import Union
6
4
 
7
5
  import gemmi
8
6
  import numpy as np
9
7
 
10
8
  from reciprocalspaceship import DataSet, concat
9
+ from reciprocalspaceship.io.common import check_for_ray, ray_context
11
10
  from reciprocalspaceship.utils import angle_between, eV2Angstroms
12
11
 
13
12
  # See Rupp Table 5-2
@@ -60,17 +59,6 @@ _block_markers = {
60
59
  }
61
60
 
62
61
 
63
- @contextmanager
64
- def ray_context(**ray_kwargs):
65
- import ray
66
-
67
- ray.init(**ray_kwargs)
68
- try:
69
- yield ray
70
- finally:
71
- ray.shutdown()
72
-
73
-
74
62
  class StreamLoader(object):
75
63
  """
76
64
  An object that loads stream files into rs.DataSet objects in parallel.
@@ -304,15 +292,7 @@ class StreamLoader(object):
304
292
 
305
293
  # Check whether ray is available
306
294
  if use_ray:
307
- if find_spec("ray") is None:
308
- use_ray = False
309
- import warnings
310
-
311
- message = (
312
- "ray (https://www.ray.io/) is not available..."
313
- "Falling back to serial stream file parser."
314
- )
315
- warnings.warn(message, ImportWarning)
295
+ use_ray = check_for_ray()
316
296
 
317
297
  with open(self.filename, "r") as f:
318
298
  memfile = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
@@ -507,10 +487,9 @@ def read_crystfel(
507
487
  The type of byte-encoding (optional, 'utf-8').
508
488
  columns : list (optional)
509
489
  Optionally specify the columns of the output by a list of strings.
510
- The default list is:
511
- [ "H", "K", "L", "I", "SigI", "BATCH", "s1x", "s1y", "s1z", "ewald_offset",
512
- "angular_ewald_offset", "XDET", "YDET" ]
513
- See `rs.io.crystfel.StreamLoader().available_column_names` for a list of available column names.
490
+ The default list is: [ "H", "K", "L", "I", "SigI", "BATCH", "s1x", "s1y", "s1z", "ewald_offset", "angular_ewald_offset", "XDET", "YDET" ]
491
+ See `rs.io.crystfel.StreamLoader().available_column_names` for a list of available
492
+ column names and *Notes* for a description of the returned columns
514
493
  parallel : bool (optional)
515
494
  Read the stream file in parallel using [ray.io](https://docs.ray.io) if it is available.
516
495
  num_cpus : int (optional)
@@ -524,6 +503,18 @@ def read_crystfel(
524
503
  Returns
525
504
  --------
526
505
  rs.DataSet
506
+
507
+ Notes
508
+ -----
509
+ The following columns are included in the returned DataSet object:
510
+
511
+ - H, K, L: Miller indices of each reflection
512
+ - I, SigI: Intensity and associated uncertainty
513
+ - BATCH: Image number
514
+ - s1x, s1y, s1z: scattered beam wavevector which points from the sample to the bragg peak
515
+ - ewald_offset: the distance in cartesian space (1/angstroms) between the observed reflection and the ewald sphere
516
+ - angular_ewald_offset: the distance in polar coordinates (degrees) between the observed reflection and the ewald sphere
517
+ - XDET, YDET: Internal detector panel coordinates
527
518
  """
528
519
  if not streamfile.endswith(".stream"):
529
520
  raise ValueError("Stream file should end with .stream")
@@ -0,0 +1,330 @@
1
+ import logging
2
+ import sys
3
+
4
+ import msgpack
5
+ import numpy as np
6
+ import pandas
7
+
8
+ LOGGER = logging.getLogger("rs.io.dials")
9
+ if not LOGGER.handlers:
10
+ LOGGER.setLevel(logging.DEBUG)
11
+ console = logging.StreamHandler(stream=sys.stdout)
12
+ console.setLevel(logging.DEBUG)
13
+ LOGGER.addHandler(console)
14
+
15
+ import reciprocalspaceship as rs
16
+ from reciprocalspaceship.decorators import cellify, spacegroupify
17
+ from reciprocalspaceship.io.common import check_for_ray, set_ray_loglevel
18
+
19
+ MSGPACK_DTYPES = {
20
+ "double": np.float64,
21
+ "float": np.float32,
22
+ "int": np.int32,
23
+ "cctbx::miller::index<>": np.int32,
24
+ "vec3<double>": np.float64,
25
+ "std::size_t": np.intp,
26
+ }
27
+
28
+ DEFAULT_COLS = [
29
+ "miller_index",
30
+ "intensity.sum.value",
31
+ "intensity.sum.variance",
32
+ "xyzcal.px",
33
+ "s1",
34
+ "delpsical.rad",
35
+ "id",
36
+ ]
37
+
38
+
39
+ def _set_logger(verbose):
40
+ level = logging.CRITICAL
41
+ if verbose:
42
+ level = logging.DEBUG
43
+
44
+ for log_name in ("rs.io.dials", "ray"):
45
+ logger = logging.getLogger(log_name)
46
+ logger.setLevel(level)
47
+ for handler in logger.handlers:
48
+ handler.setLevel(level)
49
+
50
+
51
+ def get_msgpack_data(data, name):
52
+ """
53
+
54
+ Parameters
55
+ ----------
56
+ data: msgpack data dict
57
+ name: msgpack data key
58
+
59
+ Returns
60
+ -------
61
+ numpy array of values
62
+ """
63
+ dtype, (num, buff) = data[name]
64
+ if dtype in MSGPACK_DTYPES:
65
+ dtype = MSGPACK_DTYPES[dtype]
66
+ else:
67
+ dtype = None # should we warn here ?
68
+ vals = np.frombuffer(buff, dtype).reshape((num, -1))
69
+ data_dict = {}
70
+ for i, col_data in enumerate(vals.T):
71
+ data_dict[f"{name}.{i}"] = col_data
72
+
73
+ # remove the .0 suffix if data is a scalar type
74
+ if len(data_dict) == 1:
75
+ data_dict[name] = data_dict.pop(f"{name}.0")
76
+
77
+ return data_dict
78
+
79
+
80
+ def _concat(refl_data):
81
+ """combine output of _get_refl_data"""
82
+ LOGGER.debug("Combining and formatting tables!")
83
+ if isinstance(refl_data, rs.DataSet):
84
+ ds = refl_data
85
+ else:
86
+ refl_data = [ds for ds in refl_data if ds is not None]
87
+ ds = rs.concat(refl_data, check_isomorphous=False)
88
+ expt_ids = set(ds.BATCH)
89
+ LOGGER.debug(f"Found {len(ds)} refls from {len(expt_ids)} expts.")
90
+ LOGGER.debug("Mapping batch column.")
91
+ expt_id_map = {name: i for i, name in enumerate(expt_ids)}
92
+ ds.BATCH = [expt_id_map[eid] for eid in ds.BATCH]
93
+ rename_map = {"miller_index.0": "H", "miller_index.1": "K", "miller_index.2": "L"}
94
+ ds.rename(columns=rename_map, inplace=True)
95
+ LOGGER.debug("Finished combining tables!")
96
+ return ds
97
+
98
+
99
+ def _get_refl_data(fname, unitcell, spacegroup, extra_cols=None):
100
+ """
101
+
102
+ Parameters
103
+ ----------
104
+ fname: integrated refl file
105
+ unitcell: gemmi.UnitCell instance
106
+ spacegroup: gemmi.SpaceGroup instance
107
+ extra_cols: list of additional columns to read
108
+
109
+ Returns
110
+ -------
111
+ RS dataset (pandas Dataframe)
112
+
113
+ """
114
+ LOGGER.debug(f"Loading {fname}")
115
+ pack = _get_refl_pack(fname)
116
+ refl_data = pack["data"]
117
+ expt_id_map = pack["identifiers"]
118
+
119
+ if "miller_index" not in refl_data:
120
+ raise IOError("refl table must have a miller_index column")
121
+
122
+ ds_data = {}
123
+ col_names = DEFAULT_COLS if extra_cols is None else DEFAULT_COLS + extra_cols
124
+ for col_name in col_names:
125
+ if col_name in refl_data:
126
+ col_data = get_msgpack_data(refl_data, col_name)
127
+ LOGGER.debug(f"... Read in data for {col_name}")
128
+ ds_data = {**col_data, **ds_data}
129
+
130
+ if "id" in ds_data:
131
+ ds_data["BATCH"] = np.array([expt_id_map[li] for li in ds_data.pop("id")])
132
+ ds = rs.DataSet(
133
+ ds_data,
134
+ cell=unitcell,
135
+ spacegroup=spacegroup,
136
+ )
137
+ ds["PARTIAL"] = True
138
+ return ds
139
+
140
+
141
+ def _read_dials_stills_serial(fnames, unitcell, spacegroup, extra_cols=None, **kwargs):
142
+ """run read_dials_stills without trying to import ray"""
143
+ result = [
144
+ _get_refl_data(fname, unitcell, spacegroup, extra_cols) for fname in fnames
145
+ ]
146
+ return result
147
+
148
+
149
+ def _read_dials_stills_ray(fnames, unitcell, spacegroup, numjobs=10, extra_cols=None):
150
+ """
151
+
152
+ Parameters
153
+ ----------
154
+ fnames: integration files
155
+ unitcell: gemmi.UnitCell instance
156
+ spacegroup: gemmi.SpaceGroup instance
157
+ numjobs: number of jobs
158
+ extra_cols: list of additional columns to read from refl tables
159
+
160
+ Returns
161
+ -------
162
+ RS dataset (pandas Dataframe)
163
+ """
164
+ from reciprocalspaceship.io.common import ray_context
165
+
166
+ with ray_context(
167
+ log_level=LOGGER.level,
168
+ num_cpus=numjobs,
169
+ log_to_driver=LOGGER.level == logging.DEBUG,
170
+ ) as ray:
171
+ # get the refl data
172
+ get_refl_data = ray.remote(_get_refl_data)
173
+ refl_data = ray.get(
174
+ [
175
+ get_refl_data.remote(fname, unitcell, spacegroup, extra_cols)
176
+ for fname in fnames
177
+ ]
178
+ )
179
+ return refl_data
180
+
181
+
182
+ def dials_to_mtz_dtypes(ds, inplace=True):
183
+ """
184
+ Coerce the dtypes in ds into ones that can be written to an mtz file.
185
+ This will downcast doubles to single precision. If "variance" columns
186
+ are present, they will be converted to "sigma" and assigned
187
+ StandardDeviationDtype.
188
+
189
+ Parameters
190
+ ----------
191
+ ds : rs.DataSet
192
+ inplace : bool (optional)
193
+ Convert ds dtypes in place without makeing a copy. Defaults to True.
194
+
195
+ Returns
196
+ -------
197
+ ds : rs.DataSet
198
+ """
199
+ rename_map = {}
200
+ for name in ds:
201
+ if "variance" in name:
202
+ new_name = name.replace("variance", "sigma")
203
+ rename_map[name] = new_name
204
+ ds[name] = np.sqrt(ds[name]).astype("Q")
205
+ LOGGER.debug(
206
+ f"Converted column {name} to MTZ-Type Q, took sqrt of the values, and renamed to {new_name}."
207
+ )
208
+ ds.rename(columns=rename_map, inplace=True)
209
+ ds.infer_mtz_dtypes(inplace=True)
210
+ return ds
211
+
212
+
213
+ @cellify
214
+ @spacegroupify
215
+ def read_dials_stills(
216
+ fnames,
217
+ unitcell=None,
218
+ spacegroup=None,
219
+ numjobs=10,
220
+ parallel_backend=None,
221
+ extra_cols=None,
222
+ verbose=False,
223
+ comm=None,
224
+ mtz_dtypes=False,
225
+ ):
226
+ """
227
+ Read reflections from still images processed by DIALS from fnames and return
228
+ them as a DataSet. By default, this function will not convert the data from
229
+ dials into an MTZ compatible format.
230
+
231
+ Parameters
232
+ ----------
233
+ fnames : list or tuple or string
234
+ A list or tuple of filenames (strings) or a single filename.
235
+ unitcell : gemmi.UnitCell or similar (optional)
236
+ The unit cell assigned to the returned dataset.
237
+ spacegroup : gemmi.SpaceGroup or similar (optional)
238
+ The spacegroup assigned to the returned dataset.
239
+ numjobs : int
240
+ If backend==ray, specify the number of jobs (ignored if backend==mpi).
241
+ parallel_backend : string (optional)
242
+ "ray", "mpi", or None for serial.
243
+ extra_cols : list (optional)
244
+ Optional list of additional column names to extract from the refltables. By default, this method will search for
245
+ miller_index, id, s1, xyzcal.px, intensity.sum.value, intensity.sum.variance, delpsical.rad
246
+ verbose : bool
247
+ Whether to print logging info to stdout
248
+ comm : mpi4py.MPI.Comm
249
+ Optionally override the communicator used by backend='mpi'
250
+ mtz_dtypes : bool (optional)
251
+ Optionally convert columns to mtz compatible dtypes. Note this will downcast double precision (64-bit)
252
+ floats to single precision (32-bit).
253
+
254
+ Returns
255
+ -------
256
+ ds : rs.DataSet
257
+ The dataset containing reflection info aggregated from fnames. This method will not convert any of the
258
+ columns to native rs MTZ dtypes. DIALS data are natively double precision (64-bit). Converting to MTZ
259
+ will downcast them to 32-bit. Use ds.infer_mtz_dtypes() to convert to native rs dtypes if required.
260
+ """
261
+ _set_logger(verbose)
262
+ if isinstance(fnames, str):
263
+ fnames = [fnames]
264
+
265
+ if parallel_backend not in ["ray", "mpi", None]:
266
+ raise NotImplementedError("parallel_backend should be ray, mpi, or none")
267
+
268
+ kwargs = {
269
+ "fnames": fnames,
270
+ "unitcell": unitcell,
271
+ "spacegroup": spacegroup,
272
+ "extra_cols": extra_cols,
273
+ }
274
+ reader = _read_dials_stills_serial
275
+ if parallel_backend == "ray":
276
+ kwargs["numjobs"] = numjobs
277
+ from reciprocalspaceship.io.common import check_for_ray
278
+
279
+ if check_for_ray():
280
+ reader = _read_dials_stills_ray
281
+ elif parallel_backend == "mpi":
282
+ from reciprocalspaceship.io.common import check_for_mpi
283
+
284
+ if check_for_mpi():
285
+ from reciprocalspaceship.io.dials_mpi import read_dials_stills_mpi as reader
286
+
287
+ kwargs["comm"] = comm
288
+ result = reader(**kwargs)
289
+ if result is not None:
290
+ result = _concat(result)
291
+ if mtz_dtypes:
292
+ dials_to_mtz_dtypes(result, inplace=True)
293
+ return result
294
+
295
+
296
+ def _get_refl_pack(filename):
297
+ pack = msgpack.load(open(filename, "rb"), strict_map_key=False)
298
+ try:
299
+ assert len(pack) == 3
300
+ _, _, pack = pack
301
+ except (TypeError, AssertionError):
302
+ raise IOError("File does not appear to be dials::af::reflection_table")
303
+ return pack
304
+
305
+
306
+ def print_refl_info(reflfile):
307
+ """print contents of `fname`, a reflection table file saved with DIALS"""
308
+ pack = _get_refl_pack(reflfile)
309
+ if "identifiers" in pack:
310
+ idents = pack["identifiers"]
311
+ print(f"\nFound {len(idents)} experiment identifiers in {reflfile}:")
312
+ for i, ident in idents.items():
313
+ print(f"\t{i}: {ident}")
314
+ if "data" in pack:
315
+ data = pack["data"]
316
+ columns = []
317
+ col_space = 0
318
+ for name in data:
319
+ dtype, (_, buff) = data[name]
320
+ columns.append((name, dtype))
321
+ col_space = max(len(dtype), len(name), col_space)
322
+ names, dtypes = zip(*columns)
323
+ df = pandas.DataFrame({"names": names, "dtypes": dtypes})
324
+ print(
325
+ "\nReflection contents:\n"
326
+ + df.to_string(index=False, col_space=col_space + 5, justify="center")
327
+ )
328
+
329
+ if "nrows" in pack:
330
+ print(f"\nNumber of reflections: {pack['nrows']} \n")
@@ -0,0 +1,44 @@
1
+ from itertools import chain
2
+
3
+ from reciprocalspaceship.decorators import cellify, spacegroupify
4
+ from reciprocalspaceship.io import dials
5
+
6
+
7
+ def mpi_starmap(comm, func, iterable):
8
+ results = []
9
+ for i, item in enumerate(iterable):
10
+ if i % comm.size == comm.rank:
11
+ results.append(func(*item))
12
+ results = comm.gather(results)
13
+ if comm.rank == 0:
14
+ return chain.from_iterable(results)
15
+ return None
16
+
17
+
18
+ @cellify
19
+ @spacegroupify
20
+ def read_dials_stills_mpi(fnames, unitcell, spacegroup, extra_cols=None, comm=None):
21
+ """
22
+
23
+ Parameters
24
+ ----------
25
+ fnames: integrated reflection tables
26
+ unitcell: unit cell tuple (6 params Ang,Ang,Ang,deg,deg,deg)
27
+ spacegroup: space group name e.g. P4
28
+ extra_cols: list of additional column names to read from the refl table
29
+ comm: Optionally override the MPI communicator. The default is MPI.COMM_WORLD
30
+
31
+ Returns
32
+ -------
33
+ RS dataset (pandas Dataframe) if MPI rank==0 else None
34
+ """
35
+ if comm is None:
36
+ from mpi4py import MPI
37
+
38
+ comm = MPI.COMM_WORLD
39
+ ds = mpi_starmap(
40
+ comm,
41
+ dials._get_refl_data,
42
+ ((f, unitcell, spacegroup, extra_cols) for f in fnames),
43
+ )
44
+ return ds
@@ -133,16 +133,15 @@ def to_gemmi(
133
133
  mtz.datasets[0].dataset_name = dataset_name
134
134
 
135
135
  # Construct data for Mtz object
136
- temp = dataset.reset_index()
136
+ # GH#255: DataSet is provided using the range_indexed decorator
137
137
  columns = []
138
- for c in temp.columns:
139
- cseries = temp[c]
138
+ for c in dataset.columns:
139
+ cseries = dataset[c]
140
140
  if isinstance(cseries.dtype, MTZDtype):
141
141
  mtz.add_column(label=c, type=cseries.dtype.mtztype)
142
142
  columns.append(c)
143
143
  # Special case for CENTRIC and PARTIAL flags
144
144
  elif cseries.dtype.name == "bool" and c in ["CENTRIC", "PARTIAL"]:
145
- temp[c] = temp[c].astype("MTZInt")
146
145
  mtz.add_column(label=c, type="I")
147
146
  columns.append(c)
148
147
  elif skip_problem_mtztypes:
@@ -152,7 +151,7 @@ def to_gemmi(
152
151
  f"column {c} of type {cseries.dtype} cannot be written to an MTZ file. "
153
152
  f"To skip columns without explicit MTZ dtypes, set skip_problem_mtztypes=True"
154
153
  )
155
- mtz.set_data(temp[columns].to_numpy(dtype="float32"))
154
+ mtz.set_data(dataset[columns].to_numpy(dtype="float32"))
156
155
 
157
156
  # Handle Unmerged data
158
157
  if not dataset.merged and not all_in_asu:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: reciprocalspaceship
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: Tools for exploring reciprocal space
5
5
  Home-page: https://rs-station.github.io/reciprocalspaceship/
6
6
  Author: Kevin M. Dalton, Jack B. Greisman
@@ -19,11 +19,12 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
19
  Classifier: Programming Language :: Python
20
20
  Requires-Python: >=3.9
21
21
  License-File: LICENSE
22
- Requires-Dist: gemmi<=0.6.6,>=0.5.5
23
- Requires-Dist: pandas<=2.2.2,>=2.2.2
22
+ Requires-Dist: gemmi<=0.6.7,>=0.5.5
23
+ Requires-Dist: pandas<=2.2.3,>=2.2.2
24
24
  Requires-Dist: numpy
25
25
  Requires-Dist: scipy
26
26
  Requires-Dist: ipython
27
+ Requires-Dist: msgpack
27
28
  Provides-Extra: dev
28
29
  Requires-Dist: pytest; extra == "dev"
29
30
  Requires-Dist: pytest-cov; extra == "dev"
@@ -1,9 +1,9 @@
1
- reciprocalspaceship/VERSION,sha256=n9KGQtOsoZHlx_wjg8_W-rsqrIdD8Cnau4mJrFhOMbw,6
2
- reciprocalspaceship/__init__.py,sha256=69LJFzMjF05nmlwROByI53LTwM37sgrgYAp5k1n6wCs,1842
1
+ reciprocalspaceship/VERSION,sha256=9eXJU0UyhA_NRbsALmnthcYduidTRQ4mtEi33xSB4k0,6
2
+ reciprocalspaceship/__init__.py,sha256=m6pXLI-HuXwefCfSE2Rs_2McqzuHw5W6yMBXEbceke8,2034
3
3
  reciprocalspaceship/concat.py,sha256=v2eg8-RBiNLYHkkPDeaozh3HvGCaFbmlC15FaeNJMgY,1695
4
4
  reciprocalspaceship/dataseries.py,sha256=ibU1bHMd8zORFxRtDswtvLh_n-miAyBqO0ghLmY29Js,6188
5
- reciprocalspaceship/dataset.py,sha256=YUcpvaTifmlQeR4qewHkzo-RSz6DOq_xLalFRXa_O94,57008
6
- reciprocalspaceship/decorators.py,sha256=U2gfm29infWHVGzQnfnpRsjxOihDD6Iah7oHd4uD8jk,5612
5
+ reciprocalspaceship/dataset.py,sha256=6GMIMWVdKzOFhFsgODgvFn2-hrrMRMPw3-oDzlbL0YQ,57698
6
+ reciprocalspaceship/decorators.py,sha256=sZAPAV5fk5zUlwzub2VZy-u28XVNXjBpnqwnKjESWgY,5721
7
7
  reciprocalspaceship/algorithms/__init__.py,sha256=r5IYCGswTHXpSs9Q7c6PfEz8_P8d1fEei2SyTkp5aYY,258
8
8
  reciprocalspaceship/algorithms/intensity.py,sha256=iDHaqqrMAe0v-aTVT5jf54JwkNQLSQ7HhezPw6qZndg,2657
9
9
  reciprocalspaceship/algorithms/merge.py,sha256=iwPrDfjtliBwLqEzHbcIfoTkvS_0s2_CszS5IfrEUXI,2154
@@ -15,13 +15,16 @@ reciprocalspaceship/dtypes/base.py,sha256=1X56U4jKt_wjVkW930C9gP2Di0RpCMDZsDKNTx
15
15
  reciprocalspaceship/dtypes/floating.py,sha256=jOQ25GZEE4QromaJA3_oeu0Tkjq1iT4dHCke_7W6TYo,19675
16
16
  reciprocalspaceship/dtypes/inference.py,sha256=jLgF8VfKtITGRzQbfeyZzEoJ1fQlbHXB_gXIJ9-AQxk,3029
17
17
  reciprocalspaceship/dtypes/integer.py,sha256=fPaLTWfMsJ-wuEPkm9oEJez3NDqzB4XKVHFRFEb585A,15816
18
- reciprocalspaceship/dtypes/internals.py,sha256=BkkqUDEvTTlebLXjcu7EiQV295-qR7GdMJXqrOKbbU0,47596
18
+ reciprocalspaceship/dtypes/internals.py,sha256=YNv6Dz4miazjZVFJCOTFudH-0ejUbOcu_snCq1RU2Nw,47607
19
19
  reciprocalspaceship/dtypes/summarize.py,sha256=1w6-N3odFcI3ZEQP5qgrog6ucbGjO71vSgabmjklkbc,1114
20
- reciprocalspaceship/io/__init__.py,sha256=ZMQ_rGfLmfzijbErnjEFphJuZokPvZyyVRk65DC0gLA,400
20
+ reciprocalspaceship/io/__init__.py,sha256=UquHOv850aJGdKnWEG-KTkHPgye7ldYFge62O5N6G_w,476
21
21
  reciprocalspaceship/io/ccp4map.py,sha256=yztiHPTdyR9FiCKRg-eVmL-_MyZTKThPI9uuHuuPF_0,1029
22
- reciprocalspaceship/io/crystfel.py,sha256=lKpGzM2OLNXBjy6njwahtk1IsI3MH0edaGSmaQ6NbGk,21662
22
+ reciprocalspaceship/io/common.py,sha256=_XzdAFeE6B-Q_ORc4bkOR7ANwNT4dNqYtlejzYJfWxs,1055
23
+ reciprocalspaceship/io/crystfel.py,sha256=N6CufOt3yESbOC4niFVPfBx2PSp9UpLlnmsuGXDdeIM,21877
23
24
  reciprocalspaceship/io/csv.py,sha256=A2ZnqAnFwFUQskF7_3EsQAPCcrJ5KEgjhZls6MDViv8,1194
24
- reciprocalspaceship/io/mtz.py,sha256=8XqFVoSJz47vjK-kEzwSu7NxwQnEyyHd0pgt1CaBavM,8074
25
+ reciprocalspaceship/io/dials.py,sha256=FQQa3eT9TQw7h43ohyvNI3huViHE-eP9Y4IbRQL5dIc,10137
26
+ reciprocalspaceship/io/dials_mpi.py,sha256=wvm-sQqFG7N7bgcnxd5jn94eyKveimA3rvP8ns1B5Jg,1212
27
+ reciprocalspaceship/io/mtz.py,sha256=_gdlx7Vi6Z0HyFBZFP6Ptmla7Pd_mON2KaGL4Q3N7Ik,8071
25
28
  reciprocalspaceship/io/pickle.py,sha256=clnSTK8T2O_d7midS_E54WHmXEHrL10d386gWx7ztsM,818
26
29
  reciprocalspaceship/io/precognition.py,sha256=DWRE2erXPVpm9-y5DjIWUHfmv9jZcsqoa47ienp1Sao,3641
27
30
  reciprocalspaceship/stats/__init__.py,sha256=jdAWbpD_CKAn0W0sO_MKSnTu3bZSoLAXgb1_Y6jDMzk,197
@@ -41,18 +44,19 @@ reciprocalspaceship/utils/units.py,sha256=ng-2hzZBERYo9bnQDPr-HLr7xPah-JzOthfrpH
41
44
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
45
  tests/conftest.py,sha256=bQZClqzu3lonsI01OdP5X38asMd7F76fAGzlWWYPXAI,3930
43
46
  tests/test_dataseries.py,sha256=go-q5tT8lLq3tlRVnmrwUytK7PlaoKs3CBPjWryGfGg,3309
44
- tests/test_dataset.py,sha256=dMFW6-pCs1rjIYEqbfstVqFRiYwKfz5rHlncVL9grQg,22231
47
+ tests/test_dataset.py,sha256=Ir9cFhrzAtMAnoLZikrkiLqKUbDvCTr3xqdzeaKLH3M,23759
45
48
  tests/test_dataset_anomalous.py,sha256=LQb1inSS_oDbVYEIyyx_GBFAkXGlEQYZ-ZhpwMeyMmQ,6963
46
49
  tests/test_dataset_binning.py,sha256=NgD_vy-TUh3vQrUVgysVBSZu75xN66LR6hRu2_qAUTs,3564
47
50
  tests/test_dataset_grid.py,sha256=S2EswVAbcg08WT9TjLtQ3YF1_zJmEKcucHrN3Lw5EM8,4086
48
51
  tests/test_dataset_index.py,sha256=-6sMVgAKkkcYRc7UfLuVEH3p7D83o1S7e7c6MbrOrZo,2842
49
52
  tests/test_dataset_preserve_attributes.py,sha256=gwQQJGsiBZld2KKmLrcMkuc9zesR3FD7GVnPDNRScto,5314
53
+ tests/test_dataset_signatures.py,sha256=ZbH9JNzqAWJDfVh9gqZVQXx8glmmBUhsbPmQBHe8Cuo,1554
50
54
  tests/test_dataset_symops.py,sha256=PV86tLu1qDACuk-YqjYQszk8Ctb0-h_NsQRnuCDFnOU,10864
51
55
  tests/test_decorators.py,sha256=ExR7mCU0iIqhHo4ho6ywPrZIEaGcsElaI4jtH9o5afE,5331
52
56
  tests/test_summarize_mtz_dtypes.py,sha256=JE0ctXMWii1AV-cmKogF6hjb8NCHrgvxNZ0ZRCHh-Ho,696
53
- reciprocalspaceship-1.0.2.dist-info/LICENSE,sha256=E22aZlYy5qJsJCJ94EkO_Vt3COio5UcLg59dZLPam7I,1093
54
- reciprocalspaceship-1.0.2.dist-info/METADATA,sha256=36KZFStMfUhplc6K1h7vpF-FVJ-TrExqWI3XXdW5oTE,3056
55
- reciprocalspaceship-1.0.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
56
- reciprocalspaceship-1.0.2.dist-info/entry_points.txt,sha256=Bqjl2J8UrG4UAHHhPbdH5r-xYaOdLCEdyRH6zJ9joDw,76
57
- reciprocalspaceship-1.0.2.dist-info/top_level.txt,sha256=tOo679MsLFS7iwiYZDwnKTuTpJLYVFBk6g9xnnB_s-w,26
58
- reciprocalspaceship-1.0.2.dist-info/RECORD,,
57
+ reciprocalspaceship-1.0.3.dist-info/LICENSE,sha256=E22aZlYy5qJsJCJ94EkO_Vt3COio5UcLg59dZLPam7I,1093
58
+ reciprocalspaceship-1.0.3.dist-info/METADATA,sha256=nvD6MJEEv_RHH-Dx5gj5fIvyHRRoTVUfkZxts37goQ4,3079
59
+ reciprocalspaceship-1.0.3.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
60
+ reciprocalspaceship-1.0.3.dist-info/entry_points.txt,sha256=Bqjl2J8UrG4UAHHhPbdH5r-xYaOdLCEdyRH6zJ9joDw,76
61
+ reciprocalspaceship-1.0.3.dist-info/top_level.txt,sha256=tOo679MsLFS7iwiYZDwnKTuTpJLYVFBk6g9xnnB_s-w,26
62
+ reciprocalspaceship-1.0.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: bdist_wheel (0.45.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
tests/test_dataset.py CHANGED
@@ -603,6 +603,48 @@ def test_is_isomorphous(data_unmerged, data_fmodel, sg1, sg2, cell1, cell2):
603
603
  assert not result
604
604
 
605
605
 
606
+ @pytest.mark.parametrize("threshold", [5.0, 1.0, 0.5, 0.1])
607
+ def test_is_isomorphous_threshold(threshold):
608
+ """
609
+ Test that DataSet.is_isorphous(self, other, cell_threshold) method's
610
+ cell_threshold operates on percent difference.
611
+ """
612
+ epsilon = 1e-12
613
+ cell = np.array([34.0, 45.0, 98.0, 90.0, 90.0, 90.0])
614
+ spacegroup = 19
615
+
616
+ ds = rs.DataSet(cell=cell, spacegroup=spacegroup)
617
+ cell_resize_factor = (200.0 + threshold) / (200.0 - threshold)
618
+
619
+ # Make a cell that should be exactly threshold percent bigger
620
+ other_cell = cell_resize_factor * cell
621
+ too_big_cell = other_cell + epsilon
622
+ big_cell = other_cell - epsilon
623
+
624
+ # Make a cell that should be exactly threshold percent smaller
625
+ other_cell = cell / cell_resize_factor
626
+ too_small_cell = other_cell - epsilon
627
+ small_cell = other_cell + epsilon
628
+
629
+ # Construct data sets
630
+ too_big = rs.DataSet(cell=too_big_cell, spacegroup=spacegroup)
631
+ big = rs.DataSet(cell=big_cell, spacegroup=spacegroup)
632
+ too_small = rs.DataSet(cell=too_small_cell, spacegroup=spacegroup)
633
+ small = rs.DataSet(cell=small_cell, spacegroup=spacegroup)
634
+
635
+ # Cell is barely too big to be isomorphous
636
+ assert not ds.is_isomorphous(too_big, threshold)
637
+
638
+ # Cell is barely too small to be isomorphous
639
+ assert not ds.is_isomorphous(too_small, threshold)
640
+
641
+ # Cell is almost too big to be isomorphous
642
+ assert ds.is_isomorphous(big, threshold)
643
+
644
+ # Cell is almost too small to be isomorphous
645
+ assert ds.is_isomorphous(small, threshold)
646
+
647
+
606
648
  def test_to_gemmi_withNans(data_merged):
607
649
  """
608
650
  GH144: Test whether DataSet.to_gemmi() works with NaN-containing data.
@@ -0,0 +1,53 @@
1
+ from inspect import signature
2
+
3
+ import pandas as pd
4
+ import pytest
5
+ from pandas.testing import assert_frame_equal
6
+
7
+ import reciprocalspaceship as rs
8
+
9
+
10
+ def test_reset_index_dataseries():
11
+ """
12
+ Minimal example from GH#223
13
+ """
14
+ result = rs.DataSeries(range(10)).reset_index()
15
+ expected = pd.Series(range(10)).reset_index()
16
+ expected = rs.DataSet(expected)
17
+ assert_frame_equal(result, expected)
18
+
19
+
20
+ def test_reset_index_signature(dataset_hkl):
21
+ """
22
+ Test call signature of rs.DataSet.reset_index() matches call signature of
23
+ pd.DataFrame.reset_index() using default parameters
24
+ """
25
+ df = pd.DataFrame(dataset_hkl)
26
+ sig = signature(pd.DataFrame.reset_index)
27
+ bsig = sig.bind(df)
28
+ bsig.apply_defaults()
29
+
30
+ expected = df.reset_index(*bsig.args[1:], **bsig.kwargs)
31
+ result = dataset_hkl.reset_index(*bsig.args[1:], **bsig.kwargs)
32
+ result = pd.DataFrame(result)
33
+
34
+ assert_frame_equal(result, expected)
35
+
36
+
37
+ @pytest.mark.parametrize("names", ["H", "K", ["H", "K"]])
38
+ def test_set_index_signature(dataset_hkl, names):
39
+ """
40
+ Test call signature of rs.DataSet.set_index() matches call signature of
41
+ pd.DataFrame.set_index() using default parameters
42
+ """
43
+ ds = dataset_hkl.reset_index()
44
+ df = pd.DataFrame(ds)
45
+ sig = signature(pd.DataFrame.set_index)
46
+ bsig = sig.bind(df, names)
47
+ bsig.apply_defaults()
48
+
49
+ expected = df.set_index(*bsig.args[1:], **bsig.kwargs)
50
+ result = ds.set_index(*bsig.args[1:], **bsig.kwargs)
51
+ result = pd.DataFrame(result)
52
+
53
+ assert_frame_equal(result, expected)