reciprocalspaceship 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- 1.0.2
1
+ 1.0.4
@@ -1,8 +1,15 @@
1
1
  # Version number for reciprocalspaceship
2
2
  def getVersionNumber():
3
- import pkg_resources
3
+ version = None
4
+ try:
5
+ from setuptools.version import metadata
6
+
7
+ version = metadata.version("reciprocalspaceship")
8
+ except ImportError:
9
+ from setuptools.version import pkg_resources
10
+
11
+ version = pkg_resources.require("reciprocalspaceship")[0].version
4
12
 
5
- version = pkg_resources.require("reciprocalspaceship")[0].version
6
13
  return version
7
14
 
8
15
 
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ Summarize the contents of a CIF file.
4
+
5
+ Examples
6
+ --------
7
+ In order to summarize contents of file.cif::
8
+
9
+ > rs.cifdump file.cif
10
+
11
+ If you would like to interactively inspect file.cif in an IPython
12
+ shell, use the "--embed" argument::
13
+
14
+ > rs.cifdump file.cif --embed
15
+
16
+ If multiple CIF files are listed, they will be summarized sequentially,
17
+ and can be accessed in an IPython shell as a dictionary called `cifs`::
18
+
19
+ > rs.cifdump file1.cif file2.cif file3.cif --embed
20
+
21
+ Usage Details
22
+ -------------
23
+ """
24
+ import argparse
25
+
26
+ import pandas as pd
27
+
28
+ import reciprocalspaceship as rs
29
+
30
+ # If matplotlib is available, use pylab to setup IPython environment
31
+ try:
32
+ from pylab import *
33
+ except:
34
+ pass
35
+
36
+
37
+ def parse_arguments():
38
+ """Parse commandline arguments"""
39
+
40
+ parser = argparse.ArgumentParser(
41
+ formatter_class=argparse.RawTextHelpFormatter, description=__doc__
42
+ )
43
+
44
+ # Required arguments
45
+ parser.add_argument("cif", nargs="+", help="CIF file(s) to summarize")
46
+
47
+ # Optional arguments
48
+ parser.add_argument(
49
+ "--embed",
50
+ action="store_true",
51
+ help=(
52
+ "CIF file(s) will be summarized, and an IPython " "shell will be started"
53
+ ),
54
+ )
55
+ parser.add_argument(
56
+ "-p",
57
+ "--precision",
58
+ type=int,
59
+ default=3,
60
+ help="Number of significant digits to output for floats",
61
+ )
62
+
63
+ return parser
64
+
65
+
66
+ def summarize(cif, precision):
67
+ """Summarize contents of CIF file"""
68
+ with pd.option_context("display.precision", precision):
69
+ print(f"Spacegroup: {cif.spacegroup.short_name()}")
70
+ print(f"Extended Hermann-Mauguin name: {cif.spacegroup.xhm()}")
71
+ print(
72
+ (
73
+ f"Unit cell dimensions: {cif.cell.a:.3f} {cif.cell.b:.3f} {cif.cell.c:.3f} "
74
+ f"{cif.cell.alpha:.3f} {cif.cell.beta:.3f} {cif.cell.gamma:.3f}"
75
+ )
76
+ )
77
+ print(f"\ncif.head():\n\n{cif.head()}")
78
+ print(f"\ncif.describe():\n\n{cif.describe()}")
79
+ print(f"\ncif.dtypes:\n\n{cif.dtypes}")
80
+ return
81
+
82
+
83
+ def main():
84
+ # Parse commandline arguments
85
+ parser = parse_arguments()
86
+ args = parser.parse_args()
87
+
88
+ if len(args.cif) == 1:
89
+ cif = rs.read_cif(args.cif[0])
90
+ summarize(cif, args.precision)
91
+ else:
92
+ cifs = dict(zip(args.cif, map(rs.read_cif, args.cif)))
93
+ for key, value in cifs.items():
94
+ print(f"CIF file: {key}\n")
95
+ summarize(value, args.precision)
96
+ print(f"{'-'*50}")
97
+
98
+ # Begin IPython shell
99
+ if args.embed:
100
+ from IPython import embed
101
+
102
+ bold = "\033[1m"
103
+ end = "\033[0m"
104
+ if "cifs" in locals():
105
+ header = f"rs.DataSets stored in {bold}cifs{end} dictionary"
106
+ else:
107
+ header = f"rs.DataSet stored as {bold}cif{end}"
108
+ print()
109
+ embed(colors="neutral", header=header)
110
+
111
+ return
112
+
113
+
114
+ if __name__ == "__main__":
115
+ parser = main()
@@ -74,9 +74,13 @@ def summarize(mtz, precision):
74
74
  f"{mtz.cell.alpha:.3f} {mtz.cell.beta:.3f} {mtz.cell.gamma:.3f}"
75
75
  )
76
76
  )
77
- print(f"\nmtz.head():\n\n{mtz.head()}")
78
- print(f"\nmtz.describe():\n\n{mtz.describe()}")
79
- print(f"\nmtz.dtypes:\n\n{mtz.dtypes}")
77
+ if mtz.cell is not None:
78
+ dHKL = mtz.compute_dHKL().dHKL
79
+ print(f"Resolution range: {dHKL.max():.3f} - {dHKL.min():.3f} Å")
80
+ with pd.option_context("display.max_rows", None):
81
+ print(f"\nmtz.head():\n\n{mtz.head()}")
82
+ print(f"\nmtz.describe().T:\n\n{mtz.describe().T}")
83
+ print(f"\nmtz.dtypes:\n\n{mtz.dtypes}")
80
84
  return
81
85
 
82
86
 
@@ -43,6 +43,23 @@ class DataSet(pd.DataFrame):
43
43
  and attributes, please see the `Pandas.DataFrame documentation`_.
44
44
 
45
45
  .. _Pandas.DataFrame documentation: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html
46
+
47
+ Attributes
48
+ ----------
49
+ acentrics : rs.DataSet
50
+ Access only the acentric reflections in this dataset
51
+ cell : gemmi.UnitCell
52
+ The unit cell
53
+ centrics : rs.DataSet
54
+ Access only the centric reflections in this dataset
55
+ hkls : ndarray, shape=(n_reflections, 3)
56
+ Miller indices in DataSet.
57
+ merged : bool
58
+ Whether this is a merged dataset or unmerged
59
+ spacegroup : gemmi.SpaceGroup
60
+ The space group
61
+ reindexing_ops : list
62
+ Possible reindexing ops consistent with the cell and spacegroup
46
63
  """
47
64
 
48
65
  _metadata = ["_spacegroup", "_cell", "_index_dtypes", "_merged"]
@@ -131,6 +148,38 @@ class DataSet(pd.DataFrame):
131
148
  def merged(self, val):
132
149
  self._merged = val
133
150
 
151
+ @property
152
+ @range_indexed
153
+ def hkls(self):
154
+ """Miller indices"""
155
+ hkl = self[["H", "K", "L"]].to_numpy(dtype=np.int32)
156
+ return hkl
157
+
158
+ def get_hkls(self):
159
+ """Get the Miller indices of the dataset."""
160
+ return self.hkls
161
+
162
+ @hkls.setter
163
+ @range_indexed
164
+ def hkls(self, hkls):
165
+ if isinstance(hkls, DataSet):
166
+ """Convert to numpy if hkls is a dataset"""
167
+ hkls = hkls.hkls
168
+ if isinstance(hkls, np.ndarray):
169
+ h, k, l = hkls[..., 0], hkls[..., 1], hkls[..., 2]
170
+ else:
171
+ """Try coercing to numpy"""
172
+ try:
173
+ hkls = np.array(hkls)
174
+ h, k, l = hkls[..., 0], hkls[..., 1], hkls[..., 2]
175
+ except:
176
+ raise ValueError(
177
+ "Unable to convert hkls to a suitable type. Please ensure hkls is a numpy array or rs.DataSet"
178
+ )
179
+ self["H"] = DataSeries(h, index=self.index, dtype="H")
180
+ self["K"] = DataSeries(k, index=self.index, dtype="H")
181
+ self["L"] = DataSeries(l, index=self.index, dtype="H")
182
+
134
183
  @property
135
184
  def centrics(self):
136
185
  """Access centric reflections in DataSet"""
@@ -258,7 +307,14 @@ class DataSet(pd.DataFrame):
258
307
  )
259
308
 
260
309
  def reset_index(
261
- self, level=None, drop=False, inplace=False, col_level=0, col_fill=""
310
+ self,
311
+ level=None,
312
+ drop=False,
313
+ inplace=False,
314
+ col_level=0,
315
+ col_fill="",
316
+ allow_duplicates=lib.no_default,
317
+ names=None,
262
318
  ):
263
319
  """
264
320
  Reset the index or a specific level of a MultiIndex.
@@ -281,6 +337,12 @@ class DataSet(pd.DataFrame):
281
337
  col_fill : object
282
338
  If the columns have multiple levels, determines how the other
283
339
  levels are named. If None then the index name is repeated.
340
+ allow_duplicates : bool
341
+ Allow duplicate column labels to be created.
342
+ names : int, str, tuple, list
343
+ Using the given string, rename the DataSet column which contains the
344
+ index data. If the DataSet has a MultiIndex, this has to be a list or
345
+ tuple with length equal to the number of levels.
284
346
 
285
347
  Returns
286
348
  -------
@@ -317,6 +379,8 @@ class DataSet(pd.DataFrame):
317
379
  inplace=inplace,
318
380
  col_level=col_level,
319
381
  col_fill=col_fill,
382
+ allow_duplicates=allow_duplicates,
383
+ names=names,
320
384
  )
321
385
  _handle_cached_dtypes(self, columns, drop)
322
386
  return
@@ -327,6 +391,8 @@ class DataSet(pd.DataFrame):
327
391
  inplace=inplace,
328
392
  col_level=col_level,
329
393
  col_fill=col_fill,
394
+ allow_duplicates=allow_duplicates,
395
+ names=names,
330
396
  )
331
397
  dataset._index_dtypes = dataset._index_dtypes.copy()
332
398
  dataset = _handle_cached_dtypes(dataset, columns, drop)
@@ -406,6 +472,7 @@ class DataSet(pd.DataFrame):
406
472
  """
407
473
  return cls(gemmiMtz)
408
474
 
475
+ @range_indexed
409
476
  def to_gemmi(
410
477
  self,
411
478
  skip_problem_mtztypes=False,
@@ -575,6 +642,7 @@ class DataSet(pd.DataFrame):
575
642
  result = super().join(*args, **kwargs)
576
643
  return result.__finalize__(self)
577
644
 
645
+ @range_indexed
578
646
  def write_mtz(
579
647
  self,
580
648
  mtzfile,
@@ -1158,7 +1226,7 @@ class DataSet(pd.DataFrame):
1158
1226
 
1159
1227
  return result
1160
1228
 
1161
- def is_isomorphous(self, other, cell_threshold=0.05):
1229
+ def is_isomorphous(self, other, cell_threshold=0.5):
1162
1230
  """
1163
1231
  Determine whether DataSet is isomorphous to another DataSet. This
1164
1232
  method confirms isomorphism by ensuring the spacegroups are equivalent,
@@ -1195,7 +1263,8 @@ class DataSet(pd.DataFrame):
1195
1263
  for param in params:
1196
1264
  param1 = self.cell.__getattribute__(param)
1197
1265
  param2 = other.cell.__getattribute__(param)
1198
- if (np.abs((param1 - param2)) / 100.0) > cell_threshold:
1266
+ diff = 200.0 * np.abs(param1 - param2) / (param1 + param2)
1267
+ if diff > cell_threshold:
1199
1268
  return False
1200
1269
 
1201
1270
  return True
@@ -1565,24 +1634,6 @@ class DataSet(pd.DataFrame):
1565
1634
  warnings.simplefilter("always")
1566
1635
  warnings.warn(message, DeprecationWarning)
1567
1636
 
1568
- if dmin is not None:
1569
- ds = self.loc[self.compute_dHKL().dHKL >= dmin, [key]]
1570
- else:
1571
- ds = self.loc[:, [key]]
1572
-
1573
- if gridsize is None:
1574
- gridsize = self.get_reciprocal_grid_size(dmin=dmin, sample_rate=sample_rate)
1575
-
1576
- # Set up P1 unit cell
1577
- p1 = ds.expand_to_p1()
1578
- p1 = p1.expand_anomalous()
1579
-
1580
- # Get data and indices
1581
- data = p1[key].to_numpy()
1582
- H = p1.get_hkls()
1583
-
1584
- # Populate grid
1585
- grid = np.zeros(gridsize, dtype=data.dtype)
1586
- grid[H[:, 0], H[:, 1], H[:, 2]] = data
1587
-
1588
- return grid
1637
+ return self.to_reciprocal_grid(
1638
+ key, sample_rate=sample_rate, dmin=dmin, grid_size=gridsize
1639
+ )
@@ -4,6 +4,8 @@ from inspect import signature
4
4
  import gemmi
5
5
  import numpy as np
6
6
 
7
+ import reciprocalspaceship as rs
8
+
7
9
 
8
10
  def inplace(f):
9
11
  """
@@ -46,9 +48,11 @@ def range_indexed(f):
46
48
  names = ds.index.names
47
49
  ds = ds._index_from_names([None], inplace=True)
48
50
  result = f(ds, *args, **kwargs)
49
- result = result._index_from_names(names, inplace=True)
50
51
  ds = ds._index_from_names(names, inplace=True)
51
- return result.__finalize__(ds)
52
+ if isinstance(result, rs.DataSet):
53
+ result = result._index_from_names(names, inplace=True)
54
+ result = result.__finalize__(ds)
55
+ return result
52
56
 
53
57
  return wrapped
54
58
 
@@ -1359,7 +1359,7 @@ class NumericArray(BaseMaskedArray):
1359
1359
 
1360
1360
  @wraps(libmissing.is_numeric_na)
1361
1361
  def is_numeric_na(values):
1362
- allowed_dtypes = ("float32", "int32")
1362
+ allowed_dtypes = ("float64", "float32", "int32")
1363
1363
  if isinstance(values, np.ndarray) and values.dtype in allowed_dtypes:
1364
1364
  return np.isnan(values)
1365
1365
  return libmissing.is_numeric_na(values)
@@ -1,6 +1,7 @@
1
1
  from reciprocalspaceship.io.ccp4map import write_ccp4_map
2
2
  from reciprocalspaceship.io.crystfel import read_crystfel
3
3
  from reciprocalspaceship.io.csv import read_csv
4
+ from reciprocalspaceship.io.dials import print_refl_info, read_dials_stills
4
5
  from reciprocalspaceship.io.mtz import (
5
6
  from_gemmi,
6
7
  read_cif,
@@ -0,0 +1,48 @@
1
+ import logging
2
+ import warnings
3
+ from contextlib import contextmanager
4
+ from importlib.util import find_spec
5
+
6
+
7
+ def set_ray_loglevel(level):
8
+ logger = logging.getLogger("ray")
9
+ logger.setLevel(level)
10
+ for handler in logger.handlers:
11
+ handler.setLevel(level)
12
+
13
+
14
+ def check_for_ray():
15
+ has_ray = True
16
+ if find_spec("ray") is None:
17
+ has_ray = False
18
+
19
+ message = (
20
+ "ray (https://www.ray.io/) is not available..." "Falling back to serial."
21
+ )
22
+ warnings.warn(message, ImportWarning)
23
+ return has_ray
24
+
25
+
26
+ def check_for_mpi():
27
+ try:
28
+ from mpi4py import MPI
29
+
30
+ return True
31
+ except Exception as err:
32
+ message = (
33
+ f"Failed `from mpi4py import MPI` with {err}. Falling back to serial mode."
34
+ )
35
+ warnings.warn(message, ImportWarning)
36
+ return False
37
+
38
+
39
+ @contextmanager
40
+ def ray_context(log_level="DEBUG", **ray_kwargs):
41
+ import ray
42
+
43
+ set_ray_loglevel(log_level)
44
+ ray.init(**ray_kwargs)
45
+ try:
46
+ yield ray
47
+ finally:
48
+ ray.shutdown()
@@ -1,13 +1,12 @@
1
1
  import mmap
2
2
  import re
3
- from contextlib import contextmanager
4
- from importlib.util import find_spec
5
3
  from typing import Union
6
4
 
7
5
  import gemmi
8
6
  import numpy as np
9
7
 
10
8
  from reciprocalspaceship import DataSet, concat
9
+ from reciprocalspaceship.io.common import check_for_ray, ray_context
11
10
  from reciprocalspaceship.utils import angle_between, eV2Angstroms
12
11
 
13
12
  # See Rupp Table 5-2
@@ -60,17 +59,6 @@ _block_markers = {
60
59
  }
61
60
 
62
61
 
63
- @contextmanager
64
- def ray_context(**ray_kwargs):
65
- import ray
66
-
67
- ray.init(**ray_kwargs)
68
- try:
69
- yield ray
70
- finally:
71
- ray.shutdown()
72
-
73
-
74
62
  class StreamLoader(object):
75
63
  """
76
64
  An object that loads stream files into rs.DataSet objects in parallel.
@@ -304,15 +292,7 @@ class StreamLoader(object):
304
292
 
305
293
  # Check whether ray is available
306
294
  if use_ray:
307
- if find_spec("ray") is None:
308
- use_ray = False
309
- import warnings
310
-
311
- message = (
312
- "ray (https://www.ray.io/) is not available..."
313
- "Falling back to serial stream file parser."
314
- )
315
- warnings.warn(message, ImportWarning)
295
+ use_ray = check_for_ray()
316
296
 
317
297
  with open(self.filename, "r") as f:
318
298
  memfile = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
@@ -507,10 +487,9 @@ def read_crystfel(
507
487
  The type of byte-encoding (optional, 'utf-8').
508
488
  columns : list (optional)
509
489
  Optionally specify the columns of the output by a list of strings.
510
- The default list is:
511
- [ "H", "K", "L", "I", "SigI", "BATCH", "s1x", "s1y", "s1z", "ewald_offset",
512
- "angular_ewald_offset", "XDET", "YDET" ]
513
- See `rs.io.crystfel.StreamLoader().available_column_names` for a list of available column names.
490
+ The default list is: [ "H", "K", "L", "I", "SigI", "BATCH", "s1x", "s1y", "s1z", "ewald_offset", "angular_ewald_offset", "XDET", "YDET" ]
491
+ See `rs.io.crystfel.StreamLoader().available_column_names` for a list of available
492
+ column names and *Notes* for a description of the returned columns
514
493
  parallel : bool (optional)
515
494
  Read the stream file in parallel using [ray.io](https://docs.ray.io) if it is available.
516
495
  num_cpus : int (optional)
@@ -524,6 +503,18 @@ def read_crystfel(
524
503
  Returns
525
504
  --------
526
505
  rs.DataSet
506
+
507
+ Notes
508
+ -----
509
+ The following columns are included in the returned DataSet object:
510
+
511
+ - H, K, L: Miller indices of each reflection
512
+ - I, SigI: Intensity and associated uncertainty
513
+ - BATCH: Image number
514
+ - s1x, s1y, s1z: scattered beam wavevector which points from the sample to the bragg peak
515
+ - ewald_offset: the distance in cartesian space (1/angstroms) between the observed reflection and the ewald sphere
516
+ - angular_ewald_offset: the distance in polar coordinates (degrees) between the observed reflection and the ewald sphere
517
+ - XDET, YDET: Internal detector panel coordinates
527
518
  """
528
519
  if not streamfile.endswith(".stream"):
529
520
  raise ValueError("Stream file should end with .stream")
@@ -0,0 +1,330 @@
1
+ import logging
2
+ import sys
3
+
4
+ import msgpack
5
+ import numpy as np
6
+ import pandas
7
+
8
+ LOGGER = logging.getLogger("rs.io.dials")
9
+ if not LOGGER.handlers:
10
+ LOGGER.setLevel(logging.DEBUG)
11
+ console = logging.StreamHandler(stream=sys.stdout)
12
+ console.setLevel(logging.DEBUG)
13
+ LOGGER.addHandler(console)
14
+
15
+ import reciprocalspaceship as rs
16
+ from reciprocalspaceship.decorators import cellify, spacegroupify
17
+ from reciprocalspaceship.io.common import check_for_ray, set_ray_loglevel
18
+
19
+ MSGPACK_DTYPES = {
20
+ "double": np.float64,
21
+ "float": np.float32,
22
+ "int": np.int32,
23
+ "cctbx::miller::index<>": np.int32,
24
+ "vec3<double>": np.float64,
25
+ "std::size_t": np.intp,
26
+ }
27
+
28
+ DEFAULT_COLS = [
29
+ "miller_index",
30
+ "intensity.sum.value",
31
+ "intensity.sum.variance",
32
+ "xyzcal.px",
33
+ "s1",
34
+ "delpsical.rad",
35
+ "id",
36
+ ]
37
+
38
+
39
+ def _set_logger(verbose):
40
+ level = logging.CRITICAL
41
+ if verbose:
42
+ level = logging.DEBUG
43
+
44
+ for log_name in ("rs.io.dials", "ray"):
45
+ logger = logging.getLogger(log_name)
46
+ logger.setLevel(level)
47
+ for handler in logger.handlers:
48
+ handler.setLevel(level)
49
+
50
+
51
+ def get_msgpack_data(data, name):
52
+ """
53
+
54
+ Parameters
55
+ ----------
56
+ data: msgpack data dict
57
+ name: msgpack data key
58
+
59
+ Returns
60
+ -------
61
+ numpy array of values
62
+ """
63
+ dtype, (num, buff) = data[name]
64
+ if dtype in MSGPACK_DTYPES:
65
+ dtype = MSGPACK_DTYPES[dtype]
66
+ else:
67
+ dtype = None # should we warn here ?
68
+ vals = np.frombuffer(buff, dtype).reshape((num, -1))
69
+ data_dict = {}
70
+ for i, col_data in enumerate(vals.T):
71
+ data_dict[f"{name}.{i}"] = col_data
72
+
73
+ # remove the .0 suffix if data is a scalar type
74
+ if len(data_dict) == 1:
75
+ data_dict[name] = data_dict.pop(f"{name}.0")
76
+
77
+ return data_dict
78
+
79
+
80
+ def _concat(refl_data):
81
+ """combine output of _get_refl_data"""
82
+ LOGGER.debug("Combining and formatting tables!")
83
+ if isinstance(refl_data, rs.DataSet):
84
+ ds = refl_data
85
+ else:
86
+ refl_data = [ds for ds in refl_data if ds is not None]
87
+ ds = rs.concat(refl_data, check_isomorphous=False)
88
+ expt_ids = set(ds.BATCH)
89
+ LOGGER.debug(f"Found {len(ds)} refls from {len(expt_ids)} expts.")
90
+ LOGGER.debug("Mapping batch column.")
91
+ expt_id_map = {name: i for i, name in enumerate(expt_ids)}
92
+ ds.BATCH = [expt_id_map[eid] for eid in ds.BATCH]
93
+ rename_map = {"miller_index.0": "H", "miller_index.1": "K", "miller_index.2": "L"}
94
+ ds.rename(columns=rename_map, inplace=True)
95
+ LOGGER.debug("Finished combining tables!")
96
+ return ds
97
+
98
+
99
+ def _get_refl_data(fname, unitcell, spacegroup, extra_cols=None):
100
+ """
101
+
102
+ Parameters
103
+ ----------
104
+ fname: integrated refl file
105
+ unitcell: gemmi.UnitCell instance
106
+ spacegroup: gemmi.SpaceGroup instance
107
+ extra_cols: list of additional columns to read
108
+
109
+ Returns
110
+ -------
111
+ RS dataset (pandas Dataframe)
112
+
113
+ """
114
+ LOGGER.debug(f"Loading {fname}")
115
+ pack = _get_refl_pack(fname)
116
+ refl_data = pack["data"]
117
+ expt_id_map = pack["identifiers"]
118
+
119
+ if "miller_index" not in refl_data:
120
+ raise IOError("refl table must have a miller_index column")
121
+
122
+ ds_data = {}
123
+ col_names = DEFAULT_COLS if extra_cols is None else DEFAULT_COLS + extra_cols
124
+ for col_name in col_names:
125
+ if col_name in refl_data:
126
+ col_data = get_msgpack_data(refl_data, col_name)
127
+ LOGGER.debug(f"... Read in data for {col_name}")
128
+ ds_data = {**col_data, **ds_data}
129
+
130
+ if "id" in ds_data:
131
+ ds_data["BATCH"] = np.array([expt_id_map[li] for li in ds_data.pop("id")])
132
+ ds = rs.DataSet(
133
+ ds_data,
134
+ cell=unitcell,
135
+ spacegroup=spacegroup,
136
+ )
137
+ ds["PARTIAL"] = True
138
+ return ds
139
+
140
+
141
+ def _read_dials_stills_serial(fnames, unitcell, spacegroup, extra_cols=None, **kwargs):
142
+ """run read_dials_stills without trying to import ray"""
143
+ result = [
144
+ _get_refl_data(fname, unitcell, spacegroup, extra_cols) for fname in fnames
145
+ ]
146
+ return result
147
+
148
+
149
+ def _read_dials_stills_ray(fnames, unitcell, spacegroup, numjobs=10, extra_cols=None):
150
+ """
151
+
152
+ Parameters
153
+ ----------
154
+ fnames: integration files
155
+ unitcell: gemmi.UnitCell instance
156
+ spacegroup: gemmi.SpaceGroup instance
157
+ numjobs: number of jobs
158
+ extra_cols: list of additional columns to read from refl tables
159
+
160
+ Returns
161
+ -------
162
+ RS dataset (pandas Dataframe)
163
+ """
164
+ from reciprocalspaceship.io.common import ray_context
165
+
166
+ with ray_context(
167
+ log_level=LOGGER.level,
168
+ num_cpus=numjobs,
169
+ log_to_driver=LOGGER.level == logging.DEBUG,
170
+ ) as ray:
171
+ # get the refl data
172
+ get_refl_data = ray.remote(_get_refl_data)
173
+ refl_data = ray.get(
174
+ [
175
+ get_refl_data.remote(fname, unitcell, spacegroup, extra_cols)
176
+ for fname in fnames
177
+ ]
178
+ )
179
+ return refl_data
180
+
181
+
182
+ def dials_to_mtz_dtypes(ds, inplace=True):
183
+ """
184
+ Coerce the dtypes in ds into ones that can be written to an mtz file.
185
+ This will downcast doubles to single precision. If "variance" columns
186
+ are present, they will be converted to "sigma" and assigned
187
+ StandardDeviationDtype.
188
+
189
+ Parameters
190
+ ----------
191
+ ds : rs.DataSet
192
+ inplace : bool (optional)
193
+ Convert ds dtypes in place without makeing a copy. Defaults to True.
194
+
195
+ Returns
196
+ -------
197
+ ds : rs.DataSet
198
+ """
199
+ rename_map = {}
200
+ for name in ds:
201
+ if "variance" in name:
202
+ new_name = name.replace("variance", "sigma")
203
+ rename_map[name] = new_name
204
+ ds[name] = np.sqrt(ds[name]).astype("Q")
205
+ LOGGER.debug(
206
+ f"Converted column {name} to MTZ-Type Q, took sqrt of the values, and renamed to {new_name}."
207
+ )
208
+ ds.rename(columns=rename_map, inplace=True)
209
+ ds.infer_mtz_dtypes(inplace=True)
210
+ return ds
211
+
212
+
213
+ @cellify
214
+ @spacegroupify
215
+ def read_dials_stills(
216
+ fnames,
217
+ unitcell=None,
218
+ spacegroup=None,
219
+ numjobs=10,
220
+ parallel_backend=None,
221
+ extra_cols=None,
222
+ verbose=False,
223
+ comm=None,
224
+ mtz_dtypes=False,
225
+ ):
226
+ """
227
+ Read reflections from still images processed by DIALS from fnames and return
228
+ them as a DataSet. By default, this function will not convert the data from
229
+ dials into an MTZ compatible format.
230
+
231
+ Parameters
232
+ ----------
233
+ fnames : list or tuple or string
234
+ A list or tuple of filenames (strings) or a single filename.
235
+ unitcell : gemmi.UnitCell or similar (optional)
236
+ The unit cell assigned to the returned dataset.
237
+ spacegroup : gemmi.SpaceGroup or similar (optional)
238
+ The spacegroup assigned to the returned dataset.
239
+ numjobs : int
240
+ If backend==ray, specify the number of jobs (ignored if backend==mpi).
241
+ parallel_backend : string (optional)
242
+ "ray", "mpi", or None for serial.
243
+ extra_cols : list (optional)
244
+ Optional list of additional column names to extract from the refltables. By default, this method will search for
245
+ miller_index, id, s1, xyzcal.px, intensity.sum.value, intensity.sum.variance, delpsical.rad
246
+ verbose : bool
247
+ Whether to print logging info to stdout
248
+ comm : mpi4py.MPI.Comm
249
+ Optionally override the communicator used by backend='mpi'
250
+ mtz_dtypes : bool (optional)
251
+ Optionally convert columns to mtz compatible dtypes. Note this will downcast double precision (64-bit)
252
+ floats to single precision (32-bit).
253
+
254
+ Returns
255
+ -------
256
+ ds : rs.DataSet
257
+ The dataset containing reflection info aggregated from fnames. This method will not convert any of the
258
+ columns to native rs MTZ dtypes. DIALS data are natively double precision (64-bit). Converting to MTZ
259
+ will downcast them to 32-bit. Use ds.infer_mtz_dtypes() to convert to native rs dtypes if required.
260
+ """
261
+ _set_logger(verbose)
262
+ if isinstance(fnames, str):
263
+ fnames = [fnames]
264
+
265
+ if parallel_backend not in ["ray", "mpi", None]:
266
+ raise NotImplementedError("parallel_backend should be ray, mpi, or none")
267
+
268
+ kwargs = {
269
+ "fnames": fnames,
270
+ "unitcell": unitcell,
271
+ "spacegroup": spacegroup,
272
+ "extra_cols": extra_cols,
273
+ }
274
+ reader = _read_dials_stills_serial
275
+ if parallel_backend == "ray":
276
+ kwargs["numjobs"] = numjobs
277
+ from reciprocalspaceship.io.common import check_for_ray
278
+
279
+ if check_for_ray():
280
+ reader = _read_dials_stills_ray
281
+ elif parallel_backend == "mpi":
282
+ from reciprocalspaceship.io.common import check_for_mpi
283
+
284
+ if check_for_mpi():
285
+ from reciprocalspaceship.io.dials_mpi import read_dials_stills_mpi as reader
286
+
287
+ kwargs["comm"] = comm
288
+ result = reader(**kwargs)
289
+ if result is not None:
290
+ result = _concat(result)
291
+ if mtz_dtypes:
292
+ dials_to_mtz_dtypes(result, inplace=True)
293
+ return result
294
+
295
+
296
+ def _get_refl_pack(filename):
297
+ pack = msgpack.load(open(filename, "rb"), strict_map_key=False)
298
+ try:
299
+ assert len(pack) == 3
300
+ _, _, pack = pack
301
+ except (TypeError, AssertionError):
302
+ raise IOError("File does not appear to be dials::af::reflection_table")
303
+ return pack
304
+
305
+
306
+ def print_refl_info(reflfile):
307
+ """print contents of `fname`, a reflection table file saved with DIALS"""
308
+ pack = _get_refl_pack(reflfile)
309
+ if "identifiers" in pack:
310
+ idents = pack["identifiers"]
311
+ print(f"\nFound {len(idents)} experiment identifiers in {reflfile}:")
312
+ for i, ident in idents.items():
313
+ print(f"\t{i}: {ident}")
314
+ if "data" in pack:
315
+ data = pack["data"]
316
+ columns = []
317
+ col_space = 0
318
+ for name in data:
319
+ dtype, (_, buff) = data[name]
320
+ columns.append((name, dtype))
321
+ col_space = max(len(dtype), len(name), col_space)
322
+ names, dtypes = zip(*columns)
323
+ df = pandas.DataFrame({"names": names, "dtypes": dtypes})
324
+ print(
325
+ "\nReflection contents:\n"
326
+ + df.to_string(index=False, col_space=col_space + 5, justify="center")
327
+ )
328
+
329
+ if "nrows" in pack:
330
+ print(f"\nNumber of reflections: {pack['nrows']} \n")
@@ -0,0 +1,44 @@
1
+ from itertools import chain
2
+
3
+ from reciprocalspaceship.decorators import cellify, spacegroupify
4
+ from reciprocalspaceship.io import dials
5
+
6
+
7
+ def mpi_starmap(comm, func, iterable):
8
+ results = []
9
+ for i, item in enumerate(iterable):
10
+ if i % comm.size == comm.rank:
11
+ results.append(func(*item))
12
+ results = comm.gather(results)
13
+ if comm.rank == 0:
14
+ return chain.from_iterable(results)
15
+ return None
16
+
17
+
18
+ @cellify
19
+ @spacegroupify
20
+ def read_dials_stills_mpi(fnames, unitcell, spacegroup, extra_cols=None, comm=None):
21
+ """
22
+
23
+ Parameters
24
+ ----------
25
+ fnames: integrated reflection tables
26
+ unitcell: unit cell tuple (6 params Ang,Ang,Ang,deg,deg,deg)
27
+ spacegroup: space group name e.g. P4
28
+ extra_cols: list of additional column names to read from the refl table
29
+ comm: Optionally override the MPI communicator. The default is MPI.COMM_WORLD
30
+
31
+ Returns
32
+ -------
33
+ RS dataset (pandas Dataframe) if MPI rank==0 else None
34
+ """
35
+ if comm is None:
36
+ from mpi4py import MPI
37
+
38
+ comm = MPI.COMM_WORLD
39
+ ds = mpi_starmap(
40
+ comm,
41
+ dials._get_refl_data,
42
+ ((f, unitcell, spacegroup, extra_cols) for f in fnames),
43
+ )
44
+ return ds
@@ -133,16 +133,15 @@ def to_gemmi(
133
133
  mtz.datasets[0].dataset_name = dataset_name
134
134
 
135
135
  # Construct data for Mtz object
136
- temp = dataset.reset_index()
136
+ # GH#255: DataSet is provided using the range_indexed decorator
137
137
  columns = []
138
- for c in temp.columns:
139
- cseries = temp[c]
138
+ for c in dataset.columns:
139
+ cseries = dataset[c]
140
140
  if isinstance(cseries.dtype, MTZDtype):
141
141
  mtz.add_column(label=c, type=cseries.dtype.mtztype)
142
142
  columns.append(c)
143
143
  # Special case for CENTRIC and PARTIAL flags
144
144
  elif cseries.dtype.name == "bool" and c in ["CENTRIC", "PARTIAL"]:
145
- temp[c] = temp[c].astype("MTZInt")
146
145
  mtz.add_column(label=c, type="I")
147
146
  columns.append(c)
148
147
  elif skip_problem_mtztypes:
@@ -152,7 +151,7 @@ def to_gemmi(
152
151
  f"column {c} of type {cseries.dtype} cannot be written to an MTZ file. "
153
152
  f"To skip columns without explicit MTZ dtypes, set skip_problem_mtztypes=True"
154
153
  )
155
- mtz.set_data(temp[columns].to_numpy(dtype="float32"))
154
+ mtz.set_data(dataset[columns].to_numpy(dtype="float32"))
156
155
 
157
156
  # Handle Unmerged data
158
157
  if not dataset.merged and not all_in_asu:
@@ -31,7 +31,7 @@ def read_precognition(hklfile, spacegroup=None, cell=None, logfile=None):
31
31
  F = pd.read_csv(
32
32
  hklfile,
33
33
  header=None,
34
- delim_whitespace=True,
34
+ sep="\\s+",
35
35
  names=["H", "K", "L", "F(+)", "SigF(+)", "F(-)", "SigF(-)"],
36
36
  usecols=usecols,
37
37
  )
@@ -49,7 +49,7 @@ def read_precognition(hklfile, spacegroup=None, cell=None, logfile=None):
49
49
  F = pd.read_csv(
50
50
  hklfile,
51
51
  header=None,
52
- delim_whitespace=True,
52
+ sep="\\s+",
53
53
  names=[
54
54
  "H",
55
55
  "K",
@@ -29,7 +29,7 @@ def compute_dHKL(H, cell):
29
29
  if inverse.shape[-1] == 1:
30
30
  inverse = inverse.squeeze(-1)
31
31
 
32
- F = np.array(cell.fractionalization_matrix.tolist()).astype(np.float64)
32
+ F = np.array(cell.frac.mat, dtype=np.float64)
33
33
  dhkls = np.reciprocal(np.linalg.norm((hkls @ F), 2, 1)).astype(np.float32)
34
34
  return dhkls[inverse]
35
35
 
@@ -50,6 +50,7 @@ def get_reciprocal_grid_size(cell, dmin, sample_rate=3.0, spacegroup=None):
50
50
 
51
51
  # Use gemmi.Mtz to find valid grid (FFT-friendly and obeys symmetry)
52
52
  m = gemmi.Mtz()
53
- m.spacegroup = spacegroup
53
+ if spacegroup is not None:
54
+ m.spacegroup = spacegroup
54
55
 
55
56
  return m.get_size_for_hkl(min_size=min_size)
@@ -131,4 +131,4 @@ def is_absent(H, spacegroup):
131
131
  absent : array
132
132
  Boolean array of length n. absent[i] == True if H[i] is systematically absent in sg.
133
133
  """
134
- return spacegroup.operations().systematic_absences(H)
134
+ return spacegroup.operations().systematic_absences(np.array(H, dtype=np.int32))
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: reciprocalspaceship
3
- Version: 1.0.2
3
+ Version: 1.0.4
4
4
  Summary: Tools for exploring reciprocal space
5
5
  Home-page: https://rs-station.github.io/reciprocalspaceship/
6
6
  Author: Kevin M. Dalton, Jack B. Greisman
@@ -19,18 +19,19 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
19
  Classifier: Programming Language :: Python
20
20
  Requires-Python: >=3.9
21
21
  License-File: LICENSE
22
- Requires-Dist: gemmi<=0.6.6,>=0.5.5
23
- Requires-Dist: pandas<=2.2.2,>=2.2.2
22
+ Requires-Dist: gemmi<=0.7.1,>=0.7.0
23
+ Requires-Dist: pandas<=2.2.3,>=2.2.2
24
24
  Requires-Dist: numpy
25
25
  Requires-Dist: scipy
26
26
  Requires-Dist: ipython
27
+ Requires-Dist: msgpack
27
28
  Provides-Extra: dev
28
29
  Requires-Dist: pytest; extra == "dev"
29
30
  Requires-Dist: pytest-cov; extra == "dev"
30
31
  Requires-Dist: pytest-xdist; extra == "dev"
31
32
  Requires-Dist: ray; extra == "dev"
32
33
  Requires-Dist: sphinx; extra == "dev"
33
- Requires-Dist: sphinx-rtd-theme; extra == "dev"
34
+ Requires-Dist: sphinx_rtd_theme; extra == "dev"
34
35
  Requires-Dist: nbsphinx; extra == "dev"
35
36
  Requires-Dist: sphinx-design; extra == "dev"
36
37
  Requires-Dist: sphinxcontrib-autoprogram; extra == "dev"
@@ -48,6 +49,18 @@ Requires-Dist: matplotlib; extra == "examples"
48
49
  Requires-Dist: seaborn; extra == "examples"
49
50
  Requires-Dist: celluloid; extra == "examples"
50
51
  Requires-Dist: scikit-image; extra == "examples"
52
+ Dynamic: author
53
+ Dynamic: author-email
54
+ Dynamic: classifier
55
+ Dynamic: description
56
+ Dynamic: home-page
57
+ Dynamic: license
58
+ Dynamic: license-file
59
+ Dynamic: project-url
60
+ Dynamic: provides-extra
61
+ Dynamic: requires-dist
62
+ Dynamic: requires-python
63
+ Dynamic: summary
51
64
 
52
65
 
53
66
  ``reciprocalspaceship`` provides a ``pandas``-style interface for
@@ -1,58 +1,63 @@
1
- reciprocalspaceship/VERSION,sha256=n9KGQtOsoZHlx_wjg8_W-rsqrIdD8Cnau4mJrFhOMbw,6
2
- reciprocalspaceship/__init__.py,sha256=69LJFzMjF05nmlwROByI53LTwM37sgrgYAp5k1n6wCs,1842
1
+ reciprocalspaceship/VERSION,sha256=0bd7iPS59nWpAOQUOsdoohIYYjz-FwPKQxYjiMMnG9Y,6
2
+ reciprocalspaceship/__init__.py,sha256=m6pXLI-HuXwefCfSE2Rs_2McqzuHw5W6yMBXEbceke8,2034
3
3
  reciprocalspaceship/concat.py,sha256=v2eg8-RBiNLYHkkPDeaozh3HvGCaFbmlC15FaeNJMgY,1695
4
4
  reciprocalspaceship/dataseries.py,sha256=ibU1bHMd8zORFxRtDswtvLh_n-miAyBqO0ghLmY29Js,6188
5
- reciprocalspaceship/dataset.py,sha256=YUcpvaTifmlQeR4qewHkzo-RSz6DOq_xLalFRXa_O94,57008
6
- reciprocalspaceship/decorators.py,sha256=U2gfm29infWHVGzQnfnpRsjxOihDD6Iah7oHd4uD8jk,5612
5
+ reciprocalspaceship/dataset.py,sha256=xLgTcmVuypcyTepu1gnLu3YpBz1KowzNQcCQwNpADDM,58889
6
+ reciprocalspaceship/decorators.py,sha256=sZAPAV5fk5zUlwzub2VZy-u28XVNXjBpnqwnKjESWgY,5721
7
7
  reciprocalspaceship/algorithms/__init__.py,sha256=r5IYCGswTHXpSs9Q7c6PfEz8_P8d1fEei2SyTkp5aYY,258
8
8
  reciprocalspaceship/algorithms/intensity.py,sha256=iDHaqqrMAe0v-aTVT5jf54JwkNQLSQ7HhezPw6qZndg,2657
9
9
  reciprocalspaceship/algorithms/merge.py,sha256=iwPrDfjtliBwLqEzHbcIfoTkvS_0s2_CszS5IfrEUXI,2154
10
10
  reciprocalspaceship/algorithms/scale_merged_intensities.py,sha256=hNKKISCCDvchail1PZ_0r6sq1Rbgoraqaz1aDCayTYQ,11269
11
11
  reciprocalspaceship/commandline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- reciprocalspaceship/commandline/mtzdump.py,sha256=JBg_W-CWQ6rbOOVmtK7TsErFXhCBR5pmC5RRSCapEZg,2939
12
+ reciprocalspaceship/commandline/cifdump.py,sha256=X9dU2nTFX-5sPlb6miWo7feMc7BPrTnBJsrbS_fOxmo,2938
13
+ reciprocalspaceship/commandline/mtzdump.py,sha256=ERSk2ORoOTtsdjml-QdxgozS6uzjMvAFWgKv163nVyM,3169
13
14
  reciprocalspaceship/dtypes/__init__.py,sha256=cO0M2F6pO_0jtqx-MlkbzqxLSmK1Ibmon5p_ksWmcbk,1038
14
15
  reciprocalspaceship/dtypes/base.py,sha256=1X56U4jKt_wjVkW930C9gP2Di0RpCMDZsDKNTxYle5I,1052
15
16
  reciprocalspaceship/dtypes/floating.py,sha256=jOQ25GZEE4QromaJA3_oeu0Tkjq1iT4dHCke_7W6TYo,19675
16
17
  reciprocalspaceship/dtypes/inference.py,sha256=jLgF8VfKtITGRzQbfeyZzEoJ1fQlbHXB_gXIJ9-AQxk,3029
17
18
  reciprocalspaceship/dtypes/integer.py,sha256=fPaLTWfMsJ-wuEPkm9oEJez3NDqzB4XKVHFRFEb585A,15816
18
- reciprocalspaceship/dtypes/internals.py,sha256=BkkqUDEvTTlebLXjcu7EiQV295-qR7GdMJXqrOKbbU0,47596
19
+ reciprocalspaceship/dtypes/internals.py,sha256=YNv6Dz4miazjZVFJCOTFudH-0ejUbOcu_snCq1RU2Nw,47607
19
20
  reciprocalspaceship/dtypes/summarize.py,sha256=1w6-N3odFcI3ZEQP5qgrog6ucbGjO71vSgabmjklkbc,1114
20
- reciprocalspaceship/io/__init__.py,sha256=ZMQ_rGfLmfzijbErnjEFphJuZokPvZyyVRk65DC0gLA,400
21
+ reciprocalspaceship/io/__init__.py,sha256=UquHOv850aJGdKnWEG-KTkHPgye7ldYFge62O5N6G_w,476
21
22
  reciprocalspaceship/io/ccp4map.py,sha256=yztiHPTdyR9FiCKRg-eVmL-_MyZTKThPI9uuHuuPF_0,1029
22
- reciprocalspaceship/io/crystfel.py,sha256=lKpGzM2OLNXBjy6njwahtk1IsI3MH0edaGSmaQ6NbGk,21662
23
+ reciprocalspaceship/io/common.py,sha256=_XzdAFeE6B-Q_ORc4bkOR7ANwNT4dNqYtlejzYJfWxs,1055
24
+ reciprocalspaceship/io/crystfel.py,sha256=N6CufOt3yESbOC4niFVPfBx2PSp9UpLlnmsuGXDdeIM,21877
23
25
  reciprocalspaceship/io/csv.py,sha256=A2ZnqAnFwFUQskF7_3EsQAPCcrJ5KEgjhZls6MDViv8,1194
24
- reciprocalspaceship/io/mtz.py,sha256=8XqFVoSJz47vjK-kEzwSu7NxwQnEyyHd0pgt1CaBavM,8074
26
+ reciprocalspaceship/io/dials.py,sha256=FQQa3eT9TQw7h43ohyvNI3huViHE-eP9Y4IbRQL5dIc,10137
27
+ reciprocalspaceship/io/dials_mpi.py,sha256=wvm-sQqFG7N7bgcnxd5jn94eyKveimA3rvP8ns1B5Jg,1212
28
+ reciprocalspaceship/io/mtz.py,sha256=_gdlx7Vi6Z0HyFBZFP6Ptmla7Pd_mON2KaGL4Q3N7Ik,8071
25
29
  reciprocalspaceship/io/pickle.py,sha256=clnSTK8T2O_d7midS_E54WHmXEHrL10d386gWx7ztsM,818
26
- reciprocalspaceship/io/precognition.py,sha256=DWRE2erXPVpm9-y5DjIWUHfmv9jZcsqoa47ienp1Sao,3641
30
+ reciprocalspaceship/io/precognition.py,sha256=xHBeKarVABmtm1DaYUOSs2UYsS3CFTDLCAd47jO03nI,3619
27
31
  reciprocalspaceship/stats/__init__.py,sha256=jdAWbpD_CKAn0W0sO_MKSnTu3bZSoLAXgb1_Y6jDMzk,197
28
32
  reciprocalspaceship/stats/completeness.py,sha256=1QM-Ac_V58nTLJoewbOK5CL69qsb0C0sc8L0c59WorQ,6702
29
33
  reciprocalspaceship/utils/__init__.py,sha256=bKJwbkxXa-TX2etIQgIESKkv9kdag1rHL77JLhI-2B8,1714
30
34
  reciprocalspaceship/utils/asu.py,sha256=WwxvIq-_QEF2UvyELuNudVo53daty9wiN-vaOYAUbKI,8680
31
35
  reciprocalspaceship/utils/binning.py,sha256=CHf5z8EsHSg34ZgC-yM_8Gd3D2BB8cqTtHAf7vwfgLo,2786
32
- reciprocalspaceship/utils/cell.py,sha256=aNIaugA3F8CRs9n8Ck0Rjc8YI7qHZcW3lJPE7yvj0dk,2053
33
- reciprocalspaceship/utils/grid.py,sha256=xB7sw1xrhgzFojrVHbC_uVBT3NMTBsvKsCqaRrVfvTQ,1893
36
+ reciprocalspaceship/utils/cell.py,sha256=MCebTyHrPiiy1H0A6OX3KWTrssw2LJ7ziA-anrfZULU,2027
37
+ reciprocalspaceship/utils/grid.py,sha256=MCpQ9wy0XN0U-Q7H3fwGmWlMzO1RiZtBYkIdDWxG7p4,1928
34
38
  reciprocalspaceship/utils/math.py,sha256=m6Iq9u0fjiieftzjQPAEHTN2htBIOwLhBCJdrcIN5Ao,1019
35
39
  reciprocalspaceship/utils/phases.py,sha256=zyiE99bq-TV_4aI6ZhBi4MLAvKwE3Sx1dFqppJL5rkE,2438
36
40
  reciprocalspaceship/utils/rfree.py,sha256=qFgepLOfgdU-cvZIMu8WfzlFExTc4jILff2ro7iu8FQ,3411
37
41
  reciprocalspaceship/utils/stats.py,sha256=p_1R3bTVVAVlDWh-hzcurlT8GOHkJA8ovFuQjD0w5AY,3681
38
- reciprocalspaceship/utils/structurefactors.py,sha256=ZW6CVPn_04dxay0DDnA0-byUrZnGraQ0kItqN1m5F3k,3686
42
+ reciprocalspaceship/utils/structurefactors.py,sha256=ykcog4yTuVrANrEnQxB7La5QQFn-7D38xsK3on_qVa0,3712
39
43
  reciprocalspaceship/utils/symmetry.py,sha256=xsYmEUo0PTH57-kctJdUq_-k14ci5LUGeG5LwzmjjPU,2963
40
44
  reciprocalspaceship/utils/units.py,sha256=ng-2hzZBERYo9bnQDPr-HLr7xPah-JzOthfrpHH816Y,388
45
+ reciprocalspaceship-1.0.4.dist-info/licenses/LICENSE,sha256=E22aZlYy5qJsJCJ94EkO_Vt3COio5UcLg59dZLPam7I,1093
41
46
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
47
  tests/conftest.py,sha256=bQZClqzu3lonsI01OdP5X38asMd7F76fAGzlWWYPXAI,3930
43
48
  tests/test_dataseries.py,sha256=go-q5tT8lLq3tlRVnmrwUytK7PlaoKs3CBPjWryGfGg,3309
44
- tests/test_dataset.py,sha256=dMFW6-pCs1rjIYEqbfstVqFRiYwKfz5rHlncVL9grQg,22231
49
+ tests/test_dataset.py,sha256=VTyLNJggHtisRsYpegshbtFTYgOGES6QIAo2faVwnic,25011
45
50
  tests/test_dataset_anomalous.py,sha256=LQb1inSS_oDbVYEIyyx_GBFAkXGlEQYZ-ZhpwMeyMmQ,6963
46
51
  tests/test_dataset_binning.py,sha256=NgD_vy-TUh3vQrUVgysVBSZu75xN66LR6hRu2_qAUTs,3564
47
- tests/test_dataset_grid.py,sha256=S2EswVAbcg08WT9TjLtQ3YF1_zJmEKcucHrN3Lw5EM8,4086
52
+ tests/test_dataset_grid.py,sha256=tVFEUl3YA8XhCJa8tMNXQelakIgm5kStp10VhwTPzkY,4070
48
53
  tests/test_dataset_index.py,sha256=-6sMVgAKkkcYRc7UfLuVEH3p7D83o1S7e7c6MbrOrZo,2842
49
54
  tests/test_dataset_preserve_attributes.py,sha256=gwQQJGsiBZld2KKmLrcMkuc9zesR3FD7GVnPDNRScto,5314
55
+ tests/test_dataset_signatures.py,sha256=ZbH9JNzqAWJDfVh9gqZVQXx8glmmBUhsbPmQBHe8Cuo,1554
50
56
  tests/test_dataset_symops.py,sha256=PV86tLu1qDACuk-YqjYQszk8Ctb0-h_NsQRnuCDFnOU,10864
51
57
  tests/test_decorators.py,sha256=ExR7mCU0iIqhHo4ho6ywPrZIEaGcsElaI4jtH9o5afE,5331
52
58
  tests/test_summarize_mtz_dtypes.py,sha256=JE0ctXMWii1AV-cmKogF6hjb8NCHrgvxNZ0ZRCHh-Ho,696
53
- reciprocalspaceship-1.0.2.dist-info/LICENSE,sha256=E22aZlYy5qJsJCJ94EkO_Vt3COio5UcLg59dZLPam7I,1093
54
- reciprocalspaceship-1.0.2.dist-info/METADATA,sha256=36KZFStMfUhplc6K1h7vpF-FVJ-TrExqWI3XXdW5oTE,3056
55
- reciprocalspaceship-1.0.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
56
- reciprocalspaceship-1.0.2.dist-info/entry_points.txt,sha256=Bqjl2J8UrG4UAHHhPbdH5r-xYaOdLCEdyRH6zJ9joDw,76
57
- reciprocalspaceship-1.0.2.dist-info/top_level.txt,sha256=tOo679MsLFS7iwiYZDwnKTuTpJLYVFBk6g9xnnB_s-w,26
58
- reciprocalspaceship-1.0.2.dist-info/RECORD,,
59
+ reciprocalspaceship-1.0.4.dist-info/METADATA,sha256=KhsqyH2rHwyy83eL3383KyLkX0EeoAbuHB11kdjd8VU,3326
60
+ reciprocalspaceship-1.0.4.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
61
+ reciprocalspaceship-1.0.4.dist-info/entry_points.txt,sha256=g-Bn5ZXMuODBSvJWj0PWIv4SVE-ibEplzFeiHH4kMDE,134
62
+ reciprocalspaceship-1.0.4.dist-info/top_level.txt,sha256=tOo679MsLFS7iwiYZDwnKTuTpJLYVFBk6g9xnnB_s-w,26
63
+ reciprocalspaceship-1.0.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: setuptools (80.3.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,2 +1,3 @@
1
1
  [console_scripts]
2
+ rs.cifdump = reciprocalspaceship.commandline.cifdump:main
2
3
  rs.mtzdump = reciprocalspaceship.commandline.mtzdump:main
tests/test_dataset.py CHANGED
@@ -603,6 +603,48 @@ def test_is_isomorphous(data_unmerged, data_fmodel, sg1, sg2, cell1, cell2):
603
603
  assert not result
604
604
 
605
605
 
606
+ @pytest.mark.parametrize("threshold", [5.0, 1.0, 0.5, 0.1])
607
+ def test_is_isomorphous_threshold(threshold):
608
+ """
609
+ Test that DataSet.is_isorphous(self, other, cell_threshold) method's
610
+ cell_threshold operates on percent difference.
611
+ """
612
+ epsilon = 1e-12
613
+ cell = np.array([34.0, 45.0, 98.0, 90.0, 90.0, 90.0])
614
+ spacegroup = 19
615
+
616
+ ds = rs.DataSet(cell=cell, spacegroup=spacegroup)
617
+ cell_resize_factor = (200.0 + threshold) / (200.0 - threshold)
618
+
619
+ # Make a cell that should be exactly threshold percent bigger
620
+ other_cell = cell_resize_factor * cell
621
+ too_big_cell = other_cell + epsilon
622
+ big_cell = other_cell - epsilon
623
+
624
+ # Make a cell that should be exactly threshold percent smaller
625
+ other_cell = cell / cell_resize_factor
626
+ too_small_cell = other_cell - epsilon
627
+ small_cell = other_cell + epsilon
628
+
629
+ # Construct data sets
630
+ too_big = rs.DataSet(cell=too_big_cell, spacegroup=spacegroup)
631
+ big = rs.DataSet(cell=big_cell, spacegroup=spacegroup)
632
+ too_small = rs.DataSet(cell=too_small_cell, spacegroup=spacegroup)
633
+ small = rs.DataSet(cell=small_cell, spacegroup=spacegroup)
634
+
635
+ # Cell is barely too big to be isomorphous
636
+ assert not ds.is_isomorphous(too_big, threshold)
637
+
638
+ # Cell is barely too small to be isomorphous
639
+ assert not ds.is_isomorphous(too_small, threshold)
640
+
641
+ # Cell is almost too big to be isomorphous
642
+ assert ds.is_isomorphous(big, threshold)
643
+
644
+ # Cell is almost too small to be isomorphous
645
+ assert ds.is_isomorphous(small, threshold)
646
+
647
+
606
648
  def test_to_gemmi_withNans(data_merged):
607
649
  """
608
650
  GH144: Test whether DataSet.to_gemmi() works with NaN-containing data.
@@ -669,3 +711,50 @@ def test_select_mtzdtype_ValueError(data_merged, dtype):
669
711
  """
670
712
  with pytest.raises(ValueError):
671
713
  data_merged.select_mtzdtype(dtype)
714
+
715
+
716
+ @pytest.mark.parametrize("merged", [True, False])
717
+ @pytest.mark.parametrize("hkl_type", ["ds", "index", "numpy"])
718
+ @pytest.mark.parametrize("range_index", [True, False])
719
+ def test_hkls_property_setter(
720
+ data_merged, data_unmerged, merged, hkl_type, range_index
721
+ ):
722
+ """
723
+ Test the setter for the .hkls property of rs datasets
724
+ """
725
+ if merged:
726
+ input_ds = data_merged
727
+ else:
728
+ input_ds = data_unmerged
729
+
730
+ hkls = input_ds.copy().reset_index()[["H", "K", "L"]]
731
+
732
+ ds = input_ds.copy()
733
+ if range_index:
734
+ ds = ds.reset_index()
735
+
736
+ # Confirm we're starting with equivalent miller indices
737
+ expected = ds.hkls
738
+ value = hkls
739
+
740
+ # Shuffle the hkls
741
+ hkls = hkls.sample(frac=1.0)
742
+
743
+ # confirm shuffling
744
+ assert not np.array_equal(hkls, ds.hkls)
745
+
746
+ # confirm setter
747
+ if hkl_type == "ds":
748
+ ds.hkls = hkls
749
+ elif hkl_type == "index":
750
+ ds.hkls = hkls.set_index(["H", "K", "L"])
751
+ elif hkl_type == "numpy":
752
+ ds.hkls = hkls.to_numpy()
753
+ expected = ds.hkls
754
+ value = hkls.hkls
755
+ assert np.array_equal(value, expected)
756
+
757
+ # Test that all data remained the same
758
+ for k in input_ds:
759
+ if k not in ["H", "K", "L"]:
760
+ assert np.array_equal(ds[k], input_ds[k])
@@ -22,10 +22,10 @@ def test_to_reciprocal_grid_gemmi(mtz_by_spacegroup, sample_rate, p1, use_sf):
22
22
  grid_size = dataset.get_reciprocal_grid_size(sample_rate=sample_rate)
23
23
 
24
24
  if use_sf:
25
- gemmigrid = gemmimtz.get_f_phi_on_grid("FMODEL", "PHIFMODEL", size=grid_size)
26
- expected = np.array(gemmigrid, copy=False)
27
25
  dataset["sf"] = dataset.to_structurefactor("FMODEL", "PHIFMODEL")
28
26
  result = dataset.to_reciprocal_grid("sf", grid_size=grid_size)
27
+ gemmigrid = gemmimtz.get_f_phi_on_grid("FMODEL", "PHIFMODEL", size=grid_size)
28
+ expected = gemmigrid.array
29
29
 
30
30
  # Requires rtol due to truncations applied in gemmi
31
31
  assert np.allclose(result, expected, rtol=1e-4)
@@ -0,0 +1,53 @@
1
+ from inspect import signature
2
+
3
+ import pandas as pd
4
+ import pytest
5
+ from pandas.testing import assert_frame_equal
6
+
7
+ import reciprocalspaceship as rs
8
+
9
+
10
+ def test_reset_index_dataseries():
11
+ """
12
+ Minimal example from GH#223
13
+ """
14
+ result = rs.DataSeries(range(10)).reset_index()
15
+ expected = pd.Series(range(10)).reset_index()
16
+ expected = rs.DataSet(expected)
17
+ assert_frame_equal(result, expected)
18
+
19
+
20
+ def test_reset_index_signature(dataset_hkl):
21
+ """
22
+ Test call signature of rs.DataSet.reset_index() matches call signature of
23
+ pd.DataFrame.reset_index() using default parameters
24
+ """
25
+ df = pd.DataFrame(dataset_hkl)
26
+ sig = signature(pd.DataFrame.reset_index)
27
+ bsig = sig.bind(df)
28
+ bsig.apply_defaults()
29
+
30
+ expected = df.reset_index(*bsig.args[1:], **bsig.kwargs)
31
+ result = dataset_hkl.reset_index(*bsig.args[1:], **bsig.kwargs)
32
+ result = pd.DataFrame(result)
33
+
34
+ assert_frame_equal(result, expected)
35
+
36
+
37
+ @pytest.mark.parametrize("names", ["H", "K", ["H", "K"]])
38
+ def test_set_index_signature(dataset_hkl, names):
39
+ """
40
+ Test call signature of rs.DataSet.set_index() matches call signature of
41
+ pd.DataFrame.set_index() using default parameters
42
+ """
43
+ ds = dataset_hkl.reset_index()
44
+ df = pd.DataFrame(ds)
45
+ sig = signature(pd.DataFrame.set_index)
46
+ bsig = sig.bind(df, names)
47
+ bsig.apply_defaults()
48
+
49
+ expected = df.set_index(*bsig.args[1:], **bsig.kwargs)
50
+ result = ds.set_index(*bsig.args[1:], **bsig.kwargs)
51
+ result = pd.DataFrame(result)
52
+
53
+ assert_frame_equal(result, expected)