ncrystal-python 3.9.81__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. NCrystal/__init__.py +85 -0
  2. NCrystal/__main__.py +98 -0
  3. NCrystal/_chooks.py +854 -0
  4. NCrystal/_cli_cif2ncmat.py +269 -0
  5. NCrystal/_cli_endf2ncmat.py +503 -0
  6. NCrystal/_cli_hfg2ncmat.py +144 -0
  7. NCrystal/_cli_mcstasunion.py +74 -0
  8. NCrystal/_cli_ncmat2cpp.py +31 -0
  9. NCrystal/_cli_ncmat2hkl.py +180 -0
  10. NCrystal/_cli_nctool.py +1018 -0
  11. NCrystal/_cli_vdos2ncmat.py +463 -0
  12. NCrystal/_cli_verifyatompos.py +257 -0
  13. NCrystal/_cliimpl.py +307 -0
  14. NCrystal/_cliwrap_config.py +36 -0
  15. NCrystal/_common.py +499 -0
  16. NCrystal/_coreimpl.py +114 -0
  17. NCrystal/_hfgdata.py +546 -0
  18. NCrystal/_hklobjects.py +136 -0
  19. NCrystal/_is_std.py +0 -0
  20. NCrystal/_locatelib.py +210 -0
  21. NCrystal/_miscimpl.py +354 -0
  22. NCrystal/_mmc.py +757 -0
  23. NCrystal/_msg.py +60 -0
  24. NCrystal/_ncmat2cpp_impl.py +445 -0
  25. NCrystal/_ncmatimpl.py +2131 -0
  26. NCrystal/_numpy.py +76 -0
  27. NCrystal/_testimpl.py +579 -0
  28. NCrystal/api.py +56 -0
  29. NCrystal/atomdata.py +177 -0
  30. NCrystal/cfgstr.py +77 -0
  31. NCrystal/cifutils.py +1795 -0
  32. NCrystal/cli.py +96 -0
  33. NCrystal/constants.py +134 -0
  34. NCrystal/core.py +1910 -0
  35. NCrystal/datasrc.py +226 -0
  36. NCrystal/exceptions.py +66 -0
  37. NCrystal/hfg2ncmat.py +270 -0
  38. NCrystal/mcstasutils.py +438 -0
  39. NCrystal/misc.py +317 -0
  40. NCrystal/mmc.py +35 -0
  41. NCrystal/ncmat.py +778 -0
  42. NCrystal/ncmat2cpp.py +80 -0
  43. NCrystal/obsolete.py +67 -0
  44. NCrystal/plot.py +484 -0
  45. NCrystal/plugins.py +49 -0
  46. NCrystal/test.py +76 -0
  47. NCrystal/vdos.py +1034 -0
  48. ncrystal_python-3.9.81.dist-info/LICENSE +206 -0
  49. ncrystal_python-3.9.81.dist-info/METADATA +515 -0
  50. ncrystal_python-3.9.81.dist-info/RECORD +53 -0
  51. ncrystal_python-3.9.81.dist-info/WHEEL +5 -0
  52. ncrystal_python-3.9.81.dist-info/entry_points.txt +10 -0
  53. ncrystal_python-3.9.81.dist-info/top_level.txt +1 -0
NCrystal/cifutils.py ADDED
@@ -0,0 +1,1795 @@
1
+
2
+ ################################################################################
3
+ ## ##
4
+ ## This file is part of NCrystal (see https://mctools.github.io/ncrystal/) ##
5
+ ## ##
6
+ ## Copyright 2015-2024 NCrystal developers ##
7
+ ## ##
8
+ ## Licensed under the Apache License, Version 2.0 (the "License"); ##
9
+ ## you may not use this file except in compliance with the License. ##
10
+ ## You may obtain a copy of the License at ##
11
+ ## ##
12
+ ## http://www.apache.org/licenses/LICENSE-2.0 ##
13
+ ## ##
14
+ ## Unless required by applicable law or agreed to in writing, software ##
15
+ ## distributed under the License is distributed on an "AS IS" BASIS, ##
16
+ ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ##
17
+ ## See the License for the specific language governing permissions and ##
18
+ ## limitations under the License. ##
19
+ ## ##
20
+ ################################################################################
21
+
22
+ """Utilities for converting local or online CIF data to NCMAT data.
23
+
24
+ Two utility classes (CIFSource and CIFLoader) are included, and along with the
25
+ NCMATComposer they are used to implement the related cmd-line scripts (in
26
+ particular ncrystal_cif2ncmat), but they can of course also be used directly via
27
+ the Python API below by expert users.
28
+
29
+ """
30
+
31
+ __all__ = ['CIFSource','CIFLoader',
32
+ 'produce_validation_plot',
33
+ 'produce_validation_plots']
34
+
35
+ from . import _common as _nc_common
36
+ from . import ncmat as _nc_ncmat
37
+ from . import _ncmatimpl as _nc_ncmatimpl
38
+ from . import core as _nc_core
39
+
40
+ class CIFSource:
41
+
42
+ """
43
+ Generalised CIF source, either in the form of text data, a file path, a
44
+ database ID (in the form of a string like "codid::9008460" or "mpid::87)",
45
+ for either the Crystallography Open Database
46
+ (https://www.crystallography.net/cod/) or the Materials Project
47
+ (https://www.materialsproject.org/), an existing CIFSource object, or any
48
+ object like CIFLoader which has a cifsrc property which is a CIFSource
49
+ object.
50
+ """
51
+
52
+ def __init__( self, data, *, allow_fail = False, name = None ):
53
+ """Initialise from data in various formats (see class
54
+ description). Unless allow_fail=True, an unrecognised input format will
55
+ result in an NCBadInput exception being thrown. If allow_fail=True the
56
+ .invalid property can be used to check if the loading failed.
57
+
58
+ The name argument can be used to assign a name to otherwise anonymous
59
+ text data. It can also be used to override the name.
60
+
61
+ """
62
+ self.__codid = None
63
+ self.__mpid = None
64
+ self.__fp = None
65
+ self.__textdata = None
66
+ self.__name_override = name
67
+
68
+ def check_status():
69
+ if ( not allow_fail ) and self.invalid:
70
+ raise _nc_core.NCBadInput('Could not detect CIF source type')
71
+
72
+ #Check if data is CIFSource object or has one as a .cifsrc property (e.g. LoadedCIF, CIFAnalyser)
73
+ o = getattr( data, 'cifsrc', None ) or data
74
+ if isinstance(o,CIFSource):
75
+ self.__codid = o.__codid
76
+ self.__mpid = o.__mpid
77
+ self.__fp = o.__fp
78
+ self.__textdata = o.__textdata
79
+ self.__name_override = o.__name_override
80
+ return
81
+ def _setfp( pth ):
82
+ pth = pth.decode() if hasattr(pth,'decode') else pth
83
+ self.__fp = _nc_common._lookup_existing_file( pth )
84
+ if self.__fp is None:
85
+ #Try to look up via NCrystal's TextData infrastructure:
86
+ from .core import createTextData, NCFileNotFound
87
+ try:
88
+ td = createTextData(pth)
89
+ tdname = td.dataSourceName
90
+ except NCFileNotFound:
91
+ td = None
92
+ tdname = None
93
+ if td is not None:
94
+ self.__textdata = td.rawData
95
+ if tdname and not self.__name_override:
96
+ self.__name_override = tdname
97
+ self.__fp = None
98
+ if not allow_fail and self.__fp is None:
99
+ raise _nc_core.NCBadInput('Could not detect CIF source type'
100
+ ' (tried to load from path, but'
101
+ ' could not locate "%s")'%pth)
102
+
103
+ if hasattr(data,'__fspath__'):
104
+ _setfp(data)
105
+ return check_status()
106
+ if hasattr( data, 'startswith' ):
107
+ str_to_sb = ( lambda s : s ) if not hasattr(data,'decode') else ( lambda s : s.encode() )
108
+
109
+ if data.startswith(str_to_sb('codid::')) and data[7:].isdigit():
110
+ self.__codid = int(data[7:])
111
+ return check_status()
112
+ if data.startswith(str_to_sb('mpid::')) and data[6:].isdigit():
113
+ self.__mpid = int(data[6:])
114
+ return check_status()
115
+ if isinstance( data, str ):
116
+ if '\n' in data.strip():
117
+ self.__textdata = data
118
+ else:
119
+ _setfp(data)
120
+ return check_status()
121
+ if isinstance( data, bytes ):
122
+ if b'\n' in data.strip():
123
+ self.__textdata = data.decode()
124
+ else:
125
+ _setfp(data)
126
+ return check_status()
127
+ check_status()
128
+
129
+ @property
130
+ def invalid( self ):
131
+ """Check if instance is invalid (only possible if constructed with
132
+ allow_fail=True). """
133
+ return all(e is None for e in (self.__codid,self.__mpid,
134
+ self.__fp,self.__textdata))
135
+
136
+ @property
137
+ def codid( self ):
138
+ """
139
+ None or a database ID (integer) for the Crystallography Open Database
140
+ (https://www.crystallography.net/cod/).
141
+ """
142
+ return self.__codid
143
+
144
+ @property
145
+ def mpid( self ):
146
+ """
147
+ None or a database ID (integer) for the Materials Project
148
+ (https://www.materialsproject.org/).
149
+ """
150
+ return self.__mpid
151
+
152
+ @property
153
+ def textdata( self ):
154
+ """
155
+ None or a string containing raw CIF data.
156
+ """
157
+ return self.__textdata
158
+
159
+ @property
160
+ def name( self ):
161
+ """A short string describing the data. This might be a file name
162
+ (without directory part), or a special string like 'mpid::xyz'
163
+ 'codid::xyz'. Returns None if no name is available.
164
+ """
165
+ if self.__name_override:
166
+ return self.__name_override
167
+ if self.__mpid:
168
+ return 'mpid::%i'%self.__mpid
169
+ if self.__codid:
170
+ return 'codid::%i'%self.__codid
171
+ if self.__fp:
172
+ return self.__fp.name
173
+
174
+ @property
175
+ def filepath( self ):
176
+ """
177
+ None or a pathlib.Path to an on-disk CIF file.
178
+ """
179
+ return self.__fp
180
+
181
+ @property
182
+ def is_remote( self):
183
+ """True if either .codid or .mpid is not None"""
184
+ return any( ( e is not None ) for e in (self.__codid,self.__mpid) )
185
+
186
+ def load_data( self, quiet = False, mp_apikey = None ):
187
+ """Try to load the data and return it as a string. Depending on the
188
+ source, this might result in reading a file or querying an online
189
+ database. In case the data is defined via a materials project ID (mpid),
190
+ a materials project API key must be provided either via the mp_apikey
191
+ parameter, or in the MATERIALSPROJECT_USER_API_KEY environment variable.
192
+ """
193
+ if self.invalid:
194
+ raise _nc_core.NCDataLoadError( 'Can not load_data'
195
+ ' from invalid CIF source' )
196
+ if self.__textdata is not None:
197
+ return self.__textdata
198
+ if self.__fp is not None:
199
+ if not quiet:
200
+ _nc_common.print(f"Loading data from file {self.__fp.name}")
201
+ return self.__fp.read_text()
202
+ if self.__mpid is not None:
203
+ return _mp_get_cifdata( self.__mpid, quiet = quiet, apikey = mp_apikey )
204
+ assert self.__codid
205
+ return _cod_get_cifdata( self.__codid, quiet = quiet )
206
+
207
+ class CIFLoader:
208
+
209
+ """Class which is used to load and analyse CIF data, primarily so it can be
210
+ used to transfer the crystal structure into an NCMATComposer object, with
211
+ the intention of producing NCMAT data and new materials for usage with
212
+ NCrystal.
213
+
214
+ Behind the scenes the class uses third-party projects gemmi and spglib to
215
+ parse the CIF data and process the symmetries within.
216
+
217
+ The perhaps biggest issue to understand when basing NCrystal materials on
218
+ CIF data, is that usually CIF data does not contain any or only incomplete
219
+ information about the dynamics of the atoms. Thus, for a complete
220
+ description of a high-quality material, one might want to pair up the usage
221
+ of a CIFLoader, with a PhononDOSAnalyser from the NCrystal.vdos
222
+ module.
223
+
224
+ However, some CIF data contain information about atomic displacements
225
+ (either "Uiso" or the anisotropic equivalent), which NCrystal is able to
226
+ convert into a Debye temperature in order to provide a temperature-dependent
227
+ model of dynamics and atomic displacement. However, such "Uiso" information
228
+ is only useful if one knows at which temperature it was obtained, and that
229
+ information is almost always absent in most CIF data out there. Thus, in the
230
+ last step - when using the .create_ncmat(..) or .create_ncmat_composer(..)
231
+ methods of the CIFLoader object, one should ideally use the uiso_temperature
232
+ parameter to provide this information to NCrystal (usually after a call to
233
+ print(cifloader.raw_cif_textdata) to manually inspect the meta-data, read
234
+ any linked journal papers, etc.).
235
+
236
+ TODO: Add link here to online tutorial once it is ready.
237
+
238
+ """
239
+
240
+ def __init__( self, cifsrc, quiet = False, mp_apikey = None,
241
+ refine_with_spglib = True, merge_equiv = True, override_spacegroup = None ):
242
+ """Initialise from cifsrc which can either be a CIFSource object, or any
243
+ sort of data which can be used to initialise a CIFSource object (cf. the
244
+ CIFSource class documentation)
245
+
246
+ If refine_with_spglib=False, the structure will not be verified and
247
+ refined with spglib. It is strongly recommended to not disable this
248
+ refinement, as it is needed to ensure correctness and consistency of the
249
+ resulting material.
250
+
251
+ The override_spacegroup parameter can in some cases be used to select a
252
+ particular setting of the detected spacegroup. For instance, spacegroup
253
+ number 70 is available in two settings, and loading a CIF file with
254
+ those might result in a warning that both "F d d d:1" and "F d d d:2"
255
+ are available. Trying again with override_spacegroup="F d d d:1" can be
256
+ used to explicitly select one of them (it might require some
257
+ investigation of course to determine which one is right). A list of
258
+ space group settings can be found at:
259
+
260
+ https://cci.lbl.gov/sginfo/hall_symbols.html
261
+
262
+ Setting merge_equiv=False will prevent usage of the same NCMAT label for
263
+ multiple lists of equivalent atomic positions. However, note that this
264
+ might affect the symmetry and space group of the structure, and is in
265
+ general not recommended.
266
+
267
+ If quiet=True, no informative messages will be emitted.
268
+
269
+ Finally, the mp_apikey can be used to specify an API key in case the
270
+ cifsrc is an entry for the materialsproject.org. To avoid having to
271
+ specify the key here, one can instead provide the key in the environment
272
+ variable MATERIALSPROJECT_USER_API_KEY.
273
+ """
274
+ with _nc_common.WarningSpy( block = quiet ) as warnlist:
275
+ load, gemmi, struct = _actual_init_gemmicif( cifsrc,
276
+ quiet = quiet,
277
+ mp_apikey = mp_apikey,
278
+ refine_with_spglib = refine_with_spglib,
279
+ merge_equiv = merge_equiv,
280
+ override_spacegroup = override_spacegroup )
281
+ assert len(load)==9
282
+ self.__cifsrc = load['cifsrc']
283
+ self.__actual_codid = load['actual_codid']
284
+ self.__actual_mpid = load['actual_mpid']
285
+ self.__cif_chemformula = load['cif_chemformula']
286
+ self.__cifdata = load['cifdata']
287
+ self.__cif_raw = load['cif_raw']
288
+ self.__cellsg = load['cellsg']
289
+ self.__atoms = load['atoms']
290
+ self.__cifdescr = load['cifdescr']
291
+ self.__warnlist = tuple( warnlist )
292
+
293
+ #Hidden support for additional processing in derived class:
294
+ _ = getattr( self, '_process_raw_gemmi_objects', None )
295
+ if _:
296
+ _( gemmi, struct )
297
+
298
+ @property
299
+ def cifsrc( self ):
300
+ """The CIFSource object on which everything is based."""
301
+ return self.__cifsrc
302
+
303
+ @property
304
+ def actual_codid( self ):
305
+ """None or the database ID of the Crystallography Open Database. This
306
+ might be present even if .cifsrc.codid is None, since the CIF data might
307
+ have been downloaded manually, but provide the ID in its metadata."""
308
+ return self.__actual_codid
309
+
310
+ @property
311
+ def actual_mpid( self ):
312
+ """None or the database ID of the Material Project. This might be
313
+ present even if .cifsrc.mpid is None, since the CIF data might have
314
+ been downloaded manually, but provide the ID in its metadata.
315
+ """
316
+ return self.__actual_mpid
317
+
318
+ @property
319
+ def raw_cif_textdata( self ):
320
+ """The raw CIF input data, as a string."""
321
+ return self.__cifdata
322
+
323
+ @property
324
+ def raw_cif_chemformula( self ):
325
+ """The chemical formula encoded in the CIF data (usually in the
326
+ _chemical_formula_sum field) as a string."""
327
+ return self.__cif_chemformula
328
+
329
+ @property
330
+ def raw_cif_dict( self ):
331
+ """The CIF data parsed into a dictionary."""
332
+ return self.__cif_raw
333
+
334
+ @property
335
+ def atoms( self ):
336
+ """A tuple of all atoms in the crystal structure, including positions in
337
+ the unit cell. The format of each atom is essentially a dictionary
338
+ similar to:
339
+ {'aniso': None,
340
+ 'cif_labels': ['Al'],
341
+ 'composition': ((1.0, 'Al'),),
342
+ 'equivalent_positions': [(0.0, 0.0, 0.0),
343
+ (0.0, 0.5, 0.5),
344
+ (0.5, 0.0, 0.5),
345
+ (0.5, 0.5, 0.0)],
346
+ 'occupancy': 1.0,
347
+
348
+ 'uiso': None})
349
+
350
+ Here 'aniso' might provide information about
351
+ anisotropic displacements, 'uiso' (Uiso) about isotropic displacements,
352
+ and occupancy is the site occupancy. The 'cif_labels' are the
353
+ corresponding labels in the CIF data, and the 'composition' +
354
+ 'equivalent_positions' should be self-explanatory.
355
+ """
356
+ return self.__atoms
357
+
358
+ @property
359
+ def cellsg( self ):
360
+ """
361
+ Get information about unit cell parameters and space group, as a
362
+ dictionary with a format like:
363
+
364
+ {'a': 4.0496, 'b': 4.0496,'c': 4.0496,
365
+ 'alpha': 90,'beta': 90,'gamma': 90,
366
+ 'spacegroup': {'hm': 'Fm-3m', 'number': 225} }
367
+
368
+ """
369
+ return self.__cellsg
370
+
371
+ @property
372
+ def extracted_description( self ):
373
+ """
374
+ Description of the material based on the meta-data found in the CIF
375
+ data. Returned as a list of strings.
376
+ """
377
+ return self.__cifdescr
378
+
379
+ @property
380
+ def warnings( self ):
381
+ """
382
+ List of warnings emitted during processing of the CIF data, as a
383
+ sequence of tuples of two strings: (warning_message, warning_category).
384
+ """
385
+ return self.__warnlist
386
+
387
+ def create_ncmat( self, *args, **kwargs ):
388
+ """
389
+ Return NCMAT data corresponding to the currently held crystal
390
+ parameters. All parameters are forwarded (same as calling
391
+ create_ncmat_composer(*args,**kwargs).create_ncmat())
392
+ """
393
+ c = self.create_ncmat_composer(*args,**kwargs)
394
+ return c.create_ncmat()
395
+
396
+ def create_ncmat_composer( self, *,
397
+ uiso_temperature = None,
398
+ skip_dyninfo = False,
399
+ top_comments = None,
400
+ fallback_debye_temp = 300.0,
401
+ quiet = False,
402
+ remap = None,
403
+ no_formula_check = False ):
404
+ """Setup and return an NCMATComposer object, based on the currently held
405
+ crystal parameters.
406
+
407
+ The most important thing to be aware of is that, if you wish to take
408
+ advantage of any Uiso/biso values found in the CIF data, you must
409
+ provide the uiso_temperature parameter value (kelvin), to indicate the
410
+ temperature at which these are valid (most likely after inspecting the
411
+ .raw_cif_textdata manually for hints about the correct value).
412
+
413
+ If quiet=True, no informative messages will be emitted.
414
+
415
+ The fallback_debye_temp parameter can be used to change the default
416
+ Debye temperature assigned to atoms, for which Uiso/biso information was
417
+ not available.
418
+
419
+ The other parameters are more esoteric:
420
+
421
+ If skip_dyninfo=True, no ncmat.set_dyninfo_... calls will be performed,
422
+ presumably because the caller will perform such calls subsequently
423
+ (i.e. with a PhononDOSAnalyser).
424
+
425
+ The remap parameter can be used to remap elements and isotopes found in
426
+ the CIF data to any desired composition (cf. the
427
+ NCMATComposer.set_composition for the allowed composition syntax. For
428
+ instance, if one wishes to take a CIF file containing hydrogen atoms and
429
+ deuterate it partly, one could used remap = { 'H' : '0.95 D 0.05 H', }.
430
+
431
+ Finally, unless no_formula_check=True, an exception is raised if the
432
+ chemical formula of the resulting material is incompatible with any
433
+ chemical formula indicated in the input CIF meta data. This check is
434
+ normally nice to keep, since it can catch many cases where the wrong
435
+ setting of a spacegroup was used.
436
+ """
437
+ return _impl_create_ncmat_composer( self,
438
+ uiso_temperature = uiso_temperature,
439
+ skip_dyninfo = skip_dyninfo,
440
+ top_comments = top_comments,
441
+ fallback_debye_temp = fallback_debye_temp,
442
+ #TODO? #merge_equiv = merge_equiv,
443
+ quiet = quiet,
444
+ remap = remap,
445
+ no_formula_check = no_formula_check )
446
+
447
+
448
+ def produce_validation_plots( files, verbose_lbls = True, pdf_target = None,
449
+ **plot_kwargs ):
450
+ """Function which can produce several of the validation plots resulting
451
+ from usage of the produce_validation_plot(..) function, and possibly even
452
+ embed them in a PDF file. Any plot_kwargs will be passed along to the
453
+ produce_validation_plot(..) function.
454
+ """
455
+ from .plot import ( _import_matplotlib_plt,
456
+ _import_matplotlib_pdfpages )
457
+
458
+ if pdf_target:
459
+ pdfpages = _import_matplotlib_pdfpages()
460
+ the_real_pdfpages = ( pdfpages.the_real_inspected_object
461
+ if hasattr(pdfpages,'the_real_inspected_object')
462
+ else None )
463
+ already_pdfpages = isinstance(pdf_target,the_real_pdfpages) if the_real_pdfpages else False
464
+ pdf = pdf_target if already_pdfpages else pdfpages(pdf_target)
465
+ assert pdf
466
+ else:
467
+ pdf, already_pdfpages = None, False
468
+
469
+ if pdf:
470
+ plt = _import_matplotlib_plt()
471
+ for f in files:
472
+ produce_validation_plot( f, verbose_lbls = verbose_lbls, do_show = not pdf,
473
+ line_width_scale = 0.5, **plot_kwargs )
474
+ if pdf:
475
+ pdf.savefig()
476
+ plt.close()
477
+
478
+ if pdf and not already_pdfpages:
479
+ pdf.close()
480
+ _nc_common.print("created %s"%pdf_target)
481
+
482
+ def produce_validation_plot( data_or_file, verbose_lbls = True, line_width_scale = 1,
483
+ quiet = False, xlabel = None, legend_args = None, do_newfig = True,
484
+ do_show = True, do_legend = True, do_grid=True, do_tight_layout=True ):
485
+
486
+ """Function which can produce validation plots for NCMAT data defining
487
+ crystalline materials like the ones on
488
+ https://github.com/mctools/ncrystal/wiki/Data-library. The plot will compare
489
+ the powder Bragg diffraction cross section curve of the NCMAT file, with
490
+ those obtained by replacing the crystal structure with a crystal structure
491
+ loaded directly from online database entries that might be mentioned in the
492
+ comments of the NCMAT data. This makes it easy to verify that the crystal
493
+ structure is still compatible (or not) with those crystal structures
494
+ mentioned in the comments.
495
+
496
+ Currently two online databases are supported, with recognised URLs in a
497
+ format like either of the following:
498
+
499
+ https://www.crystallography.net/cod/9008460.html
500
+ https://www.materialsproject.org/materials/mp-87
501
+
502
+ """
503
+
504
+ def lookupAndProduce( cifsrc, *,dynamics, remap):
505
+ cifloader = CIFLoader( cifsrc, quiet = quiet )
506
+ ncmat = cifloader.create_ncmat_composer( uiso_temperature = None,#dynamics provided separately here
507
+ remap = remap,
508
+ skip_dyninfo = True )
509
+ ncmat.transfer_dyninfo_objects( dynamics )
510
+ return ncmat.create_ncmat()
511
+
512
+ from .plot import _import_matplotlib_plt
513
+ plt = _import_matplotlib_plt()
514
+ if do_newfig:
515
+ plt.figure()
516
+
517
+ def multcreate( data ):
518
+ return _nc_core.directMultiCreate(data,cfg_params='comp=bragg')
519
+ _file = None
520
+ fn = None
521
+ if hasattr( data_or_file, '__fspath__' ):
522
+ pass
523
+ elif isinstance( data_or_file, _nc_core.TextData ):
524
+ mc = multcreate( data_or_file )
525
+ contentiterable = data_or_file
526
+ fn = data_or_file.dataSourceName
527
+ elif isinstance( data_or_file, bytes ) or isinstance( data_or_file, str ):
528
+ _ = data_or_file.decode() if isinstance( data_or_file, bytes ) else data_or_file
529
+ if '\n' in _ or _.startswith('NCMAT'):
530
+ contentiterable = _.splitlines()
531
+ mc = multcreate(_)
532
+ fn = 'Anonymous NCMAT data'
533
+ if not fn:
534
+ fn = data_or_file
535
+ contentiterable = _nc_core.createTextData(data_or_file)
536
+ mc = multcreate(contentiterable)
537
+
538
+ if hasattr(fn,'__fspath__'):
539
+ import pathlib
540
+ fn = pathlib.Path(fn).name
541
+ elif '/' in fn:
542
+ fn = fn.split('/')[-1]
543
+ if not quiet:
544
+ _nc_common.print(f'Attempting to validate {fn}')
545
+
546
+ if not mc.info.isCrystalline():
547
+ if not quiet:
548
+ _nc_common.print(f"Ignoring non-crystalline material {fn}")
549
+ return
550
+
551
+ def _extractID(s,pattern):
552
+ if pattern not in s:
553
+ return None
554
+ ll=s.split(pattern)[1:]
555
+ while ll:
556
+ e,ll = ll[0],ll[1:]
557
+ d=''
558
+ while e and e[0].isdigit():
559
+ d+=e[0]
560
+ e=e[1:]
561
+ if d and int(d)>0:
562
+ yield int(d)
563
+
564
+ import re as _re
565
+ _re_atomdbspecs = _re.compile(r"\[ *with ([a-zA-Z ]+)->([ a-zA-Z0-9+-\.]+) *\]")
566
+ def _extractAtomDBSpec(s):
567
+ #Look for remapping specs like "[with H->D]" and return in @ATOMDB format
568
+ #(i.e. "H is D").
569
+ if '->' not in s:
570
+ return
571
+ m = _re_atomdbspecs.search(s)
572
+ return ' '.join(('%s is %s'%m.groups()).split()) if m else None
573
+
574
+ ids = []
575
+ for ll in contentiterable:
576
+ atomdb = _extractAtomDBSpec(ll)
577
+ for e in _extractID(ll,'materialsproject.org/materials/mp-'):
578
+ ids.append( dict(dbtype='mp',entryid=e,atomdb=atomdb))
579
+ for e in _extractID(ll,'crystallography.net/cod/'):
580
+ ids.append( dict(dbtype='cod',entryid=e,atomdb=atomdb))
581
+
582
+ #order-preserving remove duplicates:
583
+ _seen = set()
584
+ newids=[]
585
+ for d in ids:
586
+ key = tuple(sorted((k,v) for k,v in d.items()))
587
+ if key in _seen:
588
+ continue
589
+ newids.append(d)
590
+ _seen.add(key)
591
+ ids = newids
592
+
593
+ def _atomdb_to_remap( atomdb):
594
+ _atomdb = list( ' '.join(e.strip().split())
595
+ for e in (atomdb or '').replace(':',' ').split('@') )
596
+ _atomdb = list( e for e in _atomdb if e )
597
+ ll = []
598
+ for c in _atomdb:
599
+ p=c.replace(':',' ').split()
600
+ if not len(p)>=3 or p[1]!='is':
601
+ raise _nc_core.NCBadInput('invalid atomdb remap syntax in "%s"'%c)
602
+ ll.append( (p[0],' '.join(p[2:]) ) )
603
+ return ll
604
+
605
+ dynamics = mc.info
606
+ cmps = [ (fn, mc ) ]
607
+ for _ in ids:
608
+ dbname = _['dbtype']
609
+ entryid = _['entryid']
610
+ atomdb = _['atomdb']
611
+ lpargs=dict(remap=_atomdb_to_remap(atomdb),dynamics=dynamics)
612
+ if dbname=='mp':
613
+ lpargs['cifsrc']='mpid::%i'%entryid
614
+ else:
615
+ assert dbname=='cod'
616
+ lpargs['cifsrc']='codid::%i'%entryid
617
+ out = lookupAndProduce( **lpargs )
618
+ lbl=f'{dbname}-{entryid}'
619
+ if atomdb:
620
+ lbl = f'{lbl} with replacement "{atomdb}"'
621
+ cmps.append( ( lbl, multcreate(out) ) )
622
+ from .constants import ekin2wl
623
+ wlmax = max(1.05*ekin2wl(mc.scatter.domain()[0]) for _,mc in cmps)
624
+ from ._numpy import _np_linspace
625
+ wls = _np_linspace(0.001,wlmax,10000)
626
+ if len(cmps)<=1:
627
+ _nc_common.warn(f"No online DB IDs found in {fn}")
628
+ return
629
+
630
+ sgno_all = set()
631
+ natoms_all = set()
632
+
633
+ col_ordered = [_nc_common._palette_Few.get(k,k) for k in
634
+ ('black',
635
+ 'blue',
636
+ 'orange',
637
+ 'green',
638
+ 'red',
639
+ 'brown',
640
+ 'purple',
641
+ 'yellow',
642
+ 'pink',
643
+ 'gray')]
644
+
645
+ def fix_lbl_for_plt(lbl):
646
+ return lbl.replace('_',"$"+'\\'+"mathrm{\\_}$")
647
+
648
+ for i,(lbl,mc) in enumerate(cmps):
649
+ lbl = str(lbl)
650
+ if '/' in lbl:
651
+ lbl=lbl.split('/')[-1]
652
+ elif lbl.startswith('mp-'):
653
+ lbl = 'Materials Project entry '+lbl[3:]
654
+ elif lbl.startswith('cod-'):
655
+ lbl = 'Crystallography Open Database entry '+lbl[4:]
656
+ lw=4 if i>0 else 2
657
+ lw *= line_width_scale
658
+
659
+ si = mc.info.structure_info if mc.info.hasStructureInfo() else None
660
+ if not quiet:
661
+ _nc_common.print('Structure[%s]:'%lbl,si)
662
+ if si and verbose_lbls:
663
+ sgno=si.get('spacegroup',None)
664
+ lbl += ' (SG-%s, %i atoms/cell, Vcell=%g)'%(sgno or 'unspecified',si['n_atoms'],si['volume'])
665
+ sgno_all.add(sgno)
666
+ natoms_all.add(si['n_atoms'])
667
+
668
+ plt.plot(wls,mc.scatter.xsect(wl=wls),
669
+ label=fix_lbl_for_plt(lbl),
670
+ linewidth=lw,
671
+ alpha=0.5 if i==0 else 0.5,
672
+ color = col_ordered[i] if i<len(col_ordered) else None,
673
+ dashes = [] if i==0 else [4 if len(cmps)>2 else 2,2]+[2,2]*i)
674
+ if len(natoms_all)>1:
675
+ _nc_common.warn('WARNING validation detected different number of atoms/cell in tested curves')
676
+ if len(sgno_all)>1:
677
+ _nc_common.warn('WARNING validation detected different space group numbers in tested curves')
678
+
679
+ plt.xlim(0.0)
680
+ plt.ylim(0.0)
681
+ plt.title('NB: This compares the crystal structure (space group, lattice, atom positions). Phonons/dynamics always taken from .ncmat file',fontsize=6)
682
+ plt.xlabel(xlabel or 'Neutron wavelength (%s)'%(b'\xc3\x85'.decode()))
683
+ plt.ylabel('Coherent elastic cross section (barn)')
684
+ if do_legend:
685
+ plt.legend(loc='best',handlelength=5,**(legend_args or {}))
686
+ if do_grid:
687
+ plt.grid()
688
+ if do_tight_layout:
689
+ plt.tight_layout()
690
+ if do_show:
691
+ plt.show()
692
+
693
+ def _extract_descr_from_cif( raw_cif_dict, cifsrc, ciftextdata ):
694
+
695
+ def normalise_result( _l ):
696
+ #split any newlines and trim whitespace before returning:
697
+ res = []
698
+ for e in _l:
699
+ for ee in e.splitlines():
700
+ res.append( ee.rstrip() )
701
+ return res
702
+
703
+ if not raw_cif_dict:
704
+ _nc_common.warn('Could not properly decode CIF metadata due to inability to access raw CIF data as dictionary.')
705
+ return [], None, None, None
706
+
707
+ #cif is cif section with publication info:
708
+ ll=[]
709
+ def extract(key,*altkeys,expectlist = False):
710
+ if not any( key in s for _,s in sorted(raw_cif_dict.items())):
711
+ if altkeys:
712
+ return extract( altkeys[0], *altkeys[1:], expectlist = expectlist )
713
+ return [] if expectlist else ''
714
+ s = list( d[key] for _,d in sorted(raw_cif_dict.items()) if key in d)[0]
715
+ if not expectlist:
716
+ if s is None:
717
+ return ''
718
+ if not hasattr(s,'strip') and hasattr(s,'__len__') and len(s)==1:
719
+ s=s[0]#convert single element list to first element
720
+ s=str(s).strip()
721
+ return '' if s in ('?','.') else s
722
+ else:
723
+ _l=list( ('' if (e is None or (hasattr(e,'strip') and e.strip()=='?')) else str(e).strip()) for e in s )
724
+ return list( e for e in _l if e )
725
+
726
+ title = extract('_publ_section_title','_citation_title')
727
+ authors = extract('_publ_author_name','_citation_author_name',expectlist=True)
728
+ journalname = extract('_journal_name_full','_citation_journal_full')
729
+ year = extract('_journal_year','_citation_journal_year','_citation_year')
730
+ doi = extract('_journal_paper_doi')
731
+ chemformsum = extract('_chemical_formula_sum','_cod_original_formula_sum')
732
+
733
+ extracted_codid = extract('_cod_database_code')
734
+ if extracted_codid:
735
+ extracted_codid = int(extracted_codid) if extracted_codid.isdigit() else None
736
+
737
+ extracted_mpid = None
738
+ _sp = 'Note from NCrystal.cifutils: Data from materialsproject.org / mp-'# NB this exact format is produced elsewhere in this file.
739
+ for e in ciftextdata.splitlines():
740
+ if _sp in e:
741
+ _ = e.split(_sp,1)
742
+ if len(_) >= 2:
743
+ _ = _[1].split()[0].split('#',1)[0]
744
+ if _.isdigit():
745
+ extracted_mpid = int(_)
746
+
747
+ if extracted_codid is not None and cifsrc.codid is not None and cifsrc.codid != extracted_codid:
748
+ _nc_common.warn(f'Embedded CODID {extracted_codid} is different than requested {cifsrc.codid}!')
749
+
750
+ if extracted_mpid is not None and cifsrc.mpid is not None and cifsrc.mpid != extracted_mpid:
751
+ _nc_common.warn(f'Embedded MPID {extracted_mpid} is different than requested {cifsrc.mpid}!')
752
+
753
+ _thecodid = cifsrc.codid or extracted_codid
754
+ _thempid = cifsrc.mpid or extracted_mpid
755
+
756
+ audit_creation_method = extract('_audit_creation_method')
757
+ audit_creation_date = extract('_audit_creation_date')
758
+ audit_author_name = extract('_audit_author_name')
759
+
760
+ nauthors = len(authors or [])
761
+ if authors:
762
+ if len(authors)==1:
763
+ authors = authors[0]
764
+ elif len(authors)==2:
765
+ authors = f'{authors[0]} and {authors[1]}'
766
+ else:
767
+ authors = f'{authors[0]}, et al.'
768
+ if title:
769
+ ll.append(f'"{title}"')
770
+ if (journalname and year) and not authors:
771
+ authors = '<unknown authors>'
772
+ if journalname and year:
773
+ _jy = f'{journalname}, {year}'
774
+ elif journalname or year:
775
+ _jy = journalname or year
776
+ else:
777
+ _jy = ''
778
+ if authors:
779
+ ll.append(f'{authors} [{_jy}]' if _jy else f'Author{"" if nauthors==1 else "s"}: {authors}')
780
+
781
+ if doi:
782
+ ll.append(f'DOI: https://dx.doi.org/{doi}')
783
+
784
+ if audit_creation_method:
785
+ ll.append(f'CIF creation method: {audit_creation_method}')
786
+ if audit_creation_date:
787
+ ll.append(f'CIF creation date: {audit_creation_date}')
788
+ if audit_author_name:
789
+ ll.append(f'CIF created by: {audit_author_name}')
790
+
791
+ if _thecodid:
792
+ ll += [ f'Crystallography Open Database entry {_thecodid}', _codid2url(_thecodid) ]
793
+ if _thempid:
794
+ ll += [ 'The Materials Project', _mpid2url(_thempid) ]
795
+
796
+ return normalise_result( ll ), _thecodid, _thempid, chemformsum
797
+
798
+ def _codid2url( codid ):
799
+ return f'https://www.crystallography.net/cod/{codid}.html'
800
+
801
+ def _mpid2url( mpid ):
802
+ return f'https://www.materialsproject.org/materials/mp-{mpid}'
803
+
804
+ def _impl_create_ncmat_composer( cifloader, *,
805
+ uiso_temperature,
806
+ skip_dyninfo,
807
+ top_comments,
808
+ fallback_debye_temp,
809
+ #merge_equiv,
810
+ quiet,
811
+ remap,
812
+ no_formula_check ):
813
+ with _nc_common.WarningSpy( block = quiet ) as extra_ana_warnings:
814
+ composer = _impl_create_ncmat_composer_internal( cifloader = cifloader,
815
+ #merge_equiv = merge_equiv,
816
+ uiso_temperature = uiso_temperature,
817
+ skip_dyninfo = skip_dyninfo,
818
+ top_comments = top_comments,
819
+ remap = remap,
820
+ no_formula_check = no_formula_check )
821
+ if not skip_dyninfo and fallback_debye_temp and fallback_debye_temp > 0.0:
822
+ composer.allow_fallback_dyninfo( fallback_debye_temp )
823
+
824
+ def fmtwarnings( warnings, descrtxt, nwmax ):
825
+ ll=[]
826
+ if not warnings:
827
+ return ll
828
+ ll.append(f'Notice: The following WARNINGS were emitted {descrtxt}:')
829
+ ll.append('')
830
+
831
+ wleft=warnings[::-1]
832
+ while wleft:
833
+ w = wleft.pop()
834
+ ncount = 1 + wleft.count(w)
835
+ if ncount > 1:
836
+ wleft = [e for e in wleft if e!=w]
837
+ wmsg,wcat = w
838
+ wmsg = list(e.strip() for e in wmsg.splitlines() if e.strip())
839
+ s0 = ' %s :'%wcat if ncount==1 else ' (%ix) %s : '%(ncount,wcat)
840
+ for i,m in enumerate(wmsg):
841
+ if i==nwmax:
842
+ break
843
+ pref=s0 if i==0 else ' '*(len(s0))
844
+ if i+1==nwmax and len(wmsg)>nwmax:
845
+ m='<%i lines of output hidden>'%(len(wmsg)-nwmax)
846
+ ll.append('%s %s'%(pref,m))
847
+ return ll
848
+
849
+ composer.add_comments( fmtwarnings( list(cifloader.warnings) + extra_ana_warnings,
850
+ 'when loading the CIF data',nwmax=10),
851
+ add_empty_line_divider = True )
852
+ return composer
853
+
854
+ def _aniso_nontrivial( aniso_dict ):
855
+ return aniso_dict and ( any( abs(aniso_dict[e])>1e-13 for e in ('u12','u13','u23') )
856
+ or ( aniso_dict['u11'] != aniso_dict['u22'] )
857
+ or ( aniso_dict['u11'] != aniso_dict['u33'] ) )
858
+
859
+
860
+ def _impl_create_ncmat_composer_internal( cifloader, *, uiso_temperature, skip_dyninfo, top_comments, remap, no_formula_check ):
861
+
862
+ src = cifloader
863
+
864
+ src_atoms = src.atoms
865
+
866
+ #Apply remap:
867
+ def _stdatomname( name ):
868
+ return {'D':'H2','T':'H3'}.get(name,name)
869
+
870
+ remap_decoded = {}
871
+ for elemiso, compos in (remap or []):
872
+ elemiso, compos = _nc_ncmatimpl._decode_composition(elemiso,compos)
873
+ elemiso = _stdatomname( elemiso )
874
+ remap_decoded[ elemiso ] = compos
875
+ remap = remap_decoded
876
+ remap_str = ''
877
+
878
+ if remap:
879
+ new_src_atoms = []
880
+ for atom in src_atoms:
881
+ if not any( e in remap for fr,e in atom['composition'] ):
882
+ new_src_atoms.append( atom )
883
+ continue
884
+ newcompos = []
885
+ for fr,e in atom['composition']:
886
+ remap_compos = remap.get(e,None)
887
+ if remap_compos is None:
888
+ newcompos.append( (fr,e) )
889
+ else:
890
+ for remap_fr, remap_elem in remap_compos:
891
+ newcompos.append( ( fr*remap_fr, remap_elem ) )
892
+ d = dict( (k,v) for k,v in atom.items() if k!='composition' )
893
+ d['composition'] = newcompos
894
+ new_src_atoms.append( d )
895
+ src_atoms = tuple( new_src_atoms )
896
+ #For the description:
897
+ remap_strs = []
898
+ for elemiso, compos in sorted(remap.items()):
899
+ compos_str = compos[0][1] if len(compos)==1 else ' '.join( f'{_f:g} {_n}' for _f,_n in compos)
900
+ remap_strs.append(f'{elemiso} -> {compos_str}')
901
+ remap_str = ', '.join(remap_strs)
902
+
903
+ #DYNINFO analysis can only be done now, where we know the skip_dyninfo and uiso_temperature values:
904
+
905
+ ncmat = _nc_ncmat.NCMATComposer()
906
+ all_atompos = []
907
+ for idx, a in enumerate( src_atoms ):
908
+ lbl = f'cif_species_{idx}'
909
+ compos = [ (fr,e) for fr,e in a['composition'] ]
910
+ if len(compos)==1 and compos[0][0]==1.0:
911
+ composstr = compos[0][1]
912
+ else:
913
+ composstr = ' '.join(f'{f:g} {n}' for f,n in compos)
914
+ ncmat.set_composition( lbl, compos )
915
+ if not skip_dyninfo:
916
+ uiso = a.get('uiso',None)
917
+ if uiso is not None:
918
+ assert uiso>0.0
919
+ if uiso_temperature is None:
920
+ _nc_common.warn(f'ignoring uiso info present in CIF input for "{composstr}" since uiso_temperature parameter value is not provided\n')
921
+ else:
922
+ ncmat.set_dyninfo_msd( lbl, msd=uiso, temperature=uiso_temperature )
923
+ _aniso = a.get('aniso',None)
924
+
925
+ if _aniso_nontrivial(_aniso):
926
+ _nc_common.warn('Anisotropic displacement for %s ignored (%s)'%(composstr,', '.join (f'{k}={v:g}' for k,v in sorted(_aniso.items()))))
927
+
928
+ occu = a.get('occupancy',1.0)
929
+ for c in a['equivalent_positions']:
930
+ all_atompos.append( ( lbl, c[0], c[1], c[2], occu ) )
931
+
932
+ ncmat.set_atompos( all_atompos )
933
+
934
+ st = src.cellsg
935
+ ncmat.set_cellsg( a=st['a'], b=st['b'], c=st['c'],
936
+ alpha=st['alpha'], beta=st['beta'], gamma=st['gamma'],
937
+ spacegroup=st['spacegroup']['number'] )
938
+
939
+ if top_comments is not None:
940
+ ncmat.add_comments(top_comments)
941
+
942
+ _extracted_description = src.extracted_description
943
+ if not _extracted_description:
944
+ if src.cifsrc.name:
945
+ _ds = 'CIF data' if src.cifsrc.is_remote else 'CIF file'
946
+ _extracted_description = [f'{_ds}: {src.cifsrc.name}']
947
+ else:
948
+ _extracted_description = ['Anonymous CIF data']
949
+ if _extracted_description:
950
+ ncmat.add_comments(['Structure converted (with NCrystal'
951
+ '.cifutils module) from:',''])
952
+
953
+ _thedescr = _extracted_description
954
+ if remap_str:
955
+ src.actual_mpid
956
+ _codidurl = _codid2url(src.actual_codid) if src.actual_codid else None
957
+ _mpidurl = _mpid2url(src.actual_mpid) if src.actual_mpid else None
958
+ if _codidurl and _codidurl in _thedescr:
959
+ _=f'{_codidurl} [with {remap_str}]'
960
+ _thedescr = [e.replace(_codidurl,_) for e in _thedescr]
961
+ elif _mpidurl and _mpidurl in _thedescr:
962
+ _=f'{_mpidurl} [with {remap_str}]'
963
+ _thedescr = [e.replace(_mpidurl,_) for e in _thedescr]
964
+ else:
965
+ _thedescr+=['',f'Note: With custom remappings: {remap_str}']
966
+
967
+ ncmat.add_comments([' '+e for e in _thedescr])
968
+ ncmat.add_comments([''])
969
+
970
+ ncmat.add_comments('IMPORTANT NOTICE: This is a mostly automatic conversion which has not been',add_empty_line_divider=True)
971
+ ncmat.add_comments(' verified! In particular the @DYNINFO sections might need')
972
+ ncmat.add_comments(' post-editing. Before distributing this file to other people,')
973
+ ncmat.add_comments(' please review this, amend the comments here to document,')
974
+ ncmat.add_comments(' anything done, and remove this notice.')
975
+
976
+ #formula
977
+ def _extract_atomcount( atom ):
978
+ occu, npos = atom['occupancy'], len(atom['equivalent_positions'])
979
+ return npos * ( int(occu) if occu == int(occu) else float(occu) )
980
+ total_composition = []
981
+ for a in src_atoms:
982
+ atomcount = _extract_atomcount( a )
983
+ for elemfrac, eleminfo in a['composition']:
984
+ total_composition.append( ( eleminfo, elemfrac * atomcount ) )
985
+ formula = _nc_common.format_chemform( total_composition )
986
+
987
+
988
+ expected_formula = cifloader.raw_cif_chemformula
989
+
990
+ if expected_formula:
991
+ def formula_to_dict( f ):
992
+ if not f:
993
+ return None
994
+ if isinstance(f,str):
995
+ return formula_to_dict( _decode_formula(f) )
996
+ if isinstance(f,dict):
997
+ return f
998
+ res = {}
999
+ for lbl,count in f:
1000
+ assert count>=0.0
1001
+ if count==0:
1002
+ continue
1003
+ if lbl in res:
1004
+ res[lbl] += count
1005
+ else:
1006
+ res[lbl] = count
1007
+ return res or None
1008
+ from .atomdata import allElementNames
1009
+ _all_elements = allElementNames()
1010
+ def _decode_formula( s ):
1011
+ l1 = [e for e in _all_elements if len(e)==1]+['D','T']
1012
+ l2 = [e for e in _all_elements if len(e)==2]
1013
+ res = []
1014
+ while s.strip():
1015
+ s=s.strip()
1016
+ c = [ e for e in l2 if s.startswith(e) ]
1017
+ if not c:
1018
+ c = [ e for e in l1 if s.startswith(e) ]
1019
+ if len(c)!=1:
1020
+ return None
1021
+ c = c[0]
1022
+ s=s[len(c):].strip()
1023
+ leaddigits = ''
1024
+ while s and s[0].isdigit():
1025
+ leaddigits += s[0]
1026
+ s=s[1:]#nb: no strip here, we want to stop on a space!
1027
+ n = int(leaddigits) if leaddigits else 1
1028
+ res.append( (c,n) )
1029
+ return formula_to_dict(res)
1030
+ def formulas_incompatible( formula1, formula2 ):
1031
+ f1 = formula_to_dict(formula1)
1032
+ f2 = formula_to_dict(formula2)
1033
+ if not f1 or not f2:
1034
+ return False
1035
+ #map D,T,H2,H3 -> H to ensure fewer false positives:
1036
+ def _tmp(d):
1037
+ return dict( (('H' if k in ('D','T','H2','H3') else k),v)
1038
+ for k,v in d.items() )
1039
+ f1,f2 = _tmp(f1),_tmp(f2)
1040
+ if f1 == f2:
1041
+ return False
1042
+ if not set(f1.keys())==set(f2.keys()):
1043
+ return True
1044
+ kref = list(f1.keys())[0]
1045
+ assert f1[kref] > 0.0 and f2[kref]>0.0
1046
+ f2scale = f1[kref]/f2[kref]
1047
+ tol = 1e-2#not tighter to avoid false positives!
1048
+ for k,v1 in f1.items():
1049
+ v2 = f2[k]*f2scale
1050
+ if abs(v1-v2)>tol and abs(v1-v2)/(1e-300+abs(v1)+abs(v2)) > tol:
1051
+ return True
1052
+ return False
1053
+
1054
+ if not no_formula_check and expected_formula:
1055
+ actual_expected_formula_dict = formula_to_dict( expected_formula )
1056
+ if remap:
1057
+ newf = {}
1058
+ def _collect( _name, _frac ):
1059
+ _name = _stdatomname(_name)
1060
+ if _name not in newf:
1061
+ newf[_name] = _frac
1062
+ else:
1063
+ newf[_name] += _frac
1064
+ for name, frac in actual_expected_formula_dict.items():
1065
+ _remapped = remap.get(_stdatomname(name),None)
1066
+ if _remapped is not None:
1067
+ for remap_frac, remap_name in _remapped:
1068
+ _collect( remap_name, remap_frac * frac )
1069
+ else:
1070
+ _collect( name, frac )
1071
+ actual_expected_formula_dict = newf
1072
+ if formulas_incompatible(total_composition,actual_expected_formula_dict):
1073
+ s = f'"{expected_formula}"'
1074
+ if remap:
1075
+ _ = _nc_common.format_chemform( list( sorted(actual_expected_formula_dict.items() ) ) )
1076
+ s += f' remapped to "{_}"'
1077
+ raise _nc_core.NCBadInput(f'Formula encoded in CIF data ({s}) is not compatible with formula of loaded structure ("{formula}")')
1078
+
1079
+ return ncmat
1080
+
1081
+
1082
+ def _impl_merge_atoms( atoms ):
1083
+ ll = []
1084
+ for a in atoms:
1085
+ pos = list(a['equivalent_positions'])
1086
+ cif_labels = list( a['cif_labels'] )
1087
+ other_metadata = list( sorted( (k,v) for k,v in a.items()
1088
+ if k not in ('equivalent_positions',
1089
+ 'cif_labels') ) )
1090
+ found = False
1091
+ for k,v in ll:
1092
+ if k == other_metadata:
1093
+ v[0] += list( pos )
1094
+ v[1] += list( cif_labels )
1095
+ found = True
1096
+ break
1097
+ if not found:
1098
+ ll.append( (other_metadata,[list(pos),list(cif_labels)]) )
1099
+ res = []
1100
+ for other_metadata, ( pos, cif_labels ) in ll:
1101
+ d = dict( (k,v) for k,v in sorted(other_metadata) )
1102
+ d['equivalent_positions'] = list( sorted( pos ) )
1103
+ d['cif_labels'] = list( sorted( cif_labels ) )
1104
+ res.append( d )
1105
+ return res
1106
+
1107
+ def _suggest_filename( ncmat_metadata, cifloader ):
1108
+ ll = ['autogen']
1109
+ ll.append( ncmat_metadata['chemform'] )
1110
+ sgnum = ncmat_metadata.get('cellsg',{}).get('spacegroup',None)
1111
+ if sgnum:
1112
+ ll.append( 'sg%i'%sgnum )
1113
+ if cifloader.actual_codid:
1114
+ ll.append( 'cod%i'%cifloader.actual_codid )
1115
+ if cifloader.actual_mpid:
1116
+ ll.append( 'mp%i'%cifloader.actual_mpid )
1117
+ return '_'.join(ll) + '.ncmat'
1118
+
1119
+ def _format_spglib_cell( cellsg, atoms ):
1120
+ lattice = _nc_ncmatimpl._cellparams_to_spglib_lattice(cellsg)
1121
+ atomic_points = []
1122
+ atomic_types = []
1123
+ for i,a in enumerate(atoms):
1124
+ for p in a['equivalent_positions']:
1125
+ atomic_points.append( p )
1126
+ atomic_types.append( i )
1127
+ return ( lattice, atomic_points, atomic_types )
1128
+
1129
+ def _impl_refine_cell( cellsg, atoms ):
1130
+ orig_cell = _format_spglib_cell( cellsg, atoms )
1131
+
1132
+ d = _nc_ncmatimpl._spglib_refine_cell( orig_cell )#, symprec = 0.01, allow_axis_swap = True )
1133
+ assert len(d)==7
1134
+
1135
+ refined_cell = d['refined_cell']
1136
+ new_atom_pos = dict( (i,[]) for i in range(max(refined_cell[2])+1) )
1137
+ for pos, atomidx in zip(refined_cell[1],refined_cell[2]):
1138
+ new_atom_pos[ atomidx ] += [ (pos[0],pos[1],pos[2]) ]
1139
+
1140
+ new_atoms = [ dict( (k,v if k!='equivalent_positions' else new_atom_pos[idx])
1141
+ for k,v in e.items()) for idx,e in enumerate(atoms) ]
1142
+ if not d['can_keep_anisotropic_properties']:
1143
+ new_atoms = [ dict( (k,v if k!='aniso' else None)
1144
+ for k,v in e.items()) for idx,e in enumerate(new_atoms) ]
1145
+
1146
+
1147
+
1148
+
1149
+ new_cellsg = dict( d['cellparams_snapped'].items() )
1150
+ new_cellsg['spacegroup'] = dict( number = d['sgno'],
1151
+ hm = d['sgsymb_hm'] )
1152
+
1153
+ return new_cellsg, new_atoms, d['warnings'], d['msgs']
1154
+
1155
+ def _gemmi_wrap_to_unit( gemmi_fractional ):
1156
+ #gemmi's wrap_to_unit() can return 1.0, but we want those to be 0.0 (so
1157
+ #(a,b,1.0) and (a,b,0.0) does not appear to be two different
1158
+ #points). Update: I am not 100% sure about this, but keeping this function
1159
+ #for added robustness.
1160
+ c = gemmi_fractional.wrap_to_unit()
1161
+ for i in range(3):
1162
+ if c[i]==1.0:
1163
+ c[i]=0.0
1164
+ return c
1165
+
1166
+ def _getMaterialsProjectAPIKEY():
1167
+ import os
1168
+ apikey = os.environ.get('MATERIALSPROJECT_USER_API_KEY',None)
1169
+ if len(apikey.strip()) < 25:
1170
+ raise _nc_core.NCException('ERROR: Legacy API key found in MATERIALSPROJECT_USER_API_KEY. Please instead provide a new API key from https://materialsproject.org/api (Keys for the new API are ~32 characters, whereas keys for the legacy API are ~16 characters).')
1171
+ if not apikey:
1172
+ raise _nc_core.NCException('ERROR: Missing API key for materialsproject.org access. To fix, '
1173
+ +'please make sure the environment variable MATERIALSPROJECT_USER_API_KEY'
1174
+ +' contains your personal access key that you see on'
1175
+ +' https://www.materialsproject.org/dashboard (after logging in).')
1176
+ return apikey
1177
+
1178
+ def _use_local_cif_cache( fn, text_data = None, quiet = False ):
1179
+ #NB: This simple implementation use no locking to guard against race
1180
+ #conditions!!! But we do perform write+move instead of simply write, which
1181
+ #is a bit more "atomic".
1182
+ if _nc_common.ncgetenv_bool('ONLINEDB_FORBID_NETWORK'):
1183
+ def notfound():
1184
+ n = _nc_common.expand_envname('ONLINEDB_FORBID_NETWORK')
1185
+ raise RuntimeError('Error: Trying to access remote DB but'
1186
+ f' {n} is set')
1187
+ time_limit_hours = 24*7*365*1000#revisit this in 3023
1188
+ else:
1189
+ def notfound():
1190
+ pass
1191
+ time_limit_hours = 24*7
1192
+ d = _nc_common.ncgetenv('ONLINEDB_CACHEDIR')
1193
+ if not d:
1194
+ return notfound()
1195
+ import os.path
1196
+ import pathlib
1197
+ p = pathlib.Path(os.path.expanduser(d)).resolve().absolute() if d else None
1198
+ if not p:
1199
+ return notfound()
1200
+ pfn = ( p / fn )
1201
+ if text_data:
1202
+ #store data into local cache:
1203
+ if not quiet:
1204
+ _nc_common.print(f"Adding {fn} to local file cache in $NCRYSTAL_ONLINEDB_CACHEDIR")
1205
+ p.mkdir(parents=True, exist_ok=True)
1206
+ pfn_tmp = p / f'{fn}_tmp_{os.getpid()}'
1207
+ _nc_common.write_text(pfn_tmp,text_data)
1208
+ pfn_tmp.replace(pfn)
1209
+ return notfound()
1210
+ #retrieve data from local cache:
1211
+ if not pfn.exists():
1212
+ return notfound()
1213
+ #Since online data can change, entries expire eventually:
1214
+ import datetime
1215
+ timelim = datetime.timedelta( hours = time_limit_hours )
1216
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
1217
+ mtime = datetime.datetime.fromtimestamp(pfn.stat().st_mtime, tz=datetime.timezone.utc)
1218
+ if ( now-mtime ) > timelim:
1219
+ #entry expired:
1220
+ if not quiet:
1221
+ _nc_common.print(f"Ignoring (and removing) expired {fn} from local file cache in $NCRYSTAL_ONLINEDB_CACHEDIR")
1222
+ pfn.unlink()
1223
+ return notfound()
1224
+ if not quiet:
1225
+ _nc_common.print(f"Getting {fn} from local file cache in $NCRYSTAL_ONLINEDB_CACHEDIR")
1226
+ return pfn.read_text()
1227
+
1228
+ _cod_cache = []
1229
+ def _cod_get_cifdata( codid, quiet = False ):
1230
+ for _codid, _result in _cod_cache:
1231
+ if codid == _codid:
1232
+ if not quiet:
1233
+ _nc_common.print(f"Using cached Crystallography Open Database result for entry {codid}")
1234
+ return _result
1235
+ cache_fn = 'cod_%i.cif'%codid
1236
+ #check file cache:
1237
+ c = _use_local_cif_cache( cache_fn, quiet = quiet )
1238
+ if c:
1239
+ return c
1240
+ if not quiet:
1241
+ _nc_common.print(f"Querying the Crystallography Open Database for entry {codid}")
1242
+ result = _nc_common.download_url( "https://www.crystallography.net/cod/%i.cif"%(codid) )
1243
+ if len(_cod_cache)==10:
1244
+ _cod_cache.pop(0)
1245
+ _cod_cache.append( (codid, result ) )
1246
+ #update file cache:
1247
+ _use_local_cif_cache( cache_fn, quiet = quiet, text_data = result )
1248
+ return result
1249
+
1250
+ _mp_cache = []
1251
+ def _mp_get_cifdata( mpid, quiet = False, apikey = None ):
1252
+ if hasattr(mpid,'startswith') and mpid.startswith('mp-'):
1253
+ mpid = mpid[3:]
1254
+ if hasattr(mpid,'startswith') and mpid.startswith('mpid::'):
1255
+ mpid = mpid[6:]
1256
+ mpid = int(mpid)
1257
+ for _mpid, _result in _mp_cache:
1258
+ if mpid == _mpid:
1259
+ if not quiet:
1260
+ _nc_common.print(f"Using cached materialsproject.org result for entry mp-{mpid}")
1261
+ return _result
1262
+ #check file cache:
1263
+ cache_fn = 'mp_%i.cif'%mpid
1264
+ c = _use_local_cif_cache( cache_fn, quiet = quiet )
1265
+ if c:
1266
+ return c
1267
+
1268
+ if not quiet:
1269
+ _nc_common.print(f"Querying materialsproject.org for entry mp-{mpid}")
1270
+
1271
+ if apikey is None:
1272
+ apikey = _getMaterialsProjectAPIKEY()
1273
+
1274
+ with _nc_common.WarningSpy(blockfct = lambda msg, cat : cat in ('PendingDeprecationWarning','DeprecationWarning') ):
1275
+ try:
1276
+ import mp_api.client#NB: This might trigger a spurious FPE
1277
+ except ImportError:
1278
+ raise ImportError('Could not import mp_api.client. Installing the mp-api package will most likely'
1279
+ ' fix this (perhaps with a command like "conda install -c conda-forge mp-api"'
1280
+ ' or "python3 -mpip install mp-api").')
1281
+
1282
+ with _nc_common.WarningSpy(blockfct = lambda msg,cat : msg.lower().startswith('mpcontribs-client not installed') ):
1283
+ with mp_api.client.MPRester(apikey) as mpr:
1284
+ s = mpr.get_structure_by_material_id( f'mp-{mpid}', conventional_unit_cell=True )
1285
+ #If we do no use the symprec argument in the next line, we will get a an unrefined P1 structure:
1286
+ result = s.to(fmt='cif',symprec=1e-4, significant_figures=15, angle_tolerance=5.0, refine_struct=True)
1287
+ #But we want to sanity check that this refinement gives the same spacegroup result as listed in the MP database:
1288
+ mp_expected_sg_number = s.get_space_group_info()[1]
1289
+ sg_checked = False
1290
+ errmsg = f'Unable to reliably determine spacegroup when trying to retrieve structure for mp-{mpid} from materialsproject.org'
1291
+ for ll in result.splitlines():
1292
+ p = ll.split('#',1)[0].split()
1293
+ if p and p[0]=='_symmetry_Int_Tables_number':
1294
+ if not len(p)>=2 or not p[1].isdigit():
1295
+ raise _nc_core.NCBadInput(errmsg+f' (unexpected format of line: "{ll}")')
1296
+ _sgnum = int(p[1])
1297
+ if _sgnum == mp_expected_sg_number:
1298
+ sg_checked = True
1299
+ else:
1300
+ raise _nc_core.NCBadInput(errmsg+f' (expected SG-{mp_expected_sg_number} but got SG-{_sgnum})')
1301
+
1302
+ if not sg_checked:
1303
+ raise _nc_core.NCBadInput(errmsg+' (no line with "_symmetry_Int_Tables_number" produced)')
1304
+
1305
+ #Finally embed origin as a note (so we can extract it later for the ncmat
1306
+ #header comments). Keep the format below synchronised with the reader
1307
+ #elsewhere in this file!!
1308
+ result += f'\n# Note from NCrystal.cifutils: Data from materialsproject.org / mp-{mpid}\n'
1309
+
1310
+ #Update cache and return:
1311
+ if len( _mp_cache ) == 10:
1312
+ _mp_cache.pop(0)
1313
+ _mp_cache.append( (mpid, result ) )
1314
+
1315
+ #update file cache:
1316
+ _use_local_cif_cache( cache_fn, quiet = quiet, text_data = result )
1317
+
1318
+ return result
1319
+
1320
+ def _codid2url( codid ):
1321
+ return f'https://www.crystallography.net/cod/{codid}.html'
1322
+ def _mpid2url( mpid ):
1323
+ return f'https://www.materialsproject.org/materials/mp-{mpid}'
1324
+
1325
+ ####################### NEW GEMMI STUFF ############################################
1326
+ _import_gemmi_cache = [None,None]
1327
+ def _import_gemmi( *, sysexit = False ):
1328
+ if _import_gemmi_cache[0] is not None:
1329
+ return _import_gemmi_cache[0], _import_gemmi_cache[1]
1330
+ try:
1331
+ import gemmi#both available on pypi and conda-forge
1332
+ import gemmi.cif
1333
+ except ImportError:
1334
+ m = ( 'Could not import gemmi modules needed to process CIF files.'
1335
+ +' The gemmi package is available on both PyPI ("python3 -mpip install'
1336
+ +' gemmi") and conda ("conda install -c conda-forge gemmi")' )
1337
+ if sysexit:
1338
+ raise SystemExit(m)
1339
+ else:
1340
+ raise ImportError(m)
1341
+ _import_gemmi_cache[0] = gemmi
1342
+ _import_gemmi_cache[1] = gemmi.cif
1343
+ return gemmi, gemmi.cif
1344
+
1345
+ _guessmap = [None]
1346
+ def _guess_spacegroup_name( gemmi, s ):
1347
+ def _guess_keys( s ):
1348
+ s=''.join(s.split())
1349
+ return [ s, s.replace('/',''),s.replace('-',''),s.replace('-','').replace('/','' ) ]
1350
+ def _init_guess_map():
1351
+ guess = {}
1352
+ def _ag( i, s ):
1353
+ if s not in guess:
1354
+ guess[s] = set([i])
1355
+ else:
1356
+ guess[s].add(i)
1357
+ def add_guess( i, s ):
1358
+ for k in _guess_keys(s):
1359
+ _ag( i, k )
1360
+ for i in range(1,230+1):
1361
+ sg = gemmi.find_spacegroup_by_number(i)
1362
+ add_guess( i, sg.xhm() )
1363
+ add_guess( i, sg.short_name() )
1364
+ add_guess( i, sg.hall )
1365
+ add_guess( i, sg.hm )
1366
+ return guess
1367
+ if not _guessmap[0]:
1368
+ _guessmap[0] = _init_guess_map()
1369
+ gm = _guessmap[0]
1370
+ possible_sgnos = set()
1371
+ for k in _guess_keys(s):
1372
+ for i in gm.get(k,[]):
1373
+ possible_sgnos.add( i )
1374
+ return list(sorted(possible_sgnos))
1375
+
1376
+ def _load_with_gemmi( cifblock, allow_fixup = True ):
1377
+ #Load cifblock into gemmi struct. We might perform in-place editing of the
1378
+ #cifblock, if gemmi does not immediately recognise the space group.
1379
+ gemmi, gemmi_cif = _import_gemmi()
1380
+
1381
+ struct = gemmi.make_small_structure_from_block( cifblock )
1382
+
1383
+ assert struct
1384
+ if hasattr(struct,'find_spacegroup'):
1385
+ sg = struct.find_spacegroup()
1386
+ else:
1387
+ sg = struct.spacegroup
1388
+
1389
+ if not sg:
1390
+ #Doing what the old struct.find_spacegroup() was doing:
1391
+ sg = gemmi.find_spacegroup_by_name( hm=struct.spacegroup_hm,
1392
+ alpha=struct.cell.alpha,
1393
+ gamma=struct.cell.gamma)
1394
+
1395
+ if sg or not allow_fixup:
1396
+ return struct, sg
1397
+
1398
+ #Let us see if we can find the spacegroup with a bit of manual
1399
+ #intervention. If we can, we update the cifblock and do a full reload
1400
+ #(otherwise the gemmi struct object won't have the spacegroup and images
1401
+ #properly initialised):
1402
+
1403
+ _ = cifblock.find(['_space_group_IT_number'])
1404
+ if not _:
1405
+ _ = cifblock.find(['_symmetry_Int_Tables_number'])
1406
+
1407
+ if _ and len(_)==1 and len(_[0])==1 and str(_[0][0]).isdigit() and (1<=int(str(_[0][0]))<=230):
1408
+ _sgnum = int(_[0][0])
1409
+ #NB: We can do this for some groups only (cubic?) Or perhaps we could use it as a double-check only?
1410
+ sg = gemmi.find_spacegroup_by_number(_sgnum)
1411
+ assert sg and sg.number == _sgnum
1412
+
1413
+ sg_hm = str(struct.spacegroup_hm).strip()
1414
+ if sg_hm and not sg:
1415
+ def sg_searchname(x):
1416
+ return gemmi.find_spacegroup_by_name(x,
1417
+ struct.cell.alpha,
1418
+ struct.cell.gamma )
1419
+ attempts = [ sg_hm ]
1420
+ #TODO: We used to do this, but currently it does not make a difference:
1421
+ #for e in ('H','R'):#NB H+R is not enough, see table 6 at http://cci.lbl.gov/sginfo/hall_symbols.html
1422
+ # if sg_hm.endswith(e):
1423
+ # attempts.append( sg_hm[:-1] + ' : ' + sg_hm[-1] )
1424
+ for a in attempts:
1425
+ a = ' '.join(a.split())
1426
+ sg = sg_searchname( a )
1427
+ if sg:
1428
+ _nc_common.warn(f'Had to interpret spacegroup "{sg_hm}" as "{a}" before Gemmi could recognise it.')
1429
+ break
1430
+
1431
+ if sg_hm and not sg:
1432
+ possible = _guess_spacegroup_name( gemmi, sg_hm )
1433
+ if len(possible) == 1:
1434
+ sgnum = possible[0]
1435
+ sg = gemmi.find_spacegroup_by_number(sgnum)
1436
+ _nc_common.warn(f'Had to interpret spacegroup "{sg_hm}" as "{sg.hm}" before Gemmi could recognise it.')
1437
+ elif len(possible) > 1:
1438
+ poshm = list( (sgnum,gemmi.find_spacegroup_by_number(sgnum).hm) for sgnum in possible )
1439
+ poshm = ', '.join( '"%s"(number %i)' for sgnum,sgstr in poshm )
1440
+ _nc_common.warn(f'Failed to interpret spacegroup interpret spacegroup "{sg_hm}". Could be any of: {poshm}.')
1441
+ if not sg:
1442
+ #manual intervention did not help:
1443
+ return struct, None
1444
+
1445
+ #patch up the cifblock with info about manually found spacegroup, and try again:
1446
+ _update_spacegroup_in_cifblock( cifblock, sg )
1447
+
1448
+ return _load_with_gemmi( cifblock, allow_fixup = False )
1449
+
1450
+ def _update_spacegroup_in_cifblock_from_name( gemmi, cifblock, name ):
1451
+ sg = gemmi.find_spacegroup_by_name( name )
1452
+ if not sg:
1453
+ raise _nc_core.NCBadInput(f'Unknown spacegroup: "{name}"')
1454
+ _update_spacegroup_in_cifblock( cifblock, sg )
1455
+
1456
+ def _update_spacegroup_in_cifblock( cifblock, sg, use_xhm = True ):
1457
+ cifblock.set_pair('_space_group_name_H-M_alt', sg.xhm() if use_xhm else sg.hm )
1458
+ cifblock.set_pair('_space_group_IT_number', str(sg.number) )
1459
+ cifblock.set_pair('_space_group_name_Hall', sg.hall )
1460
+ cifblock.set_pair('_space_group_crystal_system', sg.crystal_system_str() )
1461
+
1462
+
1463
+ def _actual_init_gemmicif( cifsrc, *, quiet, mp_apikey, refine_with_spglib, merge_equiv, override_spacegroup = None ):
1464
+
1465
+ from math import fsum as _math_fsum
1466
+
1467
+ result = {}
1468
+ pos_tolerance = 0.0001#NB: Best if value matches the one in NCInfoBuilder.cc
1469
+
1470
+ from types import MappingProxyType
1471
+ dict_ro, list_ro = MappingProxyType, tuple
1472
+
1473
+ gemmi, gemmi_cif = _import_gemmi()
1474
+
1475
+ if not isinstance( cifsrc, CIFSource ):
1476
+ cifsrc = CIFSource( cifsrc )
1477
+
1478
+ result['cifsrc'] = cifsrc
1479
+
1480
+ _cifdata = cifsrc.load_data( quiet = quiet, mp_apikey = mp_apikey )
1481
+ if not quiet:
1482
+ _nc_common.print("Attempting to load CIF data with gemmi")
1483
+
1484
+ result['cifdata'] = _cifdata
1485
+ try:
1486
+ cif_doc = gemmi_cif.read_string( _cifdata )
1487
+ except ValueError as e:
1488
+ errmsg = 'CIF parsing error (from Gemmi): "%s"'%e
1489
+ cif_doc = None
1490
+ if cif_doc is None:
1491
+ raise _nc_core.NCBadInput(errmsg or 'Unknown CIF parsing error from Gemmi')
1492
+
1493
+ cif_doc_as_json = cif_doc.as_json()
1494
+ import json
1495
+ try:
1496
+ cif_doc_as_dict = json.loads( cif_doc_as_json )
1497
+ except json.decoder.JSONDecodeError as e:
1498
+ cif_doc_as_dict = None
1499
+ _nc_common.warn('Could not decode raw CIF data to dictionary (Gemmi bug?): JSONDecodeError("%s")'%e)
1500
+
1501
+ result['cif_raw'] = cif_doc_as_dict
1502
+ if len(cif_doc) == 0:
1503
+ raise _nc_core.NCBadInput('CIF data had no blocks!')
1504
+ if len(cif_doc) == 1:
1505
+ cif_block_with_structure = cif_doc.sole_block()
1506
+ else:
1507
+ cif_block_with_structure = None
1508
+ for e in cif_doc:
1509
+ if not len(e.find(['_atom_site_fract_x'])):
1510
+ continue
1511
+ if cif_block_with_structure:
1512
+ cif_block_with_structure = None
1513
+ break
1514
+ cif_block_with_structure = e
1515
+ if not cif_block_with_structure:
1516
+ raise _nc_core.NCBadInput('Could not automatically determine block in CIF data with structure info')
1517
+
1518
+ assert cif_block_with_structure
1519
+
1520
+ if override_spacegroup:
1521
+ assert isinstance(override_spacegroup,str)
1522
+ _nc_common.warn(f'Overriding spacegroup to "{override_spacegroup}" due to explicit request')
1523
+ _update_spacegroup_in_cifblock_from_name( gemmi, cif_block_with_structure, override_spacegroup )
1524
+
1525
+ #record original h-m-alt entry:
1526
+ orig_hm_alt = None
1527
+ _ = cif_block_with_structure.find(['_space_group_name_H-M_alt'])
1528
+ if _ and len(_)==1 and len(_[0])==1:
1529
+ orig_hm_alt = str(_[0][0]).strip()
1530
+
1531
+ struct, sg = _load_with_gemmi( cif_block_with_structure )
1532
+
1533
+ if not struct:
1534
+ raise _nc_core.NCBadInput('Could not load structure with Gemmi')
1535
+
1536
+ if not sg:
1537
+ raise _nc_core.NCBadInput('Could not determine space group from CIF data')
1538
+
1539
+ #warn if spacegroup setting might be ambiguous:
1540
+ if sg.number != 1:
1541
+ _ = list( sorted( (e.number,e.xhm(),e.is_reference_setting()) for e in gemmi.spacegroup_table() if ( e.number==sg.number and ':' in e.xhm())))
1542
+ #_ = [ (no,xhm,isref) for no,xhm,isref in _ if isref ]
1543
+ if len(_)>1:
1544
+ _str = '", "'.join( xhm for no,xhm,isref in _)
1545
+ if not orig_hm_alt or not any( (orig_hm_alt==e[1] or orig_hm_alt.replace(' ','')==e[1].replace(' ','')) for e in _ ):
1546
+ _nc_common.warn(f'SG-{sg.number} available in multiple'
1547
+ f' choices ("{_str}") and which one was not'
1548
+ ' encoded explicitly in the '
1549
+ '_space_group_name_H-M_alt CIF field. Consider'
1550
+ ' overriding the space group explicitly when'
1551
+ ' loading this file.')
1552
+
1553
+ _ = cif_block_with_structure.find(['_atom_type_number_in_cell'])
1554
+ _ = sum((sum(([e] for e in ll),[]) for ll in _),[]) if _ else []
1555
+ if _ and not any( e is None for e in _ ):
1556
+ expected_tot_atom_in_orig_cell = sum(float(e) for e in _)
1557
+ else:
1558
+ expected_tot_atom_in_orig_cell = None
1559
+ if not ( isinstance(sg.number,int) or sg.number.isdigit() ) or not ( 1<=int(sg.number)<=230 ):
1560
+ raise _nc_core.NCBadInput(f'Could not determine space group from CIF data (it loaded with invalid SG number: {sg.number}')
1561
+ if not struct.cell.is_crystal():
1562
+ raise _nc_core.NCBadInput('Loaded structure is non-crystalline (according to Gemmi).')
1563
+
1564
+ sgnumber = int(sg.number)
1565
+
1566
+ if not struct.cell.is_compatible_with_spacegroup( sg ):
1567
+ raise _nc_core.NCBadInput('Loaded unit cell is not compatible with deduced space group (according to Gemmi).')
1568
+
1569
+ cellsg = dict ( a = struct.cell.a, b = struct.cell.b, c = struct.cell.c,
1570
+ alpha = struct.cell.alpha,
1571
+ beta = struct.cell.beta,
1572
+ gamma = struct.cell.gamma )
1573
+
1574
+ fractcoord_approx_1angstrom = 1.0 / ( (struct.cell.a+struct.cell.b+struct.cell.c)/3.0 )
1575
+
1576
+
1577
+ #Although the sg object also provides sg.ccp4 and sg.hall, we record here
1578
+ #just sgnumber and xhm, since that is what spglib refinement also provides:
1579
+ cellsg['spacegroup'] = dict_ro( dict( number = sgnumber,
1580
+ hm = sg.xhm(),
1581
+ ) )
1582
+
1583
+ collected_atoms = []
1584
+
1585
+ def _expand_coord_to_all_other_images( coord, allow_finetune = 3 ):
1586
+ pos0 = _gemmi_wrap_to_unit(coord)
1587
+ #First find all brute-force expanded coords (ignore those within machine
1588
+ #precision of each other!):
1589
+ ll = [ pos0 ]
1590
+ for candidate in ( _gemmi_wrap_to_unit(img.apply( pos0 )) for img in struct.cell.images):
1591
+ use = True
1592
+ for c in ll:
1593
+ if _nc_ncmatimpl._unit_cell_point_dist(candidate,c) < 1e-10:
1594
+ use = False
1595
+ break
1596
+ if use:
1597
+ ll.append( candidate )
1598
+ if not allow_finetune:
1599
+ return ll
1600
+ #Now, check how many of these are very close to the initial point:
1601
+ _ucpdist = _nc_ncmatimpl._unit_cell_point_dist
1602
+ lclose = [ _nc_ncmatimpl._remap_fract_pos_pt(e) for e in ll
1603
+ if _ucpdist(pos0,e) < 0.01*fractcoord_approx_1angstrom ]
1604
+ assert len(lclose) > 0
1605
+ if len(lclose) == 1:
1606
+ #no issues, just return:
1607
+ return ll
1608
+ #Input might have had inexact coordinates for atoms at special
1609
+ #positions. Fine-tune pos0 as average over the close points, and rerun:
1610
+ def _fract_delta( x1, x0 ):
1611
+ #remember that distance of (0,0,eps) and (0,0,1-eps) is 2eps, not 1-2eps.
1612
+ dx = x1 - x0
1613
+ if dx > 0.5:
1614
+ dx -= 1.0
1615
+ if dx < -0.5:
1616
+ dx += 1.0
1617
+ return dx
1618
+ def _fract_delta_pt( xyz1, xyz0 ):
1619
+ return tuple( _fract_delta( xyz1[i], xyz0[i] ) for i in range(3) )
1620
+
1621
+ pos0_new = [ _math_fsum( [lclose[0][i]] + [ _fract_delta(c[i],lclose[0][i])/len(lclose) for c in lclose ] ) for i in range(3) ]
1622
+ #snap to (0,0,0):
1623
+ for i in range(3):
1624
+ if abs(pos0_new[i])< 1e-16:
1625
+ pos0_new[i] = 0.0
1626
+ def fmt( c ):
1627
+ return f'({c[0]:.15g},{c[1]:.15g},{c[2]:.15g})'
1628
+ _nc_common.warn('Fractional coordinate %s interpreted as special position %s to avoid numerical precision issues'%(fmt(coord),fmt(pos0_new)))
1629
+ return _expand_coord_to_all_other_images( gemmi.Fractional(pos0_new[0],pos0_new[1],pos0_new[2]), allow_finetune = (allow_finetune-1) )
1630
+
1631
+
1632
+ for site in struct.sites:
1633
+ if not float( site.occ ) > 0.0:
1634
+ continue
1635
+ pos0 = _gemmi_wrap_to_unit( site.fract )
1636
+ expanded_coords = _expand_coord_to_all_other_images( pos0 )
1637
+
1638
+ aniso = None
1639
+ if site.aniso.nonzero:
1640
+ aniso = dict( (k, float(getattr(site.aniso,k))) for k in ('u11','u22','u33','u12','u13','u23') )
1641
+ if all( v==0.0 for v in aniso.values() ):
1642
+ aniso = None
1643
+ u_iso = float( site.u_iso )
1644
+ if not u_iso > 0.0:
1645
+ u_iso = None
1646
+
1647
+ if aniso and not u_iso:
1648
+ #this logic might already exist on the gemmi-side, but to be safe we add it here as well:
1649
+ u_iso = ( aniso['u11'] + aniso['u22'] + aniso['u33'] ) / 3
1650
+
1651
+ site_en = str(site.element.name).strip()
1652
+ if site_en == 'X' or not site_en:
1653
+ #Issue seen in codid::9005777, which had no atomic symbols but
1654
+ #labels like "CaM1", "CaM2", "OA1".."OA3", "OB1", "OB2", "OC1",
1655
+ #"OC3",... Correct if beginning of string starts with exactly one
1656
+ #element name, followed by either a digit or an upper case string
1657
+ #followed by digits.
1658
+ from .atomdata import allElementNames
1659
+ def guess_elem_name( site_label ):
1660
+ sl, traildigit = _nc_common._split_trailing_digit( site_label )
1661
+ if traildigit is None:
1662
+ return
1663
+ #first check element names with 2 chars:
1664
+ candidates = [ e for e in allElementNames() if ( sl.startswith(e) and len(e)==2 ) ]
1665
+ if len(candidates) > 1:
1666
+ return
1667
+ #Then those with 1 char if no hit already (also check for 'D' and 'T'):
1668
+ if not candidates:
1669
+ candidates = [ e for e in allElementNames() if ( sl.startswith(e) and len(e)==1 ) ]
1670
+ candidates += [ e for e in ('D','T') if ( sl.startswith(e) and len(e)==1 ) ]
1671
+ if len(candidates)!=1:
1672
+ return
1673
+ c = candidates[0]
1674
+ assert site_label.startswith(c)
1675
+ leftover = site_label[len(c):].strip()
1676
+ while leftover and leftover[0].isupper() and leftover[0].isalpha():
1677
+ leftover = leftover[1:].strip()
1678
+ if not leftover or leftover.isdigit():
1679
+ return c
1680
+ elem_name = guess_elem_name( site.label )
1681
+ if elem_name:
1682
+ _nc_common.warn(f'Assuming atomic symbol "{elem_name}" based on CIF label "{site.label}"')
1683
+ else:
1684
+ raise _nc_core.NCBadInput(f'Neither Gemmi nor NCrystal\'s custom'
1685
+ ' code could deduce an atomic symbol'
1686
+ f' for the CIF label "{site.label}"')
1687
+ elem_Z = None
1688
+ else:
1689
+ elem_name, elem_Z = site_en, int(site.element.atomic_number)
1690
+
1691
+ elem_A = None
1692
+ if elem_name == 'D':
1693
+ elem_name, elem_Z, elem_A = 'H',1,2
1694
+ if elem_name == 'T':
1695
+ elem_name, elem_Z, elem_A = 'T',1,3
1696
+ elem_marker = elem_name + ( str(elem_A) if elem_A else '' )
1697
+
1698
+ from .atomdata import elementNameToZValue
1699
+ z2name = elementNameToZValue( elem_name, allow_isotopes = False )
1700
+ if z2name is None:
1701
+ raise _nc_core.NCBadInput(f'Not an element name: "{elem_name}"')
1702
+
1703
+ if elem_Z is None:
1704
+ elem_Z = z2name
1705
+ if elem_Z != z2name:
1706
+ raise _nc_core.NCBadInput('Wrong atomic number (%i) provided for element "%s"'%(elem_Z,elem_name))
1707
+
1708
+ collected_atoms.append( dict( expanded_coords = expanded_coords,
1709
+ element_marker = elem_marker,
1710
+ occupancy = float(site.occ),
1711
+ cif_label = str(site.label),
1712
+ uiso = u_iso,
1713
+ aniso = aniso ) )
1714
+
1715
+ #Now merge collected atoms which occupy the same sites:
1716
+ def has_same_sites( atom1, atom2 ):
1717
+ l1,l2 = atom1['expanded_coords'],atom2['expanded_coords']
1718
+ assert len(l1)>0 and len(l2)>0
1719
+ return len(l1) == len(l2) and any( ( _nc_ncmatimpl._unit_cell_point_dist(c2,l1[0])<pos_tolerance) for c2 in l2 )
1720
+
1721
+ atoms_to_process, final_atoms = collected_atoms, []
1722
+ while atoms_to_process:
1723
+ atom1 = atoms_to_process[0]
1724
+ atoms_with_same_pos = [ atom1 ]
1725
+ atoms_with_different_pos = []
1726
+ for atom2 in atoms_to_process[1:]:
1727
+ if has_same_sites(atom1,atom2):
1728
+ if atom1['uiso'] != atom2['uiso'] or atom1['aniso'] != atom2['aniso']:
1729
+ raise _nc_core.NCBadInput('atoms labelled "%s" and "%s" have same cell '%(atom1['cif_label'],atom2['cif_label'])
1730
+ +'positions but different uiso information. This is not supported.')
1731
+ atoms_with_same_pos.append( atom2 )
1732
+ else:
1733
+ atoms_with_different_pos.append( atom2 )
1734
+
1735
+ cif_labels = list( a['cif_label'] for a in atoms_with_same_pos )
1736
+ cif_labels_str = '"%s"'%('", "'.join(cl for cl in cif_labels))
1737
+
1738
+ composition = list( ( a['occupancy'], a['element_marker'] ) for a in atoms_with_same_pos )
1739
+ occupancy = _math_fsum( fr for fr, elem in composition )
1740
+ assert occupancy > 0.0
1741
+ if occupancy != 1.0:
1742
+ composition = list( (fr/occupancy, elem) for fr, elem in composition )
1743
+ if occupancy > (1.0 + 1e-6) :
1744
+ m=f'Error: Too high total occupancy ({occupancy} which is >1) for atoms with CIF labels: {cif_labels_str}'
1745
+ if abs(int(occupancy+0.5)-occupancy)<1e-10:
1746
+ m+='. A possible cause could be multiple entries in the CIF file using atomic positions that are actually symmetrically equivalent!'
1747
+ raise _nc_core.NCBadInput(m)
1748
+ occupancy = min( 1.0, occupancy )
1749
+
1750
+ final_atom_info = dict( cif_labels = list_ro( cif_labels ),
1751
+ equivalent_positions = list_ro( (c.x,c.y,c.z) for c in atoms_with_same_pos[0]['expanded_coords'] ),
1752
+ composition = list_ro( sorted( composition ) ),
1753
+ occupancy = occupancy,
1754
+ uiso = atoms_with_same_pos[0]['uiso'],
1755
+ aniso = atoms_with_same_pos[0]['aniso'] )
1756
+ final_atoms.append( dict_ro( final_atom_info ) )
1757
+ atoms_to_process = atoms_with_different_pos
1758
+
1759
+ if merge_equiv:
1760
+ final_atoms = list_ro( dict_ro(a) for a in _impl_merge_atoms( final_atoms ) )
1761
+
1762
+ if expected_tot_atom_in_orig_cell:
1763
+ n_actual = sum( len(a['equivalent_positions']) for a in final_atoms )
1764
+ def rd( x,y ):
1765
+ return abs(x-y)/(max(1e-300,abs(x)+abs(y)))
1766
+ if rd( n_actual, expected_tot_atom_in_orig_cell ) > 0.01:
1767
+ raise _nc_core.NCBadInput(f'Expanded number of atoms per cell ({n_actual}) is different '
1768
+ 'from what is stated explicitly in the CIF data'
1769
+ f' ({expected_tot_atom_in_orig_cell}). Wrong spacegroup (or'
1770
+ ' choice/setting improperly specified)?')
1771
+
1772
+ if refine_with_spglib:
1773
+ cellsg, final_atoms, warnings, msgs = _impl_refine_cell( cellsg, final_atoms )
1774
+ for w in warnings:
1775
+ _nc_common.warn(w)
1776
+ if not quiet:
1777
+ for m in msgs:
1778
+ _nc_common.print(m)
1779
+ else:
1780
+ _nc_common.warn('Structure refinement and verification with spglib was disabled due to explicit request.')
1781
+
1782
+ if int(cellsg['spacegroup']['number']) == 1:
1783
+ #The following warning might be a bit obsolete now that we usually
1784
+ #refine cells, but we keep it here for now as an extra safeguard:
1785
+ _nc_common.warn('Space-group number 1 ("P1") detected in CIF input. A lot (but not all) of CIF data'
1786
+ ' with this space group listed is in a non-conventional format not suitable for NCrystal. Be wary!')
1787
+
1788
+ result['cellsg'] = dict_ro( cellsg )
1789
+ def _atom_sort( a ):
1790
+ #well-defined sort order for reproducible output
1791
+ return ( tuple(sorted(a.get('composition',[]))), len(a.get('equivalent_positions')),a.get('uiso',None),a.get('cif_labels',None), tuple(a.items()) )
1792
+ result['atoms'] = list_ro( dict_ro(a) for a in sorted(final_atoms,key = _atom_sort) )
1793
+
1794
+ result['cifdescr'], result['actual_codid'],result['actual_mpid'],result['cif_chemformula'] =_extract_descr_from_cif( result['cif_raw'], result['cifsrc'], result['cifdata'] )
1795
+ return result, gemmi, struct