rdworks 0.48.1__tar.gz → 0.50.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {rdworks-0.48.1 → rdworks-0.50.1}/PKG-INFO +1 -1
  2. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/__init__.py +9 -8
  3. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/ionized.py +4 -1
  4. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/mol.py +31 -20
  5. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/xtb/wrapper.py +65 -24
  6. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks.egg-info/PKG-INFO +1 -1
  7. {rdworks-0.48.1 → rdworks-0.50.1}/tests/test_ionized.py +33 -1
  8. rdworks-0.50.1/tests/test_xtb.py +160 -0
  9. rdworks-0.48.1/tests/test_xtb.py +0 -76
  10. {rdworks-0.48.1 → rdworks-0.50.1}/LICENSE +0 -0
  11. {rdworks-0.48.1 → rdworks-0.50.1}/README.md +0 -0
  12. {rdworks-0.48.1 → rdworks-0.50.1}/pyproject.toml +0 -0
  13. {rdworks-0.48.1 → rdworks-0.50.1}/setup.cfg +0 -0
  14. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/autograph/__init__.py +0 -0
  15. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/autograph/autograph.py +0 -0
  16. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/autograph/centroid.py +0 -0
  17. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/autograph/dynamictreecut.py +0 -0
  18. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/autograph/nmrclust.py +0 -0
  19. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/autograph/rckmeans.py +0 -0
  20. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/bitqt/__init__.py +0 -0
  21. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/bitqt/bitqt.py +0 -0
  22. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/conf.py +0 -0
  23. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/descriptor.py +0 -0
  24. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/display.py +0 -0
  25. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/matchedseries.py +0 -0
  26. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/mollibr.py +0 -0
  27. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/pka.py +0 -0
  28. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Asinex_fragment.xml +0 -0
  29. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Astex_RO3.xml +0 -0
  30. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +0 -0
  31. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +0 -0
  32. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +0 -0
  33. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +0 -0
  34. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +0 -0
  35. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +0 -0
  36. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Baell2010_PAINS/makexml.py +0 -0
  37. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Brenk2008_Dundee/makexml.py +0 -0
  38. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/CNS.xml +0 -0
  39. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ChEMBL_Walters/BMS.xml +0 -0
  40. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ChEMBL_Walters/Dundee.xml +0 -0
  41. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ChEMBL_Walters/Glaxo.xml +0 -0
  42. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +0 -0
  43. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ChEMBL_Walters/LINT.xml +0 -0
  44. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ChEMBL_Walters/MLSMR.xml +0 -0
  45. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ChEMBL_Walters/PAINS.xml +0 -0
  46. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +0 -0
  47. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ChEMBL_Walters/makexml.py +0 -0
  48. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +0 -0
  49. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +0 -0
  50. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +0 -0
  51. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +0 -0
  52. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +0 -0
  53. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Hann1999_Glaxo/makexml.py +0 -0
  54. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Kazius2005/Kazius2005.xml +0 -0
  55. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/Kazius2005/makexml.py +0 -0
  56. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ZINC_druglike.xml +0 -0
  57. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ZINC_fragment.xml +0 -0
  58. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ZINC_leadlike.xml +0 -0
  59. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/fragment.xml +0 -0
  60. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ionized/simple_smarts_pattern.csv +0 -0
  61. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/ionized/smarts_pattern.csv +0 -0
  62. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/misc/makexml.py +0 -0
  63. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/misc/reactive-part-2.xml +0 -0
  64. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/misc/reactive-part-3.xml +0 -0
  65. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/predefined/misc/reactive.xml +0 -0
  66. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/readin.py +0 -0
  67. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/rgroup.py +0 -0
  68. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/scaffold.py +0 -0
  69. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/std.py +0 -0
  70. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/stereoisomers.py +0 -0
  71. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/tautomers.py +0 -0
  72. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/testdata.py +0 -0
  73. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/torsion.py +0 -0
  74. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/units.py +0 -0
  75. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/utils.py +0 -0
  76. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/xml.py +0 -0
  77. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks/xtb/__init__.py +0 -0
  78. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks.egg-info/SOURCES.txt +0 -0
  79. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks.egg-info/dependency_links.txt +0 -0
  80. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks.egg-info/requires.txt +0 -0
  81. {rdworks-0.48.1 → rdworks-0.50.1}/src/rdworks.egg-info/top_level.txt +0 -0
  82. {rdworks-0.48.1 → rdworks-0.50.1}/tests/test_basics.py +0 -0
  83. {rdworks-0.48.1 → rdworks-0.50.1}/tests/test_round.py +0 -0
  84. {rdworks-0.48.1 → rdworks-0.50.1}/tests/test_torsion.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rdworks
3
- Version: 0.48.1
3
+ Version: 0.50.1
4
4
  Summary: Routine tasks built on RDKit and other tools
5
5
  Author-email: Sung-Hun Bae <sunghun.bae@gmail.com>
6
6
  Maintainer-email: Sung-Hun Bae <sunghun.bae@gmail.com>
@@ -1,4 +1,4 @@
1
- __version__ = '0.48.1'
1
+ __version__ = '0.50.1'
2
2
 
3
3
  from rdworks.conf import Conf
4
4
  from rdworks.mol import Mol
@@ -25,10 +25,11 @@ __rdkit_version__ = rdkit.rdBase.rdkitVersion
25
25
 
26
26
  rdkit_logger = rdkit.RDLogger.logger().setLevel(rdkit.RDLogger.CRITICAL)
27
27
 
28
- main_logger = logging.getLogger()
29
- main_logger.setLevel(logging.INFO) # level: DEBUG < INFO < WARNING < ERROR < CRITICAL
30
- logger_formatter = logging.Formatter(fmt='%(asctime)s %(levelname)s %(message)s',
31
- datefmt='%Y-%m-%d %H:%M:%S')
32
- logger_ch = logging.StreamHandler()
33
- logger_ch.setFormatter(logger_formatter)
34
- main_logger.addHandler(logger_ch)
28
+ logger = logging.getLogger(__name__)
29
+ logger.setLevel(logging.INFO) # level: DEBUG < INFO < WARNING < ERROR < CRITICAL
30
+
31
+ logger_stream = logging.StreamHandler() # sys.stdout or sys.stderr
32
+ logger_format = logging.Formatter(fmt='%(asctime)s %(levelname)s %(message)s',
33
+ datefmt='%Y-%m-%d %H:%M:%S')
34
+ logger_stream.setFormatter(logger_format)
35
+ logger.addHandler(logger_stream)
@@ -130,6 +130,9 @@ class IonizedStates:
130
130
  edmol.RemoveAtom(bonded_H_indices[0])
131
131
 
132
132
  elif acid_or_base == 'B': # protonate
133
+ # note that protonation at tertiary nitrogen may results in stereoisomers
134
+ # current implementation ignores the stereochemistry
135
+ # use rdworks.complete_stereoisomers() function to complete the stereoisomers
133
136
  B = edmol.GetAtomWithIdx(i)
134
137
  assert B.GetAtomicNum() > 1, f"Cannot protonate an atom (idx={i}; {B.GetAtomicNum()})"
135
138
  charge = B.GetFormalCharge() + 1
@@ -138,7 +141,7 @@ class IonizedStates:
138
141
  B.SetNumExplicitHs(nH+1)
139
142
  edmol = Chem.AddHs(edmol)
140
143
 
141
- # Clean up and save SMILES
144
+ # clean up and save SMILES
142
145
  ionized_smiles = IonizedStates.clean_smiles(edmol)
143
146
  ionized_mol = Chem.MolFromSmiles(ionized_smiles)
144
147
  ionized_mol = Chem.AddHs(ionized_mol)
@@ -46,7 +46,8 @@ from rdworks.display import render_svg, render_png
46
46
 
47
47
  from scour.scour import scourString
48
48
 
49
- main_logger = logging.getLogger()
49
+
50
+ logger = logging.getLogger(__name__)
50
51
 
51
52
 
52
53
  class Mol:
@@ -328,11 +329,7 @@ class Mol:
328
329
  return self
329
330
 
330
331
 
331
- def make_confs(self,
332
- n:int = 50,
333
- method:str = 'ETKDG',
334
- calculator:str | Callable = 'MMFF94',
335
- ) -> Self:
332
+ def make_confs(self, n: int = 50, method: str = 'ETKDG', **kwargs) -> Self:
336
333
  """Generates 3D conformers.
337
334
 
338
335
  Args:
@@ -348,12 +345,8 @@ class Mol:
348
345
  High-Quality Conformer Generation with CONFORGE: Algorithm and Performance Assessment.
349
346
  J. Chem. Inf. Model. 63, 5549-5570 (2023).
350
347
  """
351
-
352
- # if n is None:
353
- # rot_bonds = rd_descriptor_f['RotBonds'](self.rdmol)
354
- # n = min(max(1, int(8.481 * (rot_bonds **1.642))), 1000)
355
- # n = max(1, math.ceil(n * n_rel)) # ensures that n is at least 1
356
-
348
+ verbose = kwargs.get('verbose', False)
349
+
357
350
  self.confs = []
358
351
 
359
352
  if method.upper() == 'ETKDG':
@@ -426,6 +419,7 @@ class Mol:
426
419
  os.remove(tmp_filename)
427
420
 
428
421
  # energy evaluations for ranking
422
+ calculator = kwargs.get('calculator', 'MMFF94')
429
423
  for conf in self.confs:
430
424
  conf.potential_energy(calculator) # default: MMFF94
431
425
 
@@ -436,8 +430,15 @@ class Mol:
436
430
  for conf in self.confs:
437
431
  conf.props.update({"E_rel(kcal/mol)": conf.props[sort_by] - lowest_energy})
438
432
 
433
+ # rename conformers
439
434
  self = self.rename()
440
435
 
436
+ if verbose:
437
+ rot_bonds = rd_descriptor_f['RotBonds'](self.rdmol)
438
+ nrb_suggested = int(8.481 * (rot_bonds **1.642))
439
+ logger.info(f"make_confs() rotatable bonds {rot_bonds} (suggested conformers {nrb_suggested}) generated {self.count()}")
440
+ logger.info(f"make_confs() updated potential energies E_tot(kcal/mol) and E_rel(kcal/mol) by {calculator}")
441
+
441
442
  return self
442
443
 
443
444
 
@@ -499,10 +500,15 @@ class Mol:
499
500
  Returns:
500
501
  Self: modified self.
501
502
  """
503
+ verbose = kwargs.get('verbose', False)
504
+
502
505
  if calculator is not None:
503
506
  # re-calculate potential energies
507
+ if verbose :
508
+ logger.info(f"sort_cons() calculate potential energy by {calculator}")
509
+
504
510
  for conf in self.confs:
505
- PE = conf.potential_energy(calculator=calculator, **kwargs) # sets `E_tot(kcal/mol)`
511
+ PE = conf.potential_energy(calculator, **kwargs) # sets `E_tot(kcal/mol)`
506
512
 
507
513
  if all(['E_tot(kcal/mol)' in conf.props for conf in self.confs]):
508
514
  sort_by = 'E_tot(kcal/mol)'
@@ -767,7 +773,7 @@ class Mol:
767
773
  cluster: bool | None =None,
768
774
  k: int | None = None,
769
775
  window: float | None = None,
770
- verbose: bool = False) -> Self:
776
+ **kwargs) -> Self:
771
777
  """Drop conformers that meet some condition(s).
772
778
 
773
779
  Args:
@@ -792,6 +798,8 @@ class Mol:
792
798
  Self: modified self.
793
799
  """
794
800
 
801
+ verbose = kwargs.get('verbose', False)
802
+
795
803
  reasons = [f'stereo flipped',
796
804
  f'unconverged',
797
805
  f'similar({similar_rmsd})',
@@ -805,13 +813,13 @@ class Mol:
805
813
  mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) == self.smiles for _ in self.confs]
806
814
  self.confs = list(itertools.compress(self.confs, mask))
807
815
  if verbose:
808
- main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[0]:<{w}} -> {self.count()}')
816
+ logger.info(f'drop_confs() {mask.count(False):3d} {reasons[0]:<{w}} -> {self.count()}')
809
817
 
810
818
  if unconverged and self.count() > 0:
811
819
  mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in self.confs]
812
820
  self.confs = list(itertools.compress(self.confs, mask))
813
821
  if verbose:
814
- main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[1]:<{w}} -> {self.count()}')
822
+ logger.info(f'drop_confs() {mask.count(False):3d} {reasons[1]:<{w}} -> {self.count()}')
815
823
 
816
824
  if similar and self.count() > 1:
817
825
  # it is observed that there are essentially identical conformers
@@ -831,7 +839,7 @@ class Mol:
831
839
  mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(self.confs)]
832
840
  self.confs = list(itertools.compress(self.confs, mask))
833
841
  if verbose:
834
- main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[2]:<{w}} -> {self.count()}')
842
+ logger.info(f'drop_confs() {mask.count(False):3d} {reasons[2]:<{w}} -> {self.count()}')
835
843
 
836
844
  # note: it will retain the conformers with lower index
837
845
  # so, it should be sorted before dropping
@@ -856,9 +864,12 @@ class Mol:
856
864
  mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in self.confs]
857
865
  self.confs = list(itertools.compress(self.confs, mask))
858
866
  if verbose:
859
- main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[3]:<{w}} -> {self.count()}')
867
+ logger.info(f'drop_confs() {mask.count(False):3d} {reasons[3]:<{w}} -> {self.count()}')
860
868
 
861
869
  if (k or window) and self.count() > 0:
870
+ # confs must be sorted by energies
871
+ if not all(['E_rel(kcal/mol)' in _.props for _ in self.confs]):
872
+ self = self.sort_confs(**kwargs)
862
873
  if k:
863
874
  mask_k = [i < k for i,_ in enumerate(self.confs)]
864
875
  else:
@@ -871,7 +882,7 @@ class Mol:
871
882
  mask = [(x and y) for (x,y) in zip(mask_k, mask_window)]
872
883
  self.confs = list(itertools.compress(self.confs, mask))
873
884
  if verbose:
874
- main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[4]:<{w}} -> {self.count()}')
885
+ logger.info(f'drop_confs() {mask.count(False):3d} {reasons[4]:<{w}} -> {self.count()}')
875
886
 
876
887
  return self
877
888
 
@@ -974,7 +985,7 @@ class Mol:
974
985
 
975
986
 
976
987
  def torsion_energies(self,
977
- calculator: str | Callable,
988
+ calculator: str | Callable = 'MMFF94',
978
989
  torsion_key: int | None = None,
979
990
  simplify: bool = True,
980
991
  fmax: float = 0.05,
@@ -14,7 +14,8 @@ from rdkit import Chem
14
14
  from rdkit.Geometry import Point3D
15
15
 
16
16
 
17
- logger = logging.getLogger()
17
+ logger = logging.getLogger(__name__)
18
+
18
19
 
19
20
  # In ASE, the default energy unit is eV (electron volt).
20
21
  # It will be converted to kcal/mol
@@ -25,7 +26,7 @@ ev2kcalpermol = 23.060547830619026
25
26
 
26
27
 
27
28
  class GFN2xTB:
28
- def __init__(self, molecule: Chem.Mol, ncores: int = 8):
29
+ def __init__(self, molecule: Chem.Mol, ncores: int | None = None):
29
30
  assert isinstance(molecule, Chem.Mol), "molecule is not rdkit.Chem.Mol type"
30
31
  assert molecule.GetConformer().Is3D(), "molecule is not a 3D conformer"
31
32
  assert self.is_xtb_ready(), "xtb is not accessible"
@@ -35,6 +36,9 @@ class GFN2xTB:
35
36
  self.symbols = [ atom.GetSymbol() for atom in molecule.GetAtoms() ]
36
37
  self.positions = molecule.GetConformer().GetPositions().tolist()
37
38
 
39
+ if ncores is None:
40
+ ncores = os.cpu_count()
41
+
38
42
  # Parallelisation
39
43
  os.environ['OMP_STACKSIZE'] = '4G'
40
44
  os.environ['OMP_NUM_THREADS'] = f'{ncores},1'
@@ -55,6 +59,43 @@ class GFN2xTB:
55
59
  return shutil.which('xtb') is not None
56
60
 
57
61
 
62
+ @staticmethod
63
+ def is_optimize_ready() -> bool:
64
+ try:
65
+ h2o = [
66
+ '$coord',
67
+ ' 0.00000000000000 0.00000000000000 -0.73578586109551 o',
68
+ ' 1.44183152868459 0.00000000000000 0.36789293054775 h',
69
+ '-1.44183152868459 0.00000000000000 0.36789293054775 h',
70
+ '$end',
71
+ ]
72
+
73
+ with tempfile.TemporaryDirectory() as temp_dir:
74
+ test_geometry = os.path.join(temp_dir, 'coord')
75
+ with open(test_geometry, 'w') as f:
76
+ f.write('\n'.join(h2o))
77
+ proc = subprocess.run(['xtb', test_geometry, '--opt'],
78
+ capture_output=True,
79
+ text=True)
80
+ assert proc.returncode == 0
81
+
82
+ return True
83
+
84
+ except:
85
+ print("""
86
+ Conda installed xTB has the Fortran runtime error in geometry optimization.
87
+ Please install xtb using the compiled binary:
88
+
89
+ $ wget https://github.com/grimme-lab/xtb/releases/download/v6.7.1/xtb-6.7.1-linux-x86_64.tar.xz
90
+ $ tar -xf xtb-6.7.1-linux-x86_64.tar.xz
91
+ $ cp -r xtb-dist/bin/* /usr/local/bin/
92
+ $ cp -r xtb-dist/lib/* /usr/local/lib/
93
+ $ cp -r xtb-dist/include/* /usr/local/include/
94
+ $ cp -r xtb-dist/share /usr/local/ """)
95
+
96
+ return False
97
+
98
+
58
99
  @staticmethod
59
100
  def is_cpx_ready() -> bool:
60
101
  """Checks if the CPCM-X command-line tool, `cpx`, is accessible in the system.
@@ -66,7 +107,7 @@ class GFN2xTB:
66
107
 
67
108
 
68
109
  @staticmethod
69
- def is_cpcmx_option_ready() -> bool:
110
+ def is_cpcmx_ready() -> bool:
70
111
  """Checks if xtb works with the `--cpcmx` option.
71
112
 
72
113
  xtb distributed by the conda does not include CPCM-X function (as of June 17, 2025).
@@ -97,7 +138,8 @@ class GFN2xTB:
97
138
  """
98
139
  return all([GFN2xTB.is_xtb_ready(),
99
140
  GFN2xTB.is_cpx_ready(),
100
- GFN2xTB.is_cpcmx_option_ready()])
141
+ GFN2xTB.is_cpcmx_ready(),
142
+ GFN2xTB.is_optimize_ready()])
101
143
 
102
144
 
103
145
  @staticmethod
@@ -107,14 +149,13 @@ class GFN2xTB:
107
149
  Returns:
108
150
  str | None: version statement.
109
151
  """
110
- if GFN2xTB.is_ready():
152
+ if GFN2xTB.is_xtb_ready():
111
153
  cmd = ['xtb', '--version']
112
154
  proc = subprocess.run(cmd, capture_output=True, text=True)
113
155
  assert proc.returncode == 0, "GFN2xTB() Error: xtb not available"
114
- for line in proc.stdout.split('\n'):
115
- line = line.strip()
116
- if 'version' in line:
117
- return line
156
+ match = re.search('xtb\s+version\s+(?P<version>[\d.]+)', proc.stdout)
157
+ if match:
158
+ return match.group('version')
118
159
 
119
160
  return None
120
161
 
@@ -338,12 +379,9 @@ class GFN2xTB:
338
379
 
339
380
  with tempfile.TemporaryDirectory() as temp_dir: # tmpdir is a string
340
381
  workdir = Path(temp_dir)
341
- if verbose:
342
- logger.info(f'xtb.singlepoint workdir= {temp_dir}')
343
382
 
344
383
  geometry_input_path = workdir / 'geometry.xyz'
345
384
  xtbout_path = workdir / 'xtbout.json'
346
- stdout_path = workdir / 'fort.6'
347
385
  wbo_path = workdir / 'wbo'
348
386
  geometry_output_path = workdir / 'xtbtopo.mol'
349
387
 
@@ -354,8 +392,6 @@ class GFN2xTB:
354
392
 
355
393
  options = ['--gfn', '2', '--json']
356
394
 
357
- Gsolv = None
358
-
359
395
  if water is not None and isinstance(water, str):
360
396
  if water == 'gbsa':
361
397
  options += ['--gbsa', 'H2O']
@@ -363,9 +399,12 @@ class GFN2xTB:
363
399
  elif water == 'alpb':
364
400
  options += ['--alpb', 'water']
365
401
  # it does not provide Gsolv contribution to the total energy
366
- elif water == 'cpcmx' and self.is_cpcmx_option_ready():
402
+ elif water == 'cpcmx' and self.is_cpcmx_ready():
367
403
  options += ['--cpcmx', 'water']
368
404
 
405
+ if verbose:
406
+ logger.info(f"singlepoint() {' '.join(cmd+options)}")
407
+
369
408
  # 'xtbout.json', 'xtbrestart', 'xtbtopo.mol', 'charges', and 'wbo' files will be
370
409
  # created in the current working directory.
371
410
  proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
@@ -381,19 +420,20 @@ class GFN2xTB:
381
420
  if xtbout_path.is_file():
382
421
  with open(xtbout_path, 'r') as f:
383
422
  datadict = json.load(f) # takes the file object as input
423
+
424
+ Gsolv = None
384
425
 
385
- if (water is not None) and (Gsolv is None) and stdout_path.is_file():
426
+ if water is not None:
386
427
  # Free Energy contributions: [Eh] [kcal/mol]
387
428
  # -------------------------------------------------------------------------
388
429
  # solvation free energy (dG_solv): -0.92587E-03 -0.58099
389
430
  # gas phase energy (E) -0.52068E+01
390
431
  # -------------------------------------------------------------------------
391
432
  # total free energy (dG) -0.52077E+01
392
- with open(stdout_path, 'r') as f:
393
- for line in f:
394
- if 'solvation free energy' in line:
395
- m = re.search(r"solvation free energy \(dG_solv\)\:\s+[-+]?\d*\.?\d+E[-+]?\d*\s+(?P<kcalpermol>[-+]?\d*\.?\d+)", line)
396
- Gsolv = float(m.group('kcalpermol'))
433
+ for line in proc.stdout.splitlines():
434
+ if 'solvation free energy' in line:
435
+ m = re.search(r"solvation free energy \(dG_solv\)\:\s+[-+]?\d*\.?\d+E[-+]?\d*\s+(?P<kcalpermol>[-+]?\d*\.?\d+)", line)
436
+ Gsolv = float(m.group('kcalpermol'))
397
437
 
398
438
  Wiberg_bond_orders = self.load_wbo(wbo_path)
399
439
 
@@ -429,8 +469,6 @@ class GFN2xTB:
429
469
  """
430
470
  with tempfile.TemporaryDirectory() as temp_dir: # tmpdir is a string
431
471
  workdir = Path(temp_dir)
432
- if verbose:
433
- logger.info(f'xtb.optimize workdir= {temp_dir}')
434
472
 
435
473
  geometry_input_path = workdir / 'geometry.xyz'
436
474
  xtbout_path = workdir / 'xtbout.json'
@@ -452,6 +490,9 @@ class GFN2xTB:
452
490
  elif water == 'cpcmx':
453
491
  logger.warning('optimize with --cpcmx option is not implemented in xtb yet')
454
492
 
493
+ if verbose:
494
+ logger.info(f"optimize() {' '.join(cmd+options)}")
495
+
455
496
  proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
456
497
 
457
498
  if proc.returncode == 0 and xtbout_path.is_file():
@@ -523,4 +564,4 @@ class GFN2xTB:
523
564
  with open(xtb_esp_dat, 'r') as f:
524
565
  pass
525
566
 
526
- return None
567
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rdworks
3
- Version: 0.48.1
3
+ Version: 0.50.1
4
4
  Summary: Routine tasks built on RDKit and other tools
5
5
  Author-email: Sung-Hun Bae <sunghun.bae@gmail.com>
6
6
  Maintainer-email: Sung-Hun Bae <sunghun.bae@gmail.com>
@@ -13,6 +13,12 @@ def test_ionizedstate():
13
13
  print(k, v)
14
14
  print()
15
15
 
16
+ p = x.get_pairs()
17
+ print('pairs:')
18
+ for k in p:
19
+ print(k)
20
+ print()
21
+
16
22
  indices = d['CCCCNC(=O)[C@@H]1CCCN1[C@@H](CC)c1nnc(Cc2ccc(C)cc2)o1'][0]
17
23
 
18
24
  assert (11, 'B') in indices
@@ -30,5 +36,31 @@ def test_ionizedstate():
30
36
  assert set(expected).intersection(set(results)) == set(expected)
31
37
 
32
38
 
39
+ def test_gypsum_dl():
40
+ import gypsum_dl
41
+ smiles = 'O=C(NCCCC)[C@H](CCC1)N1[C@@H](CC)C2=NN=C(CC3=CC=C(C)C=C3)O2'
42
+ state_smiles = list(
43
+ gypsum_dl.GypsumDL(smiles,
44
+ min_ph=6.4,
45
+ max_ph=8.4,
46
+ pka_precision=1.0,
47
+ thoroughness=3,
48
+ max_variants_per_compound=5,
49
+ second_embed=False,
50
+ skip_optimize_geometry=False,
51
+ skip_alternate_ring_conformations=False,
52
+ skip_adding_hydrogen=False,
53
+ skip_making_tautomers=False,
54
+ skip_enumerate_chiral_mol=False,
55
+ skip_enumerate_double_bonds=False,
56
+ let_tautomers_change_chirality=False,
57
+ use_durrant_lab_filters=True,
58
+ job_manager='serial',
59
+ num_processors=1,
60
+ ))
61
+ for smi in state_smiles:
62
+ print(smi)
63
+
33
64
  if __name__ == '__main__':
34
- test_ionizedstate()
65
+ test_ionizedstate()
66
+ test_gypsum_dl()
@@ -0,0 +1,160 @@
1
+ from rdworks import Mol
2
+ from rdworks.xtb.wrapper import GFN2xTB
3
+ from rdworks.testdata import drugs
4
+
5
+ from pathlib import Path
6
+
7
+
8
+ # In ASE, the default energy unit is eV (electron volt).
9
+ # It will be converted to kcal/mol
10
+ # CODATA 2018 energy conversion factor
11
+ hartree2ev = 27.211386245988
12
+ hartree2kcalpermol = 627.50947337481
13
+ ev2kcalpermol = 23.060547830619026
14
+
15
+
16
+ datadir = Path(__file__).parent.resolve() / "data"
17
+ workdir = Path(__file__).parent.resolve() / "outfiles"
18
+
19
+ workdir.mkdir(exist_ok=True)
20
+
21
+ name = 'Atorvastatin'
22
+ testmol = Mol(drugs[name], name).make_confs(n=50).optimize_confs()
23
+ testmol = testmol.drop_confs(similar=True, verbose=True).sort_confs()
24
+
25
+
26
+ def test_xtb_wrapper():
27
+ from rdworks.xtb.wrapper import GFN2xTB
28
+ assert GFN2xTB.is_xtb_ready() == True
29
+ assert GFN2xTB.is_cpx_ready() == True
30
+ assert GFN2xTB.is_cpcmx_ready() == True
31
+ assert GFN2xTB.is_ready() == True
32
+ assert GFN2xTB.version() is not None
33
+
34
+
35
+ def test_singlepoint():
36
+ mol = testmol.copy()
37
+
38
+ print("number of conformers=", mol.count())
39
+ print("number of atoms=", mol.confs[0].natoms)
40
+
41
+ gfn2xtb = GFN2xTB(mol.confs[0].rdmol)
42
+
43
+ print("GFN2xTB.singlepoint()")
44
+ outdict = gfn2xtb.singlepoint()
45
+ print(outdict)
46
+ print()
47
+
48
+ print("GFN2xTB.singlepoint(water='gbsa')")
49
+ outdict = gfn2xtb.singlepoint(water='gbsa')
50
+ print(outdict)
51
+ print()
52
+
53
+ print("GFN2xTB.singlepoint(water='alpb')")
54
+ outdict = gfn2xtb.singlepoint(water='alpb')
55
+ print(outdict)
56
+ print()
57
+
58
+ print("GFN2xTB.singlepoint(water='cpcmx')")
59
+ outdict = gfn2xtb.singlepoint(water='cpcmx')
60
+ print(outdict)
61
+ print()
62
+
63
+
64
+ def test_optimize():
65
+ mol = testmol.copy()
66
+ print("number of conformers=", mol.count())
67
+ print("GFN2xTB.optimize()")
68
+ outdict = GFN2xTB(mol.confs[0].rdmol).optimize(verbose=True)
69
+ print(outdict)
70
+ print()
71
+
72
+
73
+ def test_state_generate():
74
+ import rdworks
75
+ import numpy as np
76
+ import os
77
+
78
+ task_queue = 'xtb'
79
+
80
+ kT = 0.001987 * 300.0 # (kcal/mol K)
81
+
82
+ smiles = 'CCCCNC(=O)[C@@H]1CCCN1[C@@H](CC)c1nnc(Cc2ccc(C)cc2)o1'
83
+ n = 50
84
+ method = 'ETKDG'
85
+
86
+ standardized = rdworks.Mol(smiles)
87
+ libr = rdworks.complete_tautomers(standardized)
88
+
89
+ PE = []
90
+ for mol in libr:
91
+ mol = mol.make_confs(n=n, method=method, verbose=True)
92
+ mol = mol.optimize_confs(calculator='MMFF94', verbose=True)
93
+ mol = mol.drop_confs(similar=True, similar_rmsd=0.3, verbose=True)
94
+ mol = mol.sort_confs(calculator='xTB', verbose=True)
95
+ mol = mol.drop_confs(k=10, window=15.0, verbose=True) # enforcing both conditions
96
+ _PE = []
97
+ for conf in mol.confs:
98
+ conf = conf.optimize(calculator='xTB', verbose=True)
99
+ # GFN2xTB requires 3D coordinates
100
+ xtb = GFN2xTB(conf.rdmol).singlepoint(water='cpcmx', verbose=True)
101
+ _PE.append(xtb.PE)
102
+ # SimpleNamespace(
103
+ # PE = datadict['total energy'] * hartree2kcalpermol,
104
+ # Gsolv = Gsolv,
105
+ # charges = datadict['partial charges'],
106
+ # wbo = Wiberg_bond_orders,
107
+ # )
108
+ PE.append(_PE)
109
+ print(_PE)
110
+
111
+ # calculate population
112
+ PE = np.array(PE)
113
+ PE = PE - np.min(PE)
114
+ Boltzmann_factors = np.exp(-PE/kT)
115
+ # partition function
116
+ Z = np.sum(Boltzmann_factors)
117
+ # population
118
+ p = np.sum(Boltzmann_factors/Z, axis=1)
119
+
120
+ sorted_indices = sorted(list(enumerate(p)), key=lambda x: x[1], reverse=True) # [(0,p0), (1,p1), ...]
121
+
122
+ molecular_states = []
123
+ for idx, population in sorted_indices:
124
+ if population < 0.05:
125
+ continue
126
+
127
+ # state.keys() = ['rdmol','smiles','charge','population','pKa', 'qikprop']
128
+
129
+ state_mol = libr[idx].rename(f'state.{idx+1}').qed(
130
+ properties=['QED', 'MolWt', 'LogP', 'TPSA', 'HBD', 'HBA'])
131
+
132
+ basic_properties = {
133
+ 'QED' : round(state_mol.props['QED'], 2),
134
+ 'MolWt' : round(state_mol.props['MolWt'], 2),
135
+ 'LogP' : round(state_mol.props['LogP'], 2),
136
+ 'TPSA' : round(state_mol.props['TPSA'], 2),
137
+ 'HBD' : state_mol.props['HBD'],
138
+ 'HBA' : state_mol.props['HBA'],
139
+ }
140
+
141
+ state_props = {
142
+ 'method': task_queue,
143
+ 'PE(kcal/mol)': state_mol.confs[0].props['E_tot(kcal/mol)'],
144
+ 'population' : round(float(population), 3),
145
+ 'basic_properties': basic_properties,
146
+ 'rdkit_version': rdworks.__rdkit_version__,
147
+ 'rdworks_version': rdworks.__version__,
148
+ }
149
+
150
+ molecular_states.append((state_mol.serialize(compressed=True), state_props))
151
+
152
+ print(molecular_states)
153
+
154
+
155
+
156
+ if __name__ == '__main__':
157
+ test_xtb_wrapper()
158
+ test_singlepoint()
159
+ test_optimize()
160
+ test_state_generate()
@@ -1,76 +0,0 @@
1
- from rdworks import Mol
2
- from rdworks.xtb.wrapper import GFN2xTB
3
- from rdworks.testdata import drugs
4
-
5
- from pathlib import Path
6
-
7
-
8
- # In ASE, the default energy unit is eV (electron volt).
9
- # It will be converted to kcal/mol
10
- # CODATA 2018 energy conversion factor
11
- hartree2ev = 27.211386245988
12
- hartree2kcalpermol = 627.50947337481
13
- ev2kcalpermol = 23.060547830619026
14
-
15
-
16
- datadir = Path(__file__).parent.resolve() / "data"
17
- workdir = Path(__file__).parent.resolve() / "outfiles"
18
-
19
- workdir.mkdir(exist_ok=True)
20
-
21
- name = 'Atorvastatin'
22
- testmol = Mol(drugs[name], name).make_confs(n=50).optimize_confs()
23
- testmol = testmol.drop_confs(similar=True, verbose=True).sort_confs()
24
-
25
-
26
- def test_xtb_wrapper():
27
- from rdworks.xtb.wrapper import GFN2xTB
28
- assert GFN2xTB.is_xtb_ready() == True
29
- assert GFN2xTB.is_cpx_ready() == True
30
- assert GFN2xTB.is_cpcmx_option_ready() == True
31
- assert GFN2xTB.is_ready() == True
32
- assert GFN2xTB.version() is not None
33
-
34
-
35
- def test_singlepoint():
36
- mol = testmol.copy()
37
-
38
- print("number of conformers=", mol.count())
39
- print("number of atoms=", mol.confs[0].natoms)
40
-
41
- gfn2xtb = GFN2xTB(mol.confs[0].rdmol, ncores=8)
42
-
43
- print("GFN2xTB.singlepoint()")
44
- outdict = gfn2xtb.singlepoint()
45
- print(outdict)
46
- print()
47
-
48
- print("GFN2xTB.singlepoint(water='gbsa')")
49
- outdict = gfn2xtb.singlepoint(water='gbsa')
50
- print(outdict)
51
- print()
52
-
53
- print("GFN2xTB.singlepoint(water='alpb')")
54
- outdict = gfn2xtb.singlepoint(water='alpb')
55
- print(outdict)
56
- print()
57
-
58
- print("GFN2xTB.singlepoint(water='cpcmx')")
59
- outdict = gfn2xtb.singlepoint(water='cpcmx')
60
- print(outdict)
61
- print()
62
-
63
-
64
- def test_optimize():
65
- mol = testmol.copy()
66
- print("number of conformers=", mol.count())
67
- print("GFN2xTB.optimize()")
68
- outdict = GFN2xTB(mol.confs[0].rdmol, ncores=8).optimize(verbose=True)
69
- print(outdict)
70
- print()
71
-
72
-
73
- if __name__ == '__main__':
74
- test_xtb_wrapper()
75
- test_singlepoint()
76
- test_optimize()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes