rdworks 0.48.1__py3-none-any.whl → 0.49.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rdworks/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.48.1'
1
+ __version__ = '0.49.1'
2
2
 
3
3
  from rdworks.conf import Conf
4
4
  from rdworks.mol import Mol
@@ -25,10 +25,11 @@ __rdkit_version__ = rdkit.rdBase.rdkitVersion
25
25
 
26
26
  rdkit_logger = rdkit.RDLogger.logger().setLevel(rdkit.RDLogger.CRITICAL)
27
27
 
28
- main_logger = logging.getLogger()
29
- main_logger.setLevel(logging.INFO) # level: DEBUG < INFO < WARNING < ERROR < CRITICAL
30
- logger_formatter = logging.Formatter(fmt='%(asctime)s %(levelname)s %(message)s',
31
- datefmt='%Y-%m-%d %H:%M:%S')
32
- logger_ch = logging.StreamHandler()
33
- logger_ch.setFormatter(logger_formatter)
34
- main_logger.addHandler(logger_ch)
28
+ logger = logging.getLogger(__name__)
29
+ logger.setLevel(logging.INFO) # level: DEBUG < INFO < WARNING < ERROR < CRITICAL
30
+
31
+ logger_stream = logging.StreamHandler() # sys.stdout or sys.stderr
32
+ logger_format = logging.Formatter(fmt='%(asctime)s %(levelname)s %(message)s',
33
+ datefmt='%Y-%m-%d %H:%M:%S')
34
+ logger_stream.setFormatter(logger_format)
35
+ logger.addHandler(logger_stream)
rdworks/ionized.py CHANGED
@@ -130,6 +130,9 @@ class IonizedStates:
130
130
  edmol.RemoveAtom(bonded_H_indices[0])
131
131
 
132
132
  elif acid_or_base == 'B': # protonate
133
+ # note that protonation at tertiary nitrogen may results in stereoisomers
134
+ # current implementation ignores the stereochemistry
135
+ # use rdworks.complete_stereoisomers() function to complete the stereoisomers
133
136
  B = edmol.GetAtomWithIdx(i)
134
137
  assert B.GetAtomicNum() > 1, f"Cannot protonate an atom (idx={i}; {B.GetAtomicNum()})"
135
138
  charge = B.GetFormalCharge() + 1
@@ -138,7 +141,7 @@ class IonizedStates:
138
141
  B.SetNumExplicitHs(nH+1)
139
142
  edmol = Chem.AddHs(edmol)
140
143
 
141
- # Clean up and save SMILES
144
+ # clean up and save SMILES
142
145
  ionized_smiles = IonizedStates.clean_smiles(edmol)
143
146
  ionized_mol = Chem.MolFromSmiles(ionized_smiles)
144
147
  ionized_mol = Chem.AddHs(ionized_mol)
rdworks/mol.py CHANGED
@@ -46,7 +46,8 @@ from rdworks.display import render_svg, render_png
46
46
 
47
47
  from scour.scour import scourString
48
48
 
49
- main_logger = logging.getLogger()
49
+
50
+ logger = logging.getLogger(__name__)
50
51
 
51
52
 
52
53
  class Mol:
@@ -328,11 +329,7 @@ class Mol:
328
329
  return self
329
330
 
330
331
 
331
- def make_confs(self,
332
- n:int = 50,
333
- method:str = 'ETKDG',
334
- calculator:str | Callable = 'MMFF94',
335
- ) -> Self:
332
+ def make_confs(self, n: int = 50, method: str = 'ETKDG', **kwargs) -> Self:
336
333
  """Generates 3D conformers.
337
334
 
338
335
  Args:
@@ -348,12 +345,8 @@ class Mol:
348
345
  High-Quality Conformer Generation with CONFORGE: Algorithm and Performance Assessment.
349
346
  J. Chem. Inf. Model. 63, 5549-5570 (2023).
350
347
  """
351
-
352
- # if n is None:
353
- # rot_bonds = rd_descriptor_f['RotBonds'](self.rdmol)
354
- # n = min(max(1, int(8.481 * (rot_bonds **1.642))), 1000)
355
- # n = max(1, math.ceil(n * n_rel)) # ensures that n is at least 1
356
-
348
+ verbose = kwargs.get('verbose', False)
349
+
357
350
  self.confs = []
358
351
 
359
352
  if method.upper() == 'ETKDG':
@@ -426,6 +419,7 @@ class Mol:
426
419
  os.remove(tmp_filename)
427
420
 
428
421
  # energy evaluations for ranking
422
+ calculator = kwargs.get('calculator', 'MMFF94')
429
423
  for conf in self.confs:
430
424
  conf.potential_energy(calculator) # default: MMFF94
431
425
 
@@ -436,8 +430,15 @@ class Mol:
436
430
  for conf in self.confs:
437
431
  conf.props.update({"E_rel(kcal/mol)": conf.props[sort_by] - lowest_energy})
438
432
 
433
+ # rename conformers
439
434
  self = self.rename()
440
435
 
436
+ if verbose:
437
+ rot_bonds = rd_descriptor_f['RotBonds'](self.rdmol)
438
+ nrb_suggested = int(8.481 * (rot_bonds **1.642))
439
+ logger.info(f"make_confs() rotatable bonds {rot_bonds} (suggested conformers {nrb_suggested}) generated {self.count()}")
440
+ logger.info(f"make_confs() updated potential energies E_tot(kcal/mol) and E_rel(kcal/mol) by {calculator}")
441
+
441
442
  return self
442
443
 
443
444
 
@@ -499,10 +500,15 @@ class Mol:
499
500
  Returns:
500
501
  Self: modified self.
501
502
  """
503
+ verbose = kwargs.get('verbose', False)
504
+
502
505
  if calculator is not None:
503
506
  # re-calculate potential energies
507
+ if verbose :
508
+ logger.info(f"sort_cons() calculate potential energy by {calculator}")
509
+
504
510
  for conf in self.confs:
505
- PE = conf.potential_energy(calculator=calculator, **kwargs) # sets `E_tot(kcal/mol)`
511
+ PE = conf.potential_energy(calculator, **kwargs) # sets `E_tot(kcal/mol)`
506
512
 
507
513
  if all(['E_tot(kcal/mol)' in conf.props for conf in self.confs]):
508
514
  sort_by = 'E_tot(kcal/mol)'
@@ -767,7 +773,7 @@ class Mol:
767
773
  cluster: bool | None =None,
768
774
  k: int | None = None,
769
775
  window: float | None = None,
770
- verbose: bool = False) -> Self:
776
+ **kwargs) -> Self:
771
777
  """Drop conformers that meet some condition(s).
772
778
 
773
779
  Args:
@@ -792,6 +798,8 @@ class Mol:
792
798
  Self: modified self.
793
799
  """
794
800
 
801
+ verbose = kwargs.get('verbose', False)
802
+
795
803
  reasons = [f'stereo flipped',
796
804
  f'unconverged',
797
805
  f'similar({similar_rmsd})',
@@ -805,13 +813,13 @@ class Mol:
805
813
  mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) == self.smiles for _ in self.confs]
806
814
  self.confs = list(itertools.compress(self.confs, mask))
807
815
  if verbose:
808
- main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[0]:<{w}} -> {self.count()}')
816
+ logger.info(f'drop_confs() {mask.count(False):3d} {reasons[0]:<{w}} -> {self.count()}')
809
817
 
810
818
  if unconverged and self.count() > 0:
811
819
  mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in self.confs]
812
820
  self.confs = list(itertools.compress(self.confs, mask))
813
821
  if verbose:
814
- main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[1]:<{w}} -> {self.count()}')
822
+ logger.info(f'drop_confs() {mask.count(False):3d} {reasons[1]:<{w}} -> {self.count()}')
815
823
 
816
824
  if similar and self.count() > 1:
817
825
  # it is observed that there are essentially identical conformers
@@ -831,7 +839,7 @@ class Mol:
831
839
  mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(self.confs)]
832
840
  self.confs = list(itertools.compress(self.confs, mask))
833
841
  if verbose:
834
- main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[2]:<{w}} -> {self.count()}')
842
+ logger.info(f'drop_confs() {mask.count(False):3d} {reasons[2]:<{w}} -> {self.count()}')
835
843
 
836
844
  # note: it will retain the conformers with lower index
837
845
  # so, it should be sorted before dropping
@@ -856,9 +864,12 @@ class Mol:
856
864
  mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in self.confs]
857
865
  self.confs = list(itertools.compress(self.confs, mask))
858
866
  if verbose:
859
- main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[3]:<{w}} -> {self.count()}')
867
+ logger.info(f'drop_confs() {mask.count(False):3d} {reasons[3]:<{w}} -> {self.count()}')
860
868
 
861
869
  if (k or window) and self.count() > 0:
870
+ # confs must be sorted by energies
871
+ if not all(['E_rel(kcal/mol)' in _.props for _ in self.confs]):
872
+ self = self.sort_confs(**kwargs)
862
873
  if k:
863
874
  mask_k = [i < k for i,_ in enumerate(self.confs)]
864
875
  else:
@@ -871,7 +882,7 @@ class Mol:
871
882
  mask = [(x and y) for (x,y) in zip(mask_k, mask_window)]
872
883
  self.confs = list(itertools.compress(self.confs, mask))
873
884
  if verbose:
874
- main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[4]:<{w}} -> {self.count()}')
885
+ logger.info(f'drop_confs() {mask.count(False):3d} {reasons[4]:<{w}} -> {self.count()}')
875
886
 
876
887
  return self
877
888
 
@@ -974,7 +985,7 @@ class Mol:
974
985
 
975
986
 
976
987
  def torsion_energies(self,
977
- calculator: str | Callable,
988
+ calculator: str | Callable = 'MMFF94',
978
989
  torsion_key: int | None = None,
979
990
  simplify: bool = True,
980
991
  fmax: float = 0.05,
rdworks/xtb/wrapper.py CHANGED
@@ -14,7 +14,8 @@ from rdkit import Chem
14
14
  from rdkit.Geometry import Point3D
15
15
 
16
16
 
17
- logger = logging.getLogger()
17
+ logger = logging.getLogger(__name__)
18
+
18
19
 
19
20
  # In ASE, the default energy unit is eV (electron volt).
20
21
  # It will be converted to kcal/mol
@@ -25,7 +26,7 @@ ev2kcalpermol = 23.060547830619026
25
26
 
26
27
 
27
28
  class GFN2xTB:
28
- def __init__(self, molecule: Chem.Mol, ncores: int = 8):
29
+ def __init__(self, molecule: Chem.Mol, ncores: int | None = None):
29
30
  assert isinstance(molecule, Chem.Mol), "molecule is not rdkit.Chem.Mol type"
30
31
  assert molecule.GetConformer().Is3D(), "molecule is not a 3D conformer"
31
32
  assert self.is_xtb_ready(), "xtb is not accessible"
@@ -35,6 +36,9 @@ class GFN2xTB:
35
36
  self.symbols = [ atom.GetSymbol() for atom in molecule.GetAtoms() ]
36
37
  self.positions = molecule.GetConformer().GetPositions().tolist()
37
38
 
39
+ if ncores is None:
40
+ ncores = os.cpu_count()
41
+
38
42
  # Parallelisation
39
43
  os.environ['OMP_STACKSIZE'] = '4G'
40
44
  os.environ['OMP_NUM_THREADS'] = f'{ncores},1'
@@ -107,7 +111,7 @@ class GFN2xTB:
107
111
  Returns:
108
112
  str | None: version statement.
109
113
  """
110
- if GFN2xTB.is_ready():
114
+ if GFN2xTB.is_xtb_ready():
111
115
  cmd = ['xtb', '--version']
112
116
  proc = subprocess.run(cmd, capture_output=True, text=True)
113
117
  assert proc.returncode == 0, "GFN2xTB() Error: xtb not available"
@@ -338,12 +342,9 @@ class GFN2xTB:
338
342
 
339
343
  with tempfile.TemporaryDirectory() as temp_dir: # tmpdir is a string
340
344
  workdir = Path(temp_dir)
341
- if verbose:
342
- logger.info(f'xtb.singlepoint workdir= {temp_dir}')
343
345
 
344
346
  geometry_input_path = workdir / 'geometry.xyz'
345
347
  xtbout_path = workdir / 'xtbout.json'
346
- stdout_path = workdir / 'fort.6'
347
348
  wbo_path = workdir / 'wbo'
348
349
  geometry_output_path = workdir / 'xtbtopo.mol'
349
350
 
@@ -354,8 +355,6 @@ class GFN2xTB:
354
355
 
355
356
  options = ['--gfn', '2', '--json']
356
357
 
357
- Gsolv = None
358
-
359
358
  if water is not None and isinstance(water, str):
360
359
  if water == 'gbsa':
361
360
  options += ['--gbsa', 'H2O']
@@ -366,6 +365,9 @@ class GFN2xTB:
366
365
  elif water == 'cpcmx' and self.is_cpcmx_option_ready():
367
366
  options += ['--cpcmx', 'water']
368
367
 
368
+ if verbose:
369
+ logger.info(f"singlepoint() {' '.join(cmd+options)}")
370
+
369
371
  # 'xtbout.json', 'xtbrestart', 'xtbtopo.mol', 'charges', and 'wbo' files will be
370
372
  # created in the current working directory.
371
373
  proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
@@ -381,19 +383,20 @@ class GFN2xTB:
381
383
  if xtbout_path.is_file():
382
384
  with open(xtbout_path, 'r') as f:
383
385
  datadict = json.load(f) # takes the file object as input
386
+
387
+ Gsolv = None
384
388
 
385
- if (water is not None) and (Gsolv is None) and stdout_path.is_file():
389
+ if water is not None:
386
390
  # Free Energy contributions: [Eh] [kcal/mol]
387
391
  # -------------------------------------------------------------------------
388
392
  # solvation free energy (dG_solv): -0.92587E-03 -0.58099
389
393
  # gas phase energy (E) -0.52068E+01
390
394
  # -------------------------------------------------------------------------
391
395
  # total free energy (dG) -0.52077E+01
392
- with open(stdout_path, 'r') as f:
393
- for line in f:
394
- if 'solvation free energy' in line:
395
- m = re.search(r"solvation free energy \(dG_solv\)\:\s+[-+]?\d*\.?\d+E[-+]?\d*\s+(?P<kcalpermol>[-+]?\d*\.?\d+)", line)
396
- Gsolv = float(m.group('kcalpermol'))
396
+ for line in proc.stdout.splitlines():
397
+ if 'solvation free energy' in line:
398
+ m = re.search(r"solvation free energy \(dG_solv\)\:\s+[-+]?\d*\.?\d+E[-+]?\d*\s+(?P<kcalpermol>[-+]?\d*\.?\d+)", line)
399
+ Gsolv = float(m.group('kcalpermol'))
397
400
 
398
401
  Wiberg_bond_orders = self.load_wbo(wbo_path)
399
402
 
@@ -429,8 +432,6 @@ class GFN2xTB:
429
432
  """
430
433
  with tempfile.TemporaryDirectory() as temp_dir: # tmpdir is a string
431
434
  workdir = Path(temp_dir)
432
- if verbose:
433
- logger.info(f'xtb.optimize workdir= {temp_dir}')
434
435
 
435
436
  geometry_input_path = workdir / 'geometry.xyz'
436
437
  xtbout_path = workdir / 'xtbout.json'
@@ -452,6 +453,9 @@ class GFN2xTB:
452
453
  elif water == 'cpcmx':
453
454
  logger.warning('optimize with --cpcmx option is not implemented in xtb yet')
454
455
 
456
+ if verbose:
457
+ logger.info(f"optimize() {' '.join(cmd+options)}")
458
+
455
459
  proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
456
460
 
457
461
  if proc.returncode == 0 and xtbout_path.is_file():
@@ -523,4 +527,4 @@ class GFN2xTB:
523
527
  with open(xtb_esp_dat, 'r') as f:
524
528
  pass
525
529
 
526
- return None
530
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rdworks
3
- Version: 0.48.1
3
+ Version: 0.49.1
4
4
  Summary: Routine tasks built on RDKit and other tools
5
5
  Author-email: Sung-Hun Bae <sunghun.bae@gmail.com>
6
6
  Maintainer-email: Sung-Hun Bae <sunghun.bae@gmail.com>
@@ -1,10 +1,10 @@
1
- rdworks/__init__.py,sha256=TjR6uygQrAgURJ2d1zoPHmai8uJLNF0oFmG5s8Q6TJI,1368
1
+ rdworks/__init__.py,sha256=1teH6iycO-shzgeW5KwSDr-CH6CYybv2sDcrSNUBDW0,1391
2
2
  rdworks/conf.py,sha256=iQLb3Qg3pjGiiMVMJ5-d57BC1id3zxEhEGlhhrLrA_c,34162
3
3
  rdworks/descriptor.py,sha256=34T_dQ6g8v3u-ym8TLKbQtxIIV5TEo-d3pdedq3o-cg,2106
4
4
  rdworks/display.py,sha256=JR0gR26UpH-JCxVOaqXZCUj2MiGZSrx9Me87FncspVI,13469
5
- rdworks/ionized.py,sha256=LQLApkskhPPMZCmFMbStJXS-ugVTDSzZJD7pQbJTe8E,7425
5
+ rdworks/ionized.py,sha256=_t-Ajssv1rytV4Y_KsSbxfnsBKqy-EusbhNUtaWcV6o,7681
6
6
  rdworks/matchedseries.py,sha256=A3ON4CUpQV159mu9VqgNiJ8uoQ9ePOry9d3ra4NCAgc,10377
7
- rdworks/mol.py,sha256=e62DYV8XeqLL1mBydbS881pSUfJjaeHxalG64CIant8,68133
7
+ rdworks/mol.py,sha256=UPLLJbfn1cPhcedrGW7tL_bk1QpG3BfpjCOhop0tmBY,68663
8
8
  rdworks/mollibr.py,sha256=X4UBO6Ga-QmNS7RwUiaDYAx0Q5hnWs71yTkEpH02Qb4,37696
9
9
  rdworks/pka.py,sha256=NVJVfpcNEMlX5QRyLBgUM7GIT7VMjO-llAR4LWc8J2c,1656
10
10
  rdworks/readin.py,sha256=0bnVcZcAmSLqc6zu1mYcv0LdBv2agQfOpKGwpSRL9VE,11742
@@ -65,9 +65,9 @@ rdworks/predefined/misc/reactive-part-2.xml,sha256=0vNTMwWrrQmxBpbgbyRHx8sVs83cq
65
65
  rdworks/predefined/misc/reactive-part-3.xml,sha256=LgWHSEbRTVmgBoIO45xbTo1xQJs0Xu51j3JnIapRYo4,3094
66
66
  rdworks/predefined/misc/reactive.xml,sha256=syedoQ6VYUfRLnxy99ObuDniJ_a_WhrWAJbTKFfJ6VY,11248
67
67
  rdworks/xtb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
- rdworks/xtb/wrapper.py,sha256=NQgkdRKN5YEtv_UPYKWDijzMEs5v2kFrhUWHqiro7bE,22174
69
- rdworks-0.48.1.dist-info/licenses/LICENSE,sha256=UOkJSBqYyQUvtCp7a-vdCANeEcLE2dnTie_eB1By5SY,1074
70
- rdworks-0.48.1.dist-info/METADATA,sha256=op0QbvR5pScMsbe112ZrEUmSoT77msXNuZCd-zhTOF8,1967
71
- rdworks-0.48.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
- rdworks-0.48.1.dist-info/top_level.txt,sha256=05C98HbvBK2axUBogC_hAT_CdpOeQYGnQ6vRAgawr8s,8
73
- rdworks-0.48.1.dist-info/RECORD,,
68
+ rdworks/xtb/wrapper.py,sha256=Uv5XrC1gbWyVZiUJsoVwn6i76SPrtBCVSja0kOgcSWQ,22125
69
+ rdworks-0.49.1.dist-info/licenses/LICENSE,sha256=UOkJSBqYyQUvtCp7a-vdCANeEcLE2dnTie_eB1By5SY,1074
70
+ rdworks-0.49.1.dist-info/METADATA,sha256=3Z8TiRFULi-sddBb-_bhWh13MYWcjjopn5wnpUORX00,1967
71
+ rdworks-0.49.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
+ rdworks-0.49.1.dist-info/top_level.txt,sha256=05C98HbvBK2axUBogC_hAT_CdpOeQYGnQ6vRAgawr8s,8
73
+ rdworks-0.49.1.dist-info/RECORD,,