AutoREACTER 0.2b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AutoREACTER/__init__.py +46 -0
- AutoREACTER/cache.py +300 -0
- AutoREACTER/detectors/__init__.py +0 -0
- AutoREACTER/detectors/detector.py +259 -0
- AutoREACTER/detectors/functional_groups_detector.py +498 -0
- AutoREACTER/detectors/functional_groups_library.py +130 -0
- AutoREACTER/detectors/non_monomer_detector.py +352 -0
- AutoREACTER/detectors/reaction_detector.py +687 -0
- AutoREACTER/detectors/reactions_library.py +308 -0
- AutoREACTER/initialization.py +48 -0
- AutoREACTER/input_parser.py +1064 -0
- AutoREACTER/reaction_preparation/__init__.py +0 -0
- AutoREACTER/reaction_preparation/build_reaction_system.py +1 -0
- AutoREACTER/reaction_preparation/lunar_client/REACTER_files_builder.py +758 -0
- AutoREACTER/reaction_preparation/lunar_client/__init__.py +0 -0
- AutoREACTER/reaction_preparation/lunar_client/config.py +1 -0
- AutoREACTER/reaction_preparation/lunar_client/ff_validator.py +185 -0
- AutoREACTER/reaction_preparation/lunar_client/locate_lunar.py +228 -0
- AutoREACTER/reaction_preparation/lunar_client/lunar_api_wrapper.py +694 -0
- AutoREACTER/reaction_preparation/lunar_client/modifiers_molecule_files.py +757 -0
- AutoREACTER/reaction_preparation/lunar_client/molecule_3d_preparation.py +280 -0
- AutoREACTER/reaction_preparation/lunar_client/template_builder.py +37 -0
- AutoREACTER/reaction_preparation/reaction_processor/__init__.py +0 -0
- AutoREACTER/reaction_preparation/reaction_processor/fragment_comparison.py +238 -0
- AutoREACTER/reaction_preparation/reaction_processor/prepare_reactions.py +716 -0
- AutoREACTER/reaction_preparation/reaction_processor/utils.py +161 -0
- AutoREACTER/reaction_preparation/reaction_processor/walker.py +206 -0
- AutoREACTER/reaction_template_builder/__init__.py +0 -0
- AutoREACTER/reaction_template_builder/lunar_client/__init__.py +0 -0
- AutoREACTER/reaction_template_builder/reaction_template_pipeline/__init__.py +0 -0
- AutoREACTER/sim_setup/__init__.py +0 -0
- AutoREACTER/sim_setup/simulation_setup.py +92 -0
- AutoREACTER/sim_setup/system_property_calculations.py +188 -0
- AutoREACTER/sim_setup/writers/__init__.py +0 -0
- AutoREACTER/sim_setup/writers/densification_writer.py +330 -0
- AutoREACTER/sim_setup/writers/lammps_settings.py +66 -0
- AutoREACTER/sim_setup/writers/post_eq_writer.py +161 -0
- AutoREACTER/sim_setup/writers/pre_eq_writer.py +156 -0
- AutoREACTER/sim_setup/writers/rxn_first_stage_writer.py +221 -0
- AutoREACTER/sim_setup/writers/rxn_second_stage_writer.py +219 -0
- AutoREACTER/sim_setup/writers/writer.py +67 -0
- autoreacter-0.2b1.dist-info/METADATA +139 -0
- autoreacter-0.2b1.dist-info/RECORD +47 -0
- autoreacter-0.2b1.dist-info/WHEEL +5 -0
- autoreacter-0.2b1.dist-info/licenses/LICENSE.md +21 -0
- autoreacter-0.2b1.dist-info/top_level.txt +2 -0
- docs/source/conf.py +46 -0
AutoREACTER/__init__.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AutoREACTER
|
|
3
|
+
|
|
4
|
+
Automated generation of LAMMPS/REACTER-ready reaction templates
|
|
5
|
+
for polymerization workflows.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.3.0b1"
|
|
9
|
+
|
|
10
|
+
from .input_parser import (
|
|
11
|
+
InputParser,
|
|
12
|
+
SimulationSetup,
|
|
13
|
+
MonomerEntry,
|
|
14
|
+
Replica,
|
|
15
|
+
InputError,
|
|
16
|
+
InputSchemaError,
|
|
17
|
+
InputConflictError,
|
|
18
|
+
NumericFieldError,
|
|
19
|
+
SmilesValidationError,
|
|
20
|
+
DuplicateMonomerError,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
from .detectors.functional_groups_detector import (
|
|
24
|
+
FunctionalGroupsDetector,
|
|
25
|
+
FunctionalGroupInfo,
|
|
26
|
+
MonomerRole,
|
|
27
|
+
FunctionalGroupVisualization,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"__version__",
|
|
32
|
+
"InputParser",
|
|
33
|
+
"SimulationSetup",
|
|
34
|
+
"MonomerEntry",
|
|
35
|
+
"Replica",
|
|
36
|
+
"InputError",
|
|
37
|
+
"InputSchemaError",
|
|
38
|
+
"InputConflictError",
|
|
39
|
+
"NumericFieldError",
|
|
40
|
+
"SmilesValidationError",
|
|
41
|
+
"DuplicateMonomerError",
|
|
42
|
+
"FunctionalGroupsDetector",
|
|
43
|
+
"FunctionalGroupInfo",
|
|
44
|
+
"MonomerRole",
|
|
45
|
+
"FunctionalGroupVisualization",
|
|
46
|
+
]
|
AutoREACTER/cache.py
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess
|
|
6
|
+
import datetime as dt
|
|
7
|
+
from AutoREACTER.reaction_preparation.lunar_client.REACTER_files_builder import REACTERFiles
|
|
8
|
+
|
|
9
|
+
class GetCacheDir:
|
|
10
|
+
"""
|
|
11
|
+
Manages the base cache directory for the AutoREACTER workflow.
|
|
12
|
+
|
|
13
|
+
This class determines the root of the git repository (or falls back to a
|
|
14
|
+
path relative to the script) and creates a standardized cache directory
|
|
15
|
+
structure with a staging area.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, clear_staging: bool = False):
|
|
19
|
+
"""
|
|
20
|
+
Initialize cache directory structure.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
clear_staging:
|
|
24
|
+
If True, clear all contents inside cache/00_cache.
|
|
25
|
+
The 00_cache directory itself is preserved.
|
|
26
|
+
"""
|
|
27
|
+
self.git_root = self.get_git_root()
|
|
28
|
+
self.cache_base_dir = self.git_root / "cache"
|
|
29
|
+
self.cache_base_dir.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
|
|
31
|
+
# Staging directory for temporary files before moving to dated run folders
|
|
32
|
+
self.staging_dir = self.cache_base_dir / "00_cache"
|
|
33
|
+
self.staging_dir.mkdir(parents=True, exist_ok=True)
|
|
34
|
+
|
|
35
|
+
if clear_staging:
|
|
36
|
+
self.clear_staging_dir()
|
|
37
|
+
|
|
38
|
+
def clear_staging_dir(self) -> None:
|
|
39
|
+
"""
|
|
40
|
+
Clear all files and folders inside the staging cache directory.
|
|
41
|
+
|
|
42
|
+
This only clears cache/00_cache contents.
|
|
43
|
+
It does not delete dated run folders.
|
|
44
|
+
"""
|
|
45
|
+
self.staging_dir.mkdir(parents=True, exist_ok=True)
|
|
46
|
+
|
|
47
|
+
failed = []
|
|
48
|
+
for item in self.staging_dir.iterdir():
|
|
49
|
+
try:
|
|
50
|
+
if item.is_symlink() or item.is_file():
|
|
51
|
+
item.unlink()
|
|
52
|
+
elif item.is_dir():
|
|
53
|
+
shutil.rmtree(item)
|
|
54
|
+
except Exception as e:
|
|
55
|
+
print(f"[WARN] Failed to remove staging cache item {item}: {e}")
|
|
56
|
+
failed.append(item)
|
|
57
|
+
|
|
58
|
+
if failed:
|
|
59
|
+
print(f"[WARN] Staging cache partially cleared ({len(failed)} item(s) could not be removed): {self.staging_dir}")
|
|
60
|
+
else:
|
|
61
|
+
print(f"[OK] Cleared staging cache: {self.staging_dir}")
|
|
62
|
+
|
|
63
|
+
def get_git_root(self) -> Path:
|
|
64
|
+
"""
|
|
65
|
+
Return the root directory of the current git repository.
|
|
66
|
+
|
|
67
|
+
Falls back to a path derived from this script's location if git
|
|
68
|
+
command fails.
|
|
69
|
+
"""
|
|
70
|
+
try:
|
|
71
|
+
out = subprocess.check_output(
|
|
72
|
+
["git", "rev-parse", "--show-toplevel"],
|
|
73
|
+
text=True
|
|
74
|
+
).strip()
|
|
75
|
+
return Path(out)
|
|
76
|
+
except Exception:
|
|
77
|
+
script_dir = Path(__file__).resolve().parent
|
|
78
|
+
|
|
79
|
+
for parent in [script_dir] + list(script_dir.parents):
|
|
80
|
+
if (parent / ".git").exists() or (parent / "pyproject.toml").exists():
|
|
81
|
+
return parent
|
|
82
|
+
|
|
83
|
+
return script_dir.parent
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class RunDirectoryManager:
|
|
87
|
+
"""
|
|
88
|
+
Handles creation and management of dated run directories for simulations.
|
|
89
|
+
|
|
90
|
+
Organizes output into a structure like: cache/YYYY-MM-DD/1/, cache/YYYY-MM-DD/2/, etc.
|
|
91
|
+
Provides utilities for moving files and updating REACTER file references.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
def __init__(self, base_dir: Path):
|
|
95
|
+
"""
|
|
96
|
+
Initialize with a base directory for storing run folders.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
base_dir: Base path where dated run directories will be created
|
|
100
|
+
"""
|
|
101
|
+
self.base_dir = Path(base_dir)
|
|
102
|
+
self.base_dir.mkdir(parents=True, exist_ok=True)
|
|
103
|
+
|
|
104
|
+
def _is_empty(self, path: Path) -> bool:
|
|
105
|
+
"""Check if a directory contains no files or subdirectories."""
|
|
106
|
+
return not any(path.iterdir())
|
|
107
|
+
|
|
108
|
+
def make_dated_run_dir(self) -> Path:
|
|
109
|
+
"""
|
|
110
|
+
Create and return a dated run directory following the pattern: {base}/{today}/{run_number}.
|
|
111
|
+
|
|
112
|
+
Reuses the latest run directory if it is empty. Otherwise, increments the run number.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Path: Path to the final run directory
|
|
116
|
+
"""
|
|
117
|
+
today = dt.date.today().isoformat()
|
|
118
|
+
date_dir = self.base_dir / today
|
|
119
|
+
date_dir.mkdir(parents=True, exist_ok=True)
|
|
120
|
+
|
|
121
|
+
# Find existing run numbers (directories named with integers)
|
|
122
|
+
existing_runs = [
|
|
123
|
+
int(p.name) for p in date_dir.iterdir()
|
|
124
|
+
if p.is_dir() and p.name.isdigit()
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
if existing_runs:
|
|
128
|
+
latest_run = date_dir / str(max(existing_runs))
|
|
129
|
+
if self._is_empty(latest_run):
|
|
130
|
+
print(f"[INFO] Final directory: {latest_run}")
|
|
131
|
+
return latest_run
|
|
132
|
+
|
|
133
|
+
# Create new run directory with incremented number
|
|
134
|
+
run_number = max(existing_runs, default=0) + 1
|
|
135
|
+
run_dir = date_dir / str(run_number)
|
|
136
|
+
run_dir.mkdir()
|
|
137
|
+
|
|
138
|
+
print(f"[INFO] Final directory: {run_dir}")
|
|
139
|
+
return run_dir
|
|
140
|
+
|
|
141
|
+
def remove_path(self, path: Path) -> None:
|
|
142
|
+
"""
|
|
143
|
+
Remove a file, symlink, or directory recursively.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
path: Path to remove
|
|
147
|
+
"""
|
|
148
|
+
if path.is_symlink() or path.is_file():
|
|
149
|
+
path.unlink()
|
|
150
|
+
elif path.is_dir():
|
|
151
|
+
shutil.rmtree(path)
|
|
152
|
+
|
|
153
|
+
def move_into_run(self, source_dir: Path, dest_dir: Path) -> Path:
|
|
154
|
+
"""
|
|
155
|
+
Move all contents from source_dir into dest_dir, overwriting if necessary.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
source_dir: Directory containing files to move
|
|
159
|
+
dest_dir: Destination run directory
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Path: The destination directory
|
|
163
|
+
"""
|
|
164
|
+
for item in source_dir.iterdir():
|
|
165
|
+
target = dest_dir / item.name
|
|
166
|
+
|
|
167
|
+
if target.exists():
|
|
168
|
+
self.remove_path(target)
|
|
169
|
+
|
|
170
|
+
shutil.move(str(item), str(target))
|
|
171
|
+
|
|
172
|
+
print(f"[OK] Moved files → {dest_dir}")
|
|
173
|
+
return dest_dir
|
|
174
|
+
|
|
175
|
+
def move_reacter_files(
|
|
176
|
+
self,
|
|
177
|
+
reacter_files: REACTERFiles,
|
|
178
|
+
staging_dir: Path,
|
|
179
|
+
final_dir: Path
|
|
180
|
+
) -> REACTERFiles:
|
|
181
|
+
"""
|
|
182
|
+
Move REACTER-related files from staging area into the final run directory
|
|
183
|
+
and update all internal Path references in the REACTERFiles object.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
reacter_files: REACTERFiles object containing various file paths
|
|
187
|
+
staging_dir: Current staging directory (usually .../00_cache)
|
|
188
|
+
final_dir: Target run directory
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
REACTERFiles: Updated object with remapped paths
|
|
192
|
+
|
|
193
|
+
Raises:
|
|
194
|
+
FileNotFoundError: If the expected REACTER files directory doesn't exist
|
|
195
|
+
"""
|
|
196
|
+
old_base = staging_dir / "lunar" / "REACTER_files"
|
|
197
|
+
|
|
198
|
+
if not old_base.exists():
|
|
199
|
+
raise FileNotFoundError(f"REACTER files not found: {old_base}")
|
|
200
|
+
|
|
201
|
+
new_base = self.move_into_run(old_base, final_dir)
|
|
202
|
+
|
|
203
|
+
def remap(p: Path) -> Path:
|
|
204
|
+
"""Remap a path from old_base to new_base while preserving relative structure."""
|
|
205
|
+
try:
|
|
206
|
+
return new_base / p.relative_to(old_base)
|
|
207
|
+
except ValueError:
|
|
208
|
+
return p
|
|
209
|
+
|
|
210
|
+
# Update all file references to point to the new location
|
|
211
|
+
reacter_files.force_field_data = remap(reacter_files.force_field_data)
|
|
212
|
+
reacter_files.in_file = remap(reacter_files.in_file)
|
|
213
|
+
|
|
214
|
+
for mol in reacter_files.molecule_files:
|
|
215
|
+
mol.molecule_files.lmp_molecule_file = remap(
|
|
216
|
+
mol.molecule_files.lmp_molecule_file
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
for tpl in reacter_files.template_files:
|
|
220
|
+
tpl.map_file = remap(tpl.map_file)
|
|
221
|
+
tpl.pre_reaction_file.lmp_molecule_file = remap(
|
|
222
|
+
tpl.pre_reaction_file.lmp_molecule_file
|
|
223
|
+
)
|
|
224
|
+
tpl.post_reaction_file.lmp_molecule_file = remap(
|
|
225
|
+
tpl.post_reaction_file.lmp_molecule_file
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
print(f"[OK] REACTER files moved → {new_base}")
|
|
229
|
+
return reacter_files
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class RetentionCleanup:
|
|
233
|
+
"""
|
|
234
|
+
Provides interactive cleanup functionality for old cache directories.
|
|
235
|
+
|
|
236
|
+
Allows users to delete simulation run folders older than a chosen retention
|
|
237
|
+
period (1 week, 1 month, 3 months) or delete everything.
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
def __init__(self, base_dir: Path):
|
|
241
|
+
"""
|
|
242
|
+
Initialize cleanup manager with target base directory.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
base_dir: Cache base directory to clean up
|
|
246
|
+
"""
|
|
247
|
+
self.base_dir = Path(base_dir)
|
|
248
|
+
|
|
249
|
+
def run(self, mode="skip"):
|
|
250
|
+
"""
|
|
251
|
+
Run the cleanup process based on the specified mode.
|
|
252
|
+
Modes are:
|
|
253
|
+
- "skip": Do not perform any cleanup
|
|
254
|
+
- "all": Delete all dated run directories
|
|
255
|
+
- int (number of days): Delete directories older than this many days
|
|
256
|
+
Args:
|
|
257
|
+
mode: Cleanup mode - "skip", int(number of days), or "all"
|
|
258
|
+
Returns:
|
|
259
|
+
None
|
|
260
|
+
"""
|
|
261
|
+
print(f"\n[INFO] Cache directory: {self.base_dir}")
|
|
262
|
+
|
|
263
|
+
if mode == "skip":
|
|
264
|
+
print("[INFO] Cache cleanup skipped.")
|
|
265
|
+
return
|
|
266
|
+
|
|
267
|
+
pattern = re.compile(r"\d{4}-\d{2}-\d{2}")
|
|
268
|
+
protected = {"00_cache"}
|
|
269
|
+
|
|
270
|
+
if mode == "all":
|
|
271
|
+
for folder in self.base_dir.iterdir():
|
|
272
|
+
if (
|
|
273
|
+
folder.is_dir()
|
|
274
|
+
and folder.name not in protected
|
|
275
|
+
and pattern.match(folder.name)
|
|
276
|
+
):
|
|
277
|
+
shutil.rmtree(folder)
|
|
278
|
+
print(f"[OK] Deleted: {folder}")
|
|
279
|
+
return
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
days = int(mode)
|
|
283
|
+
except Exception:
|
|
284
|
+
print(f"[WARN] Invalid cleanup mode: {mode}")
|
|
285
|
+
return
|
|
286
|
+
|
|
287
|
+
cutoff = dt.date.today() - dt.timedelta(days=days)
|
|
288
|
+
|
|
289
|
+
for folder in self.base_dir.iterdir():
|
|
290
|
+
if not folder.is_dir() or folder.name == "00_cache":
|
|
291
|
+
continue
|
|
292
|
+
|
|
293
|
+
try:
|
|
294
|
+
folder_date = dt.datetime.strptime(folder.name, "%Y-%m-%d").date()
|
|
295
|
+
except ValueError:
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
if folder_date < cutoff:
|
|
299
|
+
shutil.rmtree(folder)
|
|
300
|
+
print(f"[OK] Deleted: {folder}")
|
|
File without changes
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for detecting reactions and handling non-reactant monomers.
|
|
3
|
+
|
|
4
|
+
This module provides functionality to analyze a set of input monomers, detect
|
|
5
|
+
potential chemical reactions based on functional groups, and identify monomers
|
|
6
|
+
that do not participate in any detected reactions. It also includes an interactive
|
|
7
|
+
workflow to allow users to decide whether to retain non-reactant molecules in
|
|
8
|
+
the simulation.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import warnings
|
|
12
|
+
import os, json
|
|
13
|
+
|
|
14
|
+
if os.environ.get("AUTOREACTER_SHOW_DEPRECATION", "").lower() in {"1", "true", "yes", "on"}:
|
|
15
|
+
warnings.warn(
|
|
16
|
+
"""This script is deprecated and will be modified in future versions. Within v0.2, the whole package will primaraliy
|
|
17
|
+
support on jupyter notebook and the CLI is removed. Please use the notebook version for now and refer to the README for how to use the package.""",
|
|
18
|
+
DeprecationWarning,
|
|
19
|
+
stacklevel=2
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Attempt to import detector modules. Handles different import paths depending on
|
|
23
|
+
# whether the script is run as a module, part of a package, or standalone.
|
|
24
|
+
try:
|
|
25
|
+
from functional_groups_detector import FunctionalGroupsDetector
|
|
26
|
+
from reaction_detector import ReactionDetector
|
|
27
|
+
except (ImportError, ModuleNotFoundError):
|
|
28
|
+
from .functional_groups_detector import FunctionalGroupsDetector
|
|
29
|
+
from .reaction_detector import ReactionDetector
|
|
30
|
+
|
|
31
|
+
from AutoREACTER.input_parser import MonomerEntry
|
|
32
|
+
|
|
33
|
+
class Detector:
|
|
34
|
+
"""
|
|
35
|
+
A class to encapsulate the reaction detection workflow.
|
|
36
|
+
|
|
37
|
+
This class provides methods to detect reactions based on input monomers and
|
|
38
|
+
to identify non-reactant monomers. It serves as a structured way to organize
|
|
39
|
+
the detection logic and can be extended in the future for additional functionality.
|
|
40
|
+
"""
|
|
41
|
+
def __init__(self, input_dict: dict, interactive: bool = True):
|
|
42
|
+
"""
|
|
43
|
+
Initializes the Detector with the given input dictionary.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
input_dict (dict): A dictionary containing the 'monomers' key, which maps
|
|
47
|
+
monomer IDs (int/str) to SMILES strings (str).
|
|
48
|
+
interactive (bool): If False, automatically retain all non-reactants without prompting.
|
|
49
|
+
"""
|
|
50
|
+
self.input_dict = input_dict
|
|
51
|
+
self.reactions = {}
|
|
52
|
+
self.non_reactants_list = []
|
|
53
|
+
self.functional_groups_detector = FunctionalGroupsDetector()
|
|
54
|
+
self.reactions_detector = ReactionDetector()
|
|
55
|
+
self.reactions_dict, self.non_reactants_list, self.input_dict = self.detect_reactions(self.input_dict)
|
|
56
|
+
|
|
57
|
+
def find_non_reactant_monomers(self, reactions_dict, input_dict) -> list:
|
|
58
|
+
"""
|
|
59
|
+
Identifies monomers from the input that are not participating in any detected reactions
|
|
60
|
+
and prompts the user to decide whether to retain them in the simulation.
|
|
61
|
+
Args:
|
|
62
|
+
reactions_dict (dict): A dictionary containing detected reaction data. Expected keys
|
|
63
|
+
include 'monomer_1', 'monomer_2', and 'smiles' for each reaction entry.
|
|
64
|
+
input_dict (dict): The original input dictionary containing a 'monomers' key
|
|
65
|
+
mapping monomer IDs to SMILES strings.
|
|
66
|
+
interactive (bool): If False, automatically retain all non-reactants without prompting.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
list: A list of SMILES strings representing the non-reactant monomers selected
|
|
70
|
+
by the user to be retained. Returns an empty list if the user chooses
|
|
71
|
+
to proceed with reactants only.
|
|
72
|
+
"""
|
|
73
|
+
# 1) Collect all unique SMILES strings that participate in detected reactions
|
|
74
|
+
reactant_smiles = set()
|
|
75
|
+
|
|
76
|
+
for reaction_data in reactions_dict.values():
|
|
77
|
+
# Extract SMILES for monomer 1, monomer 2, and the reaction itself
|
|
78
|
+
for k, v in reaction_data.items():
|
|
79
|
+
if not str(k).isdigit():
|
|
80
|
+
continue
|
|
81
|
+
if not isinstance(v, dict):
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
m1 = v.get("monomer_1", {}).get("smiles")
|
|
85
|
+
m2 = v.get("monomer_2", {}).get("smiles")
|
|
86
|
+
|
|
87
|
+
if isinstance(m1, str):
|
|
88
|
+
reactant_smiles.add(m1)
|
|
89
|
+
if isinstance(m2, str):
|
|
90
|
+
reactant_smiles.add(m2)
|
|
91
|
+
|
|
92
|
+
# 2) Build a dictionary of monomers that are not found in the reactant set
|
|
93
|
+
monomers = input_dict.get("monomers", {})
|
|
94
|
+
self.non_reactants = {}
|
|
95
|
+
|
|
96
|
+
# Re-index non-reactants starting from ID 1
|
|
97
|
+
new_id = 1
|
|
98
|
+
for _, smi in monomers.items():
|
|
99
|
+
if smi not in reactant_smiles:
|
|
100
|
+
self.non_reactants[new_id] = smi
|
|
101
|
+
new_id += 1
|
|
102
|
+
|
|
103
|
+
# 3) Handle user interaction if non-reactant monomers are found
|
|
104
|
+
if self.non_reactants:
|
|
105
|
+
print(
|
|
106
|
+
"\nThere are non-reactant monomers/molecules in the input.\n"
|
|
107
|
+
"There may be reactions possible with the given monomers in the user inputs,\n"
|
|
108
|
+
"but some of them are not detected for a reaction.\n"
|
|
109
|
+
"You can choose to retain some or all of these non-reactant molecules in the simulation.\n"
|
|
110
|
+
"Non-reactant molecules:"
|
|
111
|
+
)
|
|
112
|
+
# Display the list of non-reactant molecules to the user
|
|
113
|
+
for mid, molecule in self.non_reactants.items():
|
|
114
|
+
print(f"{mid}. {molecule}")
|
|
115
|
+
|
|
116
|
+
# Ask user if they want to exclude all non-reactants
|
|
117
|
+
select = input(
|
|
118
|
+
"Do you want to proceed with monomers only (no non-monomer molecules)? (y/n): "
|
|
119
|
+
).strip().lower()
|
|
120
|
+
|
|
121
|
+
if select == "y":
|
|
122
|
+
print("Proceeding with monomers only. No non-monomer molecules will be retained.")
|
|
123
|
+
self.non_reactants_list = [] # Return empty list if user declines non-reactants
|
|
124
|
+
else:
|
|
125
|
+
# Ask user to specify which non-reactants to keep by ID
|
|
126
|
+
self.selected_non_reactants = input(
|
|
127
|
+
"Please specify the monomer IDs (comma-separated) you wish to retain as non-monomer molecules: "
|
|
128
|
+
).strip()
|
|
129
|
+
|
|
130
|
+
# Parse the comma-separated input into a set of IDs
|
|
131
|
+
selected_ids = {s.strip() for s in self.selected_non_reactants.split(",") if s.strip()}
|
|
132
|
+
|
|
133
|
+
# Filter the non-reactants dictionary based on user selection
|
|
134
|
+
# strings only (SMILES)
|
|
135
|
+
self.non_reactants_list = [
|
|
136
|
+
smi
|
|
137
|
+
for mid, smi in self.non_reactants.items()
|
|
138
|
+
if str(mid) in selected_ids
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
return self.non_reactants_list
|
|
142
|
+
|
|
143
|
+
# Return empty list if no non-reactants were found
|
|
144
|
+
return []
|
|
145
|
+
|
|
146
|
+
def filter_simulation_molecules(self, input_dict, non_reactants_list, reactions_dict):
|
|
147
|
+
"""
|
|
148
|
+
Filters the input monomers to include only those that are part of detected reactions
|
|
149
|
+
and any non-reactant monomers that the user has chosen to retain.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
input_dict (dict): The original input dictionary containing a 'monomers' key mapping monomer IDs to SMILES strings.
|
|
153
|
+
non_reactants_list (list): A list of SMILES strings for non-reactant monomers that the user has chosen to retain.
|
|
154
|
+
reactions_dict (dict): A dictionary containing detected reaction data.
|
|
155
|
+
Returns:
|
|
156
|
+
dict: A filtered dictionary of monomers that includes only those participating in reactions and the selected non-reactants.
|
|
157
|
+
"""
|
|
158
|
+
# Collect SMILES of reactants from detected reactions
|
|
159
|
+
reactant_smiles = set()
|
|
160
|
+
for reaction_data in reactions_dict.values():
|
|
161
|
+
for k, v in reaction_data.items():
|
|
162
|
+
if not str(k).isdigit():
|
|
163
|
+
continue
|
|
164
|
+
if not isinstance(v, dict):
|
|
165
|
+
continue
|
|
166
|
+
|
|
167
|
+
m1 = v.get("monomer_1", {}).get("smiles")
|
|
168
|
+
m2 = v.get("monomer_2", {}).get("smiles")
|
|
169
|
+
|
|
170
|
+
if isinstance(m1, str):
|
|
171
|
+
reactant_smiles.add(m1)
|
|
172
|
+
if isinstance(m2, str):
|
|
173
|
+
reactant_smiles.add(m2)
|
|
174
|
+
|
|
175
|
+
# Combine reactant SMILES with the selected non-reactant SMILES
|
|
176
|
+
combined = reactant_smiles | set(non_reactants_list)
|
|
177
|
+
|
|
178
|
+
# Filter input monomers to include only reactants and selected non-reactants
|
|
179
|
+
monomers_dict_from_input = input_dict.get("monomers", {})
|
|
180
|
+
for k in list(monomers_dict_from_input.keys()):
|
|
181
|
+
if monomers_dict_from_input[k] not in combined:
|
|
182
|
+
del monomers_dict_from_input[k]
|
|
183
|
+
|
|
184
|
+
# Update the input dictionary with the filtered monomers
|
|
185
|
+
input_dict["monomers"] = monomers_dict_from_input # Update the input dictionary with the filtered monomers
|
|
186
|
+
|
|
187
|
+
return input_dict
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def detect_reactions(self, monomer_entry: MonomerEntry, interactive=True) -> tuple:
|
|
191
|
+
"""
|
|
192
|
+
Detects chemical reactions and identifies non-reactant monomers based on the provided input.
|
|
193
|
+
|
|
194
|
+
This function serves as the main workflow controller. It validates the input,
|
|
195
|
+
detects functional groups, selects appropriate reactions, and identifies any
|
|
196
|
+
monomers that did not participate in the detected reactions.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
monomer_entry (MonomerEntry): A MonomerEntry object containing the monomers data.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
tuple: A tuple containing:
|
|
203
|
+
- dict: The detected reactions.
|
|
204
|
+
- list: A list of SMILES strings for non-reactant monomers to be retained.
|
|
205
|
+
|
|
206
|
+
Raises:
|
|
207
|
+
ValueError: If the input dictionary is missing the 'monomers' key or if it is empty.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
# Step 1: Detect functional groups within the monomers
|
|
211
|
+
fg_results = self.functional_groups_detector.functional_groups_detector(monomer_entry)
|
|
212
|
+
|
|
213
|
+
# Debug: Print detected functional groups
|
|
214
|
+
# print("Detected Functional Groups:", json.dumps(fg_results, indent=2))
|
|
215
|
+
|
|
216
|
+
# Step 2: Select reactions based on the detected functional groups
|
|
217
|
+
reactions = self.reactions_detector.reaction_detector(fg_results)
|
|
218
|
+
|
|
219
|
+
# Debug: Print detected reactions
|
|
220
|
+
# print("Detected Reactions:", json.dumps(reactions, indent=2))
|
|
221
|
+
|
|
222
|
+
# Print detected reactions
|
|
223
|
+
print("\nDetected Reactions:")
|
|
224
|
+
for reaction_name, reaction_data in reactions.items():
|
|
225
|
+
print(f"\n{reaction_name}:")
|
|
226
|
+
monomer_1 = reaction_data.get("reactant_1", {})
|
|
227
|
+
monomer_2 = reaction_data.get("reactant_2", {})
|
|
228
|
+
if monomer_2:
|
|
229
|
+
print(f"Reaction between {monomer_1} and {monomer_2}")
|
|
230
|
+
else:
|
|
231
|
+
print(f"Reaction involving {monomer_1}")
|
|
232
|
+
|
|
233
|
+
# Step 3: Identify and handle monomers that are not part of any detected reaction
|
|
234
|
+
non_reactants_list = self.find_non_reactant_monomers(reactions, self.input_dict)
|
|
235
|
+
|
|
236
|
+
# Update the input dictionary to include only reactants and selected non-reactants
|
|
237
|
+
self.input_dict = self.filter_simulation_molecules(self.input_dict, non_reactants_list, reactions)
|
|
238
|
+
|
|
239
|
+
return reactions, non_reactants_list , self.input_dict
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
if __name__ == "__main__":
|
|
243
|
+
# Example usage of the module with sample monomer data
|
|
244
|
+
sample_inputs = {
|
|
245
|
+
"monomers": {
|
|
246
|
+
1: "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", # Example monomer 1 - Trimesoyl chloride (TMC)
|
|
247
|
+
2: "C1=CC(=CC(=C1)N)N", # Example monomer 2 - m-Phenylenediamine (MPD)
|
|
248
|
+
3: "CCO", # Example Non - monomer - Ethanol
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
# Run the detection workflow
|
|
253
|
+
detector = Detector(sample_inputs)
|
|
254
|
+
print("Detected Reactions:", json.dumps(detector.reactions_dict, indent=2))
|
|
255
|
+
# Output results
|
|
256
|
+
print("Detected Reactions:", json.dumps(detector.reactions_dict, indent=2))
|
|
257
|
+
if detector.non_reactants_list:
|
|
258
|
+
print("Non-monomer molecules to retain:", detector.non_reactants_list)
|
|
259
|
+
print("Filtered Input Dictionary for Simulation:", json.dumps(detector.input_dict, indent=2))
|