biosim-extractor 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biosim_extractor-0.0.1/LICENSE +21 -0
- biosim_extractor-0.0.1/PKG-INFO +58 -0
- biosim_extractor-0.0.1/README.md +12 -0
- biosim_extractor-0.0.1/biosim_extractor/__init__.py +8 -0
- biosim_extractor-0.0.1/biosim_extractor/amber/__init__.py +0 -0
- biosim_extractor-0.0.1/biosim_extractor/amber/amberlog.py +310 -0
- biosim_extractor-0.0.1/biosim_extractor/gromacs/__init__.py +0 -0
- biosim_extractor-0.0.1/biosim_extractor/gromacs/gromacslog.py +311 -0
- biosim_extractor-0.0.1/biosim_extractor/helpers/__init__.py +0 -0
- biosim_extractor-0.0.1/biosim_extractor/helpers/log_utils.py +82 -0
- biosim_extractor-0.0.1/biosim_extractor/helpers/metadata_utils.py +24 -0
- biosim_extractor-0.0.1/biosim_extractor/mdanalysis/__init__.py +0 -0
- biosim_extractor-0.0.1/biosim_extractor/mdanalysis/toptraj.py +380 -0
- biosim_extractor-0.0.1/biosim_extractor/metadata/__init__.py +0 -0
- biosim_extractor-0.0.1/biosim_extractor/metadata/convertpopulated.py +52 -0
- biosim_extractor-0.0.1/biosim_extractor/metadata/fetchschema.py +168 -0
- biosim_extractor-0.0.1/biosim_extractor/metadata/populatemetadata.py +592 -0
- biosim_extractor-0.0.1/biosim_extractor/metadata/validatemetadata.py +170 -0
- biosim_extractor-0.0.1/biosim_extractor/units/__init__.py +0 -0
- biosim_extractor-0.0.1/biosim_extractor/units/unitconversion.py +294 -0
- biosim_extractor-0.0.1/pyproject.toml +107 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025-2026 CCPBioSim
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: biosim_extractor
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Extract simulation metadata from MD output files
|
|
5
|
+
Keywords: molecular dynamics,parser,schema
|
|
6
|
+
Author-email: Jas Kalayan <jas.kalayan@stfc.ac.uk>
|
|
7
|
+
Maintainer-email: Jas Kalayan <jas.kalayan@stfc.ac.uk>
|
|
8
|
+
Requires-Python: >=3.12
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Natural Language :: English
|
|
17
|
+
Classifier: Intended Audience :: Science/Research
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: MDAnalysis==2.10.0
|
|
22
|
+
Requires-Dist: biopython==1.87
|
|
23
|
+
Requires-Dist: rdkit>=2025.9.5
|
|
24
|
+
Requires-Dist: linkml==1.11.1
|
|
25
|
+
Requires-Dist: sphinx>=9,<10 ; extra == "docs"
|
|
26
|
+
Requires-Dist: nbsphinx>=0.9,<1.0 ; extra == "docs"
|
|
27
|
+
Requires-Dist: sphinx_rtd_theme>=3,<3.2 ; extra == "docs"
|
|
28
|
+
Requires-Dist: sphinx_copybutton>=0.5,<1.0 ; extra == "docs"
|
|
29
|
+
Requires-Dist: sphinxcontrib-contentui>=0.2,<1.0 ; extra == "docs"
|
|
30
|
+
Requires-Dist: sphinxcontrib-details-directive>=0.1,<1.0 ; extra == "docs"
|
|
31
|
+
Requires-Dist: furo>=2025.0,<2026.0 ; extra == "docs"
|
|
32
|
+
Requires-Dist: markupsafe>=3.0,<4.0 ; extra == "docs"
|
|
33
|
+
Requires-Dist: ruff>=0.14.8 ; extra == "pre-commit"
|
|
34
|
+
Requires-Dist: pre-commit>=4.5,<5.0 ; extra == "pre-commit"
|
|
35
|
+
Requires-Dist: pylint>=4.0,<5.0 ; extra == "pre-commit"
|
|
36
|
+
Requires-Dist: pytest>=9.0,<10.0 ; extra == "testing"
|
|
37
|
+
Requires-Dist: pytest-cov>=7.0,<8.0 ; extra == "testing"
|
|
38
|
+
Requires-Dist: pytest-sugar>=1.1,<2.0 ; extra == "testing"
|
|
39
|
+
Project-URL: Documentation, https://ccpbiosim-biosim-extractor.github.io/
|
|
40
|
+
Project-URL: Homepage, https://github.com/CCPBioSim/biosim-extractor/
|
|
41
|
+
Project-URL: Repository, https://github.com/CCPBioSim/biosim-extractor.git
|
|
42
|
+
Provides-Extra: docs
|
|
43
|
+
Provides-Extra: pre-commit
|
|
44
|
+
Provides-Extra: testing
|
|
45
|
+
|
|
46
|
+
biosim-extractor
|
|
47
|
+
================
|
|
48
|
+
A repository for extracting simulation data from output files produced from molecular dynamics (MD) simulations of biomolecules and validating against biosim-schem.
|
|
49
|
+
|
|
50
|
+
## Project Status
|
|
51
|
+
|
|
52
|
+
| Category | Badges |
|
|
53
|
+
|----------------|--------|
|
|
54
|
+
| **Build** | [](https://github.com/CCPBioSim/biosim-extractor/actions/workflows/biosim_extractor_project-ci.yaml) |
|
|
55
|
+
| **Documentation** | [](https://biosim-extractor.readthedocs.io/en/latest/) |
|
|
56
|
+
| **PyPI** | [](https://pypi.org/project/biosim-extractor/) [](https://pypi.org/project/biosim-extractor/) [](https://pypi.org/project/biosim-extractor/) [](https://pypi.org/project/biosim-extractor/) |
|
|
57
|
+
| **Quality** | [](https://coveralls.io/github/CCPBioSim/biosim-extractor?branch=main) |
|
|
58
|
+
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
biosim-extractor
|
|
2
|
+
================
|
|
3
|
+
A repository for extracting simulation data from output files produced from molecular dynamics (MD) simulations of biomolecules and validating against biosim-schem.
|
|
4
|
+
|
|
5
|
+
## Project Status
|
|
6
|
+
|
|
7
|
+
| Category | Badges |
|
|
8
|
+
|----------------|--------|
|
|
9
|
+
| **Build** | [](https://github.com/CCPBioSim/biosim-extractor/actions/workflows/biosim_extractor_project-ci.yaml) |
|
|
10
|
+
| **Documentation** | [](https://biosim-extractor.readthedocs.io/en/latest/) |
|
|
11
|
+
| **PyPI** | [](https://pypi.org/project/biosim-extractor/) [](https://pypi.org/project/biosim-extractor/) [](https://pypi.org/project/biosim-extractor/) [](https://pypi.org/project/biosim-extractor/) |
|
|
12
|
+
| **Quality** | [](https://coveralls.io/github/CCPBioSim/biosim-extractor?branch=main) |
|
|
File without changes
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
Extract AMBER log file metadata into a structured dictionary.
|
|
4
|
+
|
|
5
|
+
This script parses AMBER log files and outputs structured metadata as JSON.
|
|
6
|
+
It can be used as a standalone CLI tool or imported as a module.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
|
|
13
|
+
from biosim_extractor.helpers.log_utils import add_value, normalize_name, parse_value
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# -------------------------
|
|
17
|
+
# PARSER
|
|
18
|
+
# -------------------------
|
|
19
|
+
class AmberLogParser:
|
|
20
|
+
"""
|
|
21
|
+
Parser for AMBER log files.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, filepath):
|
|
25
|
+
"""
|
|
26
|
+
Args:
|
|
27
|
+
filepath (str): Path to the AMBER log file.
|
|
28
|
+
"""
|
|
29
|
+
self.filepath = filepath
|
|
30
|
+
self.lines = []
|
|
31
|
+
self.data = {
|
|
32
|
+
# "Header": {},
|
|
33
|
+
"SimulationSettings": {},
|
|
34
|
+
"Results": {
|
|
35
|
+
"TimeSeries": [],
|
|
36
|
+
"Averages": {},
|
|
37
|
+
"RMSFluctuations": {},
|
|
38
|
+
"Timings": {},
|
|
39
|
+
},
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# -------------------------
|
|
43
|
+
# PUBLIC API
|
|
44
|
+
# -------------------------
|
|
45
|
+
def parse(self):
|
|
46
|
+
"""
|
|
47
|
+
Parse the AMBER log file.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
dict: Parsed metadata.
|
|
51
|
+
"""
|
|
52
|
+
with open(self.filepath) as f:
|
|
53
|
+
self.lines = f.readlines()
|
|
54
|
+
|
|
55
|
+
# self._parse_header()
|
|
56
|
+
self._parse_simulation_settings()
|
|
57
|
+
self._parse_results()
|
|
58
|
+
# print(json.dumps(self.data, indent=2))
|
|
59
|
+
return self.data
|
|
60
|
+
|
|
61
|
+
# # -------------------------
|
|
62
|
+
# # HEADER
|
|
63
|
+
# # -------------------------
|
|
64
|
+
# def _parse_header(self):
|
|
65
|
+
# for line in self.lines[:200]:
|
|
66
|
+
# if "=" in line:
|
|
67
|
+
# parts = line.split(",")[0].split("=")
|
|
68
|
+
# if len(parts) == 2:
|
|
69
|
+
# key, val = parts
|
|
70
|
+
# add_value(self.data["Header"], key.strip(), parse_value(val))
|
|
71
|
+
|
|
72
|
+
# -------------------------
|
|
73
|
+
# SIMULATION SETTINGS
|
|
74
|
+
# -------------------------
|
|
75
|
+
def _parse_simulation_settings(self):
|
|
76
|
+
"""
|
|
77
|
+
Parse simulation settings from the log file.
|
|
78
|
+
"""
|
|
79
|
+
settings = self.data["SimulationSettings"]
|
|
80
|
+
current_section = None
|
|
81
|
+
capture_cntrl = False
|
|
82
|
+
|
|
83
|
+
for line in self.lines:
|
|
84
|
+
stripped = line.strip()
|
|
85
|
+
|
|
86
|
+
# Stop at time series
|
|
87
|
+
if "NSTEP" in line and "TIME" in line:
|
|
88
|
+
break
|
|
89
|
+
|
|
90
|
+
# -------------------------
|
|
91
|
+
# &cntrl block
|
|
92
|
+
# -------------------------
|
|
93
|
+
if "&cntrl" in stripped:
|
|
94
|
+
capture_cntrl = True
|
|
95
|
+
current_section = "cntrl"
|
|
96
|
+
settings[current_section] = {}
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
if capture_cntrl:
|
|
100
|
+
if "/" in stripped:
|
|
101
|
+
capture_cntrl = False
|
|
102
|
+
current_section = None
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
for part in stripped.split(","):
|
|
106
|
+
if "=" in part:
|
|
107
|
+
k, v = part.split("=")
|
|
108
|
+
add_value(settings["cntrl"], k.strip(), parse_value(v))
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
# -------------------------
|
|
112
|
+
# Colon sections
|
|
113
|
+
# -------------------------
|
|
114
|
+
if stripped.endswith(":") and "=" not in stripped:
|
|
115
|
+
section_name = normalize_name(stripped[:-1])
|
|
116
|
+
current_section = section_name
|
|
117
|
+
settings[current_section] = {}
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
# -------------------------
|
|
121
|
+
# Key-value pairs
|
|
122
|
+
# -------------------------
|
|
123
|
+
if "=" in line:
|
|
124
|
+
matches = re.findall(r"([A-Za-z0-9_\-\s]+?)\s*=\s*([-\d\.E+]+)", line)
|
|
125
|
+
|
|
126
|
+
for k, v in matches:
|
|
127
|
+
key = normalize_name(k)
|
|
128
|
+
val = parse_value(v)
|
|
129
|
+
|
|
130
|
+
if current_section:
|
|
131
|
+
add_value(settings[current_section], key, val)
|
|
132
|
+
else:
|
|
133
|
+
add_value(settings, key, val)
|
|
134
|
+
|
|
135
|
+
# Reset section on blank line
|
|
136
|
+
if not stripped:
|
|
137
|
+
current_section = None
|
|
138
|
+
|
|
139
|
+
self._parse_file_assignments(settings)
|
|
140
|
+
|
|
141
|
+
# -------------------------
|
|
142
|
+
# SETTINGS: FILE ASSIGNMENTS
|
|
143
|
+
# -------------------------
|
|
144
|
+
def _parse_file_assignments(self, settings):
|
|
145
|
+
"""
|
|
146
|
+
Parse file assignments from the log file.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
settings (dict): Simulation settings dictionary to update.
|
|
150
|
+
"""
|
|
151
|
+
capture = False
|
|
152
|
+
files = {}
|
|
153
|
+
|
|
154
|
+
pattern = r"\|\s*([A-Z0-9_]+):\s*(.+)"
|
|
155
|
+
|
|
156
|
+
for line in self.lines:
|
|
157
|
+
stripped = line.strip()
|
|
158
|
+
|
|
159
|
+
# Start block
|
|
160
|
+
if "File Assignments:" in line:
|
|
161
|
+
capture = True
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
if capture:
|
|
165
|
+
# Stop if block ends
|
|
166
|
+
if not stripped or not stripped.startswith("|"):
|
|
167
|
+
break
|
|
168
|
+
|
|
169
|
+
match = re.search(pattern, line)
|
|
170
|
+
if match:
|
|
171
|
+
key = match.group(1).strip()
|
|
172
|
+
val = match.group(2).strip()
|
|
173
|
+
|
|
174
|
+
files[key] = val
|
|
175
|
+
|
|
176
|
+
if files:
|
|
177
|
+
settings["File_Assignments"] = files
|
|
178
|
+
|
|
179
|
+
# -------------------------
|
|
180
|
+
# RESULTS (ALL OUTPUT DATA)
|
|
181
|
+
# -------------------------
|
|
182
|
+
def _parse_results(self):
|
|
183
|
+
"""
|
|
184
|
+
Parse results blocks from the log file.
|
|
185
|
+
"""
|
|
186
|
+
# self._parse_time_series()
|
|
187
|
+
self._parse_block(
|
|
188
|
+
"A V E R A G E S", "R M S F L U C T U A T I O N S", "Averages"
|
|
189
|
+
)
|
|
190
|
+
# self._parse_block("R M S F L U C T U A T I O N S", "TIMINGS", "RMSFluctuations")
|
|
191
|
+
self._parse_timings()
|
|
192
|
+
|
|
193
|
+
# -------------------------
|
|
194
|
+
# TIME SERIES
|
|
195
|
+
# -------------------------
|
|
196
|
+
def _parse_time_series(self):
|
|
197
|
+
"""
|
|
198
|
+
Parse time series data from the log file.
|
|
199
|
+
"""
|
|
200
|
+
steps = []
|
|
201
|
+
current = {}
|
|
202
|
+
in_series = False
|
|
203
|
+
|
|
204
|
+
for line in self.lines:
|
|
205
|
+
if "NSTEP" in line and "TIME" in line:
|
|
206
|
+
in_series = True
|
|
207
|
+
if current:
|
|
208
|
+
steps.append(current)
|
|
209
|
+
current = {}
|
|
210
|
+
|
|
211
|
+
matches = re.findall(r"([A-Za-z\(\)\-]+)\s*=\s*([-\d\.E+]+)", line)
|
|
212
|
+
for k, v in matches:
|
|
213
|
+
current[k] = parse_value(v)
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
if in_series and "=" in line:
|
|
217
|
+
matches = re.findall(r"([A-Za-z\(\)\-]+)\s*=\s*([-\d\.E+]+)", line)
|
|
218
|
+
for k, v in matches:
|
|
219
|
+
current[k] = parse_value(v)
|
|
220
|
+
|
|
221
|
+
if "A V E R A G E S" in line:
|
|
222
|
+
break
|
|
223
|
+
|
|
224
|
+
if current:
|
|
225
|
+
steps.append(current)
|
|
226
|
+
|
|
227
|
+
self.data["Results"]["TimeSeries"] = steps
|
|
228
|
+
|
|
229
|
+
# -------------------------
|
|
230
|
+
# GENERIC BLOCK PARSER
|
|
231
|
+
# -------------------------
|
|
232
|
+
def _parse_block(self, start_marker, end_marker, target_key):
|
|
233
|
+
"""
|
|
234
|
+
Parse a generic results block.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
start_marker (str): Line indicating the start of the block.
|
|
238
|
+
end_marker (str): Line indicating the end of the block.
|
|
239
|
+
target_key (str): Key in the results dictionary to populate.
|
|
240
|
+
"""
|
|
241
|
+
capture = False
|
|
242
|
+
target = self.data["Results"][target_key]
|
|
243
|
+
|
|
244
|
+
for line in self.lines:
|
|
245
|
+
if start_marker in line:
|
|
246
|
+
capture = True
|
|
247
|
+
continue
|
|
248
|
+
|
|
249
|
+
if capture and "=" in line:
|
|
250
|
+
matches = re.findall(r"([A-Za-z\(\)\-]+)\s*=\s*([-\d\.E+]+)", line)
|
|
251
|
+
for k, v in matches:
|
|
252
|
+
add_value(target, k, parse_value(v))
|
|
253
|
+
|
|
254
|
+
if end_marker in line:
|
|
255
|
+
break
|
|
256
|
+
|
|
257
|
+
# -------------------------
|
|
258
|
+
# TIMINGS
|
|
259
|
+
# -------------------------
|
|
260
|
+
def _parse_timings(self):
|
|
261
|
+
"""
|
|
262
|
+
Parse timing information from the log file.
|
|
263
|
+
"""
|
|
264
|
+
timings = self.data["Results"]["Timings"]
|
|
265
|
+
pattern = r"\|\s*(.*?)\s*:\s*([-\d\.E+]+)\s*seconds"
|
|
266
|
+
|
|
267
|
+
for line in self.lines:
|
|
268
|
+
if "CPU time" in line or "wall time" in line:
|
|
269
|
+
match = re.search(pattern, line)
|
|
270
|
+
if match:
|
|
271
|
+
key = normalize_name(match.group(1))
|
|
272
|
+
val = parse_value(match.group(2))
|
|
273
|
+
add_value(timings, key, val)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# =========================
|
|
277
|
+
# ENTRY POINT
|
|
278
|
+
# =========================
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def parse_args():
|
|
282
|
+
"""Parse command-line arguments.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
Parsed ``argparse.Namespace`` object.
|
|
286
|
+
"""
|
|
287
|
+
parser = argparse.ArgumentParser(
|
|
288
|
+
description="Extract Amber log file metadata to JSON"
|
|
289
|
+
)
|
|
290
|
+
parser.add_argument("logfile", help="Path to Amber log file")
|
|
291
|
+
parser.add_argument("--output", "-o", help="Output file path (default: stdout)")
|
|
292
|
+
return parser.parse_args()
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def main():
|
|
296
|
+
"""Entry point: parse args, run extraction, and write output."""
|
|
297
|
+
args = parse_args()
|
|
298
|
+
|
|
299
|
+
parser = AmberLogParser(args.logfile)
|
|
300
|
+
result = parser.parse()
|
|
301
|
+
|
|
302
|
+
if args.output:
|
|
303
|
+
with open(args.output, "w") as f:
|
|
304
|
+
json.dump(result, f, indent=2)
|
|
305
|
+
else:
|
|
306
|
+
print(json.dumps(result, indent=2))
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
if __name__ == "__main__":
|
|
310
|
+
main()
|
|
File without changes
|