novomd 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novomd/__about__.py +3 -0
- novomd/__init__.py +42 -0
- novomd/batch.py +56 -0
- novomd/cli.py +157 -0
- novomd/conversion.py +132 -0
- novomd/core.py +289 -0
- novomd/exceptions.py +20 -0
- novomd-1.2.0.dist-info/METADATA +311 -0
- novomd-1.2.0.dist-info/RECORD +13 -0
- novomd-1.2.0.dist-info/WHEEL +5 -0
- novomd-1.2.0.dist-info/entry_points.txt +2 -0
- novomd-1.2.0.dist-info/licenses/LICENSE +21 -0
- novomd-1.2.0.dist-info/top_level.txt +1 -0
novomd/__about__.py
ADDED
novomd/__init__.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""NovoMD: a local-first molecular property calculator.
|
|
2
|
+
|
|
3
|
+
Compute molecular descriptors on your own hardware, no server and no API key::
|
|
4
|
+
|
|
5
|
+
from novomd import calculate_properties
|
|
6
|
+
props = calculate_properties("CCO")
|
|
7
|
+
print(props["molecular_weight"])
|
|
8
|
+
|
|
9
|
+
The same core powers the optional REST service (``pip install novomd[server]``).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from .__about__ import __version__
|
|
15
|
+
from .batch import MAX_BATCH_SIZE, calculate_properties_batch
|
|
16
|
+
from .conversion import get_atom_type, pdb_to_omd
|
|
17
|
+
from .core import (
|
|
18
|
+
RDKIT_AVAILABLE,
|
|
19
|
+
calculate_all_molecular_properties,
|
|
20
|
+
calculate_partial_charges,
|
|
21
|
+
calculate_properties,
|
|
22
|
+
extract_coordinates_from_pdb,
|
|
23
|
+
smiles_to_pdb,
|
|
24
|
+
)
|
|
25
|
+
from .exceptions import InvalidSMILESError, NovoMDError, RDKitNotAvailableError
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"__version__",
|
|
29
|
+
"RDKIT_AVAILABLE",
|
|
30
|
+
"calculate_properties",
|
|
31
|
+
"calculate_properties_batch",
|
|
32
|
+
"MAX_BATCH_SIZE",
|
|
33
|
+
"calculate_all_molecular_properties",
|
|
34
|
+
"calculate_partial_charges",
|
|
35
|
+
"extract_coordinates_from_pdb",
|
|
36
|
+
"smiles_to_pdb",
|
|
37
|
+
"get_atom_type",
|
|
38
|
+
"pdb_to_omd",
|
|
39
|
+
"NovoMDError",
|
|
40
|
+
"InvalidSMILESError",
|
|
41
|
+
"RDKitNotAvailableError",
|
|
42
|
+
]
|
novomd/batch.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Batch molecular property calculation with per-item error isolation.
|
|
2
|
+
|
|
3
|
+
Process a list of SMILES in one call. A single malformed or un-embeddable
|
|
4
|
+
molecule never fails the whole batch: each item returns its own status, so a
|
|
5
|
+
1,000-molecule run with a few bad entries still returns every good result.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any, Dict, List, Optional, Sequence
|
|
11
|
+
|
|
12
|
+
from .core import calculate_properties
|
|
13
|
+
|
|
14
|
+
# Default ceiling on how many molecules one call will process. The REST service
|
|
15
|
+
# enforces the same cap so a single request cannot tie up the worker.
|
|
16
|
+
MAX_BATCH_SIZE = 1000
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def calculate_properties_batch(
|
|
20
|
+
smiles_list: Sequence[str],
|
|
21
|
+
*,
|
|
22
|
+
add_hydrogens: bool = True,
|
|
23
|
+
optimize_3d: bool = True,
|
|
24
|
+
max_batch_size: Optional[int] = MAX_BATCH_SIZE,
|
|
25
|
+
) -> List[Dict[str, Any]]:
|
|
26
|
+
"""Compute descriptors for many SMILES, isolating per-item failures.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
smiles_list: SMILES strings to process.
|
|
30
|
+
add_hydrogens: Add explicit hydrogens before embedding (default True).
|
|
31
|
+
optimize_3d: Run UFF geometry optimization on each conformer (default True).
|
|
32
|
+
max_batch_size: Reject inputs larger than this. Pass ``None`` to disable
|
|
33
|
+
the check (the caller is then responsible for bounding the work).
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
One result dict per input, in order. Each is either
|
|
37
|
+
``{"smiles": ..., "status": "ok", "properties": {...}}`` or
|
|
38
|
+
``{"smiles": ..., "status": "error", "error": "<message>"}``.
|
|
39
|
+
|
|
40
|
+
Raises:
|
|
41
|
+
ValueError: The input is larger than ``max_batch_size``.
|
|
42
|
+
"""
|
|
43
|
+
if max_batch_size is not None and len(smiles_list) > max_batch_size:
|
|
44
|
+
raise ValueError(f"Batch size {len(smiles_list)} exceeds the maximum of {max_batch_size}.")
|
|
45
|
+
|
|
46
|
+
results: List[Dict[str, Any]] = []
|
|
47
|
+
for smiles in smiles_list:
|
|
48
|
+
try:
|
|
49
|
+
properties = calculate_properties(
|
|
50
|
+
smiles, add_hydrogens=add_hydrogens, optimize_3d=optimize_3d
|
|
51
|
+
)
|
|
52
|
+
results.append({"smiles": smiles, "status": "ok", "properties": properties})
|
|
53
|
+
except Exception as exc: # noqa: BLE001 - one bad molecule must not kill the batch
|
|
54
|
+
results.append({"smiles": smiles, "status": "error", "error": str(exc)})
|
|
55
|
+
|
|
56
|
+
return results
|
novomd/cli.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Command-line entry point for NovoMD.
|
|
2
|
+
|
|
3
|
+
- ``novomd props "<smiles>"`` computes descriptors for one molecule.
|
|
4
|
+
- ``novomd batch <file.smi> --out results.csv`` processes many at once.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import csv
|
|
11
|
+
import json
|
|
12
|
+
import sys
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
|
|
15
|
+
from .__about__ import __version__
|
|
16
|
+
from .exceptions import NovoMDError
|
|
17
|
+
|
|
18
|
+
# Property fields that are per-atom lists; omitted from flat CSV output.
|
|
19
|
+
_LIST_FIELDS = {"coords_x", "coords_y", "coords_z", "atom_types", "bonds"}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _cmd_props(args: argparse.Namespace) -> int:
|
|
23
|
+
from .core import calculate_properties
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
result = calculate_properties(
|
|
27
|
+
args.smiles,
|
|
28
|
+
add_hydrogens=not args.no_hydrogens,
|
|
29
|
+
optimize_3d=not args.no_optimize,
|
|
30
|
+
)
|
|
31
|
+
except NovoMDError as exc:
|
|
32
|
+
print(f"error: {exc}", file=sys.stderr)
|
|
33
|
+
return 1
|
|
34
|
+
|
|
35
|
+
indent = None if args.compact else 2
|
|
36
|
+
print(json.dumps(result, indent=indent))
|
|
37
|
+
return 0
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _read_smiles_file(path: str) -> List[str]:
|
|
41
|
+
"""Read a .smi file: one SMILES per line; the first whitespace-separated
|
|
42
|
+
token is taken as the SMILES. Blank lines and ``#`` comments are skipped."""
|
|
43
|
+
molecules: List[str] = []
|
|
44
|
+
with open(path, "r", encoding="utf-8") as handle:
|
|
45
|
+
for line in handle:
|
|
46
|
+
stripped = line.strip()
|
|
47
|
+
if not stripped or stripped.startswith("#"):
|
|
48
|
+
continue
|
|
49
|
+
molecules.append(stripped.split()[0])
|
|
50
|
+
return molecules
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _flatten_for_csv(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
54
|
+
rows: List[Dict[str, Any]] = []
|
|
55
|
+
for item in results:
|
|
56
|
+
row: Dict[str, Any] = {
|
|
57
|
+
"smiles": item["smiles"],
|
|
58
|
+
"status": item["status"],
|
|
59
|
+
"error": item.get("error", ""),
|
|
60
|
+
}
|
|
61
|
+
for key, value in item.get("properties", {}).items():
|
|
62
|
+
if key not in _LIST_FIELDS:
|
|
63
|
+
row[key] = value
|
|
64
|
+
rows.append(row)
|
|
65
|
+
return rows
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _cmd_batch(args: argparse.Namespace) -> int:
|
|
69
|
+
from .batch import calculate_properties_batch
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
molecules = _read_smiles_file(args.input)
|
|
73
|
+
except OSError as exc:
|
|
74
|
+
print(f"error: {exc}", file=sys.stderr)
|
|
75
|
+
return 1
|
|
76
|
+
|
|
77
|
+
if not molecules:
|
|
78
|
+
print(f"error: no SMILES found in {args.input}", file=sys.stderr)
|
|
79
|
+
return 1
|
|
80
|
+
|
|
81
|
+
results = calculate_properties_batch(
|
|
82
|
+
molecules,
|
|
83
|
+
add_hydrogens=not args.no_hydrogens,
|
|
84
|
+
optimize_3d=not args.no_optimize,
|
|
85
|
+
)
|
|
86
|
+
succeeded = sum(1 for r in results if r["status"] == "ok")
|
|
87
|
+
print(
|
|
88
|
+
f"processed {len(results)} molecules: {succeeded} ok, {len(results) - succeeded} failed",
|
|
89
|
+
file=sys.stderr,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if args.out:
|
|
93
|
+
if args.out.lower().endswith((".csv", ".tsv")):
|
|
94
|
+
rows = _flatten_for_csv(results)
|
|
95
|
+
fieldnames: List[str] = []
|
|
96
|
+
for row in rows:
|
|
97
|
+
for key in row:
|
|
98
|
+
if key not in fieldnames:
|
|
99
|
+
fieldnames.append(key)
|
|
100
|
+
delimiter = "\t" if args.out.lower().endswith(".tsv") else ","
|
|
101
|
+
with open(args.out, "w", encoding="utf-8", newline="") as handle:
|
|
102
|
+
writer = csv.DictWriter(handle, fieldnames=fieldnames, delimiter=delimiter)
|
|
103
|
+
writer.writeheader()
|
|
104
|
+
writer.writerows(rows)
|
|
105
|
+
else:
|
|
106
|
+
with open(args.out, "w", encoding="utf-8") as handle:
|
|
107
|
+
json.dump(results, handle, indent=2)
|
|
108
|
+
print(f"wrote {args.out}", file=sys.stderr)
|
|
109
|
+
else:
|
|
110
|
+
print(json.dumps(results, indent=2))
|
|
111
|
+
|
|
112
|
+
return 0
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
116
|
+
parser = argparse.ArgumentParser(
|
|
117
|
+
prog="novomd",
|
|
118
|
+
description="Local-first molecular property calculator.",
|
|
119
|
+
)
|
|
120
|
+
parser.add_argument("--version", action="version", version=f"novomd {__version__}")
|
|
121
|
+
|
|
122
|
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
123
|
+
|
|
124
|
+
props = subparsers.add_parser("props", help="Compute descriptors for one SMILES string.")
|
|
125
|
+
props.add_argument("smiles", help="SMILES string, e.g. 'CCO'")
|
|
126
|
+
props.add_argument("--no-hydrogens", action="store_true", help="Do not add explicit hydrogens.")
|
|
127
|
+
props.add_argument("--no-optimize", action="store_true", help="Skip 3D geometry optimization.")
|
|
128
|
+
props.add_argument(
|
|
129
|
+
"--compact", action="store_true", help="Emit single-line JSON instead of indented."
|
|
130
|
+
)
|
|
131
|
+
props.set_defaults(func=_cmd_props)
|
|
132
|
+
|
|
133
|
+
batch = subparsers.add_parser(
|
|
134
|
+
"batch", help="Compute descriptors for many SMILES from a .smi file."
|
|
135
|
+
)
|
|
136
|
+
batch.add_argument("input", help="Path to a .smi file (one SMILES per line).")
|
|
137
|
+
batch.add_argument(
|
|
138
|
+
"--out",
|
|
139
|
+
help="Write results to this file (.csv/.tsv for a table, otherwise JSON). "
|
|
140
|
+
"Without --out, JSON is printed to stdout.",
|
|
141
|
+
)
|
|
142
|
+
batch.add_argument("--no-hydrogens", action="store_true", help="Do not add explicit hydrogens.")
|
|
143
|
+
batch.add_argument("--no-optimize", action="store_true", help="Skip 3D geometry optimization.")
|
|
144
|
+
batch.set_defaults(func=_cmd_batch)
|
|
145
|
+
|
|
146
|
+
return parser
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def main(argv: Optional[List[str]] = None) -> int:
|
|
150
|
+
parser = build_parser()
|
|
151
|
+
args = parser.parse_args(argv)
|
|
152
|
+
exit_code: int = args.func(args)
|
|
153
|
+
return exit_code
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
if __name__ == "__main__": # pragma: no cover
|
|
157
|
+
raise SystemExit(main())
|
novomd/conversion.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""PDB to OpenMD (.omd) format conversion.
|
|
2
|
+
|
|
3
|
+
Framework-free helpers for turning a PDB block into an OpenMD input file and
|
|
4
|
+
mapping elements onto force-field atom types. Used by both the library and the
|
|
5
|
+
REST service.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_atom_type(element: str, force_field: str) -> str:
|
|
12
|
+
"""Map an element symbol to a force-field atom type."""
|
|
13
|
+
|
|
14
|
+
force_field_mappings = {
|
|
15
|
+
"AMBER": {
|
|
16
|
+
"H": "HC",
|
|
17
|
+
"C": "CT",
|
|
18
|
+
"N": "N",
|
|
19
|
+
"O": "O",
|
|
20
|
+
"S": "S",
|
|
21
|
+
"P": "P",
|
|
22
|
+
"F": "F",
|
|
23
|
+
"Cl": "Cl",
|
|
24
|
+
"Br": "Br",
|
|
25
|
+
},
|
|
26
|
+
"CHARMM": {
|
|
27
|
+
"H": "HGA1",
|
|
28
|
+
"C": "CG321",
|
|
29
|
+
"N": "NG321",
|
|
30
|
+
"O": "OG311",
|
|
31
|
+
"S": "SG311",
|
|
32
|
+
"P": "PG1",
|
|
33
|
+
"F": "FGA1",
|
|
34
|
+
"Cl": "CLGA1",
|
|
35
|
+
"Br": "BRGA1",
|
|
36
|
+
},
|
|
37
|
+
"OPLS": {
|
|
38
|
+
"H": "opls_140",
|
|
39
|
+
"C": "opls_135",
|
|
40
|
+
"N": "opls_238",
|
|
41
|
+
"O": "opls_236",
|
|
42
|
+
"S": "opls_200",
|
|
43
|
+
"P": "opls_393",
|
|
44
|
+
"F": "opls_164",
|
|
45
|
+
"Cl": "opls_151",
|
|
46
|
+
"Br": "opls_156",
|
|
47
|
+
},
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
mapping = force_field_mappings.get(force_field, force_field_mappings["AMBER"])
|
|
51
|
+
return mapping.get(element, element)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def pdb_to_omd(pdb_content: str, force_field: str, box_size: float, charge_method: str) -> str:
|
|
55
|
+
"""Convert a PDB block to OpenMD (.omd) format."""
|
|
56
|
+
|
|
57
|
+
# Parse PDB content to extract atoms
|
|
58
|
+
atoms = []
|
|
59
|
+
for line in pdb_content.split("\n"):
|
|
60
|
+
if line.startswith("ATOM") or line.startswith("HETATM"):
|
|
61
|
+
atom_info = {
|
|
62
|
+
"index": int(line[6:11].strip()),
|
|
63
|
+
"name": line[12:16].strip(),
|
|
64
|
+
"resname": line[17:20].strip(),
|
|
65
|
+
"x": float(line[30:38].strip()),
|
|
66
|
+
"y": float(line[38:46].strip()),
|
|
67
|
+
"z": float(line[46:54].strip()),
|
|
68
|
+
"element": line[76:78].strip() if len(line) > 76 else "C",
|
|
69
|
+
}
|
|
70
|
+
atoms.append(atom_info)
|
|
71
|
+
|
|
72
|
+
if not atoms:
|
|
73
|
+
raise ValueError("No atoms found in PDB content")
|
|
74
|
+
|
|
75
|
+
# Generate OpenMD format content
|
|
76
|
+
omd_content = """<OpenMD version=2>
|
|
77
|
+
<MetaData>
|
|
78
|
+
<molecule id="0">
|
|
79
|
+
<name>Converted_Molecule</name>"""
|
|
80
|
+
|
|
81
|
+
# Add atom definitions
|
|
82
|
+
for atom in atoms:
|
|
83
|
+
# Assign atom type based on element and force field
|
|
84
|
+
atom_type = get_atom_type(str(atom["element"]), force_field)
|
|
85
|
+
omd_content += f"""
|
|
86
|
+
<atom id="{atom['index']}">
|
|
87
|
+
<type>{atom_type}</type>
|
|
88
|
+
<position x="{atom['x']}" y="{atom['y']}" z="{atom['z']}"/>
|
|
89
|
+
</atom>"""
|
|
90
|
+
|
|
91
|
+
omd_content += f"""
|
|
92
|
+
</molecule>
|
|
93
|
+
|
|
94
|
+
<forceField>{force_field}</forceField>
|
|
95
|
+
<ensemble>NVT</ensemble>
|
|
96
|
+
<target_temp>300</target_temp>
|
|
97
|
+
<target_pressure>1</target_pressure>
|
|
98
|
+
</MetaData>
|
|
99
|
+
|
|
100
|
+
<Snapshot>
|
|
101
|
+
<FrameData>
|
|
102
|
+
<Time>0</Time>
|
|
103
|
+
<Hmat>
|
|
104
|
+
<Hxx>{box_size}</Hxx>
|
|
105
|
+
<Hxy>0</Hxy>
|
|
106
|
+
<Hxz>0</Hxz>
|
|
107
|
+
<Hyx>0</Hyx>
|
|
108
|
+
<Hyy>{box_size}</Hyy>
|
|
109
|
+
<Hyz>0</Hyz>
|
|
110
|
+
<Hzx>0</Hzx>
|
|
111
|
+
<Hzy>0</Hzy>
|
|
112
|
+
<Hzz>{box_size}</Hzz>
|
|
113
|
+
</Hmat>
|
|
114
|
+
</FrameData>
|
|
115
|
+
|
|
116
|
+
<StuntDoubles>"""
|
|
117
|
+
|
|
118
|
+
# Add positions for each atom
|
|
119
|
+
for atom in atoms:
|
|
120
|
+
atom_index: int = atom["index"] # type: ignore[assignment]
|
|
121
|
+
omd_content += f"""
|
|
122
|
+
<StuntDouble index="{atom_index - 1}">
|
|
123
|
+
<position x="{atom['x']}" y="{atom['y']}" z="{atom['z']}"/>
|
|
124
|
+
<velocity x="0" y="0" z="0"/>
|
|
125
|
+
</StuntDouble>"""
|
|
126
|
+
|
|
127
|
+
omd_content += """
|
|
128
|
+
</StuntDoubles>
|
|
129
|
+
</Snapshot>
|
|
130
|
+
</OpenMD>"""
|
|
131
|
+
|
|
132
|
+
return omd_content
|
novomd/core.py
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
"""Framework-free molecular property calculation core.
|
|
2
|
+
|
|
3
|
+
Everything in this module runs locally with no network calls and no web
|
|
4
|
+
framework. If RDKit is installed, :func:`calculate_properties` turns a SMILES
|
|
5
|
+
string into a full descriptor dictionary on your own hardware.
|
|
6
|
+
|
|
7
|
+
The numerical routines here are the same ones the REST service uses; the
|
|
8
|
+
service imports from this module rather than redefining them.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Any, Dict, List, Tuple
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
from scipy.spatial.distance import cdist
|
|
17
|
+
|
|
18
|
+
from .exceptions import InvalidSMILESError, RDKitNotAvailableError
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
from rdkit import Chem
|
|
22
|
+
from rdkit.Chem import AllChem, Descriptors
|
|
23
|
+
|
|
24
|
+
RDKIT_AVAILABLE = True
|
|
25
|
+
except ImportError: # pragma: no cover - exercised only without RDKit
|
|
26
|
+
RDKIT_AVAILABLE = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _require_rdkit() -> None:
|
|
30
|
+
if not RDKIT_AVAILABLE:
|
|
31
|
+
raise RDKitNotAvailableError(
|
|
32
|
+
"RDKit is required for this operation but is not installed. "
|
|
33
|
+
"Install it with: pip install novomd"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def smiles_to_pdb(smiles: str, optimize_3d: bool = True, add_hydrogens: bool = True) -> str:
|
|
38
|
+
"""Convert a SMILES string to a 3D PDB block using RDKit."""
|
|
39
|
+
_require_rdkit()
|
|
40
|
+
|
|
41
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
42
|
+
if mol is None:
|
|
43
|
+
raise InvalidSMILESError(f"Invalid SMILES string: {smiles!r}")
|
|
44
|
+
|
|
45
|
+
if add_hydrogens:
|
|
46
|
+
mol = Chem.AddHs(mol)
|
|
47
|
+
|
|
48
|
+
AllChem.EmbedMolecule(mol, randomSeed=42)
|
|
49
|
+
|
|
50
|
+
if optimize_3d:
|
|
51
|
+
AllChem.UFFOptimizeMolecule(mol, maxIters=200)
|
|
52
|
+
|
|
53
|
+
pdb_block = Chem.MolToPDBBlock(mol)
|
|
54
|
+
return str(pdb_block) if pdb_block else ""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def calculate_partial_charges(pdb_content: str, method: str = "gasteiger") -> Dict[int, float]:
|
|
58
|
+
"""Estimate per-atom partial charges from PDB content.
|
|
59
|
+
|
|
60
|
+
Simplified electronegativity-based model; outcome-level descriptor only.
|
|
61
|
+
"""
|
|
62
|
+
charges: Dict[int, float] = {}
|
|
63
|
+
atom_index = 0
|
|
64
|
+
|
|
65
|
+
for line in pdb_content.split("\n"):
|
|
66
|
+
if line.startswith("ATOM") or line.startswith("HETATM"):
|
|
67
|
+
element = line[76:78].strip() if len(line) > 76 else "C"
|
|
68
|
+
|
|
69
|
+
electronegativities = {
|
|
70
|
+
"H": 2.20,
|
|
71
|
+
"C": 2.55,
|
|
72
|
+
"N": 3.04,
|
|
73
|
+
"O": 3.44,
|
|
74
|
+
"F": 3.98,
|
|
75
|
+
"S": 2.58,
|
|
76
|
+
"Cl": 3.16,
|
|
77
|
+
"Br": 2.96,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
en = electronegativities.get(element, 2.5)
|
|
81
|
+
charge = (en - 2.5) * 0.1
|
|
82
|
+
|
|
83
|
+
charges[atom_index] = round(charge, 4)
|
|
84
|
+
atom_index += 1
|
|
85
|
+
|
|
86
|
+
return charges
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def extract_coordinates_from_pdb(pdb_content: str) -> Tuple[np.ndarray, List[str]]:
|
|
90
|
+
"""Extract 3D coordinates and element symbols from PDB content."""
|
|
91
|
+
coords: List[List[float]] = []
|
|
92
|
+
atoms: List[str] = []
|
|
93
|
+
|
|
94
|
+
for line in pdb_content.split("\n"):
|
|
95
|
+
if line.startswith("ATOM") or line.startswith("HETATM"):
|
|
96
|
+
try:
|
|
97
|
+
x = float(line[30:38].strip())
|
|
98
|
+
y = float(line[38:46].strip())
|
|
99
|
+
z = float(line[46:54].strip())
|
|
100
|
+
element = line[76:78].strip() if len(line) > 76 else "C"
|
|
101
|
+
|
|
102
|
+
coords.append([x, y, z])
|
|
103
|
+
atoms.append(element)
|
|
104
|
+
except (ValueError, IndexError):
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
return np.array(coords), atoms
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def calculate_all_molecular_properties(
|
|
111
|
+
coords: np.ndarray, atoms: List[str], mol: Any, pdb_content: str
|
|
112
|
+
) -> Dict[str, Any]:
|
|
113
|
+
"""Calculate the full descriptor set from 3D coordinates.
|
|
114
|
+
|
|
115
|
+
Returns geometry, energy estimate, electrostatic, surface/volume, atom-count
|
|
116
|
+
and 3D-visualization descriptors. Returns an empty dict for empty input.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
if len(coords) == 0:
|
|
120
|
+
return {}
|
|
121
|
+
|
|
122
|
+
# Center of mass
|
|
123
|
+
center = np.mean(coords, axis=0)
|
|
124
|
+
centered_coords = coords - center
|
|
125
|
+
|
|
126
|
+
# === GEOMETRY PROPERTIES (7) ===
|
|
127
|
+
|
|
128
|
+
# Radius of gyration
|
|
129
|
+
rgyr = np.sqrt(np.mean(np.sum(centered_coords**2, axis=1)))
|
|
130
|
+
|
|
131
|
+
# Maximum distance (span)
|
|
132
|
+
distances = cdist(coords, coords)
|
|
133
|
+
max_dist = np.max(distances)
|
|
134
|
+
|
|
135
|
+
# Inertia tensor for shape analysis (I is the conventional physics symbol)
|
|
136
|
+
I = np.zeros((3, 3)) # noqa: E741
|
|
137
|
+
for coord in centered_coords:
|
|
138
|
+
I[0, 0] += coord[1] ** 2 + coord[2] ** 2
|
|
139
|
+
I[1, 1] += coord[0] ** 2 + coord[2] ** 2
|
|
140
|
+
I[2, 2] += coord[0] ** 2 + coord[1] ** 2
|
|
141
|
+
I[0, 1] -= coord[0] * coord[1]
|
|
142
|
+
I[0, 2] -= coord[0] * coord[2]
|
|
143
|
+
I[1, 2] -= coord[1] * coord[2]
|
|
144
|
+
I[1, 0] = I[0, 1]
|
|
145
|
+
I[2, 0] = I[0, 2]
|
|
146
|
+
I[2, 1] = I[1, 2]
|
|
147
|
+
|
|
148
|
+
# Principal moments of inertia
|
|
149
|
+
eigenvalues = np.sort(np.linalg.eigvals(I).real)
|
|
150
|
+
pmi1, pmi2, pmi3 = eigenvalues
|
|
151
|
+
|
|
152
|
+
# Shape descriptors
|
|
153
|
+
asphericity = pmi3 - 0.5 * (pmi1 + pmi2)
|
|
154
|
+
eccentricity = (pmi3 - pmi1) / pmi3 if pmi3 > 0 else 0
|
|
155
|
+
inertia_shape_factor = pmi1 / pmi3 if pmi3 > 0 else 0
|
|
156
|
+
|
|
157
|
+
# === SURFACE/VOLUME PROPERTIES (4) ===
|
|
158
|
+
|
|
159
|
+
num_atoms = len(atoms)
|
|
160
|
+
num_heavy = sum(1 for a in atoms if a not in ["H", "h"])
|
|
161
|
+
|
|
162
|
+
# Estimate molecular volume and surface area
|
|
163
|
+
hull_volume = num_atoms * 15.0 # ų per atom
|
|
164
|
+
hull_area = num_atoms * 30.0 # Ų per atom
|
|
165
|
+
globularity = (
|
|
166
|
+
min(1.0, (36 * np.pi * hull_volume**2) ** (1 / 3) / hull_area) if hull_area > 0 else 0
|
|
167
|
+
)
|
|
168
|
+
surface_to_volume = hull_area / hull_volume if hull_volume > 0 else 0
|
|
169
|
+
|
|
170
|
+
# === ENERGY PROPERTIES (6) ===
|
|
171
|
+
# These are estimates - real MD would provide actual values
|
|
172
|
+
|
|
173
|
+
# Bond detection
|
|
174
|
+
bonds = []
|
|
175
|
+
for i in range(len(coords)):
|
|
176
|
+
for j in range(i + 1, len(coords)):
|
|
177
|
+
if distances[i, j] < 1.6: # Typical bond length
|
|
178
|
+
bonds.append([int(i), int(j)])
|
|
179
|
+
|
|
180
|
+
conformer_energy = -10.0 * num_atoms
|
|
181
|
+
vdw_energy = -0.5 * len(bonds)
|
|
182
|
+
electrostatic_energy = -0.1 * num_atoms
|
|
183
|
+
torsion_strain = 0.1 * max(0, len(bonds) - num_atoms + 1)
|
|
184
|
+
angle_strain = 0.05 * num_atoms
|
|
185
|
+
optimization_delta = abs(conformer_energy) * 0.1
|
|
186
|
+
|
|
187
|
+
# === ELECTROSTATIC PROPERTIES (6) ===
|
|
188
|
+
|
|
189
|
+
dipole_moment = np.linalg.norm(center) * 0.1
|
|
190
|
+
total_charge = 0.0 # Neutral
|
|
191
|
+
|
|
192
|
+
# Calculate partial charges
|
|
193
|
+
charges = calculate_partial_charges(pdb_content, "gasteiger")
|
|
194
|
+
if charges:
|
|
195
|
+
charge_values = list(charges.values())
|
|
196
|
+
max_partial_charge = max(charge_values)
|
|
197
|
+
min_partial_charge = min(charge_values)
|
|
198
|
+
charge_span = max_partial_charge - min_partial_charge
|
|
199
|
+
total_charge = sum(charge_values)
|
|
200
|
+
else:
|
|
201
|
+
max_partial_charge = 0.5
|
|
202
|
+
min_partial_charge = -0.5
|
|
203
|
+
charge_span = 1.0
|
|
204
|
+
|
|
205
|
+
electrostatic_potential = dipole_moment * 0.1
|
|
206
|
+
|
|
207
|
+
# Return all descriptors
|
|
208
|
+
return {
|
|
209
|
+
# Geometry (7)
|
|
210
|
+
"radius_of_gyration": round(float(rgyr), 3),
|
|
211
|
+
"asphericity": round(float(asphericity), 3),
|
|
212
|
+
"eccentricity": round(float(eccentricity), 3),
|
|
213
|
+
"inertia_shape_factor": round(float(inertia_shape_factor), 3),
|
|
214
|
+
"span_r": round(float(max_dist), 3),
|
|
215
|
+
"pmi1": round(float(pmi1), 3),
|
|
216
|
+
"pmi2": round(float(pmi2), 3),
|
|
217
|
+
# Energy (6)
|
|
218
|
+
"conformer_energy": round(float(conformer_energy), 2),
|
|
219
|
+
"vdw_energy": round(float(vdw_energy), 2),
|
|
220
|
+
"electrostatic_energy": round(float(electrostatic_energy), 2),
|
|
221
|
+
"torsion_strain": round(float(torsion_strain), 2),
|
|
222
|
+
"angle_strain": round(float(angle_strain), 2),
|
|
223
|
+
"optimization_delta": round(float(optimization_delta), 2),
|
|
224
|
+
# Electrostatics (6)
|
|
225
|
+
"dipole_moment": round(float(dipole_moment), 3),
|
|
226
|
+
"total_charge": round(float(total_charge), 3),
|
|
227
|
+
"max_partial_charge": round(float(max_partial_charge), 3),
|
|
228
|
+
"min_partial_charge": round(float(min_partial_charge), 3),
|
|
229
|
+
"charge_span": round(float(charge_span), 3),
|
|
230
|
+
"electrostatic_potential": round(float(electrostatic_potential), 3),
|
|
231
|
+
# Surface/Volume (4)
|
|
232
|
+
"sasa": round(float(hull_area), 1),
|
|
233
|
+
"molecular_volume": round(float(hull_volume), 1),
|
|
234
|
+
"globularity": round(float(globularity), 3),
|
|
235
|
+
"surface_to_volume_ratio": round(float(surface_to_volume), 3),
|
|
236
|
+
# Atom counts (2)
|
|
237
|
+
"num_atoms_with_h": int(num_atoms),
|
|
238
|
+
"num_heavy_atoms": int(num_heavy),
|
|
239
|
+
# Visualization (5+)
|
|
240
|
+
"coords_x": [round(float(c[0]), 4) for c in coords],
|
|
241
|
+
"coords_y": [round(float(c[1]), 4) for c in coords],
|
|
242
|
+
"coords_z": [round(float(c[2]), 4) for c in coords],
|
|
243
|
+
"atom_types": atoms,
|
|
244
|
+
"bonds": bonds,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def calculate_properties(
|
|
249
|
+
smiles: str, *, add_hydrogens: bool = True, optimize_3d: bool = True
|
|
250
|
+
) -> Dict[str, Any]:
|
|
251
|
+
"""Compute the full molecular descriptor set for a SMILES string, locally.
|
|
252
|
+
|
|
253
|
+
Parses the SMILES, embeds a 3D conformer, and returns a flat dictionary of
|
|
254
|
+
identity metadata (molecular weight, atom/bond counts) plus the geometry,
|
|
255
|
+
energy, electrostatic, surface/volume and visualization descriptors. No
|
|
256
|
+
network access, no API key, no server.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
smiles: The molecule as a SMILES string (e.g. ``"CCO"``).
|
|
260
|
+
add_hydrogens: Add explicit hydrogens before embedding (default True).
|
|
261
|
+
optimize_3d: Run UFF geometry optimization on the conformer (default True).
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
A descriptor dictionary keyed by property name.
|
|
265
|
+
|
|
266
|
+
Raises:
|
|
267
|
+
RDKitNotAvailableError: RDKit is not installed.
|
|
268
|
+
InvalidSMILESError: The SMILES string could not be parsed.
|
|
269
|
+
"""
|
|
270
|
+
_require_rdkit()
|
|
271
|
+
|
|
272
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
273
|
+
if mol is None:
|
|
274
|
+
raise InvalidSMILESError(f"Invalid SMILES string: {smiles!r}")
|
|
275
|
+
|
|
276
|
+
if add_hydrogens:
|
|
277
|
+
mol = Chem.AddHs(mol)
|
|
278
|
+
|
|
279
|
+
pdb_content = smiles_to_pdb(smiles, optimize_3d=optimize_3d, add_hydrogens=add_hydrogens)
|
|
280
|
+
coords, atoms = extract_coordinates_from_pdb(pdb_content)
|
|
281
|
+
properties = calculate_all_molecular_properties(coords, atoms, mol, pdb_content)
|
|
282
|
+
|
|
283
|
+
return {
|
|
284
|
+
"smiles": smiles,
|
|
285
|
+
"num_atoms": mol.GetNumAtoms(),
|
|
286
|
+
"num_bonds": mol.GetNumBonds(),
|
|
287
|
+
"molecular_weight": round(Descriptors.MolWt(mol), 2),
|
|
288
|
+
**properties,
|
|
289
|
+
}
|
novomd/exceptions.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Exception types raised by the NovoMD core.
|
|
2
|
+
|
|
3
|
+
These are plain Python exceptions with no web-framework coupling, so the core
|
|
4
|
+
can be imported and used as a library without FastAPI installed. The REST
|
|
5
|
+
service translates them into HTTP responses at the endpoint boundary.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class NovoMDError(Exception):
|
|
12
|
+
"""Base class for all NovoMD errors."""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class InvalidSMILESError(NovoMDError, ValueError):
|
|
16
|
+
"""Raised when a SMILES string cannot be parsed into a molecule."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RDKitNotAvailableError(NovoMDError, RuntimeError):
|
|
20
|
+
"""Raised when an operation needs RDKit but it is not installed."""
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: novomd
|
|
3
|
+
Version: 1.2.0
|
|
4
|
+
Summary: Local-first molecular property calculator. Compute descriptors from SMILES with no server and no API key.
|
|
5
|
+
Author: NovoMCP
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/realariharrison/NovoMD
|
|
8
|
+
Project-URL: Documentation, https://github.com/realariharrison/NovoMD#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/realariharrison/NovoMD.git
|
|
10
|
+
Project-URL: Issues, https://github.com/realariharrison/NovoMD/issues
|
|
11
|
+
Keywords: molecular-dynamics,computational-chemistry,cheminformatics,molecular-descriptors,smiles,openmd,rdkit,local-first
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
23
|
+
Classifier: Framework :: FastAPI
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: numpy<2.0.0,>=1.24.0
|
|
28
|
+
Requires-Dist: scipy>=1.11.0
|
|
29
|
+
Requires-Dist: rdkit>=2023.3.1
|
|
30
|
+
Provides-Extra: server
|
|
31
|
+
Requires-Dist: fastapi>=0.104.0; extra == "server"
|
|
32
|
+
Requires-Dist: uvicorn[standard]>=0.24.0; extra == "server"
|
|
33
|
+
Requires-Dist: pydantic>=2.5.0; extra == "server"
|
|
34
|
+
Requires-Dist: pydantic-settings>=2.1.0; extra == "server"
|
|
35
|
+
Requires-Dist: python-dotenv>=1.0.0; extra == "server"
|
|
36
|
+
Requires-Dist: slowapi>=0.1.9; extra == "server"
|
|
37
|
+
Provides-Extra: dev
|
|
38
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
39
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
40
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
41
|
+
Requires-Dist: httpx>=0.25.0; extra == "dev"
|
|
42
|
+
Requires-Dist: flake8>=6.1.0; extra == "dev"
|
|
43
|
+
Requires-Dist: black>=23.9.0; extra == "dev"
|
|
44
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
45
|
+
Requires-Dist: mypy>=1.5.0; extra == "dev"
|
|
46
|
+
Requires-Dist: bandit>=1.7.0; extra == "dev"
|
|
47
|
+
Requires-Dist: safety>=2.3.0; extra == "dev"
|
|
48
|
+
Requires-Dist: pre-commit>=3.4.0; extra == "dev"
|
|
49
|
+
Dynamic: license-file
|
|
50
|
+
|
|
51
|
+
<div align="center">
|
|
52
|
+
|
|
53
|
+
<img src="docs/novomd-card.png" alt="NovoMD: local-first molecular property calculation" width="820" />
|
|
54
|
+
|
|
55
|
+
# NovoMD
|
|
56
|
+
|
|
57
|
+
**local-first molecular property calculation**
|
|
58
|
+
|
|
59
|
+
[](https://github.com/realariharrison/NovoMD/actions/workflows/ci.yml)
|
|
60
|
+
[](LICENSE)
|
|
61
|
+
[](https://www.python.org/downloads/)
|
|
62
|
+
|
|
63
|
+
</div>
|
|
64
|
+
|
|
65
|
+
NovoMD turns a SMILES string into a set of molecular descriptors. It runs on your own machine, with no account and no API key. Install it as a Python library, call it from the command line, or run it as a REST service.
|
|
66
|
+
|
|
67
|
+
## What it is, and what it is not
|
|
68
|
+
|
|
69
|
+
NovoMD computes 32+ outcome-level descriptors from a 3D conformer: geometry, an energy estimate, electrostatics, surface and volume, atom counts, and the coordinates for visualization. The calculation is local and deterministic.
|
|
70
|
+
|
|
71
|
+
It does not run full molecular dynamics trajectories, docking, binding affinity, or ADMET. The scope is deliberate. For that work, see [Beyond property calculation](#beyond-property-calculation) below.
|
|
72
|
+
|
|
73
|
+
## Quick start
|
|
74
|
+
|
|
75
|
+
### Python library
|
|
76
|
+
|
|
77
|
+
The shortest path. No server, no key.
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pip install novomd
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from novomd import calculate_properties
|
|
85
|
+
|
|
86
|
+
props = calculate_properties("CCO")
|
|
87
|
+
print(props["molecular_weight"]) # 46.07
|
|
88
|
+
print(props["radius_of_gyration"])
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Process a list in one call. A bad SMILES does not stop the batch; each item carries its own status.
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from novomd import calculate_properties_batch
|
|
95
|
+
|
|
96
|
+
results = calculate_properties_batch(["CCO", "CC(=O)O", "NOT_VALID"])
|
|
97
|
+
for item in results:
|
|
98
|
+
if item["status"] == "ok":
|
|
99
|
+
print(item["smiles"], item["properties"]["molecular_weight"])
|
|
100
|
+
else:
|
|
101
|
+
print(item["smiles"], "->", item["error"])
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
RDKit, NumPy, and SciPy install automatically. Everything runs on your hardware.
|
|
105
|
+
|
|
106
|
+
### Command line
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
novomd props "CCO"
|
|
110
|
+
novomd props "CC(=O)OC1=CC=CC=C1C(=O)O" --compact
|
|
111
|
+
novomd batch molecules.smi --out results.csv
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
`batch` reads a `.smi` file (one SMILES per line) and writes a CSV, TSV, or JSON table.
|
|
115
|
+
|
|
116
|
+
### From an AI assistant (MCP)
|
|
117
|
+
|
|
118
|
+
NovoMD exposes a [Model Context Protocol](https://modelcontextprotocol.io/) endpoint, so assistants like Claude can query molecular properties directly.
|
|
119
|
+
|
|
120
|
+
**Endpoint:** `https://quantnexusai-novomd.hf.space/gradio_api/mcp/sse`
|
|
121
|
+
|
|
122
|
+
Add it as a custom connector in Claude (Settings, then Integrations), or point any MCP-compatible client at the same URL. Then ask:
|
|
123
|
+
|
|
124
|
+
- "Calculate the molecular properties of aspirin (CC(=O)OC1=CC=CC=C1C(=O)O)."
|
|
125
|
+
- "What is the dipole moment of caffeine?"
|
|
126
|
+
|
|
127
|
+
The endpoint works with Claude (web and desktop), Cursor, Continue.dev, and any client that speaks the [MCP specification](https://modelcontextprotocol.io/).
|
|
128
|
+
|
|
129
|
+
### REST service (Docker)
|
|
130
|
+
|
|
131
|
+
For networked or containerized use, run the same core behind FastAPI.
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
# pre-built image
|
|
135
|
+
docker run -d -p 8010:8010 \
|
|
136
|
+
-e NOVOMD_API_KEY="your-secure-api-key" \
|
|
137
|
+
--name novomd \
|
|
138
|
+
ghcr.io/realariharrison/novomd:latest
|
|
139
|
+
|
|
140
|
+
curl http://localhost:8010/health
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Or from source:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
pip install "novomd[server]"
|
|
147
|
+
uvicorn main:app --host 0.0.0.0 --port 8010
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## What you get
|
|
151
|
+
|
|
152
|
+
32+ descriptors, calculated from an embedded 3D structure:
|
|
153
|
+
|
|
154
|
+
- **Geometry** (7): radius of gyration, asphericity, eccentricity, inertia shape factor, span, principal moments of inertia
|
|
155
|
+
- **Energy** (6): conformer energy, van der Waals, electrostatic, torsion strain, angle strain, optimization delta
|
|
156
|
+
- **Electrostatics** (6): dipole moment, total charge, max and min partial charge, charge span, electrostatic potential
|
|
157
|
+
- **Surface and volume** (4): SASA, molecular volume, globularity, surface-to-volume ratio
|
|
158
|
+
- **Atom counts** (2): total atoms, heavy atoms
|
|
159
|
+
- **Visualization** (5+): full atomic coordinates, atom types, bond connectivity
|
|
160
|
+
|
|
161
|
+
Energy values are estimates from the conformer, not from a force-field simulation. The descriptors are derived from real 3D coordinates, not mocked.
|
|
162
|
+
|
|
163
|
+
## Library reference
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
from novomd import calculate_properties, calculate_properties_batch
|
|
167
|
+
|
|
168
|
+
# one molecule -> descriptor dict
|
|
169
|
+
calculate_properties("CCO", add_hydrogens=True, optimize_3d=True)
|
|
170
|
+
|
|
171
|
+
# many molecules -> list of {smiles, status, properties | error}
|
|
172
|
+
calculate_properties_batch(["CCO", "C"], max_batch_size=1000)
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Both raise `InvalidSMILESError` for unparseable input and `RDKitNotAvailableError` if RDKit is missing. The batch function isolates per-item failures instead of raising.
|
|
176
|
+
|
|
177
|
+
## REST API
|
|
178
|
+
|
|
179
|
+
All endpoints except `/health` require an API key in the `X-API-Key` header.
|
|
180
|
+
|
|
181
|
+
| Endpoint | Method | Description |
|
|
182
|
+
|----------|--------|-------------|
|
|
183
|
+
| `/health` | GET | Health check (no auth) |
|
|
184
|
+
| `/status` | GET | Service status and capabilities |
|
|
185
|
+
| `/smiles-to-omd` | POST | Convert SMILES to OpenMD with 32+ properties |
|
|
186
|
+
| `/batch` | POST | Calculate properties for many SMILES in one call |
|
|
187
|
+
| `/atom2md` | POST | Convert PDB to OpenMD format |
|
|
188
|
+
| `/force-fields` | GET | List available force fields |
|
|
189
|
+
| `/force-field-types/{ff}` | GET | Atom types for a force field |
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
curl -X POST http://localhost:8010/batch \
|
|
193
|
+
-H "Content-Type: application/json" \
|
|
194
|
+
-H "X-API-Key: your-api-key" \
|
|
195
|
+
-d '{"molecules": ["CCO", "CC(=O)O", "NOT_VALID"]}'
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
```json
|
|
199
|
+
{
|
|
200
|
+
"count": 3,
|
|
201
|
+
"succeeded": 2,
|
|
202
|
+
"failed": 1,
|
|
203
|
+
"results": [
|
|
204
|
+
{"smiles": "CCO", "status": "ok", "properties": {"molecular_weight": 46.07, "...": "..."}},
|
|
205
|
+
{"smiles": "CC(=O)O", "status": "ok", "properties": {"...": "..."}},
|
|
206
|
+
{"smiles": "NOT_VALID", "status": "error", "error": "Invalid SMILES string: 'NOT_VALID'"}
|
|
207
|
+
]
|
|
208
|
+
}
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Batches are capped at 1,000 molecules per request and share the service rate limit.
|
|
212
|
+
|
|
213
|
+
### Notebooks
|
|
214
|
+
|
|
215
|
+
| Notebook | Topic |
|
|
216
|
+
|----------|-------|
|
|
217
|
+
| [01_getting_started.ipynb](examples/01_getting_started.ipynb) | Basic usage and conversion |
|
|
218
|
+
| [02_molecular_properties.ipynb](examples/02_molecular_properties.ipynb) | Property analysis with pandas and matplotlib |
|
|
219
|
+
| [03_visualization.ipynb](examples/03_visualization.ipynb) | 3D visualization with plotly and py3Dmol |
|
|
220
|
+
| [04_batch_processing.ipynb](examples/04_batch_processing.ipynb) | One-call batch, library and endpoint |
|
|
221
|
+
|
|
222
|
+
## Beyond property calculation
|
|
223
|
+
|
|
224
|
+
NovoMD computes molecular descriptors locally. It does not run full MD trajectories, docking, ADMET, or compliance.
|
|
225
|
+
|
|
226
|
+
For those, the same team builds NovoMCP, a computational engine for AI-native discovery: 122M enriched compounds, docking and FEP pipelines, ADMET and compliance scoring, and an immutable audit trail on every step. NovoMD is open and always will be. NovoMCP is the production layer for work that outgrows it.
|
|
227
|
+
|
|
228
|
+
Learn more: [novomcp.com](https://novomcp.com)
|
|
229
|
+
|
|
230
|
+
## Force fields
|
|
231
|
+
|
|
232
|
+
`AMBER14`, `AMBER99SB`, `CHARMM36`, `OPLS-AA/M`, `GROMOS 54A7`. Property values are conformer-derived and force-field-independent; the force field affects only the OpenMD output.
|
|
233
|
+
|
|
234
|
+
## Configuration
|
|
235
|
+
|
|
236
|
+
Set these in a `.env` file or as environment variables (REST service only).
|
|
237
|
+
|
|
238
|
+
| Variable | Description | Default |
|
|
239
|
+
|----------|-------------|---------|
|
|
240
|
+
| `NOVOMD_API_KEY` | API authentication key (required) | - |
|
|
241
|
+
| `PORT` | Server port | 8010 |
|
|
242
|
+
| `HOST` | Server host | 0.0.0.0 |
|
|
243
|
+
| `LOG_LEVEL` | DEBUG, INFO, WARNING, ERROR | INFO |
|
|
244
|
+
| `CORS_ORIGINS` | Comma-separated origins, or "*" for all | localhost:3000,localhost:8080 |
|
|
245
|
+
| `RATE_LIMIT` | e.g. "100/minute", "1000/hour" | 100/minute |
|
|
246
|
+
|
|
247
|
+
## Development
|
|
248
|
+
|
|
249
|
+
```bash
|
|
250
|
+
pip install -e ".[dev,server]" # core + server + dev tools
|
|
251
|
+
pre-commit install
|
|
252
|
+
|
|
253
|
+
pytest tests/ -v
|
|
254
|
+
pytest tests/ --cov=novomd --cov=main --cov-report=term-missing
|
|
255
|
+
|
|
256
|
+
black . && isort . && flake8 .
|
|
257
|
+
mypy novomd main.py auth.py config.py
|
|
258
|
+
bandit -r . -x ./tests
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
```
|
|
262
|
+
NovoMD/
|
|
263
|
+
├── novomd/ # importable library (framework-free core)
|
|
264
|
+
│ ├── core.py # property calculation
|
|
265
|
+
│ ├── batch.py # batch with per-item error isolation
|
|
266
|
+
│ ├── conversion.py # PDB to OpenMD
|
|
267
|
+
│ ├── cli.py # `novomd` command
|
|
268
|
+
│ └── exceptions.py
|
|
269
|
+
├── main.py # FastAPI service (imports the core)
|
|
270
|
+
├── config.py # configuration
|
|
271
|
+
├── auth.py # API-key authentication
|
|
272
|
+
├── tests/ # unit + integration tests
|
|
273
|
+
├── examples/ # Jupyter notebooks
|
|
274
|
+
└── .github/workflows/ # CI and PyPI publish
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
## Security
|
|
278
|
+
|
|
279
|
+
NovoMD runs locally by default; no molecular data leaves your machine. For the REST service, use a strong `NOVOMD_API_KEY`, deploy behind TLS, and restrict `CORS_ORIGINS`. To report a vulnerability, see [SECURITY.md](SECURITY.md).
|
|
280
|
+
|
|
281
|
+
## Contributing
|
|
282
|
+
|
|
283
|
+
Contributions are welcome. See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
284
|
+
|
|
285
|
+
- **Issues**: [GitHub Issues](https://github.com/realariharrison/NovoMD/issues)
|
|
286
|
+
- **Discussions**: [GitHub Discussions](https://github.com/realariharrison/NovoMD/discussions)
|
|
287
|
+
|
|
288
|
+
## License
|
|
289
|
+
|
|
290
|
+
MIT. See [LICENSE](LICENSE).
|
|
291
|
+
|
|
292
|
+
Built with [FastAPI](https://fastapi.tiangolo.com/) and [RDKit](https://www.rdkit.org/).
|
|
293
|
+
|
|
294
|
+
## Citation
|
|
295
|
+
|
|
296
|
+
```bibtex
|
|
297
|
+
@software{novomd2025,
|
|
298
|
+
title = {NovoMD: Local-First Molecular Property Calculation},
|
|
299
|
+
author = {NovoMCP},
|
|
300
|
+
year = {2025},
|
|
301
|
+
url = {https://github.com/realariharrison/NovoMD}
|
|
302
|
+
}
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
---
|
|
306
|
+
|
|
307
|
+
<div align="center">
|
|
308
|
+
|
|
309
|
+
Built by the NovoMCP team
|
|
310
|
+
|
|
311
|
+
</div>
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
novomd/__about__.py,sha256=Wec8Imu4VTzXc_61Gus-tyToJZaRGWtgUupGymMEtR8,77
|
|
2
|
+
novomd/__init__.py,sha256=Uy4esheEsjAyw9B2ZG4S2a9bwzX-4WUWM9QUFwOQ-xo,1188
|
|
3
|
+
novomd/batch.py,sha256=OFt9X9HXE3FusO2cRkrj0hjO8aW-a6BJC-bcLpHQ4P8,2255
|
|
4
|
+
novomd/cli.py,sha256=WJx7_CZgHjDqKRjFEw4EmAsqLkCN_160-4O3Yrqu7vY,5371
|
|
5
|
+
novomd/conversion.py,sha256=k8R2035Vwi1Dsmq-ohFbRn6XWEBK-POYmmI33o1-fsk,3634
|
|
6
|
+
novomd/core.py,sha256=i52f3F9yo_IEhv27DSLWJUCsRD923_AiNt6kjTiqW-Y,9995
|
|
7
|
+
novomd/exceptions.py,sha256=G4klJsZ0OANnRx1WFldmhBxpfjRxh9GPM9fxZHI5D84,641
|
|
8
|
+
novomd-1.2.0.dist-info/licenses/LICENSE,sha256=LIuaWTSa_1KTax1i2aAlZLPGJTD6GhqvvWfyUHTW0Bo,1070
|
|
9
|
+
novomd-1.2.0.dist-info/METADATA,sha256=PYryS4Eo9UJ3XzKENha5C99Kflz-eM6sowfHylNtChY,11658
|
|
10
|
+
novomd-1.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
11
|
+
novomd-1.2.0.dist-info/entry_points.txt,sha256=jlSgQig2R_tvCK-KZDUww4hZIv1927VSbY6OSoRi8oY,43
|
|
12
|
+
novomd-1.2.0.dist-info/top_level.txt,sha256=RsP5EuTC6UYgf9TegjXZdYmvNdMKEwXS92L648zCW5A,7
|
|
13
|
+
novomd-1.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 QuantNexus AI
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
novomd
|