novomd 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
novomd/__about__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Single source of truth for the package version."""
2
+
3
+ __version__ = "1.2.0"
novomd/__init__.py ADDED
@@ -0,0 +1,42 @@
1
+ """NovoMD: a local-first molecular property calculator.
2
+
3
+ Compute molecular descriptors on your own hardware, no server and no API key::
4
+
5
+ from novomd import calculate_properties
6
+ props = calculate_properties("CCO")
7
+ print(props["molecular_weight"])
8
+
9
+ The same core powers the optional REST service (``pip install novomd[server]``).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from .__about__ import __version__
15
+ from .batch import MAX_BATCH_SIZE, calculate_properties_batch
16
+ from .conversion import get_atom_type, pdb_to_omd
17
+ from .core import (
18
+ RDKIT_AVAILABLE,
19
+ calculate_all_molecular_properties,
20
+ calculate_partial_charges,
21
+ calculate_properties,
22
+ extract_coordinates_from_pdb,
23
+ smiles_to_pdb,
24
+ )
25
+ from .exceptions import InvalidSMILESError, NovoMDError, RDKitNotAvailableError
26
+
27
+ __all__ = [
28
+ "__version__",
29
+ "RDKIT_AVAILABLE",
30
+ "calculate_properties",
31
+ "calculate_properties_batch",
32
+ "MAX_BATCH_SIZE",
33
+ "calculate_all_molecular_properties",
34
+ "calculate_partial_charges",
35
+ "extract_coordinates_from_pdb",
36
+ "smiles_to_pdb",
37
+ "get_atom_type",
38
+ "pdb_to_omd",
39
+ "NovoMDError",
40
+ "InvalidSMILESError",
41
+ "RDKitNotAvailableError",
42
+ ]
novomd/batch.py ADDED
@@ -0,0 +1,56 @@
1
+ """Batch molecular property calculation with per-item error isolation.
2
+
3
+ Process a list of SMILES in one call. A single malformed or un-embeddable
4
+ molecule never fails the whole batch: each item returns its own status, so a
5
+ 1,000-molecule run with a few bad entries still returns every good result.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any, Dict, List, Optional, Sequence
11
+
12
+ from .core import calculate_properties
13
+
14
+ # Default ceiling on how many molecules one call will process. The REST service
15
+ # enforces the same cap so a single request cannot tie up the worker.
16
+ MAX_BATCH_SIZE = 1000
17
+
18
+
19
+ def calculate_properties_batch(
20
+ smiles_list: Sequence[str],
21
+ *,
22
+ add_hydrogens: bool = True,
23
+ optimize_3d: bool = True,
24
+ max_batch_size: Optional[int] = MAX_BATCH_SIZE,
25
+ ) -> List[Dict[str, Any]]:
26
+ """Compute descriptors for many SMILES, isolating per-item failures.
27
+
28
+ Args:
29
+ smiles_list: SMILES strings to process.
30
+ add_hydrogens: Add explicit hydrogens before embedding (default True).
31
+ optimize_3d: Run UFF geometry optimization on each conformer (default True).
32
+ max_batch_size: Reject inputs larger than this. Pass ``None`` to disable
33
+ the check (the caller is then responsible for bounding the work).
34
+
35
+ Returns:
36
+ One result dict per input, in order. Each is either
37
+ ``{"smiles": ..., "status": "ok", "properties": {...}}`` or
38
+ ``{"smiles": ..., "status": "error", "error": "<message>"}``.
39
+
40
+ Raises:
41
+ ValueError: The input is larger than ``max_batch_size``.
42
+ """
43
+ if max_batch_size is not None and len(smiles_list) > max_batch_size:
44
+ raise ValueError(f"Batch size {len(smiles_list)} exceeds the maximum of {max_batch_size}.")
45
+
46
+ results: List[Dict[str, Any]] = []
47
+ for smiles in smiles_list:
48
+ try:
49
+ properties = calculate_properties(
50
+ smiles, add_hydrogens=add_hydrogens, optimize_3d=optimize_3d
51
+ )
52
+ results.append({"smiles": smiles, "status": "ok", "properties": properties})
53
+ except Exception as exc: # noqa: BLE001 - one bad molecule must not kill the batch
54
+ results.append({"smiles": smiles, "status": "error", "error": str(exc)})
55
+
56
+ return results
novomd/cli.py ADDED
@@ -0,0 +1,157 @@
1
+ """Command-line entry point for NovoMD.
2
+
3
+ - ``novomd props "<smiles>"`` computes descriptors for one molecule.
4
+ - ``novomd batch <file.smi> --out results.csv`` processes many at once.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import argparse
10
+ import csv
11
+ import json
12
+ import sys
13
+ from typing import Any, Dict, List, Optional
14
+
15
+ from .__about__ import __version__
16
+ from .exceptions import NovoMDError
17
+
18
+ # Property fields that are per-atom lists; omitted from flat CSV output.
19
+ _LIST_FIELDS = {"coords_x", "coords_y", "coords_z", "atom_types", "bonds"}
20
+
21
+
22
+ def _cmd_props(args: argparse.Namespace) -> int:
23
+ from .core import calculate_properties
24
+
25
+ try:
26
+ result = calculate_properties(
27
+ args.smiles,
28
+ add_hydrogens=not args.no_hydrogens,
29
+ optimize_3d=not args.no_optimize,
30
+ )
31
+ except NovoMDError as exc:
32
+ print(f"error: {exc}", file=sys.stderr)
33
+ return 1
34
+
35
+ indent = None if args.compact else 2
36
+ print(json.dumps(result, indent=indent))
37
+ return 0
38
+
39
+
40
+ def _read_smiles_file(path: str) -> List[str]:
41
+ """Read a .smi file: one SMILES per line; the first whitespace-separated
42
+ token is taken as the SMILES. Blank lines and ``#`` comments are skipped."""
43
+ molecules: List[str] = []
44
+ with open(path, "r", encoding="utf-8") as handle:
45
+ for line in handle:
46
+ stripped = line.strip()
47
+ if not stripped or stripped.startswith("#"):
48
+ continue
49
+ molecules.append(stripped.split()[0])
50
+ return molecules
51
+
52
+
53
+ def _flatten_for_csv(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
54
+ rows: List[Dict[str, Any]] = []
55
+ for item in results:
56
+ row: Dict[str, Any] = {
57
+ "smiles": item["smiles"],
58
+ "status": item["status"],
59
+ "error": item.get("error", ""),
60
+ }
61
+ for key, value in item.get("properties", {}).items():
62
+ if key not in _LIST_FIELDS:
63
+ row[key] = value
64
+ rows.append(row)
65
+ return rows
66
+
67
+
68
+ def _cmd_batch(args: argparse.Namespace) -> int:
69
+ from .batch import calculate_properties_batch
70
+
71
+ try:
72
+ molecules = _read_smiles_file(args.input)
73
+ except OSError as exc:
74
+ print(f"error: {exc}", file=sys.stderr)
75
+ return 1
76
+
77
+ if not molecules:
78
+ print(f"error: no SMILES found in {args.input}", file=sys.stderr)
79
+ return 1
80
+
81
+ results = calculate_properties_batch(
82
+ molecules,
83
+ add_hydrogens=not args.no_hydrogens,
84
+ optimize_3d=not args.no_optimize,
85
+ )
86
+ succeeded = sum(1 for r in results if r["status"] == "ok")
87
+ print(
88
+ f"processed {len(results)} molecules: {succeeded} ok, {len(results) - succeeded} failed",
89
+ file=sys.stderr,
90
+ )
91
+
92
+ if args.out:
93
+ if args.out.lower().endswith((".csv", ".tsv")):
94
+ rows = _flatten_for_csv(results)
95
+ fieldnames: List[str] = []
96
+ for row in rows:
97
+ for key in row:
98
+ if key not in fieldnames:
99
+ fieldnames.append(key)
100
+ delimiter = "\t" if args.out.lower().endswith(".tsv") else ","
101
+ with open(args.out, "w", encoding="utf-8", newline="") as handle:
102
+ writer = csv.DictWriter(handle, fieldnames=fieldnames, delimiter=delimiter)
103
+ writer.writeheader()
104
+ writer.writerows(rows)
105
+ else:
106
+ with open(args.out, "w", encoding="utf-8") as handle:
107
+ json.dump(results, handle, indent=2)
108
+ print(f"wrote {args.out}", file=sys.stderr)
109
+ else:
110
+ print(json.dumps(results, indent=2))
111
+
112
+ return 0
113
+
114
+
115
+ def build_parser() -> argparse.ArgumentParser:
116
+ parser = argparse.ArgumentParser(
117
+ prog="novomd",
118
+ description="Local-first molecular property calculator.",
119
+ )
120
+ parser.add_argument("--version", action="version", version=f"novomd {__version__}")
121
+
122
+ subparsers = parser.add_subparsers(dest="command", required=True)
123
+
124
+ props = subparsers.add_parser("props", help="Compute descriptors for one SMILES string.")
125
+ props.add_argument("smiles", help="SMILES string, e.g. 'CCO'")
126
+ props.add_argument("--no-hydrogens", action="store_true", help="Do not add explicit hydrogens.")
127
+ props.add_argument("--no-optimize", action="store_true", help="Skip 3D geometry optimization.")
128
+ props.add_argument(
129
+ "--compact", action="store_true", help="Emit single-line JSON instead of indented."
130
+ )
131
+ props.set_defaults(func=_cmd_props)
132
+
133
+ batch = subparsers.add_parser(
134
+ "batch", help="Compute descriptors for many SMILES from a .smi file."
135
+ )
136
+ batch.add_argument("input", help="Path to a .smi file (one SMILES per line).")
137
+ batch.add_argument(
138
+ "--out",
139
+ help="Write results to this file (.csv/.tsv for a table, otherwise JSON). "
140
+ "Without --out, JSON is printed to stdout.",
141
+ )
142
+ batch.add_argument("--no-hydrogens", action="store_true", help="Do not add explicit hydrogens.")
143
+ batch.add_argument("--no-optimize", action="store_true", help="Skip 3D geometry optimization.")
144
+ batch.set_defaults(func=_cmd_batch)
145
+
146
+ return parser
147
+
148
+
149
+ def main(argv: Optional[List[str]] = None) -> int:
150
+ parser = build_parser()
151
+ args = parser.parse_args(argv)
152
+ exit_code: int = args.func(args)
153
+ return exit_code
154
+
155
+
156
+ if __name__ == "__main__": # pragma: no cover
157
+ raise SystemExit(main())
novomd/conversion.py ADDED
@@ -0,0 +1,132 @@
1
+ """PDB to OpenMD (.omd) format conversion.
2
+
3
+ Framework-free helpers for turning a PDB block into an OpenMD input file and
4
+ mapping elements onto force-field atom types. Used by both the library and the
5
+ REST service.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+
11
+ def get_atom_type(element: str, force_field: str) -> str:
12
+ """Map an element symbol to a force-field atom type."""
13
+
14
+ force_field_mappings = {
15
+ "AMBER": {
16
+ "H": "HC",
17
+ "C": "CT",
18
+ "N": "N",
19
+ "O": "O",
20
+ "S": "S",
21
+ "P": "P",
22
+ "F": "F",
23
+ "Cl": "Cl",
24
+ "Br": "Br",
25
+ },
26
+ "CHARMM": {
27
+ "H": "HGA1",
28
+ "C": "CG321",
29
+ "N": "NG321",
30
+ "O": "OG311",
31
+ "S": "SG311",
32
+ "P": "PG1",
33
+ "F": "FGA1",
34
+ "Cl": "CLGA1",
35
+ "Br": "BRGA1",
36
+ },
37
+ "OPLS": {
38
+ "H": "opls_140",
39
+ "C": "opls_135",
40
+ "N": "opls_238",
41
+ "O": "opls_236",
42
+ "S": "opls_200",
43
+ "P": "opls_393",
44
+ "F": "opls_164",
45
+ "Cl": "opls_151",
46
+ "Br": "opls_156",
47
+ },
48
+ }
49
+
50
+ mapping = force_field_mappings.get(force_field, force_field_mappings["AMBER"])
51
+ return mapping.get(element, element)
52
+
53
+
54
+ def pdb_to_omd(pdb_content: str, force_field: str, box_size: float, charge_method: str) -> str:
55
+ """Convert a PDB block to OpenMD (.omd) format."""
56
+
57
+ # Parse PDB content to extract atoms
58
+ atoms = []
59
+ for line in pdb_content.split("\n"):
60
+ if line.startswith("ATOM") or line.startswith("HETATM"):
61
+ atom_info = {
62
+ "index": int(line[6:11].strip()),
63
+ "name": line[12:16].strip(),
64
+ "resname": line[17:20].strip(),
65
+ "x": float(line[30:38].strip()),
66
+ "y": float(line[38:46].strip()),
67
+ "z": float(line[46:54].strip()),
68
+ "element": line[76:78].strip() if len(line) > 76 else "C",
69
+ }
70
+ atoms.append(atom_info)
71
+
72
+ if not atoms:
73
+ raise ValueError("No atoms found in PDB content")
74
+
75
+ # Generate OpenMD format content
76
+ omd_content = """<OpenMD version=2>
77
+ <MetaData>
78
+ <molecule id="0">
79
+ <name>Converted_Molecule</name>"""
80
+
81
+ # Add atom definitions
82
+ for atom in atoms:
83
+ # Assign atom type based on element and force field
84
+ atom_type = get_atom_type(str(atom["element"]), force_field)
85
+ omd_content += f"""
86
+ <atom id="{atom['index']}">
87
+ <type>{atom_type}</type>
88
+ <position x="{atom['x']}" y="{atom['y']}" z="{atom['z']}"/>
89
+ </atom>"""
90
+
91
+ omd_content += f"""
92
+ </molecule>
93
+
94
+ <forceField>{force_field}</forceField>
95
+ <ensemble>NVT</ensemble>
96
+ <target_temp>300</target_temp>
97
+ <target_pressure>1</target_pressure>
98
+ </MetaData>
99
+
100
+ <Snapshot>
101
+ <FrameData>
102
+ <Time>0</Time>
103
+ <Hmat>
104
+ <Hxx>{box_size}</Hxx>
105
+ <Hxy>0</Hxy>
106
+ <Hxz>0</Hxz>
107
+ <Hyx>0</Hyx>
108
+ <Hyy>{box_size}</Hyy>
109
+ <Hyz>0</Hyz>
110
+ <Hzx>0</Hzx>
111
+ <Hzy>0</Hzy>
112
+ <Hzz>{box_size}</Hzz>
113
+ </Hmat>
114
+ </FrameData>
115
+
116
+ <StuntDoubles>"""
117
+
118
+ # Add positions for each atom
119
+ for atom in atoms:
120
+ atom_index: int = atom["index"] # type: ignore[assignment]
121
+ omd_content += f"""
122
+ <StuntDouble index="{atom_index - 1}">
123
+ <position x="{atom['x']}" y="{atom['y']}" z="{atom['z']}"/>
124
+ <velocity x="0" y="0" z="0"/>
125
+ </StuntDouble>"""
126
+
127
+ omd_content += """
128
+ </StuntDoubles>
129
+ </Snapshot>
130
+ </OpenMD>"""
131
+
132
+ return omd_content
novomd/core.py ADDED
@@ -0,0 +1,289 @@
1
+ """Framework-free molecular property calculation core.
2
+
3
+ Everything in this module runs locally with no network calls and no web
4
+ framework. If RDKit is installed, :func:`calculate_properties` turns a SMILES
5
+ string into a full descriptor dictionary on your own hardware.
6
+
7
+ The numerical routines here are the same ones the REST service uses; the
8
+ service imports from this module rather than redefining them.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from typing import Any, Dict, List, Tuple
14
+
15
+ import numpy as np
16
+ from scipy.spatial.distance import cdist
17
+
18
+ from .exceptions import InvalidSMILESError, RDKitNotAvailableError
19
+
20
+ try:
21
+ from rdkit import Chem
22
+ from rdkit.Chem import AllChem, Descriptors
23
+
24
+ RDKIT_AVAILABLE = True
25
+ except ImportError: # pragma: no cover - exercised only without RDKit
26
+ RDKIT_AVAILABLE = False
27
+
28
+
29
+ def _require_rdkit() -> None:
30
+ if not RDKIT_AVAILABLE:
31
+ raise RDKitNotAvailableError(
32
+ "RDKit is required for this operation but is not installed. "
33
+ "Install it with: pip install novomd"
34
+ )
35
+
36
+
37
+ def smiles_to_pdb(smiles: str, optimize_3d: bool = True, add_hydrogens: bool = True) -> str:
38
+ """Convert a SMILES string to a 3D PDB block using RDKit."""
39
+ _require_rdkit()
40
+
41
+ mol = Chem.MolFromSmiles(smiles)
42
+ if mol is None:
43
+ raise InvalidSMILESError(f"Invalid SMILES string: {smiles!r}")
44
+
45
+ if add_hydrogens:
46
+ mol = Chem.AddHs(mol)
47
+
48
+ AllChem.EmbedMolecule(mol, randomSeed=42)
49
+
50
+ if optimize_3d:
51
+ AllChem.UFFOptimizeMolecule(mol, maxIters=200)
52
+
53
+ pdb_block = Chem.MolToPDBBlock(mol)
54
+ return str(pdb_block) if pdb_block else ""
55
+
56
+
57
+ def calculate_partial_charges(pdb_content: str, method: str = "gasteiger") -> Dict[int, float]:
58
+ """Estimate per-atom partial charges from PDB content.
59
+
60
+ Simplified electronegativity-based model; outcome-level descriptor only.
61
+ """
62
+ charges: Dict[int, float] = {}
63
+ atom_index = 0
64
+
65
+ for line in pdb_content.split("\n"):
66
+ if line.startswith("ATOM") or line.startswith("HETATM"):
67
+ element = line[76:78].strip() if len(line) > 76 else "C"
68
+
69
+ electronegativities = {
70
+ "H": 2.20,
71
+ "C": 2.55,
72
+ "N": 3.04,
73
+ "O": 3.44,
74
+ "F": 3.98,
75
+ "S": 2.58,
76
+ "Cl": 3.16,
77
+ "Br": 2.96,
78
+ }
79
+
80
+ en = electronegativities.get(element, 2.5)
81
+ charge = (en - 2.5) * 0.1
82
+
83
+ charges[atom_index] = round(charge, 4)
84
+ atom_index += 1
85
+
86
+ return charges
87
+
88
+
89
+ def extract_coordinates_from_pdb(pdb_content: str) -> Tuple[np.ndarray, List[str]]:
90
+ """Extract 3D coordinates and element symbols from PDB content."""
91
+ coords: List[List[float]] = []
92
+ atoms: List[str] = []
93
+
94
+ for line in pdb_content.split("\n"):
95
+ if line.startswith("ATOM") or line.startswith("HETATM"):
96
+ try:
97
+ x = float(line[30:38].strip())
98
+ y = float(line[38:46].strip())
99
+ z = float(line[46:54].strip())
100
+ element = line[76:78].strip() if len(line) > 76 else "C"
101
+
102
+ coords.append([x, y, z])
103
+ atoms.append(element)
104
+ except (ValueError, IndexError):
105
+ continue
106
+
107
+ return np.array(coords), atoms
108
+
109
+
110
+ def calculate_all_molecular_properties(
111
+ coords: np.ndarray, atoms: List[str], mol: Any, pdb_content: str
112
+ ) -> Dict[str, Any]:
113
+ """Calculate the full descriptor set from 3D coordinates.
114
+
115
+ Returns geometry, energy estimate, electrostatic, surface/volume, atom-count
116
+ and 3D-visualization descriptors. Returns an empty dict for empty input.
117
+ """
118
+
119
+ if len(coords) == 0:
120
+ return {}
121
+
122
+ # Center of mass
123
+ center = np.mean(coords, axis=0)
124
+ centered_coords = coords - center
125
+
126
+ # === GEOMETRY PROPERTIES (7) ===
127
+
128
+ # Radius of gyration
129
+ rgyr = np.sqrt(np.mean(np.sum(centered_coords**2, axis=1)))
130
+
131
+ # Maximum distance (span)
132
+ distances = cdist(coords, coords)
133
+ max_dist = np.max(distances)
134
+
135
+ # Inertia tensor for shape analysis (I is the conventional physics symbol)
136
+ I = np.zeros((3, 3)) # noqa: E741
137
+ for coord in centered_coords:
138
+ I[0, 0] += coord[1] ** 2 + coord[2] ** 2
139
+ I[1, 1] += coord[0] ** 2 + coord[2] ** 2
140
+ I[2, 2] += coord[0] ** 2 + coord[1] ** 2
141
+ I[0, 1] -= coord[0] * coord[1]
142
+ I[0, 2] -= coord[0] * coord[2]
143
+ I[1, 2] -= coord[1] * coord[2]
144
+ I[1, 0] = I[0, 1]
145
+ I[2, 0] = I[0, 2]
146
+ I[2, 1] = I[1, 2]
147
+
148
+ # Principal moments of inertia
149
+ eigenvalues = np.sort(np.linalg.eigvals(I).real)
150
+ pmi1, pmi2, pmi3 = eigenvalues
151
+
152
+ # Shape descriptors
153
+ asphericity = pmi3 - 0.5 * (pmi1 + pmi2)
154
+ eccentricity = (pmi3 - pmi1) / pmi3 if pmi3 > 0 else 0
155
+ inertia_shape_factor = pmi1 / pmi3 if pmi3 > 0 else 0
156
+
157
+ # === SURFACE/VOLUME PROPERTIES (4) ===
158
+
159
+ num_atoms = len(atoms)
160
+ num_heavy = sum(1 for a in atoms if a not in ["H", "h"])
161
+
162
+ # Estimate molecular volume and surface area
163
+ hull_volume = num_atoms * 15.0 # ų per atom
164
+ hull_area = num_atoms * 30.0 # Ų per atom
165
+ globularity = (
166
+ min(1.0, (36 * np.pi * hull_volume**2) ** (1 / 3) / hull_area) if hull_area > 0 else 0
167
+ )
168
+ surface_to_volume = hull_area / hull_volume if hull_volume > 0 else 0
169
+
170
+ # === ENERGY PROPERTIES (6) ===
171
+ # These are estimates - real MD would provide actual values
172
+
173
+ # Bond detection
174
+ bonds = []
175
+ for i in range(len(coords)):
176
+ for j in range(i + 1, len(coords)):
177
+ if distances[i, j] < 1.6: # Typical bond length
178
+ bonds.append([int(i), int(j)])
179
+
180
+ conformer_energy = -10.0 * num_atoms
181
+ vdw_energy = -0.5 * len(bonds)
182
+ electrostatic_energy = -0.1 * num_atoms
183
+ torsion_strain = 0.1 * max(0, len(bonds) - num_atoms + 1)
184
+ angle_strain = 0.05 * num_atoms
185
+ optimization_delta = abs(conformer_energy) * 0.1
186
+
187
+ # === ELECTROSTATIC PROPERTIES (6) ===
188
+
189
+ dipole_moment = np.linalg.norm(center) * 0.1
190
+ total_charge = 0.0 # Neutral
191
+
192
+ # Calculate partial charges
193
+ charges = calculate_partial_charges(pdb_content, "gasteiger")
194
+ if charges:
195
+ charge_values = list(charges.values())
196
+ max_partial_charge = max(charge_values)
197
+ min_partial_charge = min(charge_values)
198
+ charge_span = max_partial_charge - min_partial_charge
199
+ total_charge = sum(charge_values)
200
+ else:
201
+ max_partial_charge = 0.5
202
+ min_partial_charge = -0.5
203
+ charge_span = 1.0
204
+
205
+ electrostatic_potential = dipole_moment * 0.1
206
+
207
+ # Return all descriptors
208
+ return {
209
+ # Geometry (7)
210
+ "radius_of_gyration": round(float(rgyr), 3),
211
+ "asphericity": round(float(asphericity), 3),
212
+ "eccentricity": round(float(eccentricity), 3),
213
+ "inertia_shape_factor": round(float(inertia_shape_factor), 3),
214
+ "span_r": round(float(max_dist), 3),
215
+ "pmi1": round(float(pmi1), 3),
216
+ "pmi2": round(float(pmi2), 3),
217
+ # Energy (6)
218
+ "conformer_energy": round(float(conformer_energy), 2),
219
+ "vdw_energy": round(float(vdw_energy), 2),
220
+ "electrostatic_energy": round(float(electrostatic_energy), 2),
221
+ "torsion_strain": round(float(torsion_strain), 2),
222
+ "angle_strain": round(float(angle_strain), 2),
223
+ "optimization_delta": round(float(optimization_delta), 2),
224
+ # Electrostatics (6)
225
+ "dipole_moment": round(float(dipole_moment), 3),
226
+ "total_charge": round(float(total_charge), 3),
227
+ "max_partial_charge": round(float(max_partial_charge), 3),
228
+ "min_partial_charge": round(float(min_partial_charge), 3),
229
+ "charge_span": round(float(charge_span), 3),
230
+ "electrostatic_potential": round(float(electrostatic_potential), 3),
231
+ # Surface/Volume (4)
232
+ "sasa": round(float(hull_area), 1),
233
+ "molecular_volume": round(float(hull_volume), 1),
234
+ "globularity": round(float(globularity), 3),
235
+ "surface_to_volume_ratio": round(float(surface_to_volume), 3),
236
+ # Atom counts (2)
237
+ "num_atoms_with_h": int(num_atoms),
238
+ "num_heavy_atoms": int(num_heavy),
239
+ # Visualization (5+)
240
+ "coords_x": [round(float(c[0]), 4) for c in coords],
241
+ "coords_y": [round(float(c[1]), 4) for c in coords],
242
+ "coords_z": [round(float(c[2]), 4) for c in coords],
243
+ "atom_types": atoms,
244
+ "bonds": bonds,
245
+ }
246
+
247
+
248
+ def calculate_properties(
249
+ smiles: str, *, add_hydrogens: bool = True, optimize_3d: bool = True
250
+ ) -> Dict[str, Any]:
251
+ """Compute the full molecular descriptor set for a SMILES string, locally.
252
+
253
+ Parses the SMILES, embeds a 3D conformer, and returns a flat dictionary of
254
+ identity metadata (molecular weight, atom/bond counts) plus the geometry,
255
+ energy, electrostatic, surface/volume and visualization descriptors. No
256
+ network access, no API key, no server.
257
+
258
+ Args:
259
+ smiles: The molecule as a SMILES string (e.g. ``"CCO"``).
260
+ add_hydrogens: Add explicit hydrogens before embedding (default True).
261
+ optimize_3d: Run UFF geometry optimization on the conformer (default True).
262
+
263
+ Returns:
264
+ A descriptor dictionary keyed by property name.
265
+
266
+ Raises:
267
+ RDKitNotAvailableError: RDKit is not installed.
268
+ InvalidSMILESError: The SMILES string could not be parsed.
269
+ """
270
+ _require_rdkit()
271
+
272
+ mol = Chem.MolFromSmiles(smiles)
273
+ if mol is None:
274
+ raise InvalidSMILESError(f"Invalid SMILES string: {smiles!r}")
275
+
276
+ if add_hydrogens:
277
+ mol = Chem.AddHs(mol)
278
+
279
+ pdb_content = smiles_to_pdb(smiles, optimize_3d=optimize_3d, add_hydrogens=add_hydrogens)
280
+ coords, atoms = extract_coordinates_from_pdb(pdb_content)
281
+ properties = calculate_all_molecular_properties(coords, atoms, mol, pdb_content)
282
+
283
+ return {
284
+ "smiles": smiles,
285
+ "num_atoms": mol.GetNumAtoms(),
286
+ "num_bonds": mol.GetNumBonds(),
287
+ "molecular_weight": round(Descriptors.MolWt(mol), 2),
288
+ **properties,
289
+ }
novomd/exceptions.py ADDED
@@ -0,0 +1,20 @@
1
+ """Exception types raised by the NovoMD core.
2
+
3
+ These are plain Python exceptions with no web-framework coupling, so the core
4
+ can be imported and used as a library without FastAPI installed. The REST
5
+ service translates them into HTTP responses at the endpoint boundary.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+
11
+ class NovoMDError(Exception):
12
+ """Base class for all NovoMD errors."""
13
+
14
+
15
+ class InvalidSMILESError(NovoMDError, ValueError):
16
+ """Raised when a SMILES string cannot be parsed into a molecule."""
17
+
18
+
19
+ class RDKitNotAvailableError(NovoMDError, RuntimeError):
20
+ """Raised when an operation needs RDKit but it is not installed."""
@@ -0,0 +1,311 @@
1
+ Metadata-Version: 2.4
2
+ Name: novomd
3
+ Version: 1.2.0
4
+ Summary: Local-first molecular property calculator. Compute descriptors from SMILES with no server and no API key.
5
+ Author: NovoMCP
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/realariharrison/NovoMD
8
+ Project-URL: Documentation, https://github.com/realariharrison/NovoMD#readme
9
+ Project-URL: Repository, https://github.com/realariharrison/NovoMD.git
10
+ Project-URL: Issues, https://github.com/realariharrison/NovoMD/issues
11
+ Keywords: molecular-dynamics,computational-chemistry,cheminformatics,molecular-descriptors,smiles,openmd,rdkit,local-first
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Chemistry
22
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
23
+ Classifier: Framework :: FastAPI
24
+ Requires-Python: >=3.10
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: numpy<2.0.0,>=1.24.0
28
+ Requires-Dist: scipy>=1.11.0
29
+ Requires-Dist: rdkit>=2023.3.1
30
+ Provides-Extra: server
31
+ Requires-Dist: fastapi>=0.104.0; extra == "server"
32
+ Requires-Dist: uvicorn[standard]>=0.24.0; extra == "server"
33
+ Requires-Dist: pydantic>=2.5.0; extra == "server"
34
+ Requires-Dist: pydantic-settings>=2.1.0; extra == "server"
35
+ Requires-Dist: python-dotenv>=1.0.0; extra == "server"
36
+ Requires-Dist: slowapi>=0.1.9; extra == "server"
37
+ Provides-Extra: dev
38
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
39
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
40
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
41
+ Requires-Dist: httpx>=0.25.0; extra == "dev"
42
+ Requires-Dist: flake8>=6.1.0; extra == "dev"
43
+ Requires-Dist: black>=23.9.0; extra == "dev"
44
+ Requires-Dist: isort>=5.12.0; extra == "dev"
45
+ Requires-Dist: mypy>=1.5.0; extra == "dev"
46
+ Requires-Dist: bandit>=1.7.0; extra == "dev"
47
+ Requires-Dist: safety>=2.3.0; extra == "dev"
48
+ Requires-Dist: pre-commit>=3.4.0; extra == "dev"
49
+ Dynamic: license-file
50
+
51
+ <div align="center">
52
+
53
+ <img src="docs/novomd-card.png" alt="NovoMD: local-first molecular property calculation" width="820" />
54
+
55
+ # NovoMD
56
+
57
+ **local-first molecular property calculation**
58
+
59
+ [![CI](https://github.com/realariharrison/NovoMD/actions/workflows/ci.yml/badge.svg)](https://github.com/realariharrison/NovoMD/actions/workflows/ci.yml)
60
+ [![License: MIT](https://img.shields.io/badge/License-MIT-B8704B.svg)](LICENSE)
61
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-2D2A26.svg)](https://www.python.org/downloads/)
62
+
63
+ </div>
64
+
65
+ NovoMD turns a SMILES string into a set of molecular descriptors. It runs on your own machine, with no account and no API key. Install it as a Python library, call it from the command line, or run it as a REST service.
66
+
67
+ ## What it is, and what it is not
68
+
69
+ NovoMD computes 32+ outcome-level descriptors from a 3D conformer: geometry, an energy estimate, electrostatics, surface and volume, atom counts, and the coordinates for visualization. The calculation is local and deterministic.
70
+
71
+ It does not run full molecular dynamics trajectories, docking, binding affinity, or ADMET. The scope is deliberate. For that work, see [Beyond property calculation](#beyond-property-calculation) below.
72
+
73
+ ## Quick start
74
+
75
+ ### Python library
76
+
77
+ The shortest path. No server, no key.
78
+
79
+ ```bash
80
+ pip install novomd
81
+ ```
82
+
83
+ ```python
84
+ from novomd import calculate_properties
85
+
86
+ props = calculate_properties("CCO")
87
+ print(props["molecular_weight"]) # 46.07
88
+ print(props["radius_of_gyration"])
89
+ ```
90
+
91
+ Process a list in one call. A bad SMILES does not stop the batch; each item carries its own status.
92
+
93
+ ```python
94
+ from novomd import calculate_properties_batch
95
+
96
+ results = calculate_properties_batch(["CCO", "CC(=O)O", "NOT_VALID"])
97
+ for item in results:
98
+ if item["status"] == "ok":
99
+ print(item["smiles"], item["properties"]["molecular_weight"])
100
+ else:
101
+ print(item["smiles"], "->", item["error"])
102
+ ```
103
+
104
+ RDKit, NumPy, and SciPy install automatically. Everything runs on your hardware.
105
+
106
+ ### Command line
107
+
108
+ ```bash
109
+ novomd props "CCO"
110
+ novomd props "CC(=O)OC1=CC=CC=C1C(=O)O" --compact
111
+ novomd batch molecules.smi --out results.csv
112
+ ```
113
+
114
+ `batch` reads a `.smi` file (one SMILES per line) and writes a CSV, TSV, or JSON table.
115
+
116
+ ### From an AI assistant (MCP)
117
+
118
+ NovoMD exposes a [Model Context Protocol](https://modelcontextprotocol.io/) endpoint, so assistants like Claude can query molecular properties directly.
119
+
120
+ **Endpoint:** `https://quantnexusai-novomd.hf.space/gradio_api/mcp/sse`
121
+
122
+ Add it as a custom connector in Claude (Settings, then Integrations), or point any MCP-compatible client at the same URL. Then ask:
123
+
124
+ - "Calculate the molecular properties of aspirin (CC(=O)OC1=CC=CC=C1C(=O)O)."
125
+ - "What is the dipole moment of caffeine?"
126
+
127
+ The endpoint works with Claude (web and desktop), Cursor, Continue.dev, and any client that speaks the [MCP specification](https://modelcontextprotocol.io/).
128
+
129
+ ### REST service (Docker)
130
+
131
+ For networked or containerized use, run the same core behind FastAPI.
132
+
133
+ ```bash
134
+ # pre-built image
135
+ docker run -d -p 8010:8010 \
136
+ -e NOVOMD_API_KEY="your-secure-api-key" \
137
+ --name novomd \
138
+ ghcr.io/realariharrison/novomd:latest
139
+
140
+ curl http://localhost:8010/health
141
+ ```
142
+
143
+ Or from source:
144
+
145
+ ```bash
146
+ pip install "novomd[server]"
147
+ uvicorn main:app --host 0.0.0.0 --port 8010
148
+ ```
149
+
150
+ ## What you get
151
+
152
+ 32+ descriptors, calculated from an embedded 3D structure:
153
+
154
+ - **Geometry** (7): radius of gyration, asphericity, eccentricity, inertia shape factor, span, principal moments of inertia
155
+ - **Energy** (6): conformer energy, van der Waals, electrostatic, torsion strain, angle strain, optimization delta
156
+ - **Electrostatics** (6): dipole moment, total charge, max and min partial charge, charge span, electrostatic potential
157
+ - **Surface and volume** (4): SASA, molecular volume, globularity, surface-to-volume ratio
158
+ - **Atom counts** (2): total atoms, heavy atoms
159
+ - **Visualization** (5+): full atomic coordinates, atom types, bond connectivity
160
+
161
+ Energy values are estimates from the conformer, not from a force-field simulation. The descriptors are derived from real 3D coordinates, not mocked.
162
+
163
+ ## Library reference
164
+
165
+ ```python
166
+ from novomd import calculate_properties, calculate_properties_batch
167
+
168
+ # one molecule -> descriptor dict
169
+ calculate_properties("CCO", add_hydrogens=True, optimize_3d=True)
170
+
171
+ # many molecules -> list of {smiles, status, properties | error}
172
+ calculate_properties_batch(["CCO", "C"], max_batch_size=1000)
173
+ ```
174
+
175
+ Both raise `InvalidSMILESError` for unparseable input and `RDKitNotAvailableError` if RDKit is missing. The batch function isolates per-item failures instead of raising.
176
+
177
+ ## REST API
178
+
179
+ All endpoints except `/health` require an API key in the `X-API-Key` header.
180
+
181
+ | Endpoint | Method | Description |
182
+ |----------|--------|-------------|
183
+ | `/health` | GET | Health check (no auth) |
184
+ | `/status` | GET | Service status and capabilities |
185
+ | `/smiles-to-omd` | POST | Convert SMILES to OpenMD with 32+ properties |
186
+ | `/batch` | POST | Calculate properties for many SMILES in one call |
187
+ | `/atom2md` | POST | Convert PDB to OpenMD format |
188
+ | `/force-fields` | GET | List available force fields |
189
+ | `/force-field-types/{ff}` | GET | Atom types for a force field |
190
+
191
+ ```bash
192
+ curl -X POST http://localhost:8010/batch \
193
+ -H "Content-Type: application/json" \
194
+ -H "X-API-Key: your-api-key" \
195
+ -d '{"molecules": ["CCO", "CC(=O)O", "NOT_VALID"]}'
196
+ ```
197
+
198
+ ```json
199
+ {
200
+ "count": 3,
201
+ "succeeded": 2,
202
+ "failed": 1,
203
+ "results": [
204
+ {"smiles": "CCO", "status": "ok", "properties": {"molecular_weight": 46.07, "...": "..."}},
205
+ {"smiles": "CC(=O)O", "status": "ok", "properties": {"...": "..."}},
206
+ {"smiles": "NOT_VALID", "status": "error", "error": "Invalid SMILES string: 'NOT_VALID'"}
207
+ ]
208
+ }
209
+ ```
210
+
211
+ Batches are capped at 1,000 molecules per request and share the service rate limit.
212
+
213
+ ### Notebooks
214
+
215
+ | Notebook | Topic |
216
+ |----------|-------|
217
+ | [01_getting_started.ipynb](examples/01_getting_started.ipynb) | Basic usage and conversion |
218
+ | [02_molecular_properties.ipynb](examples/02_molecular_properties.ipynb) | Property analysis with pandas and matplotlib |
219
+ | [03_visualization.ipynb](examples/03_visualization.ipynb) | 3D visualization with plotly and py3Dmol |
220
+ | [04_batch_processing.ipynb](examples/04_batch_processing.ipynb) | One-call batch, library and endpoint |
221
+
222
+ ## Beyond property calculation
223
+
224
+ NovoMD computes molecular descriptors locally. It does not run full MD trajectories, docking, ADMET, or compliance.
225
+
226
+ For those, the same team builds NovoMCP, a computational engine for AI-native discovery: 122M enriched compounds, docking and FEP pipelines, ADMET and compliance scoring, and an immutable audit trail on every step. NovoMD is open and always will be. NovoMCP is the production layer for work that outgrows it.
227
+
228
+ Learn more: [novomcp.com](https://novomcp.com)
229
+
230
+ ## Force fields
231
+
232
+ `AMBER14`, `AMBER99SB`, `CHARMM36`, `OPLS-AA/M`, `GROMOS 54A7`. Property values are conformer-derived and force-field-independent; the force field affects only the OpenMD output.
233
+
234
+ ## Configuration
235
+
236
+ Set these in a `.env` file or as environment variables (REST service only).
237
+
238
+ | Variable | Description | Default |
239
+ |----------|-------------|---------|
240
+ | `NOVOMD_API_KEY` | API authentication key (required) | - |
241
+ | `PORT` | Server port | 8010 |
242
+ | `HOST` | Server host | 0.0.0.0 |
243
+ | `LOG_LEVEL` | DEBUG, INFO, WARNING, ERROR | INFO |
244
+ | `CORS_ORIGINS` | Comma-separated origins, or "*" for all | localhost:3000,localhost:8080 |
245
+ | `RATE_LIMIT` | e.g. "100/minute", "1000/hour" | 100/minute |
246
+
247
+ ## Development
248
+
249
+ ```bash
250
+ pip install -e ".[dev,server]" # core + server + dev tools
251
+ pre-commit install
252
+
253
+ pytest tests/ -v
254
+ pytest tests/ --cov=novomd --cov=main --cov-report=term-missing
255
+
256
+ black . && isort . && flake8 .
257
+ mypy novomd main.py auth.py config.py
258
+ bandit -r . -x ./tests
259
+ ```
260
+
261
+ ```
262
+ NovoMD/
263
+ ├── novomd/ # importable library (framework-free core)
264
+ │ ├── core.py # property calculation
265
+ │ ├── batch.py # batch with per-item error isolation
266
+ │ ├── conversion.py # PDB to OpenMD
267
+ │ ├── cli.py # `novomd` command
268
+ │ └── exceptions.py
269
+ ├── main.py # FastAPI service (imports the core)
270
+ ├── config.py # configuration
271
+ ├── auth.py # API-key authentication
272
+ ├── tests/ # unit + integration tests
273
+ ├── examples/ # Jupyter notebooks
274
+ └── .github/workflows/ # CI and PyPI publish
275
+ ```
276
+
277
+ ## Security
278
+
279
+ NovoMD runs locally by default; no molecular data leaves your machine. For the REST service, use a strong `NOVOMD_API_KEY`, deploy behind TLS, and restrict `CORS_ORIGINS`. To report a vulnerability, see [SECURITY.md](SECURITY.md).
280
+
281
+ ## Contributing
282
+
283
+ Contributions are welcome. See [CONTRIBUTING.md](CONTRIBUTING.md).
284
+
285
+ - **Issues**: [GitHub Issues](https://github.com/realariharrison/NovoMD/issues)
286
+ - **Discussions**: [GitHub Discussions](https://github.com/realariharrison/NovoMD/discussions)
287
+
288
+ ## License
289
+
290
+ MIT. See [LICENSE](LICENSE).
291
+
292
+ Built with [FastAPI](https://fastapi.tiangolo.com/) and [RDKit](https://www.rdkit.org/).
293
+
294
+ ## Citation
295
+
296
+ ```bibtex
297
+ @software{novomd2025,
298
+ title = {NovoMD: Local-First Molecular Property Calculation},
299
+ author = {NovoMCP},
300
+ year = {2025},
301
+ url = {https://github.com/realariharrison/NovoMD}
302
+ }
303
+ ```
304
+
305
+ ---
306
+
307
+ <div align="center">
308
+
309
+ Built by the NovoMCP team
310
+
311
+ </div>
@@ -0,0 +1,13 @@
1
+ novomd/__about__.py,sha256=Wec8Imu4VTzXc_61Gus-tyToJZaRGWtgUupGymMEtR8,77
2
+ novomd/__init__.py,sha256=Uy4esheEsjAyw9B2ZG4S2a9bwzX-4WUWM9QUFwOQ-xo,1188
3
+ novomd/batch.py,sha256=OFt9X9HXE3FusO2cRkrj0hjO8aW-a6BJC-bcLpHQ4P8,2255
4
+ novomd/cli.py,sha256=WJx7_CZgHjDqKRjFEw4EmAsqLkCN_160-4O3Yrqu7vY,5371
5
+ novomd/conversion.py,sha256=k8R2035Vwi1Dsmq-ohFbRn6XWEBK-POYmmI33o1-fsk,3634
6
+ novomd/core.py,sha256=i52f3F9yo_IEhv27DSLWJUCsRD923_AiNt6kjTiqW-Y,9995
7
+ novomd/exceptions.py,sha256=G4klJsZ0OANnRx1WFldmhBxpfjRxh9GPM9fxZHI5D84,641
8
+ novomd-1.2.0.dist-info/licenses/LICENSE,sha256=LIuaWTSa_1KTax1i2aAlZLPGJTD6GhqvvWfyUHTW0Bo,1070
9
+ novomd-1.2.0.dist-info/METADATA,sha256=PYryS4Eo9UJ3XzKENha5C99Kflz-eM6sowfHylNtChY,11658
10
+ novomd-1.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
11
+ novomd-1.2.0.dist-info/entry_points.txt,sha256=jlSgQig2R_tvCK-KZDUww4hZIv1927VSbY6OSoRi8oY,43
12
+ novomd-1.2.0.dist-info/top_level.txt,sha256=RsP5EuTC6UYgf9TegjXZdYmvNdMKEwXS92L648zCW5A,7
13
+ novomd-1.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ novomd = novomd.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 QuantNexus AI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ novomd