labmate-mcp 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,572 @@
1
+ """
2
+ labmate-mcp chemistry utilities module.
3
+
4
+ Pure-computation tools:
5
+ - Isotope pattern calculation from molecular formula
6
+ - CAS registry number validation
7
+ - Scientific unit conversion
8
+ - Periodic table element lookup
9
+ - Buffer pH / Henderson-Hasselbalch calculations
10
+
11
+ No external API calls — all offline, zero latency.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import math
17
+ import re
18
+ from itertools import product as iter_product
19
+ from typing import Any
20
+
21
+ # =============================================================================
22
+ # Periodic table — element data (Z ≤ 118)
23
+ # =============================================================================
24
+
25
+ ELEMENTS: dict[str, dict[str, Any]] = {
26
+ "H": {"Z": 1, "name": "Hydrogen", "mass": 1.008, "group": 1, "period": 1, "block": "s", "en": 2.20, "config": "1s1", "category": "nonmetal"},
27
+ "He": {"Z": 2, "name": "Helium", "mass": 4.003, "group": 18, "period": 1, "block": "s", "en": None, "config": "1s2", "category": "noble gas"},
28
+ "Li": {"Z": 3, "name": "Lithium", "mass": 6.941, "group": 1, "period": 2, "block": "s", "en": 0.98, "config": "[He] 2s1", "category": "alkali metal"},
29
+ "Be": {"Z": 4, "name": "Beryllium", "mass": 9.012, "group": 2, "period": 2, "block": "s", "en": 1.57, "config": "[He] 2s2", "category": "alkaline earth"},
30
+ "B": {"Z": 5, "name": "Boron", "mass": 10.81, "group": 13, "period": 2, "block": "p", "en": 2.04, "config": "[He] 2s2 2p1", "category": "metalloid"},
31
+ "C": {"Z": 6, "name": "Carbon", "mass": 12.011, "group": 14, "period": 2, "block": "p", "en": 2.55, "config": "[He] 2s2 2p2", "category": "nonmetal"},
32
+ "N": {"Z": 7, "name": "Nitrogen", "mass": 14.007, "group": 15, "period": 2, "block": "p", "en": 3.04, "config": "[He] 2s2 2p3", "category": "nonmetal"},
33
+ "O": {"Z": 8, "name": "Oxygen", "mass": 15.999, "group": 16, "period": 2, "block": "p", "en": 3.44, "config": "[He] 2s2 2p4", "category": "nonmetal"},
34
+ "F": {"Z": 9, "name": "Fluorine", "mass": 18.998, "group": 17, "period": 2, "block": "p", "en": 3.98, "config": "[He] 2s2 2p5", "category": "halogen"},
35
+ "Ne": {"Z": 10, "name": "Neon", "mass": 20.180, "group": 18, "period": 2, "block": "p", "en": None, "config": "[He] 2s2 2p6", "category": "noble gas"},
36
+ "Na": {"Z": 11, "name": "Sodium", "mass": 22.990, "group": 1, "period": 3, "block": "s", "en": 0.93, "config": "[Ne] 3s1", "category": "alkali metal"},
37
+ "Mg": {"Z": 12, "name": "Magnesium", "mass": 24.305, "group": 2, "period": 3, "block": "s", "en": 1.31, "config": "[Ne] 3s2", "category": "alkaline earth"},
38
+ "Al": {"Z": 13, "name": "Aluminium", "mass": 26.982, "group": 13, "period": 3, "block": "p", "en": 1.61, "config": "[Ne] 3s2 3p1", "category": "post-transition metal"},
39
+ "Si": {"Z": 14, "name": "Silicon", "mass": 28.086, "group": 14, "period": 3, "block": "p", "en": 1.90, "config": "[Ne] 3s2 3p2", "category": "metalloid"},
40
+ "P": {"Z": 15, "name": "Phosphorus", "mass": 30.974, "group": 15, "period": 3, "block": "p", "en": 2.19, "config": "[Ne] 3s2 3p3", "category": "nonmetal"},
41
+ "S": {"Z": 16, "name": "Sulfur", "mass": 32.06, "group": 16, "period": 3, "block": "p", "en": 2.58, "config": "[Ne] 3s2 3p4", "category": "nonmetal"},
42
+ "Cl": {"Z": 17, "name": "Chlorine", "mass": 35.45, "group": 17, "period": 3, "block": "p", "en": 3.16, "config": "[Ne] 3s2 3p5", "category": "halogen"},
43
+ "Ar": {"Z": 18, "name": "Argon", "mass": 39.948, "group": 18, "period": 3, "block": "p", "en": None, "config": "[Ne] 3s2 3p6", "category": "noble gas"},
44
+ "K": {"Z": 19, "name": "Potassium", "mass": 39.098, "group": 1, "period": 4, "block": "s", "en": 0.82, "config": "[Ar] 4s1", "category": "alkali metal"},
45
+ "Ca": {"Z": 20, "name": "Calcium", "mass": 40.078, "group": 2, "period": 4, "block": "s", "en": 1.00, "config": "[Ar] 4s2", "category": "alkaline earth"},
46
+ "Sc": {"Z": 21, "name": "Scandium", "mass": 44.956, "group": 3, "period": 4, "block": "d", "en": 1.36, "config": "[Ar] 3d1 4s2", "category": "transition metal"},
47
+ "Ti": {"Z": 22, "name": "Titanium", "mass": 47.867, "group": 4, "period": 4, "block": "d", "en": 1.54, "config": "[Ar] 3d2 4s2", "category": "transition metal"},
48
+ "V": {"Z": 23, "name": "Vanadium", "mass": 50.942, "group": 5, "period": 4, "block": "d", "en": 1.63, "config": "[Ar] 3d3 4s2", "category": "transition metal"},
49
+ "Cr": {"Z": 24, "name": "Chromium", "mass": 51.996, "group": 6, "period": 4, "block": "d", "en": 1.66, "config": "[Ar] 3d5 4s1", "category": "transition metal"},
50
+ "Mn": {"Z": 25, "name": "Manganese", "mass": 54.938, "group": 7, "period": 4, "block": "d", "en": 1.55, "config": "[Ar] 3d5 4s2", "category": "transition metal"},
51
+ "Fe": {"Z": 26, "name": "Iron", "mass": 55.845, "group": 8, "period": 4, "block": "d", "en": 1.83, "config": "[Ar] 3d6 4s2", "category": "transition metal"},
52
+ "Co": {"Z": 27, "name": "Cobalt", "mass": 58.933, "group": 9, "period": 4, "block": "d", "en": 1.88, "config": "[Ar] 3d7 4s2", "category": "transition metal"},
53
+ "Ni": {"Z": 28, "name": "Nickel", "mass": 58.693, "group": 10, "period": 4, "block": "d", "en": 1.91, "config": "[Ar] 3d8 4s2", "category": "transition metal"},
54
+ "Cu": {"Z": 29, "name": "Copper", "mass": 63.546, "group": 11, "period": 4, "block": "d", "en": 1.90, "config": "[Ar] 3d10 4s1", "category": "transition metal"},
55
+ "Zn": {"Z": 30, "name": "Zinc", "mass": 65.38, "group": 12, "period": 4, "block": "d", "en": 1.65, "config": "[Ar] 3d10 4s2", "category": "transition metal"},
56
+ "Ga": {"Z": 31, "name": "Gallium", "mass": 69.723, "group": 13, "period": 4, "block": "p", "en": 1.81, "config": "[Ar] 3d10 4s2 4p1", "category": "post-transition metal"},
57
+ "Ge": {"Z": 32, "name": "Germanium", "mass": 72.63, "group": 14, "period": 4, "block": "p", "en": 2.01, "config": "[Ar] 3d10 4s2 4p2", "category": "metalloid"},
58
+ "As": {"Z": 33, "name": "Arsenic", "mass": 74.922, "group": 15, "period": 4, "block": "p", "en": 2.18, "config": "[Ar] 3d10 4s2 4p3", "category": "metalloid"},
59
+ "Se": {"Z": 34, "name": "Selenium", "mass": 78.96, "group": 16, "period": 4, "block": "p", "en": 2.55, "config": "[Ar] 3d10 4s2 4p4", "category": "nonmetal"},
60
+ "Br": {"Z": 35, "name": "Bromine", "mass": 79.904, "group": 17, "period": 4, "block": "p", "en": 2.96, "config": "[Ar] 3d10 4s2 4p5", "category": "halogen"},
61
+ "Kr": {"Z": 36, "name": "Krypton", "mass": 83.798, "group": 18, "period": 4, "block": "p", "en": 3.00, "config": "[Ar] 3d10 4s2 4p6", "category": "noble gas"},
62
+ "Rb": {"Z": 37, "name": "Rubidium", "mass": 85.468, "group": 1, "period": 5, "block": "s", "en": 0.82, "config": "[Kr] 5s1", "category": "alkali metal"},
63
+ "Sr": {"Z": 38, "name": "Strontium", "mass": 87.62, "group": 2, "period": 5, "block": "s", "en": 0.95, "config": "[Kr] 5s2", "category": "alkaline earth"},
64
+ "Y": {"Z": 39, "name": "Yttrium", "mass": 88.906, "group": 3, "period": 5, "block": "d", "en": 1.22, "config": "[Kr] 4d1 5s2", "category": "transition metal"},
65
+ "Zr": {"Z": 40, "name": "Zirconium", "mass": 91.224, "group": 4, "period": 5, "block": "d", "en": 1.33, "config": "[Kr] 4d2 5s2", "category": "transition metal"},
66
+ "Nb": {"Z": 41, "name": "Niobium", "mass": 92.906, "group": 5, "period": 5, "block": "d", "en": 1.60, "config": "[Kr] 4d4 5s1", "category": "transition metal"},
67
+ "Mo": {"Z": 42, "name": "Molybdenum", "mass": 95.96, "group": 6, "period": 5, "block": "d", "en": 2.16, "config": "[Kr] 4d5 5s1", "category": "transition metal"},
68
+ "Ru": {"Z": 44, "name": "Ruthenium", "mass": 101.07, "group": 8, "period": 5, "block": "d", "en": 2.20, "config": "[Kr] 4d7 5s1", "category": "transition metal"},
69
+ "Rh": {"Z": 45, "name": "Rhodium", "mass": 102.906, "group": 9, "period": 5, "block": "d", "en": 2.28, "config": "[Kr] 4d8 5s1", "category": "transition metal"},
70
+ "Pd": {"Z": 46, "name": "Palladium", "mass": 106.42, "group": 10, "period": 5, "block": "d", "en": 2.20, "config": "[Kr] 4d10", "category": "transition metal"},
71
+ "Ag": {"Z": 47, "name": "Silver", "mass": 107.868, "group": 11, "period": 5, "block": "d", "en": 1.93, "config": "[Kr] 4d10 5s1", "category": "transition metal"},
72
+ "Cd": {"Z": 48, "name": "Cadmium", "mass": 112.411, "group": 12, "period": 5, "block": "d", "en": 1.69, "config": "[Kr] 4d10 5s2", "category": "transition metal"},
73
+ "In": {"Z": 49, "name": "Indium", "mass": 114.818, "group": 13, "period": 5, "block": "p", "en": 1.78, "config": "[Kr] 4d10 5s2 5p1", "category": "post-transition metal"},
74
+ "Sn": {"Z": 50, "name": "Tin", "mass": 118.710, "group": 14, "period": 5, "block": "p", "en": 1.96, "config": "[Kr] 4d10 5s2 5p2", "category": "post-transition metal"},
75
+ "Sb": {"Z": 51, "name": "Antimony", "mass": 121.760, "group": 15, "period": 5, "block": "p", "en": 2.05, "config": "[Kr] 4d10 5s2 5p3", "category": "metalloid"},
76
+ "Te": {"Z": 52, "name": "Tellurium", "mass": 127.60, "group": 16, "period": 5, "block": "p", "en": 2.10, "config": "[Kr] 4d10 5s2 5p4", "category": "metalloid"},
77
+ "I": {"Z": 53, "name": "Iodine", "mass": 126.904, "group": 17, "period": 5, "block": "p", "en": 2.66, "config": "[Kr] 4d10 5s2 5p5", "category": "halogen"},
78
+ "Xe": {"Z": 54, "name": "Xenon", "mass": 131.293, "group": 18, "period": 5, "block": "p", "en": 2.60, "config": "[Kr] 4d10 5s2 5p6", "category": "noble gas"},
79
+ "Cs": {"Z": 55, "name": "Cesium", "mass": 132.905, "group": 1, "period": 6, "block": "s", "en": 0.79, "config": "[Xe] 6s1", "category": "alkali metal"},
80
+ "Ba": {"Z": 56, "name": "Barium", "mass": 137.327, "group": 2, "period": 6, "block": "s", "en": 0.89, "config": "[Xe] 6s2", "category": "alkaline earth"},
81
+ "W": {"Z": 74, "name": "Tungsten", "mass": 183.84, "group": 6, "period": 6, "block": "d", "en": 2.36, "config": "[Xe] 4f14 5d4 6s2", "category": "transition metal"},
82
+ "Re": {"Z": 75, "name": "Rhenium", "mass": 186.207, "group": 7, "period": 6, "block": "d", "en": 1.90, "config": "[Xe] 4f14 5d5 6s2", "category": "transition metal"},
83
+ "Os": {"Z": 76, "name": "Osmium", "mass": 190.23, "group": 8, "period": 6, "block": "d", "en": 2.20, "config": "[Xe] 4f14 5d6 6s2", "category": "transition metal"},
84
+ "Ir": {"Z": 77, "name": "Iridium", "mass": 192.217, "group": 9, "period": 6, "block": "d", "en": 2.20, "config": "[Xe] 4f14 5d7 6s2", "category": "transition metal"},
85
+ "Pt": {"Z": 78, "name": "Platinum", "mass": 195.084, "group": 10, "period": 6, "block": "d", "en": 2.28, "config": "[Xe] 4f14 5d9 6s1", "category": "transition metal"},
86
+ "Au": {"Z": 79, "name": "Gold", "mass": 196.967, "group": 11, "period": 6, "block": "d", "en": 2.54, "config": "[Xe] 4f14 5d10 6s1", "category": "transition metal"},
87
+ "Hg": {"Z": 80, "name": "Mercury", "mass": 200.59, "group": 12, "period": 6, "block": "d", "en": 2.00, "config": "[Xe] 4f14 5d10 6s2", "category": "transition metal"},
88
+ "Tl": {"Z": 81, "name": "Thallium", "mass": 204.38, "group": 13, "period": 6, "block": "p", "en": 1.62, "config": "[Xe] 4f14 5d10 6s2 6p1", "category": "post-transition metal"},
89
+ "Pb": {"Z": 82, "name": "Lead", "mass": 207.2, "group": 14, "period": 6, "block": "p", "en": 2.33, "config": "[Xe] 4f14 5d10 6s2 6p2", "category": "post-transition metal"},
90
+ "Bi": {"Z": 83, "name": "Bismuth", "mass": 208.980, "group": 15, "period": 6, "block": "p", "en": 2.02, "config": "[Xe] 4f14 5d10 6s2 6p3", "category": "post-transition metal"},
91
+ "U": {"Z": 92, "name": "Uranium", "mass": 238.029, "group": None, "period": 7, "block": "f", "en": 1.38, "config": "[Rn] 5f3 6d1 7s2", "category": "actinide"},
92
+ }
93
+
94
+ # Build reverse lookup by atomic number
95
+ _Z_TO_SYMBOL = {v["Z"]: k for k, v in ELEMENTS.items()}
96
+
97
+
98
+ def lookup_element(query: str) -> dict | None:
99
+ """
100
+ Look up element by symbol, name, or atomic number.
101
+ Returns full element data dict or None.
102
+ """
103
+ q = query.strip()
104
+ # By atomic number
105
+ if q.isdigit():
106
+ sym = _Z_TO_SYMBOL.get(int(q))
107
+ if sym:
108
+ return {"symbol": sym, **ELEMENTS[sym]}
109
+ return None
110
+ # By symbol (case-insensitive, capitalize first letter)
111
+ sym = q.capitalize() if len(q) <= 2 else q
112
+ if sym in ELEMENTS:
113
+ return {"symbol": sym, **ELEMENTS[sym]}
114
+ # By name
115
+ q_lower = q.lower()
116
+ for sym, data in ELEMENTS.items():
117
+ if data["name"].lower() == q_lower:
118
+ return {"symbol": sym, **data}
119
+ # Fuzzy match
120
+ matches = []
121
+ for sym, data in ELEMENTS.items():
122
+ if q_lower in data["name"].lower() or q_lower in sym.lower():
123
+ matches.append({"symbol": sym, **data})
124
+ return matches[0] if len(matches) == 1 else (matches if matches else None)
125
+
126
+
127
+ # =============================================================================
128
+ # CAS registry number validation
129
+ # =============================================================================
130
+
131
+
132
+ def validate_cas(cas_string: str) -> dict:
133
+ """
134
+ Validate a CAS registry number (format: NNNNNNN-NN-N).
135
+
136
+ Returns dict with 'valid', 'cas', 'error' keys.
137
+ """
138
+ cas_string = cas_string.strip().replace(" ", "")
139
+
140
+ # Accept with or without hyphens
141
+ if "-" in cas_string:
142
+ parts = cas_string.split("-")
143
+ if len(parts) != 3:
144
+ return {"valid": False, "cas": cas_string, "error": "CAS must have format NNNNN-NN-N"}
145
+ try:
146
+ part1, part2, check = parts
147
+ if not (part1.isdigit() and part2.isdigit() and check.isdigit()):
148
+ raise ValueError
149
+ if len(part2) != 2 or len(check) != 1:
150
+ raise ValueError
151
+ digits = part1 + part2
152
+ check_digit = int(check)
153
+ except (ValueError, IndexError):
154
+ return {"valid": False, "cas": cas_string, "error": "Invalid CAS format"}
155
+ else:
156
+ if not cas_string.isdigit() or len(cas_string) < 5:
157
+ return {"valid": False, "cas": cas_string, "error": "Invalid CAS format"}
158
+ digits = cas_string[:-1]
159
+ check_digit = int(cas_string[-1])
160
+ # Reconstruct hyphenated form
161
+ cas_string = f"{digits[:-2]}-{digits[-2:]}-{check_digit}"
162
+
163
+ # Validate check digit: sum of (position * digit) mod 10, counting from right
164
+ total = sum((i + 1) * int(d) for i, d in enumerate(reversed(digits)))
165
+ computed_check = total % 10
166
+
167
+ if computed_check != check_digit:
168
+ return {
169
+ "valid": False,
170
+ "cas": cas_string,
171
+ "error": f"Check digit mismatch: expected {check_digit}, computed {computed_check}",
172
+ }
173
+
174
+ return {"valid": True, "cas": cas_string, "error": None}
175
+
176
+
177
+ # =============================================================================
178
+ # Isotope pattern calculation
179
+ # =============================================================================
180
+
181
+ # Natural isotope abundances (mass, abundance) for common elements
182
+ _ISOTOPES: dict[str, list[tuple[float, float]]] = {
183
+ "H": [(1.00783, 0.999885), (2.01410, 0.000115)],
184
+ "C": [(12.00000, 0.9893), (13.00335, 0.0107)],
185
+ "N": [(14.00307, 0.99632), (15.00011, 0.00368)],
186
+ "O": [(15.99491, 0.99757), (16.99913, 0.00038), (17.99916, 0.00205)],
187
+ "S": [(31.97207, 0.9493), (32.97146, 0.0076), (33.96787, 0.0429), (35.96708, 0.0002)],
188
+ "P": [(30.97376, 1.0)],
189
+ "F": [(18.99840, 1.0)],
190
+ "Cl": [(34.96885, 0.7576), (36.96590, 0.2424)],
191
+ "Br": [(78.91834, 0.5069), (80.91629, 0.4931)],
192
+ "I": [(126.90447, 1.0)],
193
+ "Si": [(27.97693, 0.92223), (28.97649, 0.04685), (29.97377, 0.03092)],
194
+ "Se": [(73.92248, 0.0089), (75.91921, 0.0937), (76.91991, 0.0763),
195
+ (77.91731, 0.2377), (79.91652, 0.4961), (81.91670, 0.0873)],
196
+ "Na": [(22.98977, 1.0)],
197
+ "K": [(38.96371, 0.932581), (39.96400, 0.000117), (40.96183, 0.067302)],
198
+ "Fe": [(53.93961, 0.05845), (55.93494, 0.91754), (56.93540, 0.02119), (57.93328, 0.00282)],
199
+ "Cu": [(62.92960, 0.6915), (64.92779, 0.3085)],
200
+ "Zn": [(63.92914, 0.4863), (65.92603, 0.2790), (66.92713, 0.0410),
201
+ (67.92485, 0.1875), (69.92532, 0.0062)],
202
+ }
203
+
204
+ # Regex for molecular formula parsing: e.g., C14H19N3O4, Ca(OH)2, etc.
205
+ _FORMULA_RE = re.compile(r"([A-Z][a-z]?)(\d*)")
206
+
207
+
208
+ def _parse_formula_to_elements(formula: str) -> dict[str, int]:
209
+ """Parse molecular formula string → {element: count}."""
210
+ elements: dict[str, int] = {}
211
+ # Simple parser (handles flat formulas like C9H8O4, not nested parentheses)
212
+ # For parentheses, expand first
213
+ expanded = _expand_parentheses(formula)
214
+ for match in _FORMULA_RE.finditer(expanded):
215
+ elem = match.group(1)
216
+ count = int(match.group(2)) if match.group(2) else 1
217
+ if elem:
218
+ elements[elem] = elements.get(elem, 0) + count
219
+ return elements
220
+
221
+
222
+ def _expand_parentheses(formula: str) -> str:
223
+ """Expand parenthesized groups: Ca(OH)2 → CaO2H2."""
224
+ while "(" in formula:
225
+ # Find innermost parentheses
226
+ m = re.search(r"\(([^()]+)\)(\d*)", formula)
227
+ if not m:
228
+ break
229
+ inner = m.group(1)
230
+ mult = int(m.group(2)) if m.group(2) else 1
231
+ # Multiply each element count
232
+ expanded = ""
233
+ for em in _FORMULA_RE.finditer(inner):
234
+ elem = em.group(1)
235
+ count = int(em.group(2)) if em.group(2) else 1
236
+ if elem:
237
+ expanded += f"{elem}{count * mult}"
238
+ formula = formula[:m.start()] + expanded + formula[m.end():]
239
+ return formula
240
+
241
+
242
+ def calculate_isotope_pattern(
243
+ formula: str = "",
244
+ smiles: str = "",
245
+ charge: int = 1,
246
+ top_n: int = 10,
247
+ min_abundance: float = 0.001,
248
+ ) -> dict:
249
+ """
250
+ Calculate isotope distribution from molecular formula or SMILES.
251
+
252
+ Args:
253
+ formula: Molecular formula (e.g., 'C9H8O4')
254
+ smiles: SMILES string (alternative to formula, uses RDKit)
255
+ charge: Charge state for m/z calculation (default 1)
256
+ top_n: Max number of peaks to return
257
+ min_abundance: Minimum relative abundance to include
258
+
259
+ Returns dict with monoisotopic_mass, average_mass, pattern (list of {mz, abundance}).
260
+ """
261
+ if smiles and not formula:
262
+ try:
263
+ from rdkit import Chem
264
+ from rdkit.Chem import rdMolDescriptors
265
+ mol = Chem.MolFromSmiles(smiles)
266
+ if mol is None:
267
+ return {"error": f"Invalid SMILES: {smiles}"}
268
+ formula = rdMolDescriptors.CalcMolFormula(mol)
269
+ except ImportError:
270
+ return {"error": "RDKit not available; provide formula instead"}
271
+
272
+ if not formula:
273
+ return {"error": "Provide either formula or smiles"}
274
+
275
+ elements = _parse_formula_to_elements(formula)
276
+ if not elements:
277
+ return {"error": f"Could not parse formula: {formula}"}
278
+
279
+ # Calculate pattern using polynomial multiplication
280
+ pattern = [(0.0, 1.0)] # (mass_offset, probability)
281
+
282
+ for elem, count in elements.items():
283
+ isotopes = _ISOTOPES.get(elem)
284
+ if not isotopes:
285
+ # Monoisotopic only for unknown elements
286
+ mass = ELEMENTS.get(elem, {}).get("mass", 0)
287
+ isotopes = [(mass, 1.0)]
288
+
289
+ # For each atom of this element, convolve with isotope distribution
290
+ for _ in range(count):
291
+ new_pattern: dict[float, float] = {}
292
+ for mass_a, prob_a in pattern:
293
+ for mass_iso, prob_iso in isotopes:
294
+ combined_mass = round(mass_a + mass_iso, 5)
295
+ new_pattern[combined_mass] = new_pattern.get(combined_mass, 0.0) + prob_a * prob_iso
296
+ # Prune low-abundance peaks for efficiency
297
+ pattern = [(m, p) for m, p in new_pattern.items() if p > 1e-8]
298
+
299
+ # Sort by mass, normalize
300
+ pattern.sort(key=lambda x: x[0])
301
+ max_prob = max(p for _, p in pattern) if pattern else 1.0
302
+
303
+ # Aggregate peaks within 0.01 Da (numerical noise)
304
+ aggregated: list[tuple[float, float]] = []
305
+ for mass, prob in pattern:
306
+ if aggregated and abs(mass - aggregated[-1][0]) < 0.01:
307
+ # Merge
308
+ old_m, old_p = aggregated[-1]
309
+ total_p = old_p + prob
310
+ avg_m = (old_m * old_p + mass * prob) / total_p
311
+ aggregated[-1] = (avg_m, total_p)
312
+ else:
313
+ aggregated.append((mass, prob))
314
+
315
+ max_prob = max(p for _, p in aggregated) if aggregated else 1.0
316
+
317
+ result_peaks = []
318
+ for mass, prob in aggregated:
319
+ rel_abundance = prob / max_prob
320
+ if rel_abundance >= min_abundance and len(result_peaks) < top_n:
321
+ mz = mass / abs(charge) if charge != 0 else mass
322
+ result_peaks.append({
323
+ "mz": round(mz, 4),
324
+ "abundance": round(prob * 100, 4),
325
+ "relative": round(rel_abundance * 100, 2),
326
+ })
327
+
328
+ mono_mass = aggregated[0][0] if aggregated else 0.0
329
+ avg_mass = sum(m * p for m, p in aggregated) / sum(p for _, p in aggregated) if aggregated else 0.0
330
+
331
+ return {
332
+ "formula": formula,
333
+ "charge": charge,
334
+ "monoisotopic_mass": round(mono_mass, 4),
335
+ "monoisotopic_mz": round(mono_mass / abs(charge), 4) if charge != 0 else round(mono_mass, 4),
336
+ "average_mass": round(avg_mass, 4),
337
+ "pattern": result_peaks,
338
+ }
339
+
340
+
341
+ # =============================================================================
342
+ # Scientific unit conversion
343
+ # =============================================================================
344
+
345
+ # Conversion factors to SI base units
346
+ _MASS_TO_KG = {
347
+ "kg": 1, "g": 1e-3, "mg": 1e-6, "μg": 1e-9, "ug": 1e-9, "ng": 1e-12,
348
+ "pg": 1e-15, "fg": 1e-18, "lb": 0.453592, "oz": 0.0283495,
349
+ "Da": 1.66054e-27, "kDa": 1.66054e-24, "amu": 1.66054e-27,
350
+ }
351
+ _VOLUME_TO_L = {
352
+ "L": 1, "l": 1, "mL": 1e-3, "ml": 1e-3, "μL": 1e-6, "uL": 1e-6,
353
+ "nL": 1e-9, "pL": 1e-12, "dL": 0.1, "kL": 1000,
354
+ "gal": 3.78541, "qt": 0.946353, "pt": 0.473176, "fl_oz": 0.0295735,
355
+ "cm3": 1e-3, "m3": 1000, "mm3": 1e-6,
356
+ }
357
+ _LENGTH_TO_M = {
358
+ "m": 1, "km": 1000, "cm": 0.01, "mm": 1e-3, "μm": 1e-6, "um": 1e-6,
359
+ "nm": 1e-9, "pm": 1e-12, "Å": 1e-10, "A": 1e-10,
360
+ "in": 0.0254, "ft": 0.3048, "mi": 1609.34,
361
+ }
362
+ _ENERGY_TO_J = {
363
+ "J": 1, "kJ": 1000, "cal": 4.184, "kcal": 4184, "eV": 1.60218e-19,
364
+ "keV": 1.60218e-16, "MeV": 1.60218e-13,
365
+ "kJ/mol": 1000/6.02214e23, "kcal/mol": 4184/6.02214e23,
366
+ "Eh": 4.35975e-18, "hartree": 4.35975e-18,
367
+ "cm-1": 1.98645e-23, "wavenumber": 1.98645e-23,
368
+ }
369
+ _PRESSURE_TO_PA = {
370
+ "Pa": 1, "kPa": 1000, "MPa": 1e6, "bar": 1e5, "mbar": 100,
371
+ "atm": 101325, "torr": 133.322, "mmHg": 133.322, "psi": 6894.76,
372
+ }
373
+ _TIME_TO_S = {
374
+ "s": 1, "ms": 1e-3, "μs": 1e-6, "us": 1e-6, "ns": 1e-9, "ps": 1e-12,
375
+ "fs": 1e-15, "min": 60, "h": 3600, "hr": 3600, "d": 86400, "day": 86400,
376
+ }
377
+ _AMOUNT_TO_MOL = {
378
+ "mol": 1, "mmol": 1e-3, "μmol": 1e-6, "umol": 1e-6,
379
+ "nmol": 1e-9, "pmol": 1e-12, "fmol": 1e-15, "kmol": 1000,
380
+ }
381
+
382
+ _UNIT_CATEGORIES = {
383
+ "mass": _MASS_TO_KG,
384
+ "volume": _VOLUME_TO_L,
385
+ "length": _LENGTH_TO_M,
386
+ "energy": _ENERGY_TO_J,
387
+ "pressure": _PRESSURE_TO_PA,
388
+ "time": _TIME_TO_S,
389
+ "amount": _AMOUNT_TO_MOL,
390
+ }
391
+
392
+
393
+ def _find_category(unit: str) -> tuple[str, dict] | None:
394
+ """Find which category a unit belongs to."""
395
+ for cat, table in _UNIT_CATEGORIES.items():
396
+ if unit in table:
397
+ return cat, table
398
+ return None
399
+
400
+
401
+ def convert_units(value: float, from_unit: str, to_unit: str) -> dict:
402
+ """
403
+ Convert between scientific units.
404
+
405
+ Supports: mass (g↔kg↔mg↔Da), volume (L↔mL↔μL), length (m↔nm↔Å),
406
+ energy (J↔kJ↔kcal↔eV↔cm⁻¹↔hartree), pressure (Pa↔atm↔bar↔torr),
407
+ time (s↔ms↔min↔h), amount (mol↔mmol↔μmol), and temperature (°C↔°F↔K).
408
+ """
409
+ # Temperature special case
410
+ temp_units = {"C", "°C", "F", "°F", "K"}
411
+ fu = from_unit.strip()
412
+ tu = to_unit.strip()
413
+
414
+ if fu in temp_units or tu in temp_units:
415
+ fu_norm = fu.replace("°", "")
416
+ tu_norm = tu.replace("°", "")
417
+ # Convert to Kelvin first
418
+ if fu_norm == "C":
419
+ k = value + 273.15
420
+ elif fu_norm == "F":
421
+ k = (value - 32) * 5/9 + 273.15
422
+ elif fu_norm == "K":
423
+ k = value
424
+ else:
425
+ return {"error": f"Unknown temperature unit: {fu}"}
426
+ # Convert from Kelvin
427
+ if tu_norm == "C":
428
+ result = k - 273.15
429
+ elif tu_norm == "F":
430
+ result = (k - 273.15) * 9/5 + 32
431
+ elif tu_norm == "K":
432
+ result = k
433
+ else:
434
+ return {"error": f"Unknown temperature unit: {tu}"}
435
+ return {
436
+ "value": value, "from_unit": from_unit,
437
+ "result": round(result, 6), "to_unit": to_unit,
438
+ "category": "temperature",
439
+ }
440
+
441
+ cat_from = _find_category(fu)
442
+ cat_to = _find_category(tu)
443
+
444
+ if not cat_from:
445
+ return {"error": f"Unknown unit: {fu}. Supported: {', '.join(u for table in _UNIT_CATEGORIES.values() for u in table)}"}
446
+ if not cat_to:
447
+ return {"error": f"Unknown unit: {tu}. Supported: {', '.join(u for table in _UNIT_CATEGORIES.values() for u in table)}"}
448
+
449
+ if cat_from[0] != cat_to[0]:
450
+ return {"error": f"Cannot convert between {cat_from[0]} ({fu}) and {cat_to[0]} ({tu})"}
451
+
452
+ # Convert: value * (from_factor / to_factor)
453
+ si_value = value * cat_from[1][fu]
454
+ result = si_value / cat_to[1][tu]
455
+
456
+ return {
457
+ "value": value, "from_unit": from_unit,
458
+ "result": result, "to_unit": to_unit,
459
+ "category": cat_from[0],
460
+ }
461
+
462
+
463
+ # =============================================================================
464
+ # Buffer pH / Henderson-Hasselbalch
465
+ # =============================================================================
466
+
467
+ # Common buffer pKa values at 25°C
468
+ BUFFER_PKA: dict[str, dict] = {
469
+ "phosphate_1": {"name": "Phosphoric acid (pKa₁)", "pKa": 2.15, "species": "H₃PO₄ / H₂PO₄⁻"},
470
+ "citrate_1": {"name": "Citric acid (pKa₁)", "pKa": 3.13, "species": "H₃Cit / H₂Cit⁻"},
471
+ "formate": {"name": "Formic acid", "pKa": 3.75, "species": "HCOOH / HCOO⁻"},
472
+ "citrate_2": {"name": "Citric acid (pKa₂)", "pKa": 4.76, "species": "H₂Cit⁻ / HCit²⁻"},
473
+ "acetate": {"name": "Acetic acid", "pKa": 4.76, "species": "CH₃COOH / CH₃COO⁻"},
474
+ "citrate_3": {"name": "Citric acid (pKa₃)", "pKa": 6.40, "species": "HCit²⁻ / Cit³⁻"},
475
+ "MES": {"name": "MES", "pKa": 6.15, "species": "MES-H / MES"},
476
+ "PIPES": {"name": "PIPES", "pKa": 6.76, "species": "PIPES-H / PIPES"},
477
+ "phosphate_2": {"name": "Phosphoric acid (pKa₂)", "pKa": 7.20, "species": "H₂PO₄⁻ / HPO₄²⁻"},
478
+ "MOPS": {"name": "MOPS", "pKa": 7.20, "species": "MOPS-H / MOPS"},
479
+ "HEPES": {"name": "HEPES", "pKa": 7.55, "species": "HEPES-H / HEPES"},
480
+ "imidazole": {"name": "Imidazole", "pKa": 6.99, "species": "ImH⁺ / Im"},
481
+ "Tris": {"name": "Tris", "pKa": 8.07, "species": "TrisH⁺ / Tris"},
482
+ "TAPS": {"name": "TAPS", "pKa": 8.44, "species": "TAPS-H / TAPS"},
483
+ "borate": {"name": "Boric acid", "pKa": 9.24, "species": "B(OH)₃ / B(OH)₄⁻"},
484
+ "CHES": {"name": "CHES", "pKa": 9.50, "species": "CHES-H / CHES"},
485
+ "glycine_amino": {"name": "Glycine (amino)", "pKa": 9.60, "species": "Gly⁺ / Gly⁻"},
486
+ "CAPS": {"name": "CAPS", "pKa": 10.40, "species": "CAPS-H / CAPS"},
487
+ "carbonate_1": {"name": "Carbonic acid (pKa₁)", "pKa": 6.35, "species": "H₂CO₃ / HCO₃⁻"},
488
+ "carbonate_2": {"name": "Carbonic acid (pKa₂)", "pKa": 10.33, "species": "HCO₃⁻ / CO₃²⁻"},
489
+ "phosphate_3": {"name": "Phosphoric acid (pKa₃)", "pKa": 12.32, "species": "HPO₄²⁻ / PO₄³⁻"},
490
+ }
491
+
492
+
493
+ def calculate_buffer_ph(
494
+ pKa: float | None = None,
495
+ buffer_name: str | None = None,
496
+ acid_conc: float | None = None,
497
+ base_conc: float | None = None,
498
+ ratio_base_acid: float | None = None,
499
+ target_ph: float | None = None,
500
+ ) -> dict:
501
+ """
502
+ Henderson-Hasselbalch calculations.
503
+
504
+ Mode 1: Given pKa + concentrations/ratio → calculate pH
505
+ Mode 2: Given pKa + target pH → calculate required ratio
506
+
507
+ Args:
508
+ pKa: pKa of the buffer (or provide buffer_name to look up)
509
+ buffer_name: Look up pKa from known buffers (e.g., 'Tris', 'HEPES', 'phosphate_2')
510
+ acid_conc: Concentration of acid form (any consistent unit)
511
+ base_conc: Concentration of base form
512
+ ratio_base_acid: [A⁻]/[HA] ratio (alternative to concentrations)
513
+ target_ph: Target pH to calculate required ratio
514
+ """
515
+ # Resolve pKa
516
+ if buffer_name and pKa is None:
517
+ buf = BUFFER_PKA.get(buffer_name)
518
+ if not buf:
519
+ # Fuzzy search
520
+ q = buffer_name.lower()
521
+ matches = [(k, v) for k, v in BUFFER_PKA.items() if q in k.lower() or q in v["name"].lower()]
522
+ if matches:
523
+ buf = matches[0][1]
524
+ else:
525
+ avail = ", ".join(BUFFER_PKA.keys())
526
+ return {"error": f"Buffer '{buffer_name}' not found. Available: {avail}"}
527
+ pKa = buf["pKa"]
528
+ species = buf.get("species", "")
529
+ else:
530
+ species = ""
531
+
532
+ if pKa is None:
533
+ return {"error": "Provide pKa or buffer_name"}
534
+
535
+ result: dict[str, Any] = {
536
+ "pKa": pKa,
537
+ "buffer": buffer_name,
538
+ "species": species,
539
+ "buffer_range": f"{pKa - 1:.1f} – {pKa + 1:.1f}",
540
+ }
541
+
542
+ # Mode 2: target pH → ratio
543
+ if target_ph is not None:
544
+ ratio = 10 ** (target_ph - pKa)
545
+ result["target_pH"] = target_ph
546
+ result["required_ratio_base_acid"] = round(ratio, 4)
547
+ result["note"] = f"Mix [A⁻]/[HA] = {ratio:.4f} to achieve pH {target_ph}"
548
+ if abs(target_ph - pKa) > 1:
549
+ result["warning"] = f"Target pH {target_ph} is outside buffer range ({pKa-1:.1f}–{pKa+1:.1f}). Buffer capacity will be poor."
550
+ return result
551
+
552
+ # Mode 1: concentrations/ratio → pH
553
+ if ratio_base_acid is None:
554
+ if acid_conc is not None and base_conc is not None and acid_conc > 0:
555
+ ratio_base_acid = base_conc / acid_conc
556
+ else:
557
+ return {"error": "Provide acid_conc + base_conc, or ratio_base_acid, or target_ph"}
558
+
559
+ if ratio_base_acid <= 0:
560
+ return {"error": "Ratio [base]/[acid] must be > 0"}
561
+
562
+ ph = pKa + math.log10(ratio_base_acid)
563
+ result["ratio_base_acid"] = round(ratio_base_acid, 4)
564
+ result["calculated_pH"] = round(ph, 2)
565
+ if acid_conc is not None:
566
+ result["acid_concentration"] = acid_conc
567
+ if base_conc is not None:
568
+ result["base_concentration"] = base_conc
569
+ if abs(ph - pKa) > 1:
570
+ result["warning"] = f"pH {ph:.2f} is outside optimal buffer range ({pKa-1:.1f}–{pKa+1:.1f})"
571
+
572
+ return result