pychnosz 1.1.4__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. pychnosz/__init__.py +129 -0
  2. pychnosz/biomolecules/__init__.py +29 -0
  3. pychnosz/biomolecules/ionize_aa.py +197 -0
  4. pychnosz/biomolecules/proteins.py +595 -0
  5. pychnosz/core/__init__.py +46 -0
  6. pychnosz/core/affinity.py +1256 -0
  7. pychnosz/core/animation.py +593 -0
  8. pychnosz/core/balance.py +334 -0
  9. pychnosz/core/basis.py +716 -0
  10. pychnosz/core/diagram.py +3336 -0
  11. pychnosz/core/equilibrate.py +813 -0
  12. pychnosz/core/equilibrium.py +554 -0
  13. pychnosz/core/info.py +821 -0
  14. pychnosz/core/retrieve.py +364 -0
  15. pychnosz/core/speciation.py +580 -0
  16. pychnosz/core/species.py +599 -0
  17. pychnosz/core/subcrt.py +1700 -0
  18. pychnosz/core/thermo.py +593 -0
  19. pychnosz/core/unicurve.py +1226 -0
  20. pychnosz/data/__init__.py +11 -0
  21. pychnosz/data/add_obigt.py +327 -0
  22. pychnosz/data/extdata/Berman/BDat17_2017.csv +2 -0
  23. pychnosz/data/extdata/Berman/Ber88_1988.csv +68 -0
  24. pychnosz/data/extdata/Berman/Ber90_1990.csv +5 -0
  25. pychnosz/data/extdata/Berman/DS10_2010.csv +6 -0
  26. pychnosz/data/extdata/Berman/FDM+14_2014.csv +2 -0
  27. pychnosz/data/extdata/Berman/Got04_2004.csv +5 -0
  28. pychnosz/data/extdata/Berman/JUN92_1992.csv +3 -0
  29. pychnosz/data/extdata/Berman/SHD91_1991.csv +12 -0
  30. pychnosz/data/extdata/Berman/VGT92_1992.csv +2 -0
  31. pychnosz/data/extdata/Berman/VPT01_2001.csv +3 -0
  32. pychnosz/data/extdata/Berman/VPV05_2005.csv +2 -0
  33. pychnosz/data/extdata/Berman/ZS92_1992.csv +11 -0
  34. pychnosz/data/extdata/Berman/sympy.R +99 -0
  35. pychnosz/data/extdata/Berman/testing/BA96.bib +12 -0
  36. pychnosz/data/extdata/Berman/testing/BA96_Berman.csv +21 -0
  37. pychnosz/data/extdata/Berman/testing/BA96_OBIGT.csv +21 -0
  38. pychnosz/data/extdata/Berman/testing/BA96_refs.csv +6 -0
  39. pychnosz/data/extdata/OBIGT/AD.csv +25 -0
  40. pychnosz/data/extdata/OBIGT/Berman_cr.csv +93 -0
  41. pychnosz/data/extdata/OBIGT/DEW.csv +211 -0
  42. pychnosz/data/extdata/OBIGT/H2O_aq.csv +4 -0
  43. pychnosz/data/extdata/OBIGT/SLOP98.csv +411 -0
  44. pychnosz/data/extdata/OBIGT/SUPCRT92.csv +178 -0
  45. pychnosz/data/extdata/OBIGT/inorganic_aq.csv +729 -0
  46. pychnosz/data/extdata/OBIGT/inorganic_cr.csv +273 -0
  47. pychnosz/data/extdata/OBIGT/inorganic_gas.csv +20 -0
  48. pychnosz/data/extdata/OBIGT/organic_aq.csv +1104 -0
  49. pychnosz/data/extdata/OBIGT/organic_cr.csv +481 -0
  50. pychnosz/data/extdata/OBIGT/organic_gas.csv +268 -0
  51. pychnosz/data/extdata/OBIGT/organic_liq.csv +533 -0
  52. pychnosz/data/extdata/OBIGT/testing/GEMSFIT.csv +43 -0
  53. pychnosz/data/extdata/OBIGT/testing/IGEM.csv +17 -0
  54. pychnosz/data/extdata/OBIGT/testing/Sandia.csv +8 -0
  55. pychnosz/data/extdata/OBIGT/testing/SiO2.csv +4 -0
  56. pychnosz/data/extdata/misc/AD03_Fig1a.csv +69 -0
  57. pychnosz/data/extdata/misc/AD03_Fig1b.csv +43 -0
  58. pychnosz/data/extdata/misc/AD03_Fig1c.csv +89 -0
  59. pychnosz/data/extdata/misc/AD03_Fig1d.csv +30 -0
  60. pychnosz/data/extdata/misc/BZA10.csv +5 -0
  61. pychnosz/data/extdata/misc/HW97_Cp.csv +90 -0
  62. pychnosz/data/extdata/misc/HWM96_V.csv +229 -0
  63. pychnosz/data/extdata/misc/LA19_test.csv +7 -0
  64. pychnosz/data/extdata/misc/Mer75_Table4.csv +42 -0
  65. pychnosz/data/extdata/misc/OBIGT_check.csv +423 -0
  66. pychnosz/data/extdata/misc/PM90.csv +7 -0
  67. pychnosz/data/extdata/misc/RH95.csv +23 -0
  68. pychnosz/data/extdata/misc/RH98_Table15.csv +17 -0
  69. pychnosz/data/extdata/misc/SC10_Rainbow.csv +19 -0
  70. pychnosz/data/extdata/misc/SK95.csv +55 -0
  71. pychnosz/data/extdata/misc/SOJSH.csv +61 -0
  72. pychnosz/data/extdata/misc/SS98_Fig5a.csv +81 -0
  73. pychnosz/data/extdata/misc/SS98_Fig5b.csv +84 -0
  74. pychnosz/data/extdata/misc/TKSS14_Fig2.csv +25 -0
  75. pychnosz/data/extdata/misc/bluered.txt +1000 -0
  76. pychnosz/data/extdata/protein/Cas/Cas_aa.csv +177 -0
  77. pychnosz/data/extdata/protein/Cas/Cas_uniprot.csv +186 -0
  78. pychnosz/data/extdata/protein/Cas/download.R +34 -0
  79. pychnosz/data/extdata/protein/Cas/mkaa.R +34 -0
  80. pychnosz/data/extdata/protein/POLG.csv +12 -0
  81. pychnosz/data/extdata/protein/TBD+05.csv +393 -0
  82. pychnosz/data/extdata/protein/TBD+05_aa.csv +393 -0
  83. pychnosz/data/extdata/protein/rubisco.csv +28 -0
  84. pychnosz/data/extdata/protein/rubisco.fasta +239 -0
  85. pychnosz/data/extdata/protein/rubisco_aa.csv +28 -0
  86. pychnosz/data/extdata/src/H2O92D.f.orig +3457 -0
  87. pychnosz/data/extdata/src/README.txt +5 -0
  88. pychnosz/data/extdata/taxonomy/names.dmp +215 -0
  89. pychnosz/data/extdata/taxonomy/nodes.dmp +63 -0
  90. pychnosz/data/extdata/thermo/Bdot_acirc.csv +60 -0
  91. pychnosz/data/extdata/thermo/buffer.csv +40 -0
  92. pychnosz/data/extdata/thermo/element.csv +135 -0
  93. pychnosz/data/extdata/thermo/groups.csv +6 -0
  94. pychnosz/data/extdata/thermo/opt.csv +2 -0
  95. pychnosz/data/extdata/thermo/protein.csv +506 -0
  96. pychnosz/data/extdata/thermo/refs.csv +343 -0
  97. pychnosz/data/extdata/thermo/stoich.csv.xz +0 -0
  98. pychnosz/data/loader.py +431 -0
  99. pychnosz/data/mod_obigt.py +322 -0
  100. pychnosz/data/obigt.py +471 -0
  101. pychnosz/data/worm.py +228 -0
  102. pychnosz/fortran/__init__.py +16 -0
  103. pychnosz/fortran/h2o92.dll +0 -0
  104. pychnosz/fortran/h2o92_interface.py +527 -0
  105. pychnosz/geochemistry/__init__.py +21 -0
  106. pychnosz/geochemistry/minerals.py +514 -0
  107. pychnosz/geochemistry/redox.py +500 -0
  108. pychnosz/models/__init__.py +47 -0
  109. pychnosz/models/archer_wang.py +165 -0
  110. pychnosz/models/berman.py +309 -0
  111. pychnosz/models/cgl.py +381 -0
  112. pychnosz/models/dew.py +997 -0
  113. pychnosz/models/hkf.py +523 -0
  114. pychnosz/models/hkf_helpers.py +222 -0
  115. pychnosz/models/iapws95.py +1113 -0
  116. pychnosz/models/supcrt92_fortran.py +238 -0
  117. pychnosz/models/water.py +480 -0
  118. pychnosz/utils/__init__.py +27 -0
  119. pychnosz/utils/expression.py +1074 -0
  120. pychnosz/utils/formula.py +830 -0
  121. pychnosz/utils/formula_ox.py +227 -0
  122. pychnosz/utils/reset.py +33 -0
  123. pychnosz/utils/units.py +259 -0
  124. pychnosz-1.1.4.dist-info/METADATA +197 -0
  125. pychnosz-1.1.4.dist-info/RECORD +128 -0
  126. pychnosz-1.1.4.dist-info/WHEEL +5 -0
  127. pychnosz-1.1.4.dist-info/licenses/LICENSE.txt +19 -0
  128. pychnosz-1.1.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,830 @@
1
+ """
2
+ Chemical formula parsing and manipulation utilities.
3
+
4
+ This module provides Python equivalents of the R functions in makeup.R and util.formula.R:
5
+ - makeup(): Parse chemical formulas and return elemental composition
6
+ - Formula validation and parsing
7
+ - Molecular weight and entropy calculations
8
+ - Stoichiometric matrix operations
9
+
10
+ Author: CHNOSZ Python port
11
+ """
12
+
13
+ import pandas as pd
14
+ import numpy as np
15
+ from typing import Union, List, Dict, Any, Optional, Tuple
16
+ import re
17
+ import warnings
18
+
19
+ from ..core.thermo import thermo
20
+
21
+
22
+ class FormulaError(Exception):
23
+ """Exception raised for formula parsing errors."""
24
+ pass
25
+
26
+
27
+ def makeup(formula: Union[str, int, List[Union[str, int]]],
28
+ multiplier: Union[float, List[float]] = 1.0,
29
+ sum_formulas: bool = False,
30
+ count_zero: bool = False) -> Union[Dict[str, float], List[Dict[str, float]]]:
31
+ """
32
+ Return elemental makeup (counts) of chemical formula(s).
33
+
34
+ Handles formulas with parenthetical subformulas, suffixed formulas,
35
+ charges, and fractional coefficients.
36
+
37
+ Parameters
38
+ ----------
39
+ formula : str, int, or list
40
+ Chemical formula(s) or species index(es)
41
+ multiplier : float or list of float
42
+ Multiplier(s) to apply to formula coefficients
43
+ sum_formulas : bool
44
+ If True, return sum of all formulas
45
+ count_zero : bool
46
+ If True, include zero counts for all elements appearing in any formula
47
+
48
+ Returns
49
+ -------
50
+ dict or list of dict
51
+ Elemental composition(s) as {element: count} dictionaries
52
+
53
+ Examples
54
+ --------
55
+ >>> makeup("H2O")
56
+ {'H': 2, 'O': 1}
57
+
58
+ >>> makeup("Ca(OH)2")
59
+ {'Ca': 1, 'O': 2, 'H': 2}
60
+
61
+ >>> makeup(["H2O", "CO2"])
62
+ [{'H': 2, 'O': 1}, {'C': 1, 'O': 2}]
63
+ """
64
+ # Handle matrix input
65
+ if isinstance(formula, np.ndarray) and formula.ndim == 2:
66
+ return [makeup(formula[i, :]) for i in range(formula.shape[0])]
67
+
68
+ # Handle named numeric objects (return unchanged)
69
+ if isinstance(formula, dict) and all(isinstance(k, str) for k in formula.keys()):
70
+ return formula
71
+
72
+ # Handle list of named objects
73
+ if isinstance(formula, list) and len(formula) > 0:
74
+ if isinstance(formula[0], dict) and all(isinstance(k, str) for k in formula[0].keys()):
75
+ return formula
76
+
77
+ # Prepare multiplier
78
+ if not isinstance(multiplier, list):
79
+ multiplier = [multiplier]
80
+
81
+ # Handle multiple formulas
82
+ if isinstance(formula, list):
83
+ if len(multiplier) != 1 and len(multiplier) != len(formula):
84
+ raise ValueError("multiplier does not have length = 1 or length = number of formulas")
85
+
86
+ if len(multiplier) == 1:
87
+ multiplier = multiplier * len(formula)
88
+
89
+ # Get formulas for any species indices
90
+ formula = get_formula(formula)
91
+
92
+ results = []
93
+ for i, f in enumerate(formula):
94
+ result = makeup(f, multiplier[i])
95
+ results.append(result)
96
+
97
+ # Handle sum_formulas option
98
+ if sum_formulas:
99
+ all_elements = set()
100
+ for result in results:
101
+ if result is not None:
102
+ all_elements.update(result.keys())
103
+
104
+ summed = {}
105
+ for element in all_elements:
106
+ summed[element] = sum(result.get(element, 0) for result in results if result is not None)
107
+ return summed
108
+
109
+ # Handle count_zero option
110
+ elif count_zero:
111
+ # Get all elements appearing in any formula
112
+ all_elements = set()
113
+ for result in results:
114
+ if result is not None:
115
+ all_elements.update(result.keys())
116
+
117
+ # Add zero counts for missing elements
118
+ complete_results = []
119
+ for result in results:
120
+ if result is None:
121
+ complete_result = {element: np.nan for element in all_elements}
122
+ else:
123
+ complete_result = {element: result.get(element, 0) for element in all_elements}
124
+ complete_results.append(complete_result)
125
+
126
+ return complete_results
127
+
128
+ return results
129
+
130
+ # Handle single formula
131
+ if isinstance(formula, int):
132
+ # Get formula from species index
133
+ thermo_obj = thermo()
134
+ if thermo_obj.obigt is not None:
135
+ # Use .loc for label-based indexing (species indices are 1-based labels)
136
+ if formula in thermo_obj.obigt.index:
137
+ formula = thermo_obj.obigt.loc[formula, 'formula']
138
+ else:
139
+ raise FormulaError(f"Species index {formula} not found in OBIGT database")
140
+ else:
141
+ raise FormulaError("Thermodynamic database not initialized")
142
+
143
+ if formula is None or pd.isna(formula):
144
+ return None
145
+
146
+ # Parse single formula
147
+ try:
148
+ result = _parse_formula(str(formula))
149
+
150
+ # Apply multiplier
151
+ if multiplier[0] != 1.0:
152
+ result = {element: count * multiplier[0] for element, count in result.items()}
153
+
154
+ # Validate elements
155
+ _validate_elements(result)
156
+
157
+ return result
158
+
159
+ except Exception as e:
160
+ raise FormulaError(f"Error parsing formula '{formula}': {e}")
161
+
162
+
163
+ def _parse_formula(formula: str) -> Dict[str, float]:
164
+ """Parse a single chemical formula string."""
165
+ # Handle charge first
166
+ charge_info = _count_charge(formula)
167
+ uncharged_formula = charge_info['uncharged']
168
+ charge = charge_info['Z']
169
+
170
+ # Add explicit charge if present
171
+ if charge != 0:
172
+ uncharged_formula += f"Z{charge}"
173
+
174
+ # Check for subformulas (parentheses, *, :)
175
+ if re.search(r'[()*:]', uncharged_formula):
176
+ return _parse_complex_formula(uncharged_formula)
177
+ else:
178
+ return _count_elements(uncharged_formula)
179
+
180
+
181
+ def _count_charge(formula: str) -> Dict[str, Any]:
182
+ """Extract charge from formula."""
183
+ Z = 0
184
+ uncharged = formula
185
+
186
+ # Look for charge at end: +, -, +n, -n
187
+ charge_match = re.search(r'([+-])(\d*\.?\d*)$', formula)
188
+ if charge_match:
189
+ sign = 1 if charge_match.group(1) == '+' else -1
190
+ magnitude_str = charge_match.group(2)
191
+
192
+ if magnitude_str == '':
193
+ magnitude = 1
194
+ else:
195
+ magnitude = float(magnitude_str)
196
+
197
+ Z = sign * magnitude
198
+ uncharged = formula[:charge_match.start()]
199
+
200
+ return {'Z': Z, 'uncharged': uncharged}
201
+
202
+
203
+ def _count_elements(formula: str) -> Dict[str, float]:
204
+ """Count elements in a simple chemical formula."""
205
+ if pd.isna(formula) or formula == '':
206
+ return {}
207
+
208
+ # Regular expression for element symbol and coefficient
209
+ element_pattern = r'([A-Z][a-z]*)([+-]?\d*\.?\d*)'
210
+
211
+ # Validate formula format
212
+ if not re.match(r'^(' + element_pattern + r')+$', formula):
213
+ raise FormulaError(f"'{formula}' is not a simple chemical formula")
214
+
215
+ elements = {}
216
+
217
+ # Find all element-coefficient pairs
218
+ matches = re.findall(element_pattern, formula)
219
+
220
+ for element, coeff_str in matches:
221
+ if coeff_str == '' or coeff_str == '+':
222
+ coeff = 1.0
223
+ elif coeff_str == '-':
224
+ coeff = -1.0
225
+ else:
226
+ coeff = float(coeff_str)
227
+
228
+ # Sum if element appears multiple times
229
+ elements[element] = elements.get(element, 0) + coeff
230
+
231
+ return elements
232
+
233
+
234
+ def _parse_complex_formula(formula: str) -> Dict[str, float]:
235
+ """Parse formula with parentheses and/or suffixes."""
236
+ subformulas = _count_formulas(formula)
237
+
238
+ total_elements = {}
239
+
240
+ for subformula, count in subformulas.items():
241
+ if subformula: # Skip empty subformulas
242
+ sub_elements = _count_elements(subformula)
243
+
244
+ # Add weighted contribution
245
+ for element, element_count in sub_elements.items():
246
+ total_elements[element] = total_elements.get(element, 0) + element_count * count
247
+
248
+ return total_elements
249
+
250
+
251
+ def _count_formulas(formula: str) -> Dict[str, float]:
252
+ """Count subformulas in a complex chemical formula."""
253
+ subformulas = {}
254
+ remaining = formula
255
+
256
+ # Handle parenthetical terms: Ca(OH)2
257
+ while '(' in remaining:
258
+ # Find matching parentheses
259
+ open_pos = remaining.find('(')
260
+ if open_pos == -1:
261
+ break
262
+
263
+ close_pos = remaining.find(')', open_pos)
264
+ if close_pos == -1:
265
+ raise FormulaError("Unpaired parentheses in formula")
266
+
267
+ # Extract subformula
268
+ subformula = remaining[open_pos + 1:close_pos]
269
+
270
+ # Look for coefficient after closing parenthesis
271
+ after_close = remaining[close_pos + 1:]
272
+ coeff_match = re.match(r'^([+-]?\d*\.?\d*)', after_close)
273
+
274
+ if coeff_match and coeff_match.group(1):
275
+ coeff_str = coeff_match.group(1)
276
+ if coeff_str in ['+', '']:
277
+ coeff = 1.0
278
+ elif coeff_str == '-':
279
+ coeff = -1.0
280
+ else:
281
+ coeff = float(coeff_str)
282
+ coeff_end = coeff_match.end()
283
+ else:
284
+ coeff = 1.0
285
+ coeff_end = 0
286
+
287
+ # Add to subformulas
288
+ subformulas[subformula] = subformulas.get(subformula, 0) + coeff
289
+
290
+ # Remove processed part
291
+ remaining = remaining[:open_pos] + remaining[close_pos + 1 + coeff_end:]
292
+
293
+ # Handle suffixed terms: CaSO4*2H2O or CaSO4:2H2O
294
+ for separator in ['*', ':']:
295
+ if separator in remaining:
296
+ parts = remaining.split(separator)
297
+ main_part = parts[0]
298
+
299
+ for i in range(1, len(parts)):
300
+ suffix_part = parts[i]
301
+
302
+ # Look for leading coefficient
303
+ coeff_match = re.match(r'^([+-]?\d*\.?\d*)', suffix_part)
304
+ if coeff_match and coeff_match.group(1):
305
+ coeff_str = coeff_match.group(1)
306
+ if coeff_str in ['+', '']:
307
+ coeff = 1.0
308
+ elif coeff_str == '-':
309
+ coeff = -1.0
310
+ else:
311
+ coeff = float(coeff_str)
312
+ subformula = suffix_part[coeff_match.end():]
313
+ else:
314
+ coeff = 1.0
315
+ subformula = suffix_part
316
+
317
+ if subformula:
318
+ subformulas[subformula] = subformulas.get(subformula, 0) + coeff
319
+
320
+ remaining = main_part
321
+ break
322
+
323
+ # Add remaining main formula
324
+ if remaining.strip():
325
+ subformulas[remaining.strip()] = subformulas.get(remaining.strip(), 0) + 1
326
+
327
+ return subformulas
328
+
329
+
330
+ def _validate_elements(composition: Dict[str, float]) -> None:
331
+ """Validate that elements exist in the thermodynamic database."""
332
+ thermo_obj = thermo()
333
+ if thermo_obj.element is not None:
334
+ known_elements = set(thermo_obj.element['element'].tolist())
335
+ unknown_elements = set(composition.keys()) - known_elements - {'Z'}
336
+
337
+ if unknown_elements:
338
+ warnings.warn(f"element(s) not in thermo().element: {' '.join(unknown_elements)}")
339
+
340
+
341
+ def get_formula(formula: Union[str, int, List[Union[str, int]]]) -> Union[str, List[str]]:
342
+ """
343
+ Get chemical formulas for species indices or return formula strings.
344
+
345
+ Parameters
346
+ ----------
347
+ formula : str, int, or list
348
+ Chemical formula(s) or species index(es)
349
+
350
+ Returns
351
+ -------
352
+ str or list of str
353
+ Chemical formula(s)
354
+ """
355
+ # Handle single values
356
+ if not isinstance(formula, list):
357
+ formula = [formula]
358
+ single_result = True
359
+ else:
360
+ single_result = False
361
+
362
+ results = []
363
+ thermo_obj = thermo()
364
+
365
+ for f in formula:
366
+ if isinstance(f, str):
367
+ # Already a formula
368
+ results.append(f)
369
+ elif isinstance(f, int):
370
+ # Species index - look up formula
371
+ if thermo_obj.obigt is not None:
372
+ # Use .loc for label-based indexing (species indices are 1-based labels)
373
+ if f in thermo_obj.obigt.index:
374
+ formula_str = thermo_obj.obigt.loc[f, 'formula']
375
+ results.append(formula_str)
376
+ else:
377
+ raise FormulaError(f"Species index {f} not found in OBIGT database")
378
+ else:
379
+ raise FormulaError("Thermodynamic database not initialized")
380
+ else:
381
+ # Try to convert to string
382
+ results.append(str(f))
383
+
384
+ if single_result:
385
+ return results[0]
386
+ else:
387
+ return results
388
+
389
+
390
+ def as_chemical_formula(makeup_dict: Union[Dict[str, float], pd.DataFrame],
391
+ drop_zero: bool = True) -> Union[str, List[str]]:
392
+ """
393
+ Convert elemental makeup to chemical formula string(s).
394
+
395
+ Parameters
396
+ ----------
397
+ makeup_dict : dict or DataFrame
398
+ Elemental composition(s)
399
+ drop_zero : bool
400
+ Whether to exclude zero coefficients
401
+
402
+ Returns
403
+ -------
404
+ str or list of str
405
+ Chemical formula string(s)
406
+ """
407
+ if isinstance(makeup_dict, pd.DataFrame):
408
+ # Handle matrix of compositions
409
+ results = []
410
+ for i in range(len(makeup_dict)):
411
+ row_dict = makeup_dict.iloc[i].to_dict()
412
+ formula = _dict_to_formula(row_dict, drop_zero)
413
+ results.append(formula)
414
+ return results
415
+ else:
416
+ # Handle single composition
417
+ return _dict_to_formula(makeup_dict, drop_zero)
418
+
419
+
420
+ def _dict_to_formula(composition: Dict[str, float], drop_zero: bool) -> str:
421
+ """Convert single composition dictionary to formula string."""
422
+ if drop_zero:
423
+ composition = {k: v for k, v in composition.items() if v != 0}
424
+
425
+ # Put Z (charge) at the end
426
+ elements = [k for k in composition.keys() if k != 'Z']
427
+ if 'Z' in composition:
428
+ elements.append('Z')
429
+
430
+ formula_parts = []
431
+
432
+ for element in elements:
433
+ count = composition[element]
434
+
435
+ if element == 'Z':
436
+ # Handle charge
437
+ if count < 0:
438
+ formula_parts.append(f"{count}")
439
+ elif count > 0:
440
+ formula_parts.append(f"+{count}")
441
+ # count == 0 is omitted
442
+ else:
443
+ # Handle regular elements
444
+ if count == 1:
445
+ formula_parts.append(element)
446
+ elif count == -1:
447
+ formula_parts.append(f"{element}-1")
448
+ else:
449
+ formula_parts.append(f"{element}{count}")
450
+
451
+ formula = ''.join(formula_parts)
452
+
453
+ # Handle special case of negative coefficient at end without charge
454
+ if 'Z' not in composition and len(elements) > 0:
455
+ last_element = elements[-1]
456
+ if composition[last_element] < 0:
457
+ formula += "+0"
458
+
459
+ return formula
460
+
461
+
462
+ def mass(formula: Union[str, int, List[Union[str, int]]]) -> Union[float, List[float]]:
463
+ """
464
+ Calculate molecular mass of chemical formula(s).
465
+
466
+ Parameters
467
+ ----------
468
+ formula : str, int, or list
469
+ Chemical formula(s) or species index(es)
470
+
471
+ Returns
472
+ -------
473
+ float or list of float
474
+ Molecular mass(es) in g/mol
475
+ """
476
+ thermo_obj = thermo()
477
+ if thermo_obj.element is None:
478
+ raise RuntimeError("Element data not available")
479
+
480
+ # Convert to stoichiometric matrix
481
+ compositions = makeup(formula, count_zero=False)
482
+ if not isinstance(compositions, list):
483
+ compositions = [compositions]
484
+
485
+ masses = []
486
+
487
+ for comp in compositions:
488
+ if comp is None:
489
+ masses.append(np.nan)
490
+ continue
491
+
492
+ total_mass = 0.0
493
+ for element, count in comp.items():
494
+ if element == 'Z':
495
+ continue # Charge has no mass
496
+
497
+ # Look up element mass
498
+ element_data = thermo_obj.element[thermo_obj.element['element'] == element]
499
+ if len(element_data) == 0:
500
+ raise FormulaError(f"Element {element} not found in element database")
501
+
502
+ element_mass = element_data.iloc[0]['mass']
503
+ total_mass += count * element_mass
504
+
505
+ masses.append(total_mass)
506
+
507
+ if len(masses) == 1:
508
+ return masses[0]
509
+ else:
510
+ return masses
511
+
512
+
513
+ def entropy(formula: Union[str, int, List[Union[str, int]]]) -> Union[float, List[float]]:
514
+ """
515
+ Calculate standard molal entropy of elements in chemical formulas.
516
+
517
+ Parameters
518
+ ----------
519
+ formula : str, int, or list
520
+ Chemical formula(s) or species index(es)
521
+
522
+ Returns
523
+ -------
524
+ float or list of float
525
+ Standard entropy(ies) in J/(mol*K)
526
+ """
527
+ thermo_obj = thermo()
528
+ if thermo_obj.element is None:
529
+ raise RuntimeError("Element data not available")
530
+
531
+ # Convert to stoichiometric matrix
532
+ compositions = makeup(formula, count_zero=False)
533
+ if not isinstance(compositions, list):
534
+ compositions = [compositions]
535
+
536
+ entropies = []
537
+
538
+ for comp in compositions:
539
+ if comp is None:
540
+ entropies.append(np.nan)
541
+ continue
542
+
543
+ total_entropy = 0.0
544
+ has_na = False
545
+
546
+ for element, count in comp.items():
547
+
548
+ # Look up element entropy
549
+ element_data = thermo_obj.element[thermo_obj.element['element'] == element]
550
+ if len(element_data) == 0:
551
+ warnings.warn(f"Element {element} not available in thermo().element")
552
+ has_na = True
553
+ continue
554
+
555
+ element_s = element_data.iloc[0]['s']
556
+ element_n = element_data.iloc[0]['n']
557
+
558
+ if pd.isna(element_s) or pd.isna(element_n):
559
+ has_na = True
560
+ continue
561
+
562
+ # Entropy per atom
563
+ entropy_per_atom = element_s / element_n
564
+ total_entropy += count * entropy_per_atom
565
+
566
+ if has_na and total_entropy == 0:
567
+ entropies.append(np.nan)
568
+ else:
569
+ # Convert to Joules (assuming input is in cal)
570
+ entropies.append(total_entropy * 4.184) # cal to J conversion
571
+
572
+ if len(entropies) == 1:
573
+ return entropies[0]
574
+ else:
575
+ return entropies
576
+
577
+
578
+ def species_basis(species: Union[List[int], np.ndarray],
579
+ makeup_matrix: Optional[np.ndarray] = None,
580
+ basis_df: Optional[pd.DataFrame] = None) -> np.ndarray:
581
+ """
582
+ Calculate coefficients for formation reactions from basis species.
583
+
584
+ Parameters
585
+ ----------
586
+ species : list of int or array
587
+ Species indices in thermo().obigt
588
+ makeup_matrix : array, optional
589
+ Pre-calculated makeup matrix
590
+ basis_df : pd.DataFrame, optional
591
+ Basis definition to use (if not using global basis)
592
+
593
+ Returns
594
+ -------
595
+ np.ndarray
596
+ Formation reaction coefficients matrix
597
+ """
598
+ from ..core.basis import basis_elements, get_basis
599
+
600
+ # Follow R CHNOSZ species.basis algorithm exactly
601
+ from ..core.thermo import thermo
602
+
603
+ # Get basis dataframe
604
+ if basis_df is None:
605
+ basis_df = get_basis()
606
+ if basis_df is None:
607
+ raise RuntimeError("Basis species not defined")
608
+
609
+ # Get basis element names
610
+ basis_element_names = [col for col in basis_df.columns
611
+ if col not in ['ispecies', 'logact', 'state']]
612
+
613
+ # Calculate basis elements matrix from basis_df
614
+ element_cols = [col for col in basis_df.columns
615
+ if col not in ['ispecies', 'logact', 'state']]
616
+ bmat = basis_df[element_cols].values.T
617
+
618
+ # basis_elements() already returns transposed matrix (equivalent to R tbmat)
619
+ tbmat = bmat
620
+
621
+ # Get thermo object for species lookup
622
+ thermo_obj = thermo()
623
+
624
+ # Initialize result matrix
625
+ n_species = len(species)
626
+ n_basis = len(basis_element_names)
627
+ formation_coeffs = np.zeros((n_species, n_basis))
628
+
629
+ # Process each species individually (following R apply logic)
630
+ for i, sp_idx in enumerate(species):
631
+ # Get species makeup (equivalent to R mkp <- as.matrix(sapply(makeup(species), c)))
632
+ formula = thermo_obj.obigt.iloc[sp_idx - 1]['formula']
633
+ sp_makeup = makeup([formula], count_zero=True)[0]
634
+
635
+ # Convert makeup to array ordered by elements present in species
636
+ sp_elements = list(sp_makeup.keys())
637
+ sp_values = np.array(list(sp_makeup.values()))
638
+
639
+ # Find positions of species elements in basis elements (R ielem <- match)
640
+ # All species elements must be in basis
641
+ missing_elements = []
642
+ for elem in sp_elements:
643
+ if elem not in basis_element_names:
644
+ missing_elements.append(elem)
645
+ if missing_elements:
646
+ raise RuntimeError(f"element(s) not in the basis: {' '.join(missing_elements)}")
647
+
648
+ # Find positions of basis elements in species elements (R jelem <- match)
649
+ jelem = []
650
+ for elem in basis_element_names:
651
+ try:
652
+ jelem.append(sp_elements.index(elem))
653
+ except ValueError:
654
+ jelem.append(None) # NA in R
655
+
656
+ # Reorder species matrix to match basis elements (R mkp <- mkp[jelem, , drop = FALSE])
657
+ sp_makeup_ordered = np.zeros(len(basis_element_names))
658
+ for j, pos in enumerate(jelem):
659
+ if pos is not None:
660
+ sp_makeup_ordered[j] = sp_values[pos]
661
+ # else remains 0 (equivalent to R mkp[ina, ] <- 0)
662
+
663
+ # Solve linear system: tbmat @ coeffs = sp_makeup_ordered
664
+ # This is equivalent to R solve(tbmat, x)
665
+ try:
666
+ coeffs = np.linalg.solve(tbmat, sp_makeup_ordered)
667
+ except np.linalg.LinAlgError:
668
+ raise RuntimeError(f"Singular basis matrix for species {sp_idx}")
669
+
670
+ # Apply R zapsmall equivalent (digits=7)
671
+ coeffs = np.around(coeffs, decimals=7)
672
+
673
+ # Clean up very small numbers
674
+ coeffs[np.abs(coeffs) < 1e-7] = 0
675
+
676
+ formation_coeffs[i, :] = coeffs
677
+
678
+ return formation_coeffs
679
+
680
+
681
+ def calculate_ghs(formula: str, G: float = np.nan, H: float = np.nan,
682
+ S: float = np.nan, T: float = 298.15,
683
+ E_units: str = "J") -> Dict[str, float]:
684
+ """
685
+ Calculate missing G, H, or S from the other two values.
686
+
687
+ Parameters
688
+ ----------
689
+ formula : str
690
+ Chemical formula
691
+ G : float
692
+ Gibbs energy of formation
693
+ H : float
694
+ Enthalpy of formation
695
+ S : float
696
+ Standard entropy
697
+ T : float
698
+ Temperature in K
699
+ E_units : str
700
+ Energy units ("J" or "cal")
701
+
702
+ Returns
703
+ -------
704
+ dict
705
+ Dictionary with G, H, S values
706
+ """
707
+ # Calculate elemental entropy
708
+ Se = entropy(formula)
709
+ if E_units == "cal":
710
+ Se = Se / 4.184 # Convert J to cal
711
+
712
+ # Calculate missing value
713
+ if pd.isna(G):
714
+ G = H - T * (S - Se)
715
+ elif pd.isna(H):
716
+ H = G + T * (S - Se)
717
+ elif pd.isna(S):
718
+ S = (H - G) / T + Se
719
+
720
+ return {"G": G, "H": H, "S": S}
721
+
722
+
723
+ def ZC(formula: Union[str, int, List[Union[str, int]]]) -> Union[float, List[float]]:
724
+ """
725
+ Calculate average oxidation state of carbon in chemical formulas.
726
+
727
+ Parameters
728
+ ----------
729
+ formula : str, int, or list
730
+ Chemical formula(s) or species index(es)
731
+
732
+ Returns
733
+ -------
734
+ float or list of float
735
+ Average oxidation state(s) of carbon
736
+ """
737
+ # Get elemental compositions
738
+ compositions = makeup(formula, count_zero=False)
739
+ if not isinstance(compositions, list):
740
+ compositions = [compositions]
741
+
742
+ results = []
743
+
744
+ # Nominal charges of elements
745
+ known_elements = ['H', 'N', 'O', 'S', 'Z']
746
+ charges = [-1, 3, 2, 2, 1]
747
+
748
+ for comp in compositions:
749
+ if comp is None or 'C' not in comp:
750
+ results.append(np.nan)
751
+ continue
752
+
753
+ # Calculate total charge from known elements
754
+ total_charge = 0
755
+ unknown_elements = []
756
+
757
+ for element, count in comp.items():
758
+ if element == 'C':
759
+ continue
760
+ elif element in known_elements:
761
+ idx = known_elements.index(element)
762
+ total_charge += count * charges[idx]
763
+ else:
764
+ unknown_elements.append(element)
765
+
766
+ if unknown_elements:
767
+ warnings.warn(f"element(s) {' '.join(unknown_elements)} not in "
768
+ f"{' '.join(known_elements)} so not included in ZC calculation")
769
+
770
+ # Calculate carbon oxidation state
771
+ n_carbon = comp['C']
772
+ zc = total_charge / n_carbon
773
+ results.append(zc)
774
+
775
+ if len(results) == 1:
776
+ return results[0]
777
+ else:
778
+ return results
779
+
780
+
781
+ # Convenience functions for stoichiometric operations
782
+ def i2A(formula: Union[str, List[str], Dict[str, float]]) -> np.ndarray:
783
+ """
784
+ Convert formula(s) to stoichiometric matrix.
785
+
786
+ Parameters
787
+ ----------
788
+ formula : str, list, or dict
789
+ Chemical formula(s) or composition
790
+
791
+ Returns
792
+ -------
793
+ np.ndarray
794
+ Stoichiometric matrix with elements as columns
795
+ """
796
+ if isinstance(formula, np.ndarray):
797
+ return formula
798
+ elif isinstance(formula, dict) and all(isinstance(k, str) for k in formula.keys()):
799
+ # Single composition dictionary
800
+ return np.array([[formula.get(k, 0) for k in sorted(formula.keys())]])
801
+
802
+ # Get compositions with zero padding
803
+ compositions = makeup(formula, count_zero=True)
804
+ if not isinstance(compositions, list):
805
+ compositions = [compositions]
806
+
807
+ # Get all elements
808
+ all_elements = set()
809
+ for comp in compositions:
810
+ if comp is not None:
811
+ all_elements.update(comp.keys())
812
+
813
+ all_elements = sorted(list(all_elements))
814
+
815
+ # Build matrix
816
+ matrix = np.zeros((len(compositions), len(all_elements)))
817
+ for i, comp in enumerate(compositions):
818
+ if comp is not None:
819
+ for j, element in enumerate(all_elements):
820
+ matrix[i, j] = comp.get(element, 0)
821
+
822
+ return matrix
823
+
824
+
825
+ # Export main functions
826
+ __all__ = [
827
+ 'makeup', 'get_formula', 'as_chemical_formula',
828
+ 'mass', 'entropy', 'species_basis', 'calculate_ghs', 'ZC', 'i2A',
829
+ 'FormulaError'
830
+ ]