pychnosz 1.1.12__cp310-cp310-macosx_15_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pychnosz/.dylibs/libgcc_s.1.1.dylib +0 -0
- pychnosz/.dylibs/libgfortran.5.dylib +0 -0
- pychnosz/.dylibs/libquadmath.0.dylib +0 -0
- pychnosz/__init__.py +129 -0
- pychnosz/_version.py +34 -0
- pychnosz/biomolecules/__init__.py +29 -0
- pychnosz/biomolecules/ionize_aa.py +197 -0
- pychnosz/biomolecules/proteins.py +595 -0
- pychnosz/core/__init__.py +46 -0
- pychnosz/core/affinity.py +1256 -0
- pychnosz/core/animation.py +593 -0
- pychnosz/core/balance.py +334 -0
- pychnosz/core/basis.py +716 -0
- pychnosz/core/diagram.py +3336 -0
- pychnosz/core/equilibrate.py +813 -0
- pychnosz/core/equilibrium.py +554 -0
- pychnosz/core/info.py +821 -0
- pychnosz/core/retrieve.py +364 -0
- pychnosz/core/speciation.py +580 -0
- pychnosz/core/species.py +599 -0
- pychnosz/core/subcrt.py +1696 -0
- pychnosz/core/thermo.py +593 -0
- pychnosz/core/unicurve.py +1226 -0
- pychnosz/data/__init__.py +11 -0
- pychnosz/data/add_obigt.py +327 -0
- pychnosz/data/extdata/Berman/BDat17_2017.csv +2 -0
- pychnosz/data/extdata/Berman/Ber88_1988.csv +68 -0
- pychnosz/data/extdata/Berman/Ber90_1990.csv +5 -0
- pychnosz/data/extdata/Berman/DS10_2010.csv +6 -0
- pychnosz/data/extdata/Berman/FDM+14_2014.csv +2 -0
- pychnosz/data/extdata/Berman/Got04_2004.csv +5 -0
- pychnosz/data/extdata/Berman/JUN92_1992.csv +3 -0
- pychnosz/data/extdata/Berman/SHD91_1991.csv +12 -0
- pychnosz/data/extdata/Berman/VGT92_1992.csv +2 -0
- pychnosz/data/extdata/Berman/VPT01_2001.csv +3 -0
- pychnosz/data/extdata/Berman/VPV05_2005.csv +2 -0
- pychnosz/data/extdata/Berman/ZS92_1992.csv +11 -0
- pychnosz/data/extdata/Berman/sympy.R +99 -0
- pychnosz/data/extdata/Berman/testing/BA96.bib +12 -0
- pychnosz/data/extdata/Berman/testing/BA96_Berman.csv +21 -0
- pychnosz/data/extdata/Berman/testing/BA96_OBIGT.csv +21 -0
- pychnosz/data/extdata/Berman/testing/BA96_refs.csv +6 -0
- pychnosz/data/extdata/OBIGT/AD.csv +25 -0
- pychnosz/data/extdata/OBIGT/Berman_cr.csv +93 -0
- pychnosz/data/extdata/OBIGT/DEW.csv +211 -0
- pychnosz/data/extdata/OBIGT/H2O_aq.csv +4 -0
- pychnosz/data/extdata/OBIGT/SLOP98.csv +411 -0
- pychnosz/data/extdata/OBIGT/SUPCRT92.csv +178 -0
- pychnosz/data/extdata/OBIGT/inorganic_aq.csv +729 -0
- pychnosz/data/extdata/OBIGT/inorganic_cr.csv +273 -0
- pychnosz/data/extdata/OBIGT/inorganic_gas.csv +20 -0
- pychnosz/data/extdata/OBIGT/organic_aq.csv +1104 -0
- pychnosz/data/extdata/OBIGT/organic_cr.csv +481 -0
- pychnosz/data/extdata/OBIGT/organic_gas.csv +268 -0
- pychnosz/data/extdata/OBIGT/organic_liq.csv +533 -0
- pychnosz/data/extdata/OBIGT/testing/GEMSFIT.csv +43 -0
- pychnosz/data/extdata/OBIGT/testing/IGEM.csv +17 -0
- pychnosz/data/extdata/OBIGT/testing/Sandia.csv +8 -0
- pychnosz/data/extdata/OBIGT/testing/SiO2.csv +4 -0
- pychnosz/data/extdata/misc/AD03_Fig1a.csv +69 -0
- pychnosz/data/extdata/misc/AD03_Fig1b.csv +43 -0
- pychnosz/data/extdata/misc/AD03_Fig1c.csv +89 -0
- pychnosz/data/extdata/misc/AD03_Fig1d.csv +30 -0
- pychnosz/data/extdata/misc/BZA10.csv +5 -0
- pychnosz/data/extdata/misc/HW97_Cp.csv +90 -0
- pychnosz/data/extdata/misc/HWM96_V.csv +229 -0
- pychnosz/data/extdata/misc/LA19_test.csv +7 -0
- pychnosz/data/extdata/misc/Mer75_Table4.csv +42 -0
- pychnosz/data/extdata/misc/OBIGT_check.csv +423 -0
- pychnosz/data/extdata/misc/PM90.csv +7 -0
- pychnosz/data/extdata/misc/RH95.csv +23 -0
- pychnosz/data/extdata/misc/RH98_Table15.csv +17 -0
- pychnosz/data/extdata/misc/SC10_Rainbow.csv +19 -0
- pychnosz/data/extdata/misc/SK95.csv +55 -0
- pychnosz/data/extdata/misc/SOJSH.csv +61 -0
- pychnosz/data/extdata/misc/SS98_Fig5a.csv +81 -0
- pychnosz/data/extdata/misc/SS98_Fig5b.csv +84 -0
- pychnosz/data/extdata/misc/TKSS14_Fig2.csv +25 -0
- pychnosz/data/extdata/misc/bluered.txt +1000 -0
- pychnosz/data/extdata/protein/Cas/Cas_aa.csv +177 -0
- pychnosz/data/extdata/protein/Cas/Cas_uniprot.csv +186 -0
- pychnosz/data/extdata/protein/Cas/download.R +34 -0
- pychnosz/data/extdata/protein/Cas/mkaa.R +34 -0
- pychnosz/data/extdata/protein/POLG.csv +12 -0
- pychnosz/data/extdata/protein/TBD+05.csv +393 -0
- pychnosz/data/extdata/protein/TBD+05_aa.csv +393 -0
- pychnosz/data/extdata/protein/rubisco.csv +28 -0
- pychnosz/data/extdata/protein/rubisco.fasta +239 -0
- pychnosz/data/extdata/protein/rubisco_aa.csv +28 -0
- pychnosz/data/extdata/src/H2O92D.f.orig +3457 -0
- pychnosz/data/extdata/src/README.txt +5 -0
- pychnosz/data/extdata/taxonomy/names.dmp +215 -0
- pychnosz/data/extdata/taxonomy/nodes.dmp +63 -0
- pychnosz/data/extdata/thermo/Bdot_acirc.csv +60 -0
- pychnosz/data/extdata/thermo/buffer.csv +40 -0
- pychnosz/data/extdata/thermo/element.csv +135 -0
- pychnosz/data/extdata/thermo/groups.csv +6 -0
- pychnosz/data/extdata/thermo/opt.csv +2 -0
- pychnosz/data/extdata/thermo/protein.csv +506 -0
- pychnosz/data/extdata/thermo/refs.csv +343 -0
- pychnosz/data/extdata/thermo/stoich.csv.xz +0 -0
- pychnosz/data/loader.py +431 -0
- pychnosz/data/mod_obigt.py +322 -0
- pychnosz/data/obigt.py +471 -0
- pychnosz/data/worm.py +228 -0
- pychnosz/fortran/.gitignore +6 -0
- pychnosz/fortran/__init__.py +16 -0
- pychnosz/fortran/h2o92.dylib +0 -0
- pychnosz/fortran/h2o92_interface.py +527 -0
- pychnosz/geochemistry/__init__.py +21 -0
- pychnosz/geochemistry/minerals.py +514 -0
- pychnosz/geochemistry/redox.py +500 -0
- pychnosz/models/__init__.py +47 -0
- pychnosz/models/archer_wang.py +165 -0
- pychnosz/models/berman.py +309 -0
- pychnosz/models/cgl.py +381 -0
- pychnosz/models/dew.py +997 -0
- pychnosz/models/hkf.py +523 -0
- pychnosz/models/hkf_helpers.py +231 -0
- pychnosz/models/iapws95.py +1113 -0
- pychnosz/models/supcrt92_fortran.py +238 -0
- pychnosz/models/water.py +480 -0
- pychnosz/utils/__init__.py +27 -0
- pychnosz/utils/expression.py +1074 -0
- pychnosz/utils/formula.py +830 -0
- pychnosz/utils/formula_ox.py +227 -0
- pychnosz/utils/reset.py +33 -0
- pychnosz/utils/units.py +259 -0
- pychnosz-1.1.12.dist-info/METADATA +197 -0
- pychnosz-1.1.12.dist-info/RECORD +133 -0
- pychnosz-1.1.12.dist-info/WHEEL +5 -0
- pychnosz-1.1.12.dist-info/licenses/LICENSE.txt +19 -0
- pychnosz-1.1.12.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1256 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Affinity calculation module.
|
|
3
|
+
|
|
4
|
+
This module provides Python equivalents of the R functions in affinity.R:
|
|
5
|
+
- affinity(): Calculate chemical affinities of formation reactions
|
|
6
|
+
- Energy calculation utilities and argument processing
|
|
7
|
+
- Variable expansion and multi-dimensional calculations
|
|
8
|
+
|
|
9
|
+
Author: CHNOSZ Python port
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pandas as pd
|
|
14
|
+
from typing import Union, List, Optional, Dict, Any, Tuple
|
|
15
|
+
import warnings
|
|
16
|
+
|
|
17
|
+
from .thermo import thermo
|
|
18
|
+
from .basis import get_basis, is_basis_defined
|
|
19
|
+
from .species import get_species, is_species_defined
|
|
20
|
+
from .subcrt import subcrt
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class AffinityError(Exception):
|
|
24
|
+
"""Exception raised for affinity-related errors."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def affinity(messages: bool = True, basis: Optional[pd.DataFrame] = None,
|
|
29
|
+
species: Optional[pd.DataFrame] = None, iprotein: Optional[Union[int, List[int], np.ndarray]] = None,
|
|
30
|
+
loga_protein: Union[float, List[float]] = 0.0, **kwargs) -> Dict[str, Any]:
|
|
31
|
+
"""
|
|
32
|
+
Calculate affinities of formation reactions.
|
|
33
|
+
|
|
34
|
+
This function calculates chemical affinities for the formation reactions of
|
|
35
|
+
species of interest from user-selected basis species. The affinities are
|
|
36
|
+
calculated as A/2.303RT where A is the chemical affinity.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
messages : bool, default True
|
|
41
|
+
Whether to print informational messages
|
|
42
|
+
basis : pd.DataFrame, optional
|
|
43
|
+
Basis species definition to use (if not using global basis)
|
|
44
|
+
species : pd.DataFrame, optional
|
|
45
|
+
Species definition to use (if not using global species)
|
|
46
|
+
iprotein : int, list of int, or array, optional
|
|
47
|
+
Build proteins from residues (row numbers in thermo().protein)
|
|
48
|
+
loga_protein : float or list of float, default 0.0
|
|
49
|
+
Activity of proteins (log scale)
|
|
50
|
+
**kwargs : dict
|
|
51
|
+
Variable arguments defining calculation conditions:
|
|
52
|
+
- Basis species names (e.g., CO2=[-60, 20, 5]): Variable basis species activities
|
|
53
|
+
- T : float or list, Temperature in °C
|
|
54
|
+
- P : float or list, Pressure in bar
|
|
55
|
+
- property : str, Property to calculate ("A", "logK", "G", etc.)
|
|
56
|
+
- exceed_Ttr : bool, Allow extrapolation beyond transition temperatures
|
|
57
|
+
- exceed_rhomin : bool, Allow calculations below minimum water density
|
|
58
|
+
- return_buffer : bool, Return buffer activities
|
|
59
|
+
- balance : str, Balance method for protein buffers
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
dict
|
|
64
|
+
Dictionary containing:
|
|
65
|
+
- fun : str, Function name ("affinity")
|
|
66
|
+
- args : dict, Arguments used in calculation
|
|
67
|
+
- sout : dict, Subcrt calculation results
|
|
68
|
+
- property : str, Property calculated
|
|
69
|
+
- basis : pd.DataFrame, Basis species definition
|
|
70
|
+
- species : pd.DataFrame, Species of interest definition
|
|
71
|
+
- T : float or array, Temperature(s) in Kelvin
|
|
72
|
+
- P : float or array, Pressure(s) in bar
|
|
73
|
+
- vars : list, Variable names
|
|
74
|
+
- vals : dict, Variable values
|
|
75
|
+
- values : dict, Calculated affinity values by species
|
|
76
|
+
|
|
77
|
+
Examples
|
|
78
|
+
--------
|
|
79
|
+
>>> import pychnosz
|
|
80
|
+
>>> pychnosz.reset()
|
|
81
|
+
>>> pychnosz.basis(["CO2", "H2O", "NH3", "H2S", "H+", "O2"])
|
|
82
|
+
>>> pychnosz.species(["glycine", "tyrosine", "serine", "methionine"])
|
|
83
|
+
>>> result = pychnosz.affinity(CO2=[-60, 20, 5], T=350, P=2000)
|
|
84
|
+
>>> print(result['values'][1566]) # Glycine affinities
|
|
85
|
+
|
|
86
|
+
>>> # With proteins
|
|
87
|
+
>>> import pandas as pd
|
|
88
|
+
>>> aa = pd.read_csv("POLG.csv")
|
|
89
|
+
>>> iprotein = pychnosz.add_protein(aa)
|
|
90
|
+
>>> pychnosz.basis("CHNOSe")
|
|
91
|
+
>>> a = pychnosz.affinity(iprotein=iprotein, pH=[2, 14], Eh=[-1, 1])
|
|
92
|
+
|
|
93
|
+
Notes
|
|
94
|
+
-----
|
|
95
|
+
This implementation maintains complete fidelity to R CHNOSZ affinity():
|
|
96
|
+
- Identical argument processing including dynamic basis species parameters
|
|
97
|
+
- Same variable expansion and multi-dimensional calculations
|
|
98
|
+
- Exact energy() function behavior for property calculations
|
|
99
|
+
- Identical output structure and formatting
|
|
100
|
+
- Support for protein calculations via iprotein parameter
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
# Get thermo object for protein handling
|
|
104
|
+
thermo_obj = thermo()
|
|
105
|
+
|
|
106
|
+
# Handle iprotein parameter
|
|
107
|
+
ires = None
|
|
108
|
+
original_species = None
|
|
109
|
+
if iprotein is not None:
|
|
110
|
+
# Convert to array
|
|
111
|
+
if isinstance(iprotein, (int, np.integer)):
|
|
112
|
+
iprotein = np.array([iprotein])
|
|
113
|
+
elif isinstance(iprotein, list):
|
|
114
|
+
iprotein = np.array(iprotein)
|
|
115
|
+
|
|
116
|
+
# Check all proteins are available
|
|
117
|
+
if np.any(np.isnan(iprotein)):
|
|
118
|
+
raise AffinityError("`iprotein` has some NA values")
|
|
119
|
+
if thermo_obj.protein is None or not np.all(iprotein < len(thermo_obj.protein)):
|
|
120
|
+
raise AffinityError("some value(s) of `iprotein` are not rownumbers of thermo().protein")
|
|
121
|
+
|
|
122
|
+
# Add protein residues to the species list
|
|
123
|
+
# Amino acids in 3-letter code
|
|
124
|
+
aminoacids_3 = ["Ala", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile", "Lys", "Leu",
|
|
125
|
+
"Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr", "Val", "Trp", "Tyr"]
|
|
126
|
+
|
|
127
|
+
# Use _RESIDUE notation (matches R CHNOSZ affinity.R line 84)
|
|
128
|
+
resnames_residue = ["H2O_RESIDUE"] + [f"{aa}_RESIDUE" for aa in aminoacids_3]
|
|
129
|
+
|
|
130
|
+
# Save original species
|
|
131
|
+
from .species import species as species_func
|
|
132
|
+
original_species = get_species() if is_species_defined() else None
|
|
133
|
+
|
|
134
|
+
# Add residue species with activity 0 (all in "aq" state)
|
|
135
|
+
species_func(resnames_residue, state="aq", add=True, messages=messages)
|
|
136
|
+
|
|
137
|
+
# Get indices of residues in species list
|
|
138
|
+
species_df_temp = get_species()
|
|
139
|
+
ires = []
|
|
140
|
+
for name in resnames_residue:
|
|
141
|
+
idx = np.where(species_df_temp['name'] == name)[0]
|
|
142
|
+
if len(idx) > 0:
|
|
143
|
+
ires.append(idx[0])
|
|
144
|
+
ires = np.array(ires)
|
|
145
|
+
|
|
146
|
+
# Check if basis and species are defined (use provided or global)
|
|
147
|
+
if basis is None:
|
|
148
|
+
if not is_basis_defined():
|
|
149
|
+
raise AffinityError("basis species are not defined")
|
|
150
|
+
basis_df = get_basis()
|
|
151
|
+
else:
|
|
152
|
+
basis_df = basis
|
|
153
|
+
|
|
154
|
+
if species is None:
|
|
155
|
+
if not is_species_defined():
|
|
156
|
+
raise AffinityError("species are not defined")
|
|
157
|
+
species_df = get_species()
|
|
158
|
+
else:
|
|
159
|
+
species_df = species
|
|
160
|
+
|
|
161
|
+
# Process arguments
|
|
162
|
+
args_orig = dict(kwargs)
|
|
163
|
+
|
|
164
|
+
# Handle argument recall (if first argument is previous affinity result)
|
|
165
|
+
if len(args_orig) > 0:
|
|
166
|
+
first_key = list(args_orig.keys())[0]
|
|
167
|
+
first_value = args_orig[first_key]
|
|
168
|
+
if (isinstance(first_value, dict) and
|
|
169
|
+
first_value.get('fun') == 'affinity'):
|
|
170
|
+
# Update arguments from previous result
|
|
171
|
+
aargs = first_value.get('args', {})
|
|
172
|
+
# Update with new arguments (skip the first one)
|
|
173
|
+
new_args = dict(list(args_orig.items())[1:])
|
|
174
|
+
aargs.update(new_args)
|
|
175
|
+
return affinity(**aargs)
|
|
176
|
+
|
|
177
|
+
# Process energy arguments
|
|
178
|
+
args = energy_args(args_orig, messages, basis_df=basis_df)
|
|
179
|
+
|
|
180
|
+
# Get property to calculate
|
|
181
|
+
property_name = args.get('what', 'A')
|
|
182
|
+
|
|
183
|
+
# Get thermo data
|
|
184
|
+
thermo_obj = thermo()
|
|
185
|
+
# basis_df and species_df are already set above
|
|
186
|
+
|
|
187
|
+
# Determine if we need specific property calculation
|
|
188
|
+
if property_name and property_name != 'A':
|
|
189
|
+
# Calculate specific property using energy function
|
|
190
|
+
energy_result = energy(
|
|
191
|
+
what=property_name,
|
|
192
|
+
vars=args['vars'],
|
|
193
|
+
vals=args['vals'],
|
|
194
|
+
lims=args['lims'],
|
|
195
|
+
T=args['T'],
|
|
196
|
+
P=args['P'],
|
|
197
|
+
IS=args.get('IS', 0),
|
|
198
|
+
exceed_Ttr=kwargs.get('exceed_Ttr', True),
|
|
199
|
+
exceed_rhomin=kwargs.get('exceed_rhomin', False),
|
|
200
|
+
basis_df=basis_df,
|
|
201
|
+
species_df=species_df,
|
|
202
|
+
messages=messages
|
|
203
|
+
)
|
|
204
|
+
affinity_values = energy_result['a']
|
|
205
|
+
energy_sout = energy_result['sout']
|
|
206
|
+
else:
|
|
207
|
+
# Calculate affinities (A/2.303RT)
|
|
208
|
+
energy_result = energy(
|
|
209
|
+
what='A',
|
|
210
|
+
vars=args['vars'],
|
|
211
|
+
vals=args['vals'],
|
|
212
|
+
lims=args['lims'],
|
|
213
|
+
T=args['T'],
|
|
214
|
+
P=args['P'],
|
|
215
|
+
IS=args.get('IS', 0),
|
|
216
|
+
exceed_Ttr=kwargs.get('exceed_Ttr', True),
|
|
217
|
+
exceed_rhomin=kwargs.get('exceed_rhomin', False),
|
|
218
|
+
basis_df=basis_df,
|
|
219
|
+
species_df=species_df,
|
|
220
|
+
messages=messages
|
|
221
|
+
)
|
|
222
|
+
affinity_values = energy_result['a']
|
|
223
|
+
energy_sout = energy_result['sout']
|
|
224
|
+
|
|
225
|
+
# Handle protein affinity calculations if iprotein was provided
|
|
226
|
+
if iprotein is not None and ires is not None:
|
|
227
|
+
# Calculate protein affinities from residue affinities using group additivity
|
|
228
|
+
# Normalize loga_protein to match number of proteins
|
|
229
|
+
if isinstance(loga_protein, (int, float)):
|
|
230
|
+
loga_protein_arr = np.full(len(iprotein), loga_protein)
|
|
231
|
+
else:
|
|
232
|
+
loga_protein_arr = np.array(loga_protein)
|
|
233
|
+
if len(loga_protein_arr) < len(iprotein):
|
|
234
|
+
loga_protein_arr = np.resize(loga_protein_arr, len(iprotein))
|
|
235
|
+
|
|
236
|
+
# Calculate affinity for each protein
|
|
237
|
+
protein_affinities = {}
|
|
238
|
+
|
|
239
|
+
for ip, iprot in enumerate(iprotein):
|
|
240
|
+
# Get protein amino acid composition from thermo().protein
|
|
241
|
+
# Columns 4:24 contain chains and amino acid counts (0-indexed: columns 4-23)
|
|
242
|
+
protein_row = thermo_obj.protein.iloc[iprot]
|
|
243
|
+
aa_counts = protein_row.iloc[4:24].values.astype(float)
|
|
244
|
+
|
|
245
|
+
# Calculate protein affinity by summing residue affinities weighted by composition
|
|
246
|
+
# affinity_values keys are ispecies indices
|
|
247
|
+
# Get the ispecies for each residue
|
|
248
|
+
species_df_current = get_species()
|
|
249
|
+
residue_ispecies = species_df_current.iloc[ires]['ispecies'].values
|
|
250
|
+
|
|
251
|
+
# Initialize protein affinity with same shape as residue affinities
|
|
252
|
+
first_residue_key = residue_ispecies[0]
|
|
253
|
+
if first_residue_key in affinity_values:
|
|
254
|
+
template_affinity = affinity_values[first_residue_key]
|
|
255
|
+
protein_affinity = np.zeros_like(template_affinity)
|
|
256
|
+
|
|
257
|
+
# Sum up contributions from all residues
|
|
258
|
+
for i, res_ispecies in enumerate(residue_ispecies):
|
|
259
|
+
if res_ispecies in affinity_values:
|
|
260
|
+
residue_contrib = affinity_values[res_ispecies] * aa_counts[i]
|
|
261
|
+
protein_affinity = protein_affinity + residue_contrib
|
|
262
|
+
|
|
263
|
+
# Subtract protein activity
|
|
264
|
+
protein_affinity = protein_affinity - loga_protein_arr[ip]
|
|
265
|
+
|
|
266
|
+
# Use negative index to denote protein (matches R CHNOSZ convention)
|
|
267
|
+
protein_key = -(iprot + 1) # Negative of (row number + 1)
|
|
268
|
+
protein_affinities[protein_key] = protein_affinity
|
|
269
|
+
|
|
270
|
+
# Add ionization affinity if H+ is in basis (matching R CHNOSZ behavior)
|
|
271
|
+
if 'H+' in basis_df.index:
|
|
272
|
+
if messages:
|
|
273
|
+
print("affinity: ionizing proteins ...")
|
|
274
|
+
|
|
275
|
+
# Get protein amino acid compositions
|
|
276
|
+
from ..biomolecules.proteins import pinfo
|
|
277
|
+
from ..biomolecules.ionize_aa import ionize_aa
|
|
278
|
+
|
|
279
|
+
# Get aa compositions for these proteins
|
|
280
|
+
aa = pinfo(iprotein)
|
|
281
|
+
|
|
282
|
+
# Determine pH values from vars/vals or basis
|
|
283
|
+
# Check if H+ is a variable
|
|
284
|
+
if 'H+' in args['vars']:
|
|
285
|
+
# H+ is a variable - get pH from vals
|
|
286
|
+
iHplus = args['vars'].index('H+')
|
|
287
|
+
pH_vals = -np.array(args['vals'][iHplus]) # pH = -log(a_H+)
|
|
288
|
+
else:
|
|
289
|
+
# H+ is constant - get from basis
|
|
290
|
+
pH_val = -basis_df.loc['H+', 'logact'] # pH = -log(a_H+)
|
|
291
|
+
pH_vals = np.array([pH_val])
|
|
292
|
+
|
|
293
|
+
# Get T values (already processed earlier)
|
|
294
|
+
T_vals = args['T']
|
|
295
|
+
if isinstance(T_vals, (int, float)):
|
|
296
|
+
T_celsius = T_vals - 273.15
|
|
297
|
+
else:
|
|
298
|
+
T_celsius = T_vals - 273.15
|
|
299
|
+
|
|
300
|
+
# Get P values
|
|
301
|
+
P_vals = args['P']
|
|
302
|
+
|
|
303
|
+
# Calculate ionization affinity
|
|
304
|
+
# ionize_aa expects arrays, so ensure T, P, pH are properly shaped
|
|
305
|
+
# For grid calculations, we need to expand T, P, pH into a grid matching the affinity grid
|
|
306
|
+
if len(args['vars']) >= 2:
|
|
307
|
+
# Multi-dimensional case - create grid
|
|
308
|
+
# Figure out which vars are T, P, H+
|
|
309
|
+
var_names = args['vars']
|
|
310
|
+
has_T_var = 'T' in var_names
|
|
311
|
+
has_P_var = 'P' in var_names
|
|
312
|
+
has_Hplus_var = 'H+' in var_names
|
|
313
|
+
|
|
314
|
+
# Build T, P, pH grids matching the affinity calculation grid
|
|
315
|
+
if has_T_var and has_Hplus_var:
|
|
316
|
+
# Both T and pH vary - create meshgrid
|
|
317
|
+
T_grid, pH_grid = np.meshgrid(T_celsius, pH_vals, indexing='ij')
|
|
318
|
+
T_flat = T_grid.flatten()
|
|
319
|
+
pH_flat = pH_grid.flatten()
|
|
320
|
+
if isinstance(P_vals, str):
|
|
321
|
+
P_flat = np.array([P_vals] * len(T_flat))
|
|
322
|
+
else:
|
|
323
|
+
P_flat = np.full(len(T_flat), P_vals if isinstance(P_vals, (int, float)) else P_vals[0])
|
|
324
|
+
elif has_T_var:
|
|
325
|
+
# Only T varies
|
|
326
|
+
T_flat = T_celsius if isinstance(T_celsius, np.ndarray) else np.array([T_celsius])
|
|
327
|
+
pH_flat = np.full(len(T_flat), pH_vals[0])
|
|
328
|
+
P_flat = np.array([P_vals] * len(T_flat)) if isinstance(P_vals, str) else np.full(len(T_flat), P_vals if isinstance(P_vals, (int, float)) else P_vals[0])
|
|
329
|
+
elif has_Hplus_var:
|
|
330
|
+
# Only pH varies
|
|
331
|
+
pH_flat = pH_vals
|
|
332
|
+
T_flat = np.full(len(pH_flat), T_celsius if isinstance(T_celsius, (int, float)) else T_celsius[0])
|
|
333
|
+
P_flat = np.array([P_vals] * len(pH_flat)) if isinstance(P_vals, str) else np.full(len(pH_flat), P_vals if isinstance(P_vals, (int, float)) else P_vals[0])
|
|
334
|
+
else:
|
|
335
|
+
# No T or pH variables
|
|
336
|
+
T_flat = np.array([T_celsius if isinstance(T_celsius, (int, float)) else T_celsius[0]])
|
|
337
|
+
pH_flat = pH_vals
|
|
338
|
+
P_flat = np.array([P_vals] if isinstance(P_vals, str) else [P_vals if isinstance(P_vals, (int, float)) else P_vals[0]])
|
|
339
|
+
else:
|
|
340
|
+
# Single or no variable case
|
|
341
|
+
T_flat = np.array([T_celsius if isinstance(T_celsius, (int, float)) else T_celsius[0]])
|
|
342
|
+
pH_flat = pH_vals if isinstance(pH_vals, np.ndarray) else np.array([pH_vals[0] if hasattr(pH_vals, '__getitem__') else pH_vals])
|
|
343
|
+
P_flat = np.array([P_vals] if isinstance(P_vals, str) else [P_vals if isinstance(P_vals, (int, float)) else P_vals[0]])
|
|
344
|
+
|
|
345
|
+
# Call ionize_aa to get ionization affinity
|
|
346
|
+
ionization_result = ionize_aa(aa, property="A", T=T_flat, P=P_flat, pH=pH_flat)
|
|
347
|
+
|
|
348
|
+
# Add ionization affinity to formation affinity for each protein
|
|
349
|
+
for ip, iprot in enumerate(iprotein):
|
|
350
|
+
protein_key = -(iprot + 1)
|
|
351
|
+
ionization_affinity = ionization_result.iloc[:, ip].values
|
|
352
|
+
|
|
353
|
+
# Reshape to match formation affinity dimensions if needed
|
|
354
|
+
formation_affinity = protein_affinities[protein_key]
|
|
355
|
+
if isinstance(formation_affinity, np.ndarray):
|
|
356
|
+
if formation_affinity.shape != ionization_affinity.shape:
|
|
357
|
+
# Reshape ionization affinity to match formation affinity
|
|
358
|
+
ionization_affinity = ionization_affinity.reshape(formation_affinity.shape)
|
|
359
|
+
|
|
360
|
+
# Add ionization to formation affinity
|
|
361
|
+
protein_affinities[protein_key] = formation_affinity + ionization_affinity
|
|
362
|
+
|
|
363
|
+
# Replace affinity_values with protein affinities
|
|
364
|
+
affinity_values = protein_affinities
|
|
365
|
+
|
|
366
|
+
# Calculate stoichiometric coefficients for proteins using matrix multiplication
|
|
367
|
+
# This matches R CHNOSZ: protbasis <- t(t((resspecies[ires, 1:nrow(thermo$basis)])) %*% t((thermo$protein[iprotein, 5:25])))
|
|
368
|
+
# IMPORTANT: Get the species list BEFORE deletion
|
|
369
|
+
species_df_with_residues = get_species()
|
|
370
|
+
|
|
371
|
+
# Extract basis species coefficients from residue species (rows = residues, cols = basis species)
|
|
372
|
+
# ires contains indices of residues in the species list
|
|
373
|
+
# We need the columns corresponding to basis species
|
|
374
|
+
basis_cols = list(basis_df.index) # e.g., ['CO2', 'H2O', 'NH3', 'H2S', 'e-', 'H+']
|
|
375
|
+
|
|
376
|
+
# Create residue coefficient matrix (n_residues x n_basis)
|
|
377
|
+
# resspecies[ires, 1:nrow(thermo$basis)] in R
|
|
378
|
+
res_coeffs = species_df_with_residues.iloc[ires][basis_cols].values.astype(float)
|
|
379
|
+
|
|
380
|
+
# Get amino acid composition matrix (n_proteins x n_residues)
|
|
381
|
+
# thermo$protein[iprotein, 5:25] in R (columns 5-25 contain chains and 20 amino acids)
|
|
382
|
+
# In Python (0-indexed): columns 4:24 contain chains and 20 amino acids
|
|
383
|
+
aa_composition = []
|
|
384
|
+
for iprot in iprotein:
|
|
385
|
+
protein_row = thermo_obj.protein.iloc[iprot]
|
|
386
|
+
# Columns 4:24 contain: chains, Ala, Cys, Asp, Glu, Phe, Gly, His, Ile, Lys, Leu,
|
|
387
|
+
# Met, Asn, Pro, Gln, Arg, Ser, Thr, Val, Trp, Tyr
|
|
388
|
+
aa_counts = protein_row.iloc[4:24].values.astype(float)
|
|
389
|
+
aa_composition.append(aa_counts)
|
|
390
|
+
aa_composition = np.array(aa_composition) # Shape: (n_proteins, 21)
|
|
391
|
+
|
|
392
|
+
# Matrix multiplication: (n_proteins x 21) @ (21 x n_basis) = (n_proteins x n_basis)
|
|
393
|
+
# Note: res_coeffs has shape (21, n_basis) - first row is H2O, next 20 are amino acids
|
|
394
|
+
# R code: t(t(resspecies) %*% t(protein)) means: (n_basis x n_residues) @ (n_residues x n_proteins) = (n_basis x n_proteins)
|
|
395
|
+
# Then transpose to get (n_proteins x n_basis)
|
|
396
|
+
# In Python: (n_proteins x n_residues) @ (n_residues x n_basis) = (n_proteins x n_basis)
|
|
397
|
+
protein_coeffs = aa_composition @ res_coeffs # Shape: (n_proteins, n_basis)
|
|
398
|
+
|
|
399
|
+
# Delete residue species from species list now that we have the coefficients
|
|
400
|
+
from .species import species as species_func
|
|
401
|
+
species_func(ires.tolist(), delete=True, messages=False)
|
|
402
|
+
|
|
403
|
+
if original_species is not None:
|
|
404
|
+
# Restore original species (but we've already calculated, so just update species_df)
|
|
405
|
+
pass
|
|
406
|
+
|
|
407
|
+
# Create DataFrame for proteins with basis species coefficients
|
|
408
|
+
species_data = {}
|
|
409
|
+
|
|
410
|
+
# Add basis species columns
|
|
411
|
+
for j, basis_sp in enumerate(basis_cols):
|
|
412
|
+
species_data[basis_sp] = protein_coeffs[:, j]
|
|
413
|
+
|
|
414
|
+
# Add metadata columns
|
|
415
|
+
protein_names = []
|
|
416
|
+
protein_ispecies = []
|
|
417
|
+
|
|
418
|
+
for iprot in iprotein:
|
|
419
|
+
prot_row = thermo_obj.protein.iloc[iprot]
|
|
420
|
+
# Escape underscores for LaTeX compatibility in diagram labels
|
|
421
|
+
protein_name = f"{prot_row['protein']}_{prot_row['organism']}"
|
|
422
|
+
# Replace underscores with escaped version for matplotlib/LaTeX
|
|
423
|
+
protein_name_escaped = protein_name.replace('_', r'\_')
|
|
424
|
+
protein_names.append(protein_name_escaped)
|
|
425
|
+
protein_ispecies.append(-(iprot + 1)) # Negative index
|
|
426
|
+
|
|
427
|
+
species_data['ispecies'] = protein_ispecies
|
|
428
|
+
species_data['logact'] = loga_protein_arr[:len(iprotein)]
|
|
429
|
+
species_data['state'] = ['aq'] * len(iprotein)
|
|
430
|
+
species_data['name'] = protein_names
|
|
431
|
+
|
|
432
|
+
species_df = pd.DataFrame(species_data)
|
|
433
|
+
|
|
434
|
+
# Process temperature and pressure for output
|
|
435
|
+
T_out = args['T']
|
|
436
|
+
P_out = args['P']
|
|
437
|
+
vars_list = args['vars']
|
|
438
|
+
vals_dict = {}
|
|
439
|
+
|
|
440
|
+
# Convert variable names and values for output
|
|
441
|
+
# Important: Keep vars_list with actual basis species names (H+, e-) for internal use
|
|
442
|
+
# but create display versions in vals_dict with user-friendly names (pH, pe, Eh)
|
|
443
|
+
vars_list_display = vars_list.copy()
|
|
444
|
+
for i, var in enumerate(vars_list):
|
|
445
|
+
# Handle pH, pe, Eh conversions for output
|
|
446
|
+
if var == 'H+' and 'pH' in args_orig:
|
|
447
|
+
vars_list_display[i] = 'pH'
|
|
448
|
+
vals_dict['pH'] = [-val for val in args['vals'][i]]
|
|
449
|
+
elif var == 'e-' and 'pe' in args_orig:
|
|
450
|
+
vars_list_display[i] = 'pe'
|
|
451
|
+
vals_dict['pe'] = [-val for val in args['vals'][i]]
|
|
452
|
+
elif var == 'e-' and 'Eh' in args_orig:
|
|
453
|
+
vars_list_display[i] = 'Eh'
|
|
454
|
+
# Convert from log(a_e-) back to Eh using temperature-dependent formula
|
|
455
|
+
# log(a_e-) = -pe, so pe = -log(a_e-)
|
|
456
|
+
# Eh = pe * (ln(10) * R * T) / F = -log(a_e-) * T / 5039.76
|
|
457
|
+
T_kelvin = args['T'] if isinstance(args['T'], (int, float)) else args['T'][0] if hasattr(args['T'], '__len__') else 298.15
|
|
458
|
+
conversion_factor = T_kelvin / 5039.76 # volts per pe unit
|
|
459
|
+
vals_dict['Eh'] = [-val * conversion_factor for val in args['vals'][i]]
|
|
460
|
+
else:
|
|
461
|
+
vals_dict[var] = args['vals'][i]
|
|
462
|
+
|
|
463
|
+
# Keep vars_list as-is (with basis species names) for internal calculations
|
|
464
|
+
# vars_list_display will be used for output only
|
|
465
|
+
|
|
466
|
+
# Check if T or P are variables
|
|
467
|
+
if 'T' in vars_list:
|
|
468
|
+
T_out = [] # Variable T
|
|
469
|
+
# Convert back to Celsius for output
|
|
470
|
+
T_vals = vals_dict['T']
|
|
471
|
+
vals_dict['T'] = [T - 273.15 for T in T_vals]
|
|
472
|
+
else:
|
|
473
|
+
# Convert to Kelvin for output (matching R)
|
|
474
|
+
T_out = args['T']
|
|
475
|
+
|
|
476
|
+
if 'P' in vars_list:
|
|
477
|
+
P_out = [] # Variable P
|
|
478
|
+
else:
|
|
479
|
+
P_out = args['P']
|
|
480
|
+
|
|
481
|
+
# Build output dictionary matching R CHNOSZ structure
|
|
482
|
+
result = {
|
|
483
|
+
'fun': 'affinity',
|
|
484
|
+
'args': {
|
|
485
|
+
**args_orig,
|
|
486
|
+
'property': property_name,
|
|
487
|
+
'exceed_Ttr': kwargs.get('exceed_Ttr', False),
|
|
488
|
+
'exceed_rhomin': kwargs.get('exceed_rhomin', False),
|
|
489
|
+
'return_buffer': kwargs.get('return_buffer', False),
|
|
490
|
+
'balance': kwargs.get('balance', 'PBB')
|
|
491
|
+
},
|
|
492
|
+
'sout': energy_sout,
|
|
493
|
+
'property': property_name,
|
|
494
|
+
'basis': basis_df,
|
|
495
|
+
'species': species_df,
|
|
496
|
+
'T': T_out,
|
|
497
|
+
'P': P_out,
|
|
498
|
+
'vars': vars_list_display, # Use display version with 'Eh', 'pH', 'pe' for output
|
|
499
|
+
'vals': vals_dict,
|
|
500
|
+
'values': affinity_values
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
return result
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def energy_args(args: Dict[str, Any], messages: bool = True, basis_df: Optional[pd.DataFrame] = None) -> Dict[str, Any]:
|
|
507
|
+
"""
|
|
508
|
+
Process arguments for energy calculations.
|
|
509
|
+
|
|
510
|
+
Converts variable arguments into consistent format for multi-dimensional
|
|
511
|
+
calculations, handling T, P, IS and basis species variables.
|
|
512
|
+
|
|
513
|
+
Parameters
|
|
514
|
+
----------
|
|
515
|
+
args : dict
|
|
516
|
+
Raw arguments from affinity() call
|
|
517
|
+
|
|
518
|
+
Returns
|
|
519
|
+
-------
|
|
520
|
+
dict
|
|
521
|
+
Processed arguments with consistent variable structure
|
|
522
|
+
"""
|
|
523
|
+
|
|
524
|
+
thermo_obj = thermo()
|
|
525
|
+
if basis_df is None:
|
|
526
|
+
basis_df = get_basis()
|
|
527
|
+
|
|
528
|
+
# Default values
|
|
529
|
+
T = 298.15
|
|
530
|
+
P = "Psat"
|
|
531
|
+
IS = 0
|
|
532
|
+
T_is_var = P_is_var = IS_is_var = False
|
|
533
|
+
|
|
534
|
+
# Process T, P, IS arguments
|
|
535
|
+
if 'T' in args:
|
|
536
|
+
T = args['T']
|
|
537
|
+
if hasattr(T, '__len__') and len(T) > 1:
|
|
538
|
+
T_is_var = True
|
|
539
|
+
# Convert to Kelvin if needed (assuming Celsius input)
|
|
540
|
+
if T_is_var:
|
|
541
|
+
if isinstance(T, (list, tuple)):
|
|
542
|
+
# Handle [T1, T2, npoints] format or [T1, T2] (default to 256 points)
|
|
543
|
+
if len(T) == 3:
|
|
544
|
+
T = np.linspace(T[0] + 273.15, T[1] + 273.15, int(T[2]))
|
|
545
|
+
elif len(T) == 2:
|
|
546
|
+
# Default resolution: 256 points (R CHNOSZ standard)
|
|
547
|
+
T = np.linspace(T[0] + 273.15, T[1] + 273.15, 256)
|
|
548
|
+
else:
|
|
549
|
+
T = np.array(T) + 273.15
|
|
550
|
+
else:
|
|
551
|
+
T = T + 273.15
|
|
552
|
+
else:
|
|
553
|
+
T = T + 273.15
|
|
554
|
+
|
|
555
|
+
if 'P' in args:
|
|
556
|
+
P = args['P']
|
|
557
|
+
if hasattr(P, '__len__') and len(P) > 1:
|
|
558
|
+
P_is_var = True
|
|
559
|
+
if P_is_var and P != "Psat":
|
|
560
|
+
if isinstance(P, (list, tuple)):
|
|
561
|
+
if len(P) == 3:
|
|
562
|
+
P = np.linspace(P[0], P[1], int(P[2]))
|
|
563
|
+
elif len(P) == 2:
|
|
564
|
+
# Default resolution: 256 points (R CHNOSZ standard)
|
|
565
|
+
P = np.linspace(P[0], P[1], 256)
|
|
566
|
+
|
|
567
|
+
if 'IS' in args:
|
|
568
|
+
IS = args['IS']
|
|
569
|
+
if hasattr(IS, '__len__') and len(IS) > 1:
|
|
570
|
+
IS_is_var = True
|
|
571
|
+
if isinstance(IS, (list, tuple)):
|
|
572
|
+
if len(IS) == 3:
|
|
573
|
+
IS = np.linspace(IS[0], IS[1], int(IS[2]))
|
|
574
|
+
elif len(IS) == 2:
|
|
575
|
+
# Default resolution: 256 points (R CHNOSZ standard)
|
|
576
|
+
IS = np.linspace(IS[0], IS[1], 256)
|
|
577
|
+
|
|
578
|
+
# Print status messages
|
|
579
|
+
if messages:
|
|
580
|
+
if not T_is_var:
|
|
581
|
+
T_celsius = T - 273.15 if isinstance(T, (int, float)) else T[0] - 273.15
|
|
582
|
+
print(f'affinity: temperature is {T_celsius:.0f} ºC')
|
|
583
|
+
|
|
584
|
+
if not P_is_var:
|
|
585
|
+
if P == "Psat":
|
|
586
|
+
print("affinity: pressure is Psat")
|
|
587
|
+
else:
|
|
588
|
+
print(f'affinity: pressure is {P} bar')
|
|
589
|
+
|
|
590
|
+
if not IS_is_var and IS != 0:
|
|
591
|
+
print(f'affinity: ionic strength is {IS}')
|
|
592
|
+
|
|
593
|
+
# Default property
|
|
594
|
+
what = 'A'
|
|
595
|
+
if 'what' in args:
|
|
596
|
+
what = args['what']
|
|
597
|
+
|
|
598
|
+
# Process variable arguments
|
|
599
|
+
# Preserve the order in which variables were specified (R CHNOSZ compatibility)
|
|
600
|
+
vars_list = []
|
|
601
|
+
vals_list = []
|
|
602
|
+
lims_list = []
|
|
603
|
+
|
|
604
|
+
# Track which T/P/IS are variables and process them in the order they appear in args
|
|
605
|
+
tps_vars = {'T': (T_is_var, T), 'P': (P_is_var, P), 'IS': (IS_is_var, IS)}
|
|
606
|
+
|
|
607
|
+
# Add T, P, IS in the order they appear in args (preserves user's specification order)
|
|
608
|
+
for arg_name in args.keys():
|
|
609
|
+
if arg_name in ['T', 'P', 'IS'] and tps_vars[arg_name][0]:
|
|
610
|
+
var_name = arg_name
|
|
611
|
+
var_value = tps_vars[arg_name][1]
|
|
612
|
+
|
|
613
|
+
vars_list.append(var_name)
|
|
614
|
+
vals_list.append(var_value)
|
|
615
|
+
|
|
616
|
+
if isinstance(args[arg_name], (list, tuple)):
|
|
617
|
+
if len(args[arg_name]) == 3:
|
|
618
|
+
# User specified [min, max, npoints]
|
|
619
|
+
if arg_name == 'T':
|
|
620
|
+
lims_list.append([args[arg_name][0] + 273.15, args[arg_name][1] + 273.15, args[arg_name][2]])
|
|
621
|
+
else:
|
|
622
|
+
lims_list.append([args[arg_name][0], args[arg_name][1], args[arg_name][2]])
|
|
623
|
+
elif len(args[arg_name]) == 2:
|
|
624
|
+
# User specified [min, max], default to 256 points
|
|
625
|
+
if arg_name == 'T':
|
|
626
|
+
lims_list.append([args[arg_name][0] + 273.15, args[arg_name][1] + 273.15, 256])
|
|
627
|
+
else:
|
|
628
|
+
lims_list.append([args[arg_name][0], args[arg_name][1], 256])
|
|
629
|
+
else:
|
|
630
|
+
# User provided explicit array of values
|
|
631
|
+
lims_list.append([var_value.min(), var_value.max(), len(var_value)])
|
|
632
|
+
else:
|
|
633
|
+
lims_list.append([var_value.min(), var_value.max(), len(var_value)])
|
|
634
|
+
|
|
635
|
+
# Process basis species variables
|
|
636
|
+
basis_names = basis_df.index.tolist()
|
|
637
|
+
|
|
638
|
+
for arg_name, arg_value in args.items():
|
|
639
|
+
# Skip T, P, IS, and non-basis arguments
|
|
640
|
+
if arg_name in ['T', 'P', 'IS', 'what', 'property', 'exceed_Ttr', 'exceed_rhomin', 'return_buffer', 'balance']:
|
|
641
|
+
continue
|
|
642
|
+
|
|
643
|
+
# Handle pH -> H+, pe -> e-, Eh -> e-
|
|
644
|
+
var_name = arg_name
|
|
645
|
+
var_values = arg_value
|
|
646
|
+
|
|
647
|
+
if arg_name == 'pH':
|
|
648
|
+
var_name = 'H+'
|
|
649
|
+
if hasattr(var_values, '__len__'):
|
|
650
|
+
if len(var_values) >= 3:
|
|
651
|
+
# [pH1, pH2, npoints] -> [-pH1, -pH2, npoints] for H+ (logact)
|
|
652
|
+
# pH and log(a_H+) are related by: pH = -log(a_H+), so log(a_H+) = -pH
|
|
653
|
+
var_values = np.linspace(-var_values[0], -var_values[1], int(var_values[2]))
|
|
654
|
+
elif len(var_values) >= 2:
|
|
655
|
+
var_values = [-v for v in var_values]
|
|
656
|
+
else:
|
|
657
|
+
# Single value in a list [pH]
|
|
658
|
+
var_values = np.array([-var_values[0]])
|
|
659
|
+
else:
|
|
660
|
+
# Scalar value
|
|
661
|
+
var_values = np.array([-var_values])
|
|
662
|
+
elif arg_name == 'pe':
|
|
663
|
+
var_name = 'e-'
|
|
664
|
+
if hasattr(var_values, '__len__'):
|
|
665
|
+
if len(var_values) >= 3:
|
|
666
|
+
# pe = -log(a_e-), so log(a_e-) = -pe
|
|
667
|
+
# For pe range [pe1, pe2], log(a_e-) range is [-pe1, -pe2]
|
|
668
|
+
var_values = np.linspace(-var_values[0], -var_values[1], int(var_values[2]))
|
|
669
|
+
elif len(var_values) >= 2:
|
|
670
|
+
var_values = [-v for v in var_values]
|
|
671
|
+
else:
|
|
672
|
+
# Single value in a list [pe]
|
|
673
|
+
var_values = np.array([-var_values[0]])
|
|
674
|
+
else:
|
|
675
|
+
# Scalar value
|
|
676
|
+
var_values = np.array([-var_values])
|
|
677
|
+
elif arg_name == 'Eh':
|
|
678
|
+
var_name = 'e-'
|
|
679
|
+
# Convert Eh (volts) to log(a_e-) using temperature-dependent formula
|
|
680
|
+
# pe = Eh * F / (ln(10) * R * T) where pe = -log(a_e-)
|
|
681
|
+
# Therefore: log(a_e-) = -pe = -Eh * F / (ln(10) * R * T)
|
|
682
|
+
# where R = 0.00831470 kJ/(mol·K), F = 96.4935 kJ/(V·mol), T in Kelvin
|
|
683
|
+
# This gives: log(a_e-) = -Eh * 96.4935 / (2.303 * 0.00831470 * T)
|
|
684
|
+
# = -Eh * 96.4935 / (0.019145 * T)
|
|
685
|
+
# = -Eh * 5039.76 / T
|
|
686
|
+
|
|
687
|
+
# Get temperature for conversion (default to 25°C if not specified)
|
|
688
|
+
T_kelvin = T if isinstance(T, (int, float)) else T[0] if hasattr(T, '__len__') else 298.15
|
|
689
|
+
conversion_factor = 5039.76 / T_kelvin # pe per volt (need to negate for log(a_e-))
|
|
690
|
+
|
|
691
|
+
if hasattr(var_values, '__len__') and len(var_values) >= 2:
|
|
692
|
+
if len(var_values) == 3:
|
|
693
|
+
# [Eh1, Eh2, npoints] format
|
|
694
|
+
# Convert to log(a_e-) = -pe = -Eh * conversion_factor
|
|
695
|
+
logact_start = -var_values[0] * conversion_factor
|
|
696
|
+
logact_end = -var_values[1] * conversion_factor
|
|
697
|
+
var_values = np.linspace(logact_start, logact_end, int(var_values[2]))
|
|
698
|
+
elif len(var_values) == 2:
|
|
699
|
+
# [Eh1, Eh2] format - default to 256 points like R
|
|
700
|
+
logact_start = -var_values[0] * conversion_factor
|
|
701
|
+
logact_end = -var_values[1] * conversion_factor
|
|
702
|
+
var_values = np.linspace(logact_start, logact_end, 256)
|
|
703
|
+
else:
|
|
704
|
+
# List of explicit Eh values
|
|
705
|
+
var_values = [-v * conversion_factor for v in var_values]
|
|
706
|
+
else:
|
|
707
|
+
# Single value
|
|
708
|
+
var_values = -var_values * conversion_factor
|
|
709
|
+
|
|
710
|
+
# Check if this is a basis species
|
|
711
|
+
if var_name in basis_names:
|
|
712
|
+
vars_list.append(var_name)
|
|
713
|
+
|
|
714
|
+
# Process values
|
|
715
|
+
if isinstance(var_values, (list, tuple)):
|
|
716
|
+
if len(var_values) == 3:
|
|
717
|
+
# [min, max, npoints] format
|
|
718
|
+
vals_array = np.linspace(var_values[0], var_values[1], int(var_values[2]))
|
|
719
|
+
vals_list.append(vals_array)
|
|
720
|
+
lims_list.append(var_values)
|
|
721
|
+
|
|
722
|
+
# Print variable info
|
|
723
|
+
if messages:
|
|
724
|
+
n_vals = int(var_values[2])
|
|
725
|
+
print(f'affinity: variable {len(vars_list)} is log10(a_{var_name}) at {n_vals} values from {var_values[0]} to {var_values[1]}')
|
|
726
|
+
|
|
727
|
+
elif len(var_values) == 2:
|
|
728
|
+
# [min, max] format - default to 256 points (R CHNOSZ behavior)
|
|
729
|
+
vals_array = np.linspace(var_values[0], var_values[1], 256)
|
|
730
|
+
vals_list.append(vals_array)
|
|
731
|
+
lims_list.append([var_values[0], var_values[1], 256])
|
|
732
|
+
|
|
733
|
+
# Print variable info
|
|
734
|
+
if messages:
|
|
735
|
+
print(f'affinity: variable {len(vars_list)} is log10(a_{var_name}) at 256 values from {var_values[0]} to {var_values[1]}')
|
|
736
|
+
|
|
737
|
+
else:
|
|
738
|
+
# Explicit array of values
|
|
739
|
+
vals_list.append(np.array(var_values))
|
|
740
|
+
lims_list.append([min(var_values), max(var_values), len(var_values)])
|
|
741
|
+
else:
|
|
742
|
+
# Single value
|
|
743
|
+
if not hasattr(var_values, '__len__'):
|
|
744
|
+
var_values = [var_values]
|
|
745
|
+
vals_list.append(np.array(var_values))
|
|
746
|
+
lims_list.append([var_values[0], var_values[-1], len(var_values)])
|
|
747
|
+
else:
|
|
748
|
+
# Not a recognized basis species or variable
|
|
749
|
+
raise AffinityError(f"{arg_name} is not one of T, P, or IS, and does not match any basis species")
|
|
750
|
+
|
|
751
|
+
return {
|
|
752
|
+
'what': what,
|
|
753
|
+
'vars': vars_list,
|
|
754
|
+
'vals': vals_list,
|
|
755
|
+
'lims': lims_list,
|
|
756
|
+
'T': T,
|
|
757
|
+
'P': P,
|
|
758
|
+
'IS': IS
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
def energy(what: str, vars: List[str], vals: List, lims: List,
|
|
763
|
+
T: Union[float, np.ndarray] = 298.15,
|
|
764
|
+
P: Union[float, str] = "Psat",
|
|
765
|
+
IS: float = 0,
|
|
766
|
+
sout: Optional[Dict] = None,
|
|
767
|
+
exceed_Ttr: bool = True,
|
|
768
|
+
exceed_rhomin: bool = False,
|
|
769
|
+
basis_df: Optional[pd.DataFrame] = None,
|
|
770
|
+
species_df: Optional[pd.DataFrame] = None,
|
|
771
|
+
messages: bool = True) -> Dict[str, Any]:
|
|
772
|
+
"""
|
|
773
|
+
Calculate energy properties over multiple dimensions.
|
|
774
|
+
|
|
775
|
+
This is the core calculation function that handles multi-dimensional
|
|
776
|
+
property calculations for basis and formed species.
|
|
777
|
+
|
|
778
|
+
Parameters
|
|
779
|
+
----------
|
|
780
|
+
what : str
|
|
781
|
+
Property to calculate ("A", "logK", "G", "H", etc.)
|
|
782
|
+
vars : list of str
|
|
783
|
+
Variable names
|
|
784
|
+
vals : list of arrays
|
|
785
|
+
Variable values
|
|
786
|
+
lims : list of limits
|
|
787
|
+
Variable limits [min, max, npoints]
|
|
788
|
+
T : float or array
|
|
789
|
+
Temperature(s) in Kelvin
|
|
790
|
+
P : float or str
|
|
791
|
+
Pressure(s) in bar or "Psat"
|
|
792
|
+
IS : float
|
|
793
|
+
Ionic strength
|
|
794
|
+
sout : dict, optional
|
|
795
|
+
Pre-calculated subcrt results
|
|
796
|
+
exceed_Ttr : bool
|
|
797
|
+
Allow extrapolation beyond transitions
|
|
798
|
+
exceed_rhomin : bool
|
|
799
|
+
Allow below minimum density
|
|
800
|
+
|
|
801
|
+
Returns
|
|
802
|
+
-------
|
|
803
|
+
dict
|
|
804
|
+
Dictionary with 'sout' (subcrt results) and 'a' (property values)
|
|
805
|
+
"""
|
|
806
|
+
|
|
807
|
+
# Get system data
|
|
808
|
+
thermo_obj = thermo()
|
|
809
|
+
if basis_df is None:
|
|
810
|
+
basis_df = get_basis()
|
|
811
|
+
if species_df is None:
|
|
812
|
+
species_df = get_species()
|
|
813
|
+
|
|
814
|
+
n_basis = len(basis_df)
|
|
815
|
+
n_species = len(species_df)
|
|
816
|
+
|
|
817
|
+
# Determine array dimensions
|
|
818
|
+
if len(vars) == 0:
|
|
819
|
+
mydim = [1]
|
|
820
|
+
else:
|
|
821
|
+
mydim = [lim[2] for lim in lims]
|
|
822
|
+
|
|
823
|
+
# Prepare subcrt call
|
|
824
|
+
if what in ['G', 'H', 'S', 'Cp', 'V', 'E', 'kT', 'logK'] or what == 'A':
|
|
825
|
+
# Need to call subcrt for thermodynamic properties
|
|
826
|
+
|
|
827
|
+
# Prepare species list (basis + formed species)
|
|
828
|
+
all_species = basis_df['ispecies'].tolist() + species_df['ispecies'].tolist()
|
|
829
|
+
|
|
830
|
+
# Prepare T, P, IS for subcrt (convert T from Kelvin to Celsius)
|
|
831
|
+
subcrt_T = T - 273.15 if isinstance(T, (int, float)) else T - 273.15
|
|
832
|
+
subcrt_P = P
|
|
833
|
+
subcrt_IS = IS
|
|
834
|
+
|
|
835
|
+
# Handle variable T, P, IS
|
|
836
|
+
if 'T' in vars:
|
|
837
|
+
# T in vals is already in Kelvin, convert to Celsius for subcrt
|
|
838
|
+
T_vals = vals[vars.index('T')]
|
|
839
|
+
subcrt_T = T_vals - 273.15 if isinstance(T_vals, (int, float)) else T_vals - 273.15
|
|
840
|
+
if 'P' in vars:
|
|
841
|
+
subcrt_P = vals[vars.index('P')]
|
|
842
|
+
if 'IS' in vars:
|
|
843
|
+
subcrt_IS = vals[vars.index('IS')]
|
|
844
|
+
|
|
845
|
+
# Call subcrt
|
|
846
|
+
# Skip sout calculation for affinity (what=='A') since the affinity block
|
|
847
|
+
# has its own optimized batch subcrt call
|
|
848
|
+
if sout is None and what != 'A':
|
|
849
|
+
try:
|
|
850
|
+
# Determine grid parameter for subcrt
|
|
851
|
+
grid_param = None
|
|
852
|
+
if len(vars) > 1:
|
|
853
|
+
# Multi-variable case - use appropriate grid
|
|
854
|
+
subcrt_vars = [v for v in vars if v in ['T', 'P', 'IS']]
|
|
855
|
+
if len(subcrt_vars) >= 2:
|
|
856
|
+
grid_param = subcrt_vars[0] # Use first subcrt variable
|
|
857
|
+
|
|
858
|
+
sout_result = subcrt(
|
|
859
|
+
species=all_species,
|
|
860
|
+
T=subcrt_T,
|
|
861
|
+
P=subcrt_P,
|
|
862
|
+
IS=subcrt_IS,
|
|
863
|
+
property='logK',
|
|
864
|
+
grid=grid_param,
|
|
865
|
+
exceed_Ttr=exceed_Ttr,
|
|
866
|
+
exceed_rhomin=exceed_rhomin,
|
|
867
|
+
messages=messages,
|
|
868
|
+
show=False
|
|
869
|
+
)
|
|
870
|
+
sout_data = sout_result.out
|
|
871
|
+
|
|
872
|
+
except Exception as e:
|
|
873
|
+
warnings.warn(f"subcrt calculation failed: {e}")
|
|
874
|
+
# Create dummy sout data
|
|
875
|
+
n_conditions = np.prod(mydim) if len(mydim) > 0 else 1
|
|
876
|
+
sout_data = pd.DataFrame({
|
|
877
|
+
'T': np.full(n_conditions, T if isinstance(T, (int, float)) else T[0]) - 273.15,
|
|
878
|
+
'P': np.full(n_conditions, 1.0 if P == "Psat" else (P if isinstance(P, (int, float)) else P[0])),
|
|
879
|
+
'logK': np.full(n_conditions, np.nan)
|
|
880
|
+
})
|
|
881
|
+
else:
|
|
882
|
+
sout_data = sout
|
|
883
|
+
|
|
884
|
+
# Calculate the requested property
|
|
885
|
+
if what == 'A':
|
|
886
|
+
# Calculate affinities A/2.303RT following R CHNOSZ logic exactly
|
|
887
|
+
affinity_values = {}
|
|
888
|
+
|
|
889
|
+
# Get basis and species information
|
|
890
|
+
basis_names = basis_df.index.tolist()
|
|
891
|
+
n_conditions = np.prod(mydim) if len(mydim) > 0 else 1
|
|
892
|
+
|
|
893
|
+
# Create activity arrays for each basis species using multi-dimensional grid expansion
|
|
894
|
+
# This implements R's expand.grid functionality using numpy.meshgrid
|
|
895
|
+
logact_basis_arrays = {}
|
|
896
|
+
|
|
897
|
+
if len(vars) > 1:
|
|
898
|
+
# Multi-dimensional case: create meshgrid for all variables
|
|
899
|
+
var_arrays = []
|
|
900
|
+
var_names_ordered = []
|
|
901
|
+
|
|
902
|
+
# Collect variable arrays in order
|
|
903
|
+
for var_name in vars:
|
|
904
|
+
if var_name in basis_names:
|
|
905
|
+
var_idx = vars.index(var_name)
|
|
906
|
+
var_arrays.append(np.array(vals[var_idx]))
|
|
907
|
+
var_names_ordered.append(var_name)
|
|
908
|
+
|
|
909
|
+
# Create meshgrid for basis species variables
|
|
910
|
+
if var_arrays:
|
|
911
|
+
# meshgrid creates N-D arrays where each variable varies along its own axis
|
|
912
|
+
# indexing='ij' gives matrix indexing (first index varies down rows)
|
|
913
|
+
meshgrids = np.meshgrid(*var_arrays, indexing='ij')
|
|
914
|
+
|
|
915
|
+
# Map meshgrid results back to basis species
|
|
916
|
+
for i, var_name in enumerate(var_names_ordered):
|
|
917
|
+
logact_basis_arrays[var_name] = meshgrids[i]
|
|
918
|
+
|
|
919
|
+
# Handle all basis species (variables and fixed)
|
|
920
|
+
for j, basis_name in enumerate(basis_names):
|
|
921
|
+
if basis_name in vars and basis_name not in logact_basis_arrays:
|
|
922
|
+
# Single variable case
|
|
923
|
+
var_idx = vars.index(basis_name)
|
|
924
|
+
logact_basis_arrays[basis_name] = np.array(vals[var_idx])
|
|
925
|
+
elif basis_name not in logact_basis_arrays:
|
|
926
|
+
# Fixed activity from basis definition - broadcast to full grid
|
|
927
|
+
basis_logact = basis_df.iloc[j]['logact']
|
|
928
|
+
try:
|
|
929
|
+
logact_val = float(basis_logact)
|
|
930
|
+
except (ValueError, TypeError):
|
|
931
|
+
logact_val = 0.0
|
|
932
|
+
|
|
933
|
+
if len(mydim) > 1:
|
|
934
|
+
# Multi-dimensional: broadcast scalar to full grid shape
|
|
935
|
+
logact_basis_arrays[basis_name] = np.full(mydim, logact_val)
|
|
936
|
+
else:
|
|
937
|
+
# Single dimension
|
|
938
|
+
logact_basis_arrays[basis_name] = np.full(n_conditions, logact_val)
|
|
939
|
+
|
|
940
|
+
# For affinities, we need logK of balanced formation reactions
|
|
941
|
+
# Optimize by calling subcrt once for all basis + non-basis species
|
|
942
|
+
# to get logK of formation from elements, then calculate formation from basis
|
|
943
|
+
formation_logK = {}
|
|
944
|
+
|
|
945
|
+
# Convert T from Kelvin back to Celsius for subcrt (subcrt expects Celsius)
|
|
946
|
+
T_celsius = T - 273.15
|
|
947
|
+
|
|
948
|
+
# Get all unique species (basis + formed species) using ispecies indices
|
|
949
|
+
# to avoid redundant info_character lookups
|
|
950
|
+
basis_ispecies_list = basis_df['ispecies'].tolist()
|
|
951
|
+
species_ispecies_list = species_df['ispecies'].tolist()
|
|
952
|
+
all_species_indices = list(dict.fromkeys(basis_ispecies_list + species_ispecies_list))
|
|
953
|
+
|
|
954
|
+
# Create mapping from names to ispecies indices
|
|
955
|
+
# Note: multiple names (e.g., "Fe" and "iron") can map to the same ispecies
|
|
956
|
+
basis_names_list = basis_names # Already defined at line 548
|
|
957
|
+
species_names_list = species_df['name'].tolist()
|
|
958
|
+
|
|
959
|
+
# Build a name->ispecies mapping
|
|
960
|
+
name_to_ispecies = {}
|
|
961
|
+
for name, ispec in zip(basis_names_list, basis_ispecies_list):
|
|
962
|
+
name_to_ispecies[name] = ispec
|
|
963
|
+
for name, ispec in zip(species_names_list, species_ispecies_list):
|
|
964
|
+
name_to_ispecies[name] = ispec
|
|
965
|
+
|
|
966
|
+
# Build ispecies->result_index mapping for batch result access
|
|
967
|
+
ispecies_to_result_idx = {ispec: idx for idx, ispec in enumerate(all_species_indices)}
|
|
968
|
+
|
|
969
|
+
# All unique names (may have duplicates that refer to same ispecies)
|
|
970
|
+
all_species_names = list(dict.fromkeys(basis_names_list + species_names_list))
|
|
971
|
+
|
|
972
|
+
# Single batch subcrt call to get logK of formation from elements for all species
|
|
973
|
+
# Use ispecies indices to avoid redundant lookups
|
|
974
|
+
try:
|
|
975
|
+
# Determine grid parameter for subcrt when we have multiple T/P variables
|
|
976
|
+
grid_param = None
|
|
977
|
+
if len(vars) >= 2:
|
|
978
|
+
# Check if we have T and/or P as variables
|
|
979
|
+
if 'T' in vars and 'P' in vars:
|
|
980
|
+
# Both T and P vary - use T as grid variable (R CHNOSZ convention)
|
|
981
|
+
grid_param = 'T'
|
|
982
|
+
elif 'T' in vars:
|
|
983
|
+
grid_param = 'T'
|
|
984
|
+
elif 'P' in vars:
|
|
985
|
+
grid_param = 'P'
|
|
986
|
+
|
|
987
|
+
batch_result = subcrt(all_species_indices, property="logK", T=T_celsius, P=P, grid=grid_param, messages=messages, show=False)
|
|
988
|
+
|
|
989
|
+
# Extract logK values from batch result
|
|
990
|
+
# batch_result.out is a dict with 'species_data' list
|
|
991
|
+
# When T/P are variable, each species_data DataFrame has multiple rows
|
|
992
|
+
species_logK_from_elements = {}
|
|
993
|
+
if isinstance(batch_result.out, dict) and 'species_data' in batch_result.out:
|
|
994
|
+
# Map each name to its data using the ispecies->result_idx mapping
|
|
995
|
+
for sp_name in all_species_names:
|
|
996
|
+
ispec = name_to_ispecies[sp_name]
|
|
997
|
+
result_idx = ispecies_to_result_idx[ispec]
|
|
998
|
+
sp_data = batch_result.out['species_data'][result_idx]
|
|
999
|
+
|
|
1000
|
+
if 'logK' in sp_data.columns:
|
|
1001
|
+
# Get all logK values (may be array if T/P variable)
|
|
1002
|
+
logK_vals = sp_data['logK'].values
|
|
1003
|
+
# Handle NaN values by keeping them as nan (they will propagate to affinity)
|
|
1004
|
+
# DO NOT replace nan with 0.0 as this causes incorrect affinity calculations
|
|
1005
|
+
# logK_vals = np.where(np.isnan(logK_vals), 0.0, logK_vals)
|
|
1006
|
+
|
|
1007
|
+
# Reshape if we have a 2-D grid
|
|
1008
|
+
if len(mydim) > 1 and len(logK_vals) == np.prod(mydim):
|
|
1009
|
+
# Reshape flattened array to match grid dimensions
|
|
1010
|
+
# mydim is [nT, nP] or similar, and grid='T' gives row-major order
|
|
1011
|
+
logK_vals = logK_vals.reshape(mydim)
|
|
1012
|
+
|
|
1013
|
+
species_logK_from_elements[sp_name] = logK_vals
|
|
1014
|
+
else:
|
|
1015
|
+
# No logK column - use zeros
|
|
1016
|
+
n_rows = len(sp_data)
|
|
1017
|
+
if len(mydim) > 1 and n_rows == np.prod(mydim):
|
|
1018
|
+
species_logK_from_elements[sp_name] = np.zeros(mydim)
|
|
1019
|
+
else:
|
|
1020
|
+
species_logK_from_elements[sp_name] = np.zeros(n_rows)
|
|
1021
|
+
elif isinstance(batch_result.out, pd.DataFrame):
|
|
1022
|
+
# Single species case - result.out is a DataFrame directly
|
|
1023
|
+
sp_data = batch_result.out
|
|
1024
|
+
sp_name = all_species_names[0]
|
|
1025
|
+
if 'logK' in sp_data.columns:
|
|
1026
|
+
logK_vals = sp_data['logK'].values
|
|
1027
|
+
# Handle NaN values by keeping them as nan (they will propagate to affinity)
|
|
1028
|
+
# DO NOT replace nan with 0.0 as this causes incorrect affinity calculations
|
|
1029
|
+
# logK_vals = np.where(np.isnan(logK_vals), 0.0, logK_vals)
|
|
1030
|
+
|
|
1031
|
+
# Reshape if we have a 2-D grid
|
|
1032
|
+
if len(mydim) > 1 and len(logK_vals) == np.prod(mydim):
|
|
1033
|
+
logK_vals = logK_vals.reshape(mydim)
|
|
1034
|
+
|
|
1035
|
+
species_logK_from_elements[sp_name] = logK_vals
|
|
1036
|
+
else:
|
|
1037
|
+
n_rows = len(sp_data)
|
|
1038
|
+
if len(mydim) > 1 and n_rows == np.prod(mydim):
|
|
1039
|
+
species_logK_from_elements[sp_name] = np.zeros(mydim)
|
|
1040
|
+
else:
|
|
1041
|
+
species_logK_from_elements[sp_name] = np.zeros(n_rows)
|
|
1042
|
+
else:
|
|
1043
|
+
# Fallback if structure is different
|
|
1044
|
+
for sp_name in all_species_names:
|
|
1045
|
+
if len(mydim) > 1:
|
|
1046
|
+
species_logK_from_elements[sp_name] = np.zeros(mydim)
|
|
1047
|
+
else:
|
|
1048
|
+
species_logK_from_elements[sp_name] = np.array([0.0])
|
|
1049
|
+
|
|
1050
|
+
# Now calculate formation logK from basis species for each formed species
|
|
1051
|
+
for i in range(n_species):
|
|
1052
|
+
species_idx = species_df.iloc[i]['ispecies']
|
|
1053
|
+
species_name = species_df.iloc[i]['name']
|
|
1054
|
+
|
|
1055
|
+
# Check if this species is also a basis species
|
|
1056
|
+
is_basis_species = species_idx in basis_df['ispecies'].values
|
|
1057
|
+
|
|
1058
|
+
if is_basis_species:
|
|
1059
|
+
# Species is in the basis - formation from basis is trivial
|
|
1060
|
+
formation_logK[species_idx] = 0.0
|
|
1061
|
+
else:
|
|
1062
|
+
# Calculate formation logK from basis using stoichiometry
|
|
1063
|
+
# The species() coefficients represent: species = basis_products - basis_reactants
|
|
1064
|
+
# For logK from elements: logK_formation = logK_species - sum(coeff_i * logK_basis_i)
|
|
1065
|
+
logK_formation_val = species_logK_from_elements.get(species_name, 0.0)
|
|
1066
|
+
|
|
1067
|
+
# Subtract contribution from basis species
|
|
1068
|
+
for basis_name in basis_names_list:
|
|
1069
|
+
coeff = species_df.iloc[i][basis_name]
|
|
1070
|
+
basis_logK = species_logK_from_elements.get(basis_name, 0.0)
|
|
1071
|
+
logK_formation_val -= coeff * basis_logK
|
|
1072
|
+
|
|
1073
|
+
formation_logK[species_idx] = logK_formation_val
|
|
1074
|
+
|
|
1075
|
+
except Exception as e:
|
|
1076
|
+
warnings.warn(f"Batch subcrt call failed, falling back to individual calls: {e}")
|
|
1077
|
+
# Fallback to old method if batch call fails
|
|
1078
|
+
for i in range(n_species):
|
|
1079
|
+
species_idx = species_df.iloc[i]['ispecies']
|
|
1080
|
+
is_basis_species = species_idx in basis_df['ispecies'].values
|
|
1081
|
+
|
|
1082
|
+
if is_basis_species:
|
|
1083
|
+
formation_logK[species_idx] = 0.0
|
|
1084
|
+
else:
|
|
1085
|
+
try:
|
|
1086
|
+
species_name = species_df.iloc[i]['name']
|
|
1087
|
+
formation_result = subcrt([species_name], [1], T=T_celsius, P=P, messages=messages, show=False)
|
|
1088
|
+
|
|
1089
|
+
# Handle both single DataFrame and dict of DataFrames
|
|
1090
|
+
if hasattr(formation_result, 'out'):
|
|
1091
|
+
if isinstance(formation_result.out, dict) and 'species_data' in formation_result.out:
|
|
1092
|
+
# Multiple conditions (T/P arrays) - result.out is a dict
|
|
1093
|
+
sp_data = formation_result.out['species_data'][0]
|
|
1094
|
+
if 'logK' in sp_data.columns:
|
|
1095
|
+
logK_vals = sp_data['logK'].values
|
|
1096
|
+
# Keep nan values as is
|
|
1097
|
+
# logK_vals = np.where(np.isnan(logK_vals), 0.0, logK_vals)
|
|
1098
|
+
logK_val = logK_vals
|
|
1099
|
+
else:
|
|
1100
|
+
logK_val = np.zeros(len(sp_data))
|
|
1101
|
+
elif isinstance(formation_result.out, pd.DataFrame):
|
|
1102
|
+
# Single condition - result.out is a DataFrame
|
|
1103
|
+
if 'logK' in formation_result.out.columns:
|
|
1104
|
+
logK_val = formation_result.out['logK'].values
|
|
1105
|
+
# Keep nan values as is
|
|
1106
|
+
# logK_val = np.where(np.isnan(logK_val), 0.0, logK_val)
|
|
1107
|
+
else:
|
|
1108
|
+
logK_val = 0.0
|
|
1109
|
+
else:
|
|
1110
|
+
logK_val = 0.0
|
|
1111
|
+
else:
|
|
1112
|
+
logK_val = 0.0
|
|
1113
|
+
formation_logK[species_idx] = logK_val
|
|
1114
|
+
except Exception as e2:
|
|
1115
|
+
warnings.warn(f"Could not get formation logK for species {species_idx}: {e2}")
|
|
1116
|
+
formation_logK[species_idx] = 0.0
|
|
1117
|
+
|
|
1118
|
+
# Calculate affinities for each formed species
|
|
1119
|
+
for i in range(n_species):
|
|
1120
|
+
species_idx = species_df.iloc[i]['ispecies']
|
|
1121
|
+
|
|
1122
|
+
# Get the formation reaction logK (already balanced)
|
|
1123
|
+
logK_formation = formation_logK[species_idx]
|
|
1124
|
+
|
|
1125
|
+
# Get formation reaction stoichiometry from species DataFrame
|
|
1126
|
+
# These are the stoichiometric coefficients from the balanced reaction
|
|
1127
|
+
formation_coeffs = {}
|
|
1128
|
+
for basis_name in basis_names:
|
|
1129
|
+
formation_coeffs[basis_name] = species_df.iloc[i][basis_name]
|
|
1130
|
+
|
|
1131
|
+
# Calculate logQ using R CHNOSZ logic:
|
|
1132
|
+
# logQ = +1 * logact_species + sum(-coeff_i * logact_basis_i)
|
|
1133
|
+
# Species gets +1 coefficient (product), all basis species get negative coefficients (reactants)
|
|
1134
|
+
|
|
1135
|
+
# Species activity (always +1 coefficient on product side)
|
|
1136
|
+
species_logact = species_df.iloc[i]['logact']
|
|
1137
|
+
try:
|
|
1138
|
+
species_logact_val = float(species_logact)
|
|
1139
|
+
except (ValueError, TypeError):
|
|
1140
|
+
species_logact_val = 0.0
|
|
1141
|
+
|
|
1142
|
+
# Start with species contribution: +1 * logact_species
|
|
1143
|
+
# Create array with proper dimensions to match the grid
|
|
1144
|
+
if len(mydim) > 1:
|
|
1145
|
+
logQ_arrays = np.full(mydim, species_logact_val)
|
|
1146
|
+
else:
|
|
1147
|
+
logQ_arrays = np.full(n_conditions, species_logact_val)
|
|
1148
|
+
|
|
1149
|
+
# Add contributions from all basis species: -coeff_i * logact_basis_i
|
|
1150
|
+
for basis_name in formation_coeffs:
|
|
1151
|
+
coeff = formation_coeffs[basis_name]
|
|
1152
|
+
logact_array = logact_basis_arrays[basis_name]
|
|
1153
|
+
# DEBUG
|
|
1154
|
+
if False and species_idx == 763: # ethanol
|
|
1155
|
+
print(f" Basis {basis_name}: coeff={coeff}, logact_array[0]={logact_array[0] if hasattr(logact_array, '__getitem__') else logact_array}")
|
|
1156
|
+
# All basis species contributions are negative (reactant side)
|
|
1157
|
+
logQ_arrays += (-coeff) * logact_array
|
|
1158
|
+
|
|
1159
|
+
# Calculate affinity: A/2.303RT = logK - logQ
|
|
1160
|
+
# Handle shape broadcasting when logK varies along fewer dimensions than logQ
|
|
1161
|
+
# This happens when we have basis variables (e.g., H2S) and subcrt variables (e.g., T)
|
|
1162
|
+
# logK only varies with subcrt variables (T, P, IS) but logQ varies with all variables
|
|
1163
|
+
if isinstance(logK_formation, np.ndarray) and isinstance(logQ_arrays, np.ndarray):
|
|
1164
|
+
if logK_formation.shape != logQ_arrays.shape:
|
|
1165
|
+
# Need to broadcast logK to match logQ dimensions
|
|
1166
|
+
if len(mydim) > 1 and logK_formation.ndim == 1:
|
|
1167
|
+
# logK is 1-D but should be broadcast to 2-D
|
|
1168
|
+
# Determine which dimension logK varies along
|
|
1169
|
+
# Check if logK length matches first dimension of mydim (typically T)
|
|
1170
|
+
if len(logK_formation) == mydim[0]:
|
|
1171
|
+
# logK varies along first dimension, broadcast to second
|
|
1172
|
+
logK_formation = np.broadcast_to(logK_formation[:, np.newaxis], mydim)
|
|
1173
|
+
elif len(logK_formation) == mydim[1]:
|
|
1174
|
+
# logK varies along second dimension, broadcast to first
|
|
1175
|
+
logK_formation = np.broadcast_to(logK_formation[np.newaxis, :], mydim)
|
|
1176
|
+
elif len(logK_formation) == np.prod(mydim):
|
|
1177
|
+
# logK is flattened, reshape it
|
|
1178
|
+
logK_formation = logK_formation.reshape(mydim)
|
|
1179
|
+
|
|
1180
|
+
affinity_array = logK_formation - logQ_arrays
|
|
1181
|
+
|
|
1182
|
+
# DEBUG: Check first value
|
|
1183
|
+
if False: # Set to True for debugging
|
|
1184
|
+
if hasattr(affinity_array, '__getitem__'):
|
|
1185
|
+
print(f"\nDEBUG affinity for species {species_idx}:")
|
|
1186
|
+
print(f" logK_formation[0] = {logK_formation[0] if hasattr(logK_formation, '__getitem__') else logK_formation}")
|
|
1187
|
+
print(f" logQ_arrays[0] = {logQ_arrays[0] if hasattr(logQ_arrays, '__getitem__') else logQ_arrays}")
|
|
1188
|
+
print(f" affinity_array[0] = {affinity_array[0]}")
|
|
1189
|
+
|
|
1190
|
+
# Store result with proper dimensions
|
|
1191
|
+
# Keep array structure if we have multiple variables, even if n_conditions == 1
|
|
1192
|
+
# This ensures diagram() can detect the correct dimensionality (matching R behavior)
|
|
1193
|
+
if n_conditions == 1 and len(mydim) <= 1:
|
|
1194
|
+
# True scalar case: no variables or single variable with 1 point
|
|
1195
|
+
affinity_values[species_idx] = affinity_array.item() if hasattr(affinity_array, 'item') else affinity_array
|
|
1196
|
+
else:
|
|
1197
|
+
# Multi-dimensional case: preserve array structure
|
|
1198
|
+
# Array already has correct shape from meshgrid
|
|
1199
|
+
affinity_values[species_idx] = affinity_array
|
|
1200
|
+
|
|
1201
|
+
return {
|
|
1202
|
+
'sout': sout_data,
|
|
1203
|
+
'a': affinity_values
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
elif what == 'logK':
|
|
1207
|
+
# Extract logK values from subcrt results
|
|
1208
|
+
logK_values = {}
|
|
1209
|
+
|
|
1210
|
+
for i in range(n_species):
|
|
1211
|
+
species_idx = species_df.iloc[i]['ispecies']
|
|
1212
|
+
|
|
1213
|
+
if hasattr(sout_data, 'iloc') and len(sout_data) > n_basis + i:
|
|
1214
|
+
logK_val = sout_data.iloc[n_basis + i]['logK'] if 'logK' in sout_data.columns else np.nan
|
|
1215
|
+
else:
|
|
1216
|
+
logK_val = np.nan
|
|
1217
|
+
|
|
1218
|
+
# Expand to proper dimensions
|
|
1219
|
+
if np.prod(mydim) > 1:
|
|
1220
|
+
logK_values[species_idx] = np.full(mydim, logK_val)
|
|
1221
|
+
else:
|
|
1222
|
+
logK_values[species_idx] = logK_val
|
|
1223
|
+
|
|
1224
|
+
return {
|
|
1225
|
+
'sout': sout_data,
|
|
1226
|
+
'a': logK_values
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
else:
|
|
1230
|
+
# Other thermodynamic properties
|
|
1231
|
+
prop_values = {}
|
|
1232
|
+
|
|
1233
|
+
for i in range(n_species):
|
|
1234
|
+
species_idx = species_df.iloc[i]['ispecies']
|
|
1235
|
+
|
|
1236
|
+
if hasattr(sout_data, 'iloc') and len(sout_data) > n_basis + i:
|
|
1237
|
+
prop_val = sout_data.iloc[n_basis + i][what] if what in sout_data.columns else np.nan
|
|
1238
|
+
else:
|
|
1239
|
+
prop_val = np.nan
|
|
1240
|
+
|
|
1241
|
+
# Expand to proper dimensions
|
|
1242
|
+
if np.prod(mydim) > 1:
|
|
1243
|
+
prop_values[species_idx] = np.full(mydim, prop_val)
|
|
1244
|
+
else:
|
|
1245
|
+
prop_values[species_idx] = prop_val
|
|
1246
|
+
|
|
1247
|
+
return {
|
|
1248
|
+
'sout': sout_data,
|
|
1249
|
+
'a': prop_values
|
|
1250
|
+
}
|
|
1251
|
+
|
|
1252
|
+
|
|
1253
|
+
# Export main functions
|
|
1254
|
+
__all__ = [
|
|
1255
|
+
'affinity', 'energy_args', 'energy', 'AffinityError'
|
|
1256
|
+
]
|