pychnosz 1.1.4__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pychnosz/__init__.py +129 -0
- pychnosz/biomolecules/__init__.py +29 -0
- pychnosz/biomolecules/ionize_aa.py +197 -0
- pychnosz/biomolecules/proteins.py +595 -0
- pychnosz/core/__init__.py +46 -0
- pychnosz/core/affinity.py +1256 -0
- pychnosz/core/animation.py +593 -0
- pychnosz/core/balance.py +334 -0
- pychnosz/core/basis.py +716 -0
- pychnosz/core/diagram.py +3336 -0
- pychnosz/core/equilibrate.py +813 -0
- pychnosz/core/equilibrium.py +554 -0
- pychnosz/core/info.py +821 -0
- pychnosz/core/retrieve.py +364 -0
- pychnosz/core/speciation.py +580 -0
- pychnosz/core/species.py +599 -0
- pychnosz/core/subcrt.py +1700 -0
- pychnosz/core/thermo.py +593 -0
- pychnosz/core/unicurve.py +1226 -0
- pychnosz/data/__init__.py +11 -0
- pychnosz/data/add_obigt.py +327 -0
- pychnosz/data/extdata/Berman/BDat17_2017.csv +2 -0
- pychnosz/data/extdata/Berman/Ber88_1988.csv +68 -0
- pychnosz/data/extdata/Berman/Ber90_1990.csv +5 -0
- pychnosz/data/extdata/Berman/DS10_2010.csv +6 -0
- pychnosz/data/extdata/Berman/FDM+14_2014.csv +2 -0
- pychnosz/data/extdata/Berman/Got04_2004.csv +5 -0
- pychnosz/data/extdata/Berman/JUN92_1992.csv +3 -0
- pychnosz/data/extdata/Berman/SHD91_1991.csv +12 -0
- pychnosz/data/extdata/Berman/VGT92_1992.csv +2 -0
- pychnosz/data/extdata/Berman/VPT01_2001.csv +3 -0
- pychnosz/data/extdata/Berman/VPV05_2005.csv +2 -0
- pychnosz/data/extdata/Berman/ZS92_1992.csv +11 -0
- pychnosz/data/extdata/Berman/sympy.R +99 -0
- pychnosz/data/extdata/Berman/testing/BA96.bib +12 -0
- pychnosz/data/extdata/Berman/testing/BA96_Berman.csv +21 -0
- pychnosz/data/extdata/Berman/testing/BA96_OBIGT.csv +21 -0
- pychnosz/data/extdata/Berman/testing/BA96_refs.csv +6 -0
- pychnosz/data/extdata/OBIGT/AD.csv +25 -0
- pychnosz/data/extdata/OBIGT/Berman_cr.csv +93 -0
- pychnosz/data/extdata/OBIGT/DEW.csv +211 -0
- pychnosz/data/extdata/OBIGT/H2O_aq.csv +4 -0
- pychnosz/data/extdata/OBIGT/SLOP98.csv +411 -0
- pychnosz/data/extdata/OBIGT/SUPCRT92.csv +178 -0
- pychnosz/data/extdata/OBIGT/inorganic_aq.csv +729 -0
- pychnosz/data/extdata/OBIGT/inorganic_cr.csv +273 -0
- pychnosz/data/extdata/OBIGT/inorganic_gas.csv +20 -0
- pychnosz/data/extdata/OBIGT/organic_aq.csv +1104 -0
- pychnosz/data/extdata/OBIGT/organic_cr.csv +481 -0
- pychnosz/data/extdata/OBIGT/organic_gas.csv +268 -0
- pychnosz/data/extdata/OBIGT/organic_liq.csv +533 -0
- pychnosz/data/extdata/OBIGT/testing/GEMSFIT.csv +43 -0
- pychnosz/data/extdata/OBIGT/testing/IGEM.csv +17 -0
- pychnosz/data/extdata/OBIGT/testing/Sandia.csv +8 -0
- pychnosz/data/extdata/OBIGT/testing/SiO2.csv +4 -0
- pychnosz/data/extdata/misc/AD03_Fig1a.csv +69 -0
- pychnosz/data/extdata/misc/AD03_Fig1b.csv +43 -0
- pychnosz/data/extdata/misc/AD03_Fig1c.csv +89 -0
- pychnosz/data/extdata/misc/AD03_Fig1d.csv +30 -0
- pychnosz/data/extdata/misc/BZA10.csv +5 -0
- pychnosz/data/extdata/misc/HW97_Cp.csv +90 -0
- pychnosz/data/extdata/misc/HWM96_V.csv +229 -0
- pychnosz/data/extdata/misc/LA19_test.csv +7 -0
- pychnosz/data/extdata/misc/Mer75_Table4.csv +42 -0
- pychnosz/data/extdata/misc/OBIGT_check.csv +423 -0
- pychnosz/data/extdata/misc/PM90.csv +7 -0
- pychnosz/data/extdata/misc/RH95.csv +23 -0
- pychnosz/data/extdata/misc/RH98_Table15.csv +17 -0
- pychnosz/data/extdata/misc/SC10_Rainbow.csv +19 -0
- pychnosz/data/extdata/misc/SK95.csv +55 -0
- pychnosz/data/extdata/misc/SOJSH.csv +61 -0
- pychnosz/data/extdata/misc/SS98_Fig5a.csv +81 -0
- pychnosz/data/extdata/misc/SS98_Fig5b.csv +84 -0
- pychnosz/data/extdata/misc/TKSS14_Fig2.csv +25 -0
- pychnosz/data/extdata/misc/bluered.txt +1000 -0
- pychnosz/data/extdata/protein/Cas/Cas_aa.csv +177 -0
- pychnosz/data/extdata/protein/Cas/Cas_uniprot.csv +186 -0
- pychnosz/data/extdata/protein/Cas/download.R +34 -0
- pychnosz/data/extdata/protein/Cas/mkaa.R +34 -0
- pychnosz/data/extdata/protein/POLG.csv +12 -0
- pychnosz/data/extdata/protein/TBD+05.csv +393 -0
- pychnosz/data/extdata/protein/TBD+05_aa.csv +393 -0
- pychnosz/data/extdata/protein/rubisco.csv +28 -0
- pychnosz/data/extdata/protein/rubisco.fasta +239 -0
- pychnosz/data/extdata/protein/rubisco_aa.csv +28 -0
- pychnosz/data/extdata/src/H2O92D.f.orig +3457 -0
- pychnosz/data/extdata/src/README.txt +5 -0
- pychnosz/data/extdata/taxonomy/names.dmp +215 -0
- pychnosz/data/extdata/taxonomy/nodes.dmp +63 -0
- pychnosz/data/extdata/thermo/Bdot_acirc.csv +60 -0
- pychnosz/data/extdata/thermo/buffer.csv +40 -0
- pychnosz/data/extdata/thermo/element.csv +135 -0
- pychnosz/data/extdata/thermo/groups.csv +6 -0
- pychnosz/data/extdata/thermo/opt.csv +2 -0
- pychnosz/data/extdata/thermo/protein.csv +506 -0
- pychnosz/data/extdata/thermo/refs.csv +343 -0
- pychnosz/data/extdata/thermo/stoich.csv.xz +0 -0
- pychnosz/data/loader.py +431 -0
- pychnosz/data/mod_obigt.py +322 -0
- pychnosz/data/obigt.py +471 -0
- pychnosz/data/worm.py +228 -0
- pychnosz/fortran/__init__.py +16 -0
- pychnosz/fortran/h2o92.dll +0 -0
- pychnosz/fortran/h2o92_interface.py +527 -0
- pychnosz/geochemistry/__init__.py +21 -0
- pychnosz/geochemistry/minerals.py +514 -0
- pychnosz/geochemistry/redox.py +500 -0
- pychnosz/models/__init__.py +47 -0
- pychnosz/models/archer_wang.py +165 -0
- pychnosz/models/berman.py +309 -0
- pychnosz/models/cgl.py +381 -0
- pychnosz/models/dew.py +997 -0
- pychnosz/models/hkf.py +523 -0
- pychnosz/models/hkf_helpers.py +222 -0
- pychnosz/models/iapws95.py +1113 -0
- pychnosz/models/supcrt92_fortran.py +238 -0
- pychnosz/models/water.py +480 -0
- pychnosz/utils/__init__.py +27 -0
- pychnosz/utils/expression.py +1074 -0
- pychnosz/utils/formula.py +830 -0
- pychnosz/utils/formula_ox.py +227 -0
- pychnosz/utils/reset.py +33 -0
- pychnosz/utils/units.py +259 -0
- pychnosz-1.1.4.dist-info/METADATA +197 -0
- pychnosz-1.1.4.dist-info/RECORD +128 -0
- pychnosz-1.1.4.dist-info/WHEEL +5 -0
- pychnosz-1.1.4.dist-info/licenses/LICENSE.txt +19 -0
- pychnosz-1.1.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,813 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Equilibrate module for calculating equilibrium activities of species.
|
|
3
|
+
|
|
4
|
+
This module provides Python equivalents of the R functions in equilibrate.R:
|
|
5
|
+
- equilibrate(): Calculate equilibrium activities from chemical affinities
|
|
6
|
+
- equil.boltzmann(): Boltzmann distribution method
|
|
7
|
+
- equil.reaction(): Reaction-based equilibration method
|
|
8
|
+
- balance(): Determine balancing coefficients
|
|
9
|
+
- Supporting utilities for species equilibration
|
|
10
|
+
|
|
11
|
+
Author: CHNOSZ Python port
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from typing import Union, List, Optional, Dict, Any, Tuple
|
|
17
|
+
import warnings
|
|
18
|
+
from scipy.optimize import brentq
|
|
19
|
+
|
|
20
|
+
from .thermo import thermo
|
|
21
|
+
from .info import info
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def equilibrate(aout: Dict[str, Any],
|
|
25
|
+
balance: Optional[Union[str, int, List[float]]] = None,
|
|
26
|
+
loga_balance: Optional[Union[float, List[float]]] = None,
|
|
27
|
+
ispecies: Optional[Union[List[int], List[bool]]] = None,
|
|
28
|
+
normalize: Union[bool, List[bool]] = False,
|
|
29
|
+
as_residue: bool = False,
|
|
30
|
+
method: Optional[Union[str, List[str]]] = None,
|
|
31
|
+
tol: float = np.finfo(float).eps ** 0.25,
|
|
32
|
+
messages: bool = True) -> Dict[str, Any]:
|
|
33
|
+
"""
|
|
34
|
+
Calculate equilibrium activities of species from affinities.
|
|
35
|
+
|
|
36
|
+
This function calculates the equilibrium activities of species in
|
|
37
|
+
(metastable) equilibrium from the affinities of their formation reactions
|
|
38
|
+
from basis species at given activities.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
aout : dict
|
|
43
|
+
Output from affinity() containing chemical affinities
|
|
44
|
+
balance : str, int, or list of float, optional
|
|
45
|
+
Balancing method:
|
|
46
|
+
- None: Autoselect using which_balance()
|
|
47
|
+
- str: Name of basis species to balance on
|
|
48
|
+
- "length": Balance on protein length (for proteins)
|
|
49
|
+
- "volume": Balance on standard-state volume
|
|
50
|
+
- 1: Balance on one mole of species (formula units)
|
|
51
|
+
- list: User-defined balancing coefficients
|
|
52
|
+
loga_balance : float or list of float, optional
|
|
53
|
+
Logarithm of total activity of the balancing basis species
|
|
54
|
+
If None, calculated from species initial activities and n.balance
|
|
55
|
+
ispecies : list of int or list of bool, optional
|
|
56
|
+
Indices or boolean mask of species to include in equilibration
|
|
57
|
+
Default: all species except those with state "cr" (crystalline)
|
|
58
|
+
normalize : bool or list of bool, default False
|
|
59
|
+
Normalize formulas by balancing coefficients?
|
|
60
|
+
as_residue : bool, default False
|
|
61
|
+
Use residue basis for proteins?
|
|
62
|
+
method : str or list of str, optional
|
|
63
|
+
Equilibration method:
|
|
64
|
+
- "boltzmann": Boltzmann distribution (for n.balance = 1)
|
|
65
|
+
- "reaction": Reaction-based equilibration (general method)
|
|
66
|
+
If None, chooses "boltzmann" if all n.balance == 1, else "reaction"
|
|
67
|
+
tol : float, default np.finfo(float).eps**0.25
|
|
68
|
+
Tolerance for root-finding in reaction method
|
|
69
|
+
messages : bool, default True
|
|
70
|
+
Whether to print informational messages
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
dict
|
|
75
|
+
Dictionary containing all aout contents plus:
|
|
76
|
+
- balance : str or list, Balancing description
|
|
77
|
+
- m_balance : list, Molar formula divisors
|
|
78
|
+
- n_balance : list, Balancing coefficients
|
|
79
|
+
- loga_balance : float or array, Log activity of balanced quantity
|
|
80
|
+
- Astar : list of arrays, Normalized affinities
|
|
81
|
+
- loga_equil : list of arrays, Equilibrium log activities
|
|
82
|
+
|
|
83
|
+
Examples
|
|
84
|
+
--------
|
|
85
|
+
>>> import pychnosz
|
|
86
|
+
>>> pychnosz.basis("CHNOS")
|
|
87
|
+
>>> pychnosz.basis("NH3", -2)
|
|
88
|
+
>>> pychnosz.species(["alanine", "glycine", "serine"])
|
|
89
|
+
>>> a = pychnosz.affinity(NH3=[-80, 60], T=55, P=2000)
|
|
90
|
+
>>> e = pychnosz.equilibrate(a, balance="CO2")
|
|
91
|
+
|
|
92
|
+
Notes
|
|
93
|
+
-----
|
|
94
|
+
This is a 1:1 replica of the R CHNOSZ equilibrate() function.
|
|
95
|
+
- Handles both Boltzmann and reaction-based equilibration
|
|
96
|
+
- Supports normalization and residue basis for proteins
|
|
97
|
+
- Properly handles crystalline species via predominance diagrams
|
|
98
|
+
- Implements identical balancing logic to R version
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
# Handle mosaic output (not implemented yet, but keep structure)
|
|
102
|
+
if aout.get('fun') == 'mosaic':
|
|
103
|
+
raise NotImplementedError("mosaic equilibration not yet implemented")
|
|
104
|
+
|
|
105
|
+
# Number of possible species
|
|
106
|
+
# affinity() returns values as a dict with ispecies as keys
|
|
107
|
+
if isinstance(aout['values'], dict):
|
|
108
|
+
# Convert dict to list ordered by species dataframe
|
|
109
|
+
values_list = []
|
|
110
|
+
for i in range(len(aout['species'])):
|
|
111
|
+
species_idx = aout['species']['ispecies'].iloc[i]
|
|
112
|
+
if species_idx in aout['values']:
|
|
113
|
+
values_list.append(aout['values'][species_idx])
|
|
114
|
+
else:
|
|
115
|
+
# Species not in values dict - use NaN array
|
|
116
|
+
values_list.append(np.array([np.nan]))
|
|
117
|
+
aout['values'] = values_list
|
|
118
|
+
|
|
119
|
+
nspecies = len(aout['values'])
|
|
120
|
+
|
|
121
|
+
# Get the balancing coefficients
|
|
122
|
+
bout = _balance(aout, balance, messages)
|
|
123
|
+
n_balance_orig = bout['n_balance'].copy()
|
|
124
|
+
n_balance = bout['n_balance'].copy()
|
|
125
|
+
balance = bout['balance']
|
|
126
|
+
|
|
127
|
+
# If solids (cr) species are present, find them on a predominance diagram
|
|
128
|
+
iscr = [('cr' in str(state)) for state in aout['species']['state']]
|
|
129
|
+
ncr = sum(iscr)
|
|
130
|
+
|
|
131
|
+
# Set default ispecies to exclude cr species (matching R default)
|
|
132
|
+
if ispecies is None:
|
|
133
|
+
ispecies = [not is_cr for is_cr in iscr]
|
|
134
|
+
|
|
135
|
+
if ncr > 0:
|
|
136
|
+
# Import diagram here to avoid circular imports
|
|
137
|
+
from .diagram import diagram
|
|
138
|
+
dout = diagram(aout, balance=balance, normalize=normalize,
|
|
139
|
+
as_residue=as_residue, plot_it=False, limit_water=False, messages=messages)
|
|
140
|
+
|
|
141
|
+
if ncr == nspecies:
|
|
142
|
+
# We get here if there are only solids
|
|
143
|
+
m_balance = None
|
|
144
|
+
Astar = None
|
|
145
|
+
loga_equil = []
|
|
146
|
+
for i in range(len(aout['values'])):
|
|
147
|
+
la = np.array(aout['values'][i], copy=True)
|
|
148
|
+
la[:] = np.nan
|
|
149
|
+
loga_equil.append(la)
|
|
150
|
+
else:
|
|
151
|
+
# We get here if there are any aqueous species
|
|
152
|
+
# Take selected species in 'ispecies'
|
|
153
|
+
if len(ispecies) == 0:
|
|
154
|
+
raise ValueError("the length of ispecies is zero")
|
|
155
|
+
|
|
156
|
+
# Convert boolean to indices if needed
|
|
157
|
+
if isinstance(ispecies, list) and len(ispecies) > 0:
|
|
158
|
+
if isinstance(ispecies[0], bool):
|
|
159
|
+
ispecies = [i for i, x in enumerate(ispecies) if x]
|
|
160
|
+
|
|
161
|
+
# Take out species that have NA affinities
|
|
162
|
+
ina = [all(np.isnan(np.array(x).flatten())) for x in aout['values']]
|
|
163
|
+
ispecies = [i for i in ispecies if not ina[i]]
|
|
164
|
+
|
|
165
|
+
if len(ispecies) == 0:
|
|
166
|
+
raise ValueError("all species have NA affinities")
|
|
167
|
+
|
|
168
|
+
if ispecies != list(range(nspecies)):
|
|
169
|
+
if messages:
|
|
170
|
+
print(f"equilibrate: using {len(ispecies)} of {nspecies} species")
|
|
171
|
+
aout_species_df = aout['species']
|
|
172
|
+
aout['species'] = aout_species_df.iloc[ispecies].reset_index(drop=True)
|
|
173
|
+
aout['values'] = [aout['values'][i] for i in ispecies]
|
|
174
|
+
n_balance = [n_balance[i] for i in ispecies]
|
|
175
|
+
|
|
176
|
+
# Number of species that are left
|
|
177
|
+
nspecies = len(aout['values'])
|
|
178
|
+
|
|
179
|
+
# Say what the balancing coefficients are
|
|
180
|
+
if len(n_balance) < 100:
|
|
181
|
+
if messages:
|
|
182
|
+
print(f"equilibrate: n.balance is {', '.join(map(str, n_balance))}")
|
|
183
|
+
|
|
184
|
+
# Logarithm of total activity of the balancing basis species
|
|
185
|
+
if loga_balance is None:
|
|
186
|
+
# Sum up the activities, then take absolute value
|
|
187
|
+
# in case n.balance is negative
|
|
188
|
+
logact = np.array([aout['species']['logact'].iloc[i] for i in range(len(aout['species']))])
|
|
189
|
+
sumact = abs(sum(10**logact * n_balance))
|
|
190
|
+
loga_balance = np.log10(sumact)
|
|
191
|
+
|
|
192
|
+
# Make loga.balance the same length as the values of affinity
|
|
193
|
+
if isinstance(loga_balance, (int, float)):
|
|
194
|
+
loga_balance = float(loga_balance)
|
|
195
|
+
else:
|
|
196
|
+
loga_balance = np.array(loga_balance).flatten()
|
|
197
|
+
|
|
198
|
+
nvalues = len(np.array(aout['values'][0]).flatten())
|
|
199
|
+
|
|
200
|
+
if isinstance(loga_balance, float) or len(np.atleast_1d(loga_balance)) == 1:
|
|
201
|
+
# We have a constant loga.balance
|
|
202
|
+
if isinstance(loga_balance, np.ndarray):
|
|
203
|
+
loga_balance = float(loga_balance[0])
|
|
204
|
+
if messages:
|
|
205
|
+
print(f"equilibrate: loga.balance is {loga_balance}")
|
|
206
|
+
loga_balance = np.full(nvalues, loga_balance)
|
|
207
|
+
else:
|
|
208
|
+
# We are using a variable loga.balance (supplied by the user)
|
|
209
|
+
if len(loga_balance) != nvalues:
|
|
210
|
+
raise ValueError(f"length of loga.balance ({len(loga_balance)}) doesn't match "
|
|
211
|
+
f"the affinity values ({nvalues})")
|
|
212
|
+
if messages:
|
|
213
|
+
print(f"equilibrate: loga.balance has same length as affinity values ({len(loga_balance)})")
|
|
214
|
+
|
|
215
|
+
# Normalize the molar formula by the balance coefficients
|
|
216
|
+
m_balance = n_balance.copy()
|
|
217
|
+
isprotein = ['_' in str(name) for name in aout['species']['name']]
|
|
218
|
+
|
|
219
|
+
# Handle normalize parameter
|
|
220
|
+
if isinstance(normalize, bool):
|
|
221
|
+
normalize = [normalize] * nspecies
|
|
222
|
+
elif not isinstance(normalize, list):
|
|
223
|
+
normalize = list(normalize)
|
|
224
|
+
|
|
225
|
+
if any(normalize) or as_residue:
|
|
226
|
+
if any(n < 0 for n in n_balance):
|
|
227
|
+
raise ValueError("one or more negative balancing coefficients prohibit using normalized molar formulas")
|
|
228
|
+
|
|
229
|
+
for i in range(nspecies):
|
|
230
|
+
if normalize[i] or as_residue:
|
|
231
|
+
n_balance[i] = 1
|
|
232
|
+
|
|
233
|
+
if as_residue:
|
|
234
|
+
if messages:
|
|
235
|
+
print("equilibrate: using 'as.residue' for molar formulas")
|
|
236
|
+
else:
|
|
237
|
+
if messages:
|
|
238
|
+
print("equilibrate: using 'normalize' for molar formulas")
|
|
239
|
+
|
|
240
|
+
# Set the formula divisor (m.balance) to 1 for species whose formulas are *not* normalized
|
|
241
|
+
m_balance = [m_balance[i] if (normalize[i] or as_residue) else 1
|
|
242
|
+
for i in range(nspecies)]
|
|
243
|
+
else:
|
|
244
|
+
m_balance = [1] * nspecies
|
|
245
|
+
|
|
246
|
+
# Astar: the affinities/2.303RT of formation reactions with
|
|
247
|
+
# formed species in their standard-state activities
|
|
248
|
+
Astar = []
|
|
249
|
+
for i in range(nspecies):
|
|
250
|
+
# 'starve' the affinity of the activity of the species,
|
|
251
|
+
# and normalize the value by the molar ratio
|
|
252
|
+
logact_i = aout['species']['logact'].iloc[i]
|
|
253
|
+
astar_i = (np.array(aout['values'][i]) + logact_i) / m_balance[i]
|
|
254
|
+
Astar.append(astar_i)
|
|
255
|
+
|
|
256
|
+
# Choose a method and compute the equilibrium activities of species
|
|
257
|
+
if method is None:
|
|
258
|
+
if all(n == 1 for n in n_balance):
|
|
259
|
+
method = ["boltzmann"]
|
|
260
|
+
else:
|
|
261
|
+
method = ["reaction"]
|
|
262
|
+
elif isinstance(method, str):
|
|
263
|
+
method = [method]
|
|
264
|
+
|
|
265
|
+
if messages:
|
|
266
|
+
print(f"equilibrate: using {method[0]} method")
|
|
267
|
+
|
|
268
|
+
if method[0] == "boltzmann":
|
|
269
|
+
loga_equil = equil_boltzmann(Astar, n_balance, loga_balance)
|
|
270
|
+
elif method[0] == "reaction":
|
|
271
|
+
loga_equil = equil_reaction(Astar, n_balance, loga_balance, tol)
|
|
272
|
+
else:
|
|
273
|
+
raise ValueError(f"unknown method: {method[0]}")
|
|
274
|
+
|
|
275
|
+
# If we normalized the formulas, get back to activities of species
|
|
276
|
+
if any(normalize) and not as_residue:
|
|
277
|
+
loga_equil = [loga_equil[i] - np.log10(m_balance[i])
|
|
278
|
+
for i in range(nspecies)]
|
|
279
|
+
|
|
280
|
+
# Process cr species
|
|
281
|
+
if ncr > 0:
|
|
282
|
+
# cr species were excluded from equilibrium calculation,
|
|
283
|
+
# so get values back to original lengths
|
|
284
|
+
norig = len(dout['values'])
|
|
285
|
+
n_balance = n_balance_orig
|
|
286
|
+
|
|
287
|
+
# Ensure ispecies is in index form (not boolean)
|
|
288
|
+
# When ncr == nspecies, ispecies was never converted from boolean to indices
|
|
289
|
+
if isinstance(ispecies, list) and len(ispecies) > 0:
|
|
290
|
+
if isinstance(ispecies[0], bool):
|
|
291
|
+
ispecies = [i for i, x in enumerate(ispecies) if x]
|
|
292
|
+
|
|
293
|
+
# Match indices back to original
|
|
294
|
+
imatch = [None] * norig
|
|
295
|
+
for j, orig_idx in enumerate(range(norig)):
|
|
296
|
+
if orig_idx in ispecies:
|
|
297
|
+
imatch[orig_idx] = ispecies.index(orig_idx)
|
|
298
|
+
|
|
299
|
+
# Handle None values (when ncr == nspecies, these are set to None)
|
|
300
|
+
# In R, indexing NULL returns NULL, so we need to check for None in Python
|
|
301
|
+
if m_balance is not None:
|
|
302
|
+
m_balance = [m_balance[imatch[i]] if imatch[i] is not None else None
|
|
303
|
+
for i in range(norig)]
|
|
304
|
+
if Astar is not None:
|
|
305
|
+
Astar = [Astar[imatch[i]] if imatch[i] is not None else None
|
|
306
|
+
for i in range(norig)]
|
|
307
|
+
|
|
308
|
+
# Get a template from first loga_equil to determine shape
|
|
309
|
+
loga_equil1 = loga_equil[0]
|
|
310
|
+
loga_equil_orig = [None] * norig
|
|
311
|
+
|
|
312
|
+
for i in range(norig):
|
|
313
|
+
if imatch[i] is not None:
|
|
314
|
+
loga_equil_orig[i] = loga_equil[imatch[i]]
|
|
315
|
+
|
|
316
|
+
# Replace None loga_equil with -999 for cr-only species (will be set to 0 where predominant)
|
|
317
|
+
# Use np.full with shape, not full_like, to avoid inheriting NaN values
|
|
318
|
+
ina = [i for i in range(norig) if imatch[i] is None]
|
|
319
|
+
for i in ina:
|
|
320
|
+
loga_equil_orig[i] = np.full(loga_equil1.shape, -999.0)
|
|
321
|
+
loga_equil = loga_equil_orig
|
|
322
|
+
aout['species'] = dout['species']
|
|
323
|
+
aout['values'] = dout['values']
|
|
324
|
+
|
|
325
|
+
# Find the grid points where any cr species is predominant
|
|
326
|
+
icr = [i for i in range(len(dout['species']))
|
|
327
|
+
if 'cr' in str(dout['species']['state'].iloc[i])]
|
|
328
|
+
|
|
329
|
+
# predominant uses 1-based R indexing (1, 2, 3, ...), convert to 0-based for Python
|
|
330
|
+
predominant = dout['predominant']
|
|
331
|
+
iscr_mask = np.zeros_like(predominant, dtype=bool)
|
|
332
|
+
for icr_idx in icr:
|
|
333
|
+
# Compare with icr_idx + 1 because predominant is 1-based
|
|
334
|
+
iscr_mask |= (predominant == icr_idx + 1)
|
|
335
|
+
|
|
336
|
+
# At those grid points, make the aqueous species' activities practically zero
|
|
337
|
+
for i in range(norig):
|
|
338
|
+
if i not in icr:
|
|
339
|
+
loga_equil[i] = np.array(loga_equil[i], copy=True)
|
|
340
|
+
loga_equil[i][iscr_mask] = -999
|
|
341
|
+
|
|
342
|
+
# At the grid points where cr species predominate, set their loga_equil to 0 (standard state)
|
|
343
|
+
for i in icr:
|
|
344
|
+
# Compare with i + 1 because predominant is 1-based
|
|
345
|
+
ispredom = (predominant == i + 1)
|
|
346
|
+
loga_equil[i] = np.array(loga_equil[i], copy=True)
|
|
347
|
+
# Set to standard state activity (logact, typically 0) where predominant
|
|
348
|
+
loga_equil[i][ispredom] = dout['species']['logact'].iloc[i]
|
|
349
|
+
|
|
350
|
+
# Put together the output
|
|
351
|
+
out = aout.copy()
|
|
352
|
+
out['fun'] = 'equilibrate' # Mark this as equilibrate output
|
|
353
|
+
out['balance'] = balance
|
|
354
|
+
out['m_balance'] = m_balance
|
|
355
|
+
out['n_balance'] = n_balance
|
|
356
|
+
out['loga_balance'] = loga_balance
|
|
357
|
+
out['Astar'] = Astar
|
|
358
|
+
out['loga_equil'] = loga_equil
|
|
359
|
+
|
|
360
|
+
return out
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def equil_boltzmann(Astar: List[np.ndarray],
|
|
364
|
+
n_balance: List[float],
|
|
365
|
+
loga_balance: np.ndarray) -> List[np.ndarray]:
|
|
366
|
+
"""
|
|
367
|
+
Calculate equilibrium activities using Boltzmann distribution.
|
|
368
|
+
|
|
369
|
+
This method works using the Boltzmann distribution:
|
|
370
|
+
A/At = e^(Astar/n.balance) / sum(e^(Astar/n.balance))
|
|
371
|
+
|
|
372
|
+
where A is activity of the ith residue and At is total activity of residues.
|
|
373
|
+
|
|
374
|
+
Advantages:
|
|
375
|
+
- Loops over species only - much faster than equil.reaction
|
|
376
|
+
- No root finding - those games might fail at times
|
|
377
|
+
|
|
378
|
+
Disadvantage:
|
|
379
|
+
- Only works for per-residue reactions (n.balance = 1)
|
|
380
|
+
- Can create NaN logacts if the Astars are huge/small
|
|
381
|
+
|
|
382
|
+
Parameters
|
|
383
|
+
----------
|
|
384
|
+
Astar : list of ndarray
|
|
385
|
+
Normalized affinities for each species
|
|
386
|
+
n_balance : list of float
|
|
387
|
+
Balancing coefficients (must all be 1)
|
|
388
|
+
loga_balance : ndarray
|
|
389
|
+
Log activity of the balanced quantity
|
|
390
|
+
|
|
391
|
+
Returns
|
|
392
|
+
-------
|
|
393
|
+
list of ndarray
|
|
394
|
+
Equilibrium log activities for each species
|
|
395
|
+
"""
|
|
396
|
+
|
|
397
|
+
if not all(n == 1 for n in n_balance):
|
|
398
|
+
raise ValueError("won't run equil.boltzmann for balance != 1")
|
|
399
|
+
|
|
400
|
+
# Initialize output object
|
|
401
|
+
A = [np.array(a, copy=True) for a in Astar]
|
|
402
|
+
|
|
403
|
+
# Remember the dimensions of elements of Astar
|
|
404
|
+
Astardim = Astar[0].shape if Astar[0].ndim > 0 else (len(Astar[0]),)
|
|
405
|
+
|
|
406
|
+
# First loop: make vectors
|
|
407
|
+
A = [a.flatten() for a in A]
|
|
408
|
+
loga_balance_vec = loga_balance.flatten()
|
|
409
|
+
|
|
410
|
+
# Second loop: get the exponentiated Astars (numerators)
|
|
411
|
+
# Need to convert /2.303RT to /RT
|
|
412
|
+
A = [np.exp(np.log(10) * Astar[i].flatten() / n_balance[i])
|
|
413
|
+
for i in range(len(A))]
|
|
414
|
+
|
|
415
|
+
# Third loop: accumulate the denominator
|
|
416
|
+
# Initialize variable to hold the sum
|
|
417
|
+
At = np.zeros_like(A[0])
|
|
418
|
+
for i in range(len(A)):
|
|
419
|
+
At = At + A[i] * n_balance[i]
|
|
420
|
+
|
|
421
|
+
# Fourth loop: calculate log abundances
|
|
422
|
+
A = [loga_balance_vec + np.log10(A[i] / At) for i in range(len(A))]
|
|
423
|
+
|
|
424
|
+
# Fifth loop: restore dimensions
|
|
425
|
+
A = [a.reshape(Astardim) for a in A]
|
|
426
|
+
|
|
427
|
+
return A
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def equil_reaction(Astar: List[np.ndarray],
|
|
431
|
+
n_balance: List[float],
|
|
432
|
+
loga_balance: np.ndarray,
|
|
433
|
+
tol: float = np.finfo(float).eps ** 0.25) -> List[np.ndarray]:
|
|
434
|
+
"""
|
|
435
|
+
Calculate equilibrium activities using reaction-based method.
|
|
436
|
+
|
|
437
|
+
To turn the affinities/RT (A) of formation reactions into
|
|
438
|
+
logactivities of species (logact(things)) at metastable equilibrium.
|
|
439
|
+
|
|
440
|
+
For any reaction stuff = thing,
|
|
441
|
+
A = logK - logQ
|
|
442
|
+
= logK - logact(thing) + logact(stuff)
|
|
443
|
+
given Astar = A + logact(thing),
|
|
444
|
+
given Abar = A / n.balance,
|
|
445
|
+
logact(thing) = Astar - Abar * n.balance [2]
|
|
446
|
+
|
|
447
|
+
where n.balance is the number of the balanced quantity
|
|
448
|
+
(conserved component) in each species.
|
|
449
|
+
|
|
450
|
+
Equilibrium values of logact(thing) satisfy:
|
|
451
|
+
1) Abar is equal for all species
|
|
452
|
+
2) log10(sum of (10^logact(thing) * n.balance)) = loga.balance [1]
|
|
453
|
+
|
|
454
|
+
Because of the logarithms, we can't solve the equations directly.
|
|
455
|
+
Instead, use root-finding to compute Abar satisfying [1].
|
|
456
|
+
|
|
457
|
+
Parameters
|
|
458
|
+
----------
|
|
459
|
+
Astar : list of ndarray
|
|
460
|
+
Normalized affinities for each species
|
|
461
|
+
n_balance : list of float
|
|
462
|
+
Balancing coefficients
|
|
463
|
+
loga_balance : ndarray
|
|
464
|
+
Log activity of the balanced quantity
|
|
465
|
+
tol : float
|
|
466
|
+
Tolerance for root-finding
|
|
467
|
+
|
|
468
|
+
Returns
|
|
469
|
+
-------
|
|
470
|
+
list of ndarray
|
|
471
|
+
Equilibrium log activities for each species
|
|
472
|
+
"""
|
|
473
|
+
|
|
474
|
+
# We can't run on one species
|
|
475
|
+
if len(Astar) == 1:
|
|
476
|
+
raise ValueError("at least two species needed for reaction-based equilibration")
|
|
477
|
+
|
|
478
|
+
# Remember the dimensions and names
|
|
479
|
+
Adim = Astar[0].shape if Astar[0].ndim > 0 else None
|
|
480
|
+
|
|
481
|
+
# Make a matrix out of the list of Astar
|
|
482
|
+
Astar_array = np.array([a.flatten() for a in Astar]).T
|
|
483
|
+
|
|
484
|
+
if len(loga_balance) != Astar_array.shape[0]:
|
|
485
|
+
raise ValueError("length of loga.balance must be equal to the number of conditions for affinity()")
|
|
486
|
+
|
|
487
|
+
# Function definitions:
|
|
488
|
+
def logafun(logact):
|
|
489
|
+
"""Calculate log of activity of balanced quantity from logact(thing) of all species [1]"""
|
|
490
|
+
# Use log-sum-exp trick for numerical stability
|
|
491
|
+
# log10(sum(10^x_i * n_i)) = log10(sum(n_i * 10^x_i))
|
|
492
|
+
# = max(x) + log10(sum(n_i * 10^(x_i - max(x))))
|
|
493
|
+
# This prevents overflow when x_i values are very large or very small
|
|
494
|
+
|
|
495
|
+
logact = np.asarray(logact)
|
|
496
|
+
n_balance_arr = np.asarray(n_balance)
|
|
497
|
+
|
|
498
|
+
# Find maximum for numerical stability
|
|
499
|
+
max_logact = np.max(logact)
|
|
500
|
+
|
|
501
|
+
# Compute sum in log space with shifted values
|
|
502
|
+
# sum(n_i * 10^x_i) = 10^max(x) * sum(n_i * 10^(x_i - max(x)))
|
|
503
|
+
shifted = logact - max_logact
|
|
504
|
+
sum_shifted = np.sum(n_balance_arr * 10**shifted)
|
|
505
|
+
|
|
506
|
+
# Convert back: log10(10^max(x) * sum(...)) = max(x) + log10(sum(...))
|
|
507
|
+
return max_logact + np.log10(sum_shifted)
|
|
508
|
+
|
|
509
|
+
def logactfun(Abar, i):
|
|
510
|
+
"""Calculate logact(thing) from Abar for the ith condition [2]"""
|
|
511
|
+
return Astar_array[i, :] - Abar * np.array(n_balance)
|
|
512
|
+
|
|
513
|
+
def logadiff(Abar, i):
|
|
514
|
+
"""Calculate difference between logafun and loga.balance for the ith condition"""
|
|
515
|
+
return loga_balance[i] - logafun(logactfun(Abar, i))
|
|
516
|
+
|
|
517
|
+
def Abarrange(i):
|
|
518
|
+
"""Calculate a range of Abar that gives negative and positive values of logadiff for the ith condition"""
|
|
519
|
+
# Starting guess of Abar (min/max) from range of Astar / n.balance
|
|
520
|
+
Abar_range = [
|
|
521
|
+
np.min(Astar_array[i, :] / n_balance),
|
|
522
|
+
np.max(Astar_array[i, :] / n_balance)
|
|
523
|
+
]
|
|
524
|
+
|
|
525
|
+
# diff(Abar.range) can't be 0 (dlogadiff.dAbar becomes NaN)
|
|
526
|
+
if Abar_range[1] - Abar_range[0] == 0:
|
|
527
|
+
Abar_range[0] -= 0.1
|
|
528
|
+
Abar_range[1] += 0.1
|
|
529
|
+
|
|
530
|
+
# The range of logadiff
|
|
531
|
+
logadiff_min = logadiff(Abar_range[0], i)
|
|
532
|
+
logadiff_max = logadiff(Abar_range[1], i)
|
|
533
|
+
|
|
534
|
+
# We're out of luck if they're both infinite
|
|
535
|
+
if np.isinf(logadiff_min) and np.isinf(logadiff_max):
|
|
536
|
+
raise ValueError("FIXME: there are no initial guesses for Abar that give "
|
|
537
|
+
"finite values of the differences in logarithm of activity "
|
|
538
|
+
"of the conserved component")
|
|
539
|
+
|
|
540
|
+
# If one of them is infinite we might have a chance
|
|
541
|
+
if np.isinf(logadiff_min):
|
|
542
|
+
# Decrease the Abar range by increasing the minimum
|
|
543
|
+
Abar_range[0] = Abar_range[0] + 0.99 * (Abar_range[1] - Abar_range[0])
|
|
544
|
+
logadiff_min = logadiff(Abar_range[0], i)
|
|
545
|
+
if np.isinf(logadiff_min):
|
|
546
|
+
raise ValueError("FIXME: the second initial guess for Abar.min failed")
|
|
547
|
+
|
|
548
|
+
if np.isinf(logadiff_max):
|
|
549
|
+
# Decrease the Abar range by decreasing the maximum
|
|
550
|
+
Abar_range[1] = Abar_range[1] - 0.99 * (Abar_range[1] - Abar_range[0])
|
|
551
|
+
logadiff_max = logadiff(Abar_range[1], i)
|
|
552
|
+
if np.isinf(logadiff_max):
|
|
553
|
+
raise ValueError("FIXME: the second initial guess for Abar.max failed")
|
|
554
|
+
|
|
555
|
+
iter_count = 0
|
|
556
|
+
while logadiff_min > 0 or logadiff_max < 0:
|
|
557
|
+
# The change of logadiff with Abar
|
|
558
|
+
# It's a weighted mean of the n.balance
|
|
559
|
+
dlogadiff_dAbar = (logadiff_max - logadiff_min) / (Abar_range[1] - Abar_range[0])
|
|
560
|
+
|
|
561
|
+
# Change Abar to center logadiff (min/max) on zero
|
|
562
|
+
logadiff_mean = (logadiff_min + logadiff_max) / 2
|
|
563
|
+
Abar_range[0] -= logadiff_mean / dlogadiff_dAbar
|
|
564
|
+
Abar_range[1] -= logadiff_mean / dlogadiff_dAbar
|
|
565
|
+
|
|
566
|
+
# One iteration is enough for the examples in the package
|
|
567
|
+
# but there might be a case where the range of logadiff doesn't cross zero
|
|
568
|
+
logadiff_min = logadiff(Abar_range[0], i)
|
|
569
|
+
logadiff_max = logadiff(Abar_range[1], i)
|
|
570
|
+
iter_count += 1
|
|
571
|
+
|
|
572
|
+
if iter_count > 5:
|
|
573
|
+
raise ValueError("FIXME: we seem to be stuck! This function (Abarrange() in "
|
|
574
|
+
"equil.reaction()) can't find a range of Abar such that the differences "
|
|
575
|
+
"in logarithm of activity of the conserved component cross zero")
|
|
576
|
+
|
|
577
|
+
return Abar_range
|
|
578
|
+
|
|
579
|
+
def Abarfun(i):
|
|
580
|
+
"""Calculate an equilibrium Abar for the ith condition"""
|
|
581
|
+
# Get limits of Abar where logadiff brackets zero
|
|
582
|
+
Abar_range = Abarrange(i)
|
|
583
|
+
|
|
584
|
+
# Now for the real thing: brentq (Python's uniroot)!
|
|
585
|
+
Abar = brentq(logadiff, Abar_range[0], Abar_range[1], args=(i,), xtol=tol)
|
|
586
|
+
return Abar
|
|
587
|
+
|
|
588
|
+
# Calculate the logact(thing) for each condition
|
|
589
|
+
logact = []
|
|
590
|
+
for i in range(Astar_array.shape[0]):
|
|
591
|
+
# Get the equilibrium Abar for each condition
|
|
592
|
+
Abar = Abarfun(i)
|
|
593
|
+
logact.append(logactfun(Abar, i))
|
|
594
|
+
|
|
595
|
+
# Restore the dimensions
|
|
596
|
+
logact = np.array(logact)
|
|
597
|
+
|
|
598
|
+
# Convert back to list of arrays with original dimensions
|
|
599
|
+
result = []
|
|
600
|
+
for i in range(logact.shape[1]):
|
|
601
|
+
thisla = logact[:, i]
|
|
602
|
+
if Adim is not None:
|
|
603
|
+
thisla = thisla.reshape(Adim)
|
|
604
|
+
result.append(thisla)
|
|
605
|
+
|
|
606
|
+
return result
|
|
607
|
+
|
|
608
|
+
|
|
609
|
+
def _balance(aout: Dict[str, Any],
|
|
610
|
+
balance: Optional[Union[str, int, List[float]]] = None,
|
|
611
|
+
messages: bool = True) -> Dict[str, Any]:
|
|
612
|
+
"""
|
|
613
|
+
Return balancing coefficients and description.
|
|
614
|
+
|
|
615
|
+
Generate n.balance from user-given or automatically identified basis species.
|
|
616
|
+
|
|
617
|
+
Parameters
|
|
618
|
+
----------
|
|
619
|
+
aout : dict
|
|
620
|
+
Output from affinity()
|
|
621
|
+
balance : str, int, or list of float, optional
|
|
622
|
+
Balance specification:
|
|
623
|
+
- None: autoselect using which_balance
|
|
624
|
+
- name of basis species: balanced on this basis species
|
|
625
|
+
- "length": balanced on sequence length of proteins
|
|
626
|
+
- "volume": standard-state volume listed in thermo()$OBIGT
|
|
627
|
+
- 1: balanced on one mole of species (formula units)
|
|
628
|
+
- numeric vector: user-defined n.balance
|
|
629
|
+
|
|
630
|
+
Returns
|
|
631
|
+
-------
|
|
632
|
+
dict
|
|
633
|
+
Dictionary with keys:
|
|
634
|
+
- n_balance : list, Balancing coefficients
|
|
635
|
+
- balance : str or list, Balancing description
|
|
636
|
+
"""
|
|
637
|
+
|
|
638
|
+
# The index of the basis species that might be balanced
|
|
639
|
+
ibalance = None
|
|
640
|
+
|
|
641
|
+
# Deal with proteins
|
|
642
|
+
isprotein = ['_' in str(name) for name in aout['species']['name']]
|
|
643
|
+
if balance is None and all(isprotein):
|
|
644
|
+
balance = "length"
|
|
645
|
+
|
|
646
|
+
# Try to automatically find a balance
|
|
647
|
+
if balance is None:
|
|
648
|
+
ibalance = which_balance(aout['species'])
|
|
649
|
+
# No shared basis species and balance not specified by user - an error
|
|
650
|
+
if ibalance is None or len(ibalance) == 0:
|
|
651
|
+
raise ValueError("no basis species is present in all formation reactions")
|
|
652
|
+
|
|
653
|
+
# Change "1" to 1 (numeric)
|
|
654
|
+
if balance == "1":
|
|
655
|
+
balance = 1
|
|
656
|
+
|
|
657
|
+
if isinstance(balance, (int, float, list, np.ndarray)):
|
|
658
|
+
# A numeric vector
|
|
659
|
+
if isinstance(balance, (int, float)):
|
|
660
|
+
balance = [balance]
|
|
661
|
+
n_balance = list(balance) * (len(aout['values']) // len(balance) + 1)
|
|
662
|
+
n_balance = n_balance[:len(aout['values'])]
|
|
663
|
+
|
|
664
|
+
msgtxt = f"balance: on supplied numeric argument ({','.join(map(str, balance))})"
|
|
665
|
+
if balance == [1]:
|
|
666
|
+
msgtxt = f"{msgtxt} [1 means balance on formula units]"
|
|
667
|
+
if messages:
|
|
668
|
+
print(msgtxt)
|
|
669
|
+
else:
|
|
670
|
+
# "length" for balancing on protein length
|
|
671
|
+
if balance == "length":
|
|
672
|
+
if not all(isprotein):
|
|
673
|
+
raise ValueError("'length' was the requested balance, but some species are not proteins")
|
|
674
|
+
n_balance = [protein_length(name) for name in aout['species']['name']]
|
|
675
|
+
if messages:
|
|
676
|
+
print("balance: on protein length")
|
|
677
|
+
elif balance == "volume":
|
|
678
|
+
ispecies_list = aout['species']['ispecies'].tolist()
|
|
679
|
+
volumes = info(ispecies_list, check_it=False, messages=messages)['V']
|
|
680
|
+
n_balance = volumes.tolist()
|
|
681
|
+
if messages:
|
|
682
|
+
print("balance: on volume")
|
|
683
|
+
else:
|
|
684
|
+
# Is the balance the name of a basis species?
|
|
685
|
+
if ibalance is None or len(ibalance) == 0:
|
|
686
|
+
# Get basis rownames
|
|
687
|
+
basis_names = list(aout['basis'].index)
|
|
688
|
+
try:
|
|
689
|
+
ibalance = [basis_names.index(balance)]
|
|
690
|
+
except ValueError:
|
|
691
|
+
raise ValueError(f"basis species ({balance}) not available to balance reactions")
|
|
692
|
+
|
|
693
|
+
# The name of the basis species (need this if we got ibalance from which_balance, above)
|
|
694
|
+
balance = list(aout['species'].columns)[ibalance[0]]
|
|
695
|
+
if messages:
|
|
696
|
+
print(f"balance: on moles of {balance} in formation reactions")
|
|
697
|
+
|
|
698
|
+
# The balancing coefficients
|
|
699
|
+
n_balance = aout['species'].iloc[:, ibalance[0]].tolist()
|
|
700
|
+
|
|
701
|
+
# We check that all formation reactions contain this basis species
|
|
702
|
+
if any(n == 0 for n in n_balance):
|
|
703
|
+
raise ValueError(f"some species have no {balance} in the formation reaction")
|
|
704
|
+
|
|
705
|
+
return {'n_balance': n_balance, 'balance': balance}
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
def which_balance(species: pd.DataFrame) -> List[int]:
|
|
709
|
+
"""
|
|
710
|
+
Return column(s) of species that all have non-zero values.
|
|
711
|
+
|
|
712
|
+
Find the first basis species that is present in all species of interest.
|
|
713
|
+
It can be used to balance the system.
|
|
714
|
+
|
|
715
|
+
Parameters
|
|
716
|
+
----------
|
|
717
|
+
species : pd.DataFrame
|
|
718
|
+
Species dataframe from affinity output
|
|
719
|
+
|
|
720
|
+
Returns
|
|
721
|
+
-------
|
|
722
|
+
list of int
|
|
723
|
+
Indices of basis species columns that have non-zero values for all species
|
|
724
|
+
"""
|
|
725
|
+
|
|
726
|
+
# Number of basis species columns (exclude the last 4 metadata columns)
|
|
727
|
+
nbasis = len(species.columns) - 4
|
|
728
|
+
|
|
729
|
+
ib = []
|
|
730
|
+
for i in range(nbasis):
|
|
731
|
+
coeff = species.iloc[:, i]
|
|
732
|
+
# Check if all coefficients are non-zero
|
|
733
|
+
if all(c != 0 for c in coeff):
|
|
734
|
+
ib.append(i)
|
|
735
|
+
break # R version returns first match
|
|
736
|
+
|
|
737
|
+
return ib
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
def protein_length(name: Union[str, List[str]]) -> Union[int, List[int]]:
|
|
741
|
+
"""
|
|
742
|
+
Get protein sequence length.
|
|
743
|
+
|
|
744
|
+
Parameters
|
|
745
|
+
----------
|
|
746
|
+
name : str or list of str
|
|
747
|
+
Protein name(s) (with underscore separator)
|
|
748
|
+
|
|
749
|
+
Returns
|
|
750
|
+
-------
|
|
751
|
+
int or list of int
|
|
752
|
+
Sequence length(s)
|
|
753
|
+
"""
|
|
754
|
+
|
|
755
|
+
if isinstance(name, str):
|
|
756
|
+
# Single protein
|
|
757
|
+
if '_' not in name:
|
|
758
|
+
raise ValueError(f"protein name '{name}' does not contain underscore")
|
|
759
|
+
# For now, return a placeholder - would need actual protein database
|
|
760
|
+
# In R this would look up the actual sequence length
|
|
761
|
+
return 100 # Placeholder
|
|
762
|
+
else:
|
|
763
|
+
# Multiple proteins
|
|
764
|
+
return [protein_length(n) for n in name]
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
def moles(eout: Dict[str, Any]) -> Dict[str, np.ndarray]:
|
|
768
|
+
"""
|
|
769
|
+
Calculate total moles of elements from equilibrate output.
|
|
770
|
+
|
|
771
|
+
Parameters
|
|
772
|
+
----------
|
|
773
|
+
eout : dict
|
|
774
|
+
Output from equilibrate()
|
|
775
|
+
|
|
776
|
+
Returns
|
|
777
|
+
-------
|
|
778
|
+
dict
|
|
779
|
+
Dictionary with element names as keys and mole arrays as values
|
|
780
|
+
"""
|
|
781
|
+
|
|
782
|
+
# Exponentiate loga.equil to get activities
|
|
783
|
+
act = [10**np.array(x) for x in eout['loga_equil']]
|
|
784
|
+
|
|
785
|
+
# Initialize list for moles of basis species
|
|
786
|
+
nbasis_list = [act[0] * 0 for _ in range(len(eout['basis']))]
|
|
787
|
+
|
|
788
|
+
# Loop over species
|
|
789
|
+
for i in range(len(eout['species'])):
|
|
790
|
+
# Loop over basis species
|
|
791
|
+
for j in range(len(eout['basis'])):
|
|
792
|
+
# The coefficient of this basis species in the formation reaction of this species
|
|
793
|
+
n = eout['species'].iloc[i, j]
|
|
794
|
+
# Accumulate the number of moles of basis species
|
|
795
|
+
nbasis_list[j] = nbasis_list[j] + act[i] * n
|
|
796
|
+
|
|
797
|
+
# Initialize list for moles of elements (same as number of basis species)
|
|
798
|
+
nelem = [act[0] * 0 for _ in range(len(eout['basis']))]
|
|
799
|
+
|
|
800
|
+
# Loop over basis species
|
|
801
|
+
for i in range(len(eout['basis'])):
|
|
802
|
+
# Loop over elements
|
|
803
|
+
for j in range(len(eout['basis'])):
|
|
804
|
+
# The coefficient of this element in the formula of this basis species
|
|
805
|
+
n = eout['basis'].iloc[i, j]
|
|
806
|
+
# Accumulate the number of moles of elements
|
|
807
|
+
nelem[j] = nelem[j] + nbasis_list[i] * n
|
|
808
|
+
|
|
809
|
+
# Add element names
|
|
810
|
+
element_names = list(eout['basis'].columns)[:len(eout['basis'])]
|
|
811
|
+
result = {element_names[i]: nelem[i] for i in range(len(nelem))}
|
|
812
|
+
|
|
813
|
+
return result
|