pychnosz 1.1.4__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. pychnosz/__init__.py +129 -0
  2. pychnosz/biomolecules/__init__.py +29 -0
  3. pychnosz/biomolecules/ionize_aa.py +197 -0
  4. pychnosz/biomolecules/proteins.py +595 -0
  5. pychnosz/core/__init__.py +46 -0
  6. pychnosz/core/affinity.py +1256 -0
  7. pychnosz/core/animation.py +593 -0
  8. pychnosz/core/balance.py +334 -0
  9. pychnosz/core/basis.py +716 -0
  10. pychnosz/core/diagram.py +3336 -0
  11. pychnosz/core/equilibrate.py +813 -0
  12. pychnosz/core/equilibrium.py +554 -0
  13. pychnosz/core/info.py +821 -0
  14. pychnosz/core/retrieve.py +364 -0
  15. pychnosz/core/speciation.py +580 -0
  16. pychnosz/core/species.py +599 -0
  17. pychnosz/core/subcrt.py +1700 -0
  18. pychnosz/core/thermo.py +593 -0
  19. pychnosz/core/unicurve.py +1226 -0
  20. pychnosz/data/__init__.py +11 -0
  21. pychnosz/data/add_obigt.py +327 -0
  22. pychnosz/data/extdata/Berman/BDat17_2017.csv +2 -0
  23. pychnosz/data/extdata/Berman/Ber88_1988.csv +68 -0
  24. pychnosz/data/extdata/Berman/Ber90_1990.csv +5 -0
  25. pychnosz/data/extdata/Berman/DS10_2010.csv +6 -0
  26. pychnosz/data/extdata/Berman/FDM+14_2014.csv +2 -0
  27. pychnosz/data/extdata/Berman/Got04_2004.csv +5 -0
  28. pychnosz/data/extdata/Berman/JUN92_1992.csv +3 -0
  29. pychnosz/data/extdata/Berman/SHD91_1991.csv +12 -0
  30. pychnosz/data/extdata/Berman/VGT92_1992.csv +2 -0
  31. pychnosz/data/extdata/Berman/VPT01_2001.csv +3 -0
  32. pychnosz/data/extdata/Berman/VPV05_2005.csv +2 -0
  33. pychnosz/data/extdata/Berman/ZS92_1992.csv +11 -0
  34. pychnosz/data/extdata/Berman/sympy.R +99 -0
  35. pychnosz/data/extdata/Berman/testing/BA96.bib +12 -0
  36. pychnosz/data/extdata/Berman/testing/BA96_Berman.csv +21 -0
  37. pychnosz/data/extdata/Berman/testing/BA96_OBIGT.csv +21 -0
  38. pychnosz/data/extdata/Berman/testing/BA96_refs.csv +6 -0
  39. pychnosz/data/extdata/OBIGT/AD.csv +25 -0
  40. pychnosz/data/extdata/OBIGT/Berman_cr.csv +93 -0
  41. pychnosz/data/extdata/OBIGT/DEW.csv +211 -0
  42. pychnosz/data/extdata/OBIGT/H2O_aq.csv +4 -0
  43. pychnosz/data/extdata/OBIGT/SLOP98.csv +411 -0
  44. pychnosz/data/extdata/OBIGT/SUPCRT92.csv +178 -0
  45. pychnosz/data/extdata/OBIGT/inorganic_aq.csv +729 -0
  46. pychnosz/data/extdata/OBIGT/inorganic_cr.csv +273 -0
  47. pychnosz/data/extdata/OBIGT/inorganic_gas.csv +20 -0
  48. pychnosz/data/extdata/OBIGT/organic_aq.csv +1104 -0
  49. pychnosz/data/extdata/OBIGT/organic_cr.csv +481 -0
  50. pychnosz/data/extdata/OBIGT/organic_gas.csv +268 -0
  51. pychnosz/data/extdata/OBIGT/organic_liq.csv +533 -0
  52. pychnosz/data/extdata/OBIGT/testing/GEMSFIT.csv +43 -0
  53. pychnosz/data/extdata/OBIGT/testing/IGEM.csv +17 -0
  54. pychnosz/data/extdata/OBIGT/testing/Sandia.csv +8 -0
  55. pychnosz/data/extdata/OBIGT/testing/SiO2.csv +4 -0
  56. pychnosz/data/extdata/misc/AD03_Fig1a.csv +69 -0
  57. pychnosz/data/extdata/misc/AD03_Fig1b.csv +43 -0
  58. pychnosz/data/extdata/misc/AD03_Fig1c.csv +89 -0
  59. pychnosz/data/extdata/misc/AD03_Fig1d.csv +30 -0
  60. pychnosz/data/extdata/misc/BZA10.csv +5 -0
  61. pychnosz/data/extdata/misc/HW97_Cp.csv +90 -0
  62. pychnosz/data/extdata/misc/HWM96_V.csv +229 -0
  63. pychnosz/data/extdata/misc/LA19_test.csv +7 -0
  64. pychnosz/data/extdata/misc/Mer75_Table4.csv +42 -0
  65. pychnosz/data/extdata/misc/OBIGT_check.csv +423 -0
  66. pychnosz/data/extdata/misc/PM90.csv +7 -0
  67. pychnosz/data/extdata/misc/RH95.csv +23 -0
  68. pychnosz/data/extdata/misc/RH98_Table15.csv +17 -0
  69. pychnosz/data/extdata/misc/SC10_Rainbow.csv +19 -0
  70. pychnosz/data/extdata/misc/SK95.csv +55 -0
  71. pychnosz/data/extdata/misc/SOJSH.csv +61 -0
  72. pychnosz/data/extdata/misc/SS98_Fig5a.csv +81 -0
  73. pychnosz/data/extdata/misc/SS98_Fig5b.csv +84 -0
  74. pychnosz/data/extdata/misc/TKSS14_Fig2.csv +25 -0
  75. pychnosz/data/extdata/misc/bluered.txt +1000 -0
  76. pychnosz/data/extdata/protein/Cas/Cas_aa.csv +177 -0
  77. pychnosz/data/extdata/protein/Cas/Cas_uniprot.csv +186 -0
  78. pychnosz/data/extdata/protein/Cas/download.R +34 -0
  79. pychnosz/data/extdata/protein/Cas/mkaa.R +34 -0
  80. pychnosz/data/extdata/protein/POLG.csv +12 -0
  81. pychnosz/data/extdata/protein/TBD+05.csv +393 -0
  82. pychnosz/data/extdata/protein/TBD+05_aa.csv +393 -0
  83. pychnosz/data/extdata/protein/rubisco.csv +28 -0
  84. pychnosz/data/extdata/protein/rubisco.fasta +239 -0
  85. pychnosz/data/extdata/protein/rubisco_aa.csv +28 -0
  86. pychnosz/data/extdata/src/H2O92D.f.orig +3457 -0
  87. pychnosz/data/extdata/src/README.txt +5 -0
  88. pychnosz/data/extdata/taxonomy/names.dmp +215 -0
  89. pychnosz/data/extdata/taxonomy/nodes.dmp +63 -0
  90. pychnosz/data/extdata/thermo/Bdot_acirc.csv +60 -0
  91. pychnosz/data/extdata/thermo/buffer.csv +40 -0
  92. pychnosz/data/extdata/thermo/element.csv +135 -0
  93. pychnosz/data/extdata/thermo/groups.csv +6 -0
  94. pychnosz/data/extdata/thermo/opt.csv +2 -0
  95. pychnosz/data/extdata/thermo/protein.csv +506 -0
  96. pychnosz/data/extdata/thermo/refs.csv +343 -0
  97. pychnosz/data/extdata/thermo/stoich.csv.xz +0 -0
  98. pychnosz/data/loader.py +431 -0
  99. pychnosz/data/mod_obigt.py +322 -0
  100. pychnosz/data/obigt.py +471 -0
  101. pychnosz/data/worm.py +228 -0
  102. pychnosz/fortran/__init__.py +16 -0
  103. pychnosz/fortran/h2o92.dll +0 -0
  104. pychnosz/fortran/h2o92_interface.py +527 -0
  105. pychnosz/geochemistry/__init__.py +21 -0
  106. pychnosz/geochemistry/minerals.py +514 -0
  107. pychnosz/geochemistry/redox.py +500 -0
  108. pychnosz/models/__init__.py +47 -0
  109. pychnosz/models/archer_wang.py +165 -0
  110. pychnosz/models/berman.py +309 -0
  111. pychnosz/models/cgl.py +381 -0
  112. pychnosz/models/dew.py +997 -0
  113. pychnosz/models/hkf.py +523 -0
  114. pychnosz/models/hkf_helpers.py +222 -0
  115. pychnosz/models/iapws95.py +1113 -0
  116. pychnosz/models/supcrt92_fortran.py +238 -0
  117. pychnosz/models/water.py +480 -0
  118. pychnosz/utils/__init__.py +27 -0
  119. pychnosz/utils/expression.py +1074 -0
  120. pychnosz/utils/formula.py +830 -0
  121. pychnosz/utils/formula_ox.py +227 -0
  122. pychnosz/utils/reset.py +33 -0
  123. pychnosz/utils/units.py +259 -0
  124. pychnosz-1.1.4.dist-info/METADATA +197 -0
  125. pychnosz-1.1.4.dist-info/RECORD +128 -0
  126. pychnosz-1.1.4.dist-info/WHEEL +5 -0
  127. pychnosz-1.1.4.dist-info/licenses/LICENSE.txt +19 -0
  128. pychnosz-1.1.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,813 @@
1
+ """
2
+ Equilibrate module for calculating equilibrium activities of species.
3
+
4
+ This module provides Python equivalents of the R functions in equilibrate.R:
5
+ - equilibrate(): Calculate equilibrium activities from chemical affinities
6
+ - equil.boltzmann(): Boltzmann distribution method
7
+ - equil.reaction(): Reaction-based equilibration method
8
+ - balance(): Determine balancing coefficients
9
+ - Supporting utilities for species equilibration
10
+
11
+ Author: CHNOSZ Python port
12
+ """
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ from typing import Union, List, Optional, Dict, Any, Tuple
17
+ import warnings
18
+ from scipy.optimize import brentq
19
+
20
+ from .thermo import thermo
21
+ from .info import info
22
+
23
+
24
+ def equilibrate(aout: Dict[str, Any],
25
+ balance: Optional[Union[str, int, List[float]]] = None,
26
+ loga_balance: Optional[Union[float, List[float]]] = None,
27
+ ispecies: Optional[Union[List[int], List[bool]]] = None,
28
+ normalize: Union[bool, List[bool]] = False,
29
+ as_residue: bool = False,
30
+ method: Optional[Union[str, List[str]]] = None,
31
+ tol: float = np.finfo(float).eps ** 0.25,
32
+ messages: bool = True) -> Dict[str, Any]:
33
+ """
34
+ Calculate equilibrium activities of species from affinities.
35
+
36
+ This function calculates the equilibrium activities of species in
37
+ (metastable) equilibrium from the affinities of their formation reactions
38
+ from basis species at given activities.
39
+
40
+ Parameters
41
+ ----------
42
+ aout : dict
43
+ Output from affinity() containing chemical affinities
44
+ balance : str, int, or list of float, optional
45
+ Balancing method:
46
+ - None: Autoselect using which_balance()
47
+ - str: Name of basis species to balance on
48
+ - "length": Balance on protein length (for proteins)
49
+ - "volume": Balance on standard-state volume
50
+ - 1: Balance on one mole of species (formula units)
51
+ - list: User-defined balancing coefficients
52
+ loga_balance : float or list of float, optional
53
+ Logarithm of total activity of the balancing basis species
54
+ If None, calculated from species initial activities and n.balance
55
+ ispecies : list of int or list of bool, optional
56
+ Indices or boolean mask of species to include in equilibration
57
+ Default: all species except those with state "cr" (crystalline)
58
+ normalize : bool or list of bool, default False
59
+ Normalize formulas by balancing coefficients?
60
+ as_residue : bool, default False
61
+ Use residue basis for proteins?
62
+ method : str or list of str, optional
63
+ Equilibration method:
64
+ - "boltzmann": Boltzmann distribution (for n.balance = 1)
65
+ - "reaction": Reaction-based equilibration (general method)
66
+ If None, chooses "boltzmann" if all n.balance == 1, else "reaction"
67
+ tol : float, default np.finfo(float).eps**0.25
68
+ Tolerance for root-finding in reaction method
69
+ messages : bool, default True
70
+ Whether to print informational messages
71
+
72
+ Returns
73
+ -------
74
+ dict
75
+ Dictionary containing all aout contents plus:
76
+ - balance : str or list, Balancing description
77
+ - m_balance : list, Molar formula divisors
78
+ - n_balance : list, Balancing coefficients
79
+ - loga_balance : float or array, Log activity of balanced quantity
80
+ - Astar : list of arrays, Normalized affinities
81
+ - loga_equil : list of arrays, Equilibrium log activities
82
+
83
+ Examples
84
+ --------
85
+ >>> import pychnosz
86
+ >>> pychnosz.basis("CHNOS")
87
+ >>> pychnosz.basis("NH3", -2)
88
+ >>> pychnosz.species(["alanine", "glycine", "serine"])
89
+ >>> a = pychnosz.affinity(NH3=[-80, 60], T=55, P=2000)
90
+ >>> e = pychnosz.equilibrate(a, balance="CO2")
91
+
92
+ Notes
93
+ -----
94
+ This is a 1:1 replica of the R CHNOSZ equilibrate() function.
95
+ - Handles both Boltzmann and reaction-based equilibration
96
+ - Supports normalization and residue basis for proteins
97
+ - Properly handles crystalline species via predominance diagrams
98
+ - Implements identical balancing logic to R version
99
+ """
100
+
101
+ # Handle mosaic output (not implemented yet, but keep structure)
102
+ if aout.get('fun') == 'mosaic':
103
+ raise NotImplementedError("mosaic equilibration not yet implemented")
104
+
105
+ # Number of possible species
106
+ # affinity() returns values as a dict with ispecies as keys
107
+ if isinstance(aout['values'], dict):
108
+ # Convert dict to list ordered by species dataframe
109
+ values_list = []
110
+ for i in range(len(aout['species'])):
111
+ species_idx = aout['species']['ispecies'].iloc[i]
112
+ if species_idx in aout['values']:
113
+ values_list.append(aout['values'][species_idx])
114
+ else:
115
+ # Species not in values dict - use NaN array
116
+ values_list.append(np.array([np.nan]))
117
+ aout['values'] = values_list
118
+
119
+ nspecies = len(aout['values'])
120
+
121
+ # Get the balancing coefficients
122
+ bout = _balance(aout, balance, messages)
123
+ n_balance_orig = bout['n_balance'].copy()
124
+ n_balance = bout['n_balance'].copy()
125
+ balance = bout['balance']
126
+
127
+ # If solids (cr) species are present, find them on a predominance diagram
128
+ iscr = [('cr' in str(state)) for state in aout['species']['state']]
129
+ ncr = sum(iscr)
130
+
131
+ # Set default ispecies to exclude cr species (matching R default)
132
+ if ispecies is None:
133
+ ispecies = [not is_cr for is_cr in iscr]
134
+
135
+ if ncr > 0:
136
+ # Import diagram here to avoid circular imports
137
+ from .diagram import diagram
138
+ dout = diagram(aout, balance=balance, normalize=normalize,
139
+ as_residue=as_residue, plot_it=False, limit_water=False, messages=messages)
140
+
141
+ if ncr == nspecies:
142
+ # We get here if there are only solids
143
+ m_balance = None
144
+ Astar = None
145
+ loga_equil = []
146
+ for i in range(len(aout['values'])):
147
+ la = np.array(aout['values'][i], copy=True)
148
+ la[:] = np.nan
149
+ loga_equil.append(la)
150
+ else:
151
+ # We get here if there are any aqueous species
152
+ # Take selected species in 'ispecies'
153
+ if len(ispecies) == 0:
154
+ raise ValueError("the length of ispecies is zero")
155
+
156
+ # Convert boolean to indices if needed
157
+ if isinstance(ispecies, list) and len(ispecies) > 0:
158
+ if isinstance(ispecies[0], bool):
159
+ ispecies = [i for i, x in enumerate(ispecies) if x]
160
+
161
+ # Take out species that have NA affinities
162
+ ina = [all(np.isnan(np.array(x).flatten())) for x in aout['values']]
163
+ ispecies = [i for i in ispecies if not ina[i]]
164
+
165
+ if len(ispecies) == 0:
166
+ raise ValueError("all species have NA affinities")
167
+
168
+ if ispecies != list(range(nspecies)):
169
+ if messages:
170
+ print(f"equilibrate: using {len(ispecies)} of {nspecies} species")
171
+ aout_species_df = aout['species']
172
+ aout['species'] = aout_species_df.iloc[ispecies].reset_index(drop=True)
173
+ aout['values'] = [aout['values'][i] for i in ispecies]
174
+ n_balance = [n_balance[i] for i in ispecies]
175
+
176
+ # Number of species that are left
177
+ nspecies = len(aout['values'])
178
+
179
+ # Say what the balancing coefficients are
180
+ if len(n_balance) < 100:
181
+ if messages:
182
+ print(f"equilibrate: n.balance is {', '.join(map(str, n_balance))}")
183
+
184
+ # Logarithm of total activity of the balancing basis species
185
+ if loga_balance is None:
186
+ # Sum up the activities, then take absolute value
187
+ # in case n.balance is negative
188
+ logact = np.array([aout['species']['logact'].iloc[i] for i in range(len(aout['species']))])
189
+ sumact = abs(sum(10**logact * n_balance))
190
+ loga_balance = np.log10(sumact)
191
+
192
+ # Make loga.balance the same length as the values of affinity
193
+ if isinstance(loga_balance, (int, float)):
194
+ loga_balance = float(loga_balance)
195
+ else:
196
+ loga_balance = np.array(loga_balance).flatten()
197
+
198
+ nvalues = len(np.array(aout['values'][0]).flatten())
199
+
200
+ if isinstance(loga_balance, float) or len(np.atleast_1d(loga_balance)) == 1:
201
+ # We have a constant loga.balance
202
+ if isinstance(loga_balance, np.ndarray):
203
+ loga_balance = float(loga_balance[0])
204
+ if messages:
205
+ print(f"equilibrate: loga.balance is {loga_balance}")
206
+ loga_balance = np.full(nvalues, loga_balance)
207
+ else:
208
+ # We are using a variable loga.balance (supplied by the user)
209
+ if len(loga_balance) != nvalues:
210
+ raise ValueError(f"length of loga.balance ({len(loga_balance)}) doesn't match "
211
+ f"the affinity values ({nvalues})")
212
+ if messages:
213
+ print(f"equilibrate: loga.balance has same length as affinity values ({len(loga_balance)})")
214
+
215
+ # Normalize the molar formula by the balance coefficients
216
+ m_balance = n_balance.copy()
217
+ isprotein = ['_' in str(name) for name in aout['species']['name']]
218
+
219
+ # Handle normalize parameter
220
+ if isinstance(normalize, bool):
221
+ normalize = [normalize] * nspecies
222
+ elif not isinstance(normalize, list):
223
+ normalize = list(normalize)
224
+
225
+ if any(normalize) or as_residue:
226
+ if any(n < 0 for n in n_balance):
227
+ raise ValueError("one or more negative balancing coefficients prohibit using normalized molar formulas")
228
+
229
+ for i in range(nspecies):
230
+ if normalize[i] or as_residue:
231
+ n_balance[i] = 1
232
+
233
+ if as_residue:
234
+ if messages:
235
+ print("equilibrate: using 'as.residue' for molar formulas")
236
+ else:
237
+ if messages:
238
+ print("equilibrate: using 'normalize' for molar formulas")
239
+
240
+ # Set the formula divisor (m.balance) to 1 for species whose formulas are *not* normalized
241
+ m_balance = [m_balance[i] if (normalize[i] or as_residue) else 1
242
+ for i in range(nspecies)]
243
+ else:
244
+ m_balance = [1] * nspecies
245
+
246
+ # Astar: the affinities/2.303RT of formation reactions with
247
+ # formed species in their standard-state activities
248
+ Astar = []
249
+ for i in range(nspecies):
250
+ # 'starve' the affinity of the activity of the species,
251
+ # and normalize the value by the molar ratio
252
+ logact_i = aout['species']['logact'].iloc[i]
253
+ astar_i = (np.array(aout['values'][i]) + logact_i) / m_balance[i]
254
+ Astar.append(astar_i)
255
+
256
+ # Choose a method and compute the equilibrium activities of species
257
+ if method is None:
258
+ if all(n == 1 for n in n_balance):
259
+ method = ["boltzmann"]
260
+ else:
261
+ method = ["reaction"]
262
+ elif isinstance(method, str):
263
+ method = [method]
264
+
265
+ if messages:
266
+ print(f"equilibrate: using {method[0]} method")
267
+
268
+ if method[0] == "boltzmann":
269
+ loga_equil = equil_boltzmann(Astar, n_balance, loga_balance)
270
+ elif method[0] == "reaction":
271
+ loga_equil = equil_reaction(Astar, n_balance, loga_balance, tol)
272
+ else:
273
+ raise ValueError(f"unknown method: {method[0]}")
274
+
275
+ # If we normalized the formulas, get back to activities of species
276
+ if any(normalize) and not as_residue:
277
+ loga_equil = [loga_equil[i] - np.log10(m_balance[i])
278
+ for i in range(nspecies)]
279
+
280
+ # Process cr species
281
+ if ncr > 0:
282
+ # cr species were excluded from equilibrium calculation,
283
+ # so get values back to original lengths
284
+ norig = len(dout['values'])
285
+ n_balance = n_balance_orig
286
+
287
+ # Ensure ispecies is in index form (not boolean)
288
+ # When ncr == nspecies, ispecies was never converted from boolean to indices
289
+ if isinstance(ispecies, list) and len(ispecies) > 0:
290
+ if isinstance(ispecies[0], bool):
291
+ ispecies = [i for i, x in enumerate(ispecies) if x]
292
+
293
+ # Match indices back to original
294
+ imatch = [None] * norig
295
+ for j, orig_idx in enumerate(range(norig)):
296
+ if orig_idx in ispecies:
297
+ imatch[orig_idx] = ispecies.index(orig_idx)
298
+
299
+ # Handle None values (when ncr == nspecies, these are set to None)
300
+ # In R, indexing NULL returns NULL, so we need to check for None in Python
301
+ if m_balance is not None:
302
+ m_balance = [m_balance[imatch[i]] if imatch[i] is not None else None
303
+ for i in range(norig)]
304
+ if Astar is not None:
305
+ Astar = [Astar[imatch[i]] if imatch[i] is not None else None
306
+ for i in range(norig)]
307
+
308
+ # Get a template from first loga_equil to determine shape
309
+ loga_equil1 = loga_equil[0]
310
+ loga_equil_orig = [None] * norig
311
+
312
+ for i in range(norig):
313
+ if imatch[i] is not None:
314
+ loga_equil_orig[i] = loga_equil[imatch[i]]
315
+
316
+ # Replace None loga_equil with -999 for cr-only species (will be set to 0 where predominant)
317
+ # Use np.full with shape, not full_like, to avoid inheriting NaN values
318
+ ina = [i for i in range(norig) if imatch[i] is None]
319
+ for i in ina:
320
+ loga_equil_orig[i] = np.full(loga_equil1.shape, -999.0)
321
+ loga_equil = loga_equil_orig
322
+ aout['species'] = dout['species']
323
+ aout['values'] = dout['values']
324
+
325
+ # Find the grid points where any cr species is predominant
326
+ icr = [i for i in range(len(dout['species']))
327
+ if 'cr' in str(dout['species']['state'].iloc[i])]
328
+
329
+ # predominant uses 1-based R indexing (1, 2, 3, ...), convert to 0-based for Python
330
+ predominant = dout['predominant']
331
+ iscr_mask = np.zeros_like(predominant, dtype=bool)
332
+ for icr_idx in icr:
333
+ # Compare with icr_idx + 1 because predominant is 1-based
334
+ iscr_mask |= (predominant == icr_idx + 1)
335
+
336
+ # At those grid points, make the aqueous species' activities practically zero
337
+ for i in range(norig):
338
+ if i not in icr:
339
+ loga_equil[i] = np.array(loga_equil[i], copy=True)
340
+ loga_equil[i][iscr_mask] = -999
341
+
342
+ # At the grid points where cr species predominate, set their loga_equil to 0 (standard state)
343
+ for i in icr:
344
+ # Compare with i + 1 because predominant is 1-based
345
+ ispredom = (predominant == i + 1)
346
+ loga_equil[i] = np.array(loga_equil[i], copy=True)
347
+ # Set to standard state activity (logact, typically 0) where predominant
348
+ loga_equil[i][ispredom] = dout['species']['logact'].iloc[i]
349
+
350
+ # Put together the output
351
+ out = aout.copy()
352
+ out['fun'] = 'equilibrate' # Mark this as equilibrate output
353
+ out['balance'] = balance
354
+ out['m_balance'] = m_balance
355
+ out['n_balance'] = n_balance
356
+ out['loga_balance'] = loga_balance
357
+ out['Astar'] = Astar
358
+ out['loga_equil'] = loga_equil
359
+
360
+ return out
361
+
362
+
363
+ def equil_boltzmann(Astar: List[np.ndarray],
364
+ n_balance: List[float],
365
+ loga_balance: np.ndarray) -> List[np.ndarray]:
366
+ """
367
+ Calculate equilibrium activities using Boltzmann distribution.
368
+
369
+ This method works using the Boltzmann distribution:
370
+ A/At = e^(Astar/n.balance) / sum(e^(Astar/n.balance))
371
+
372
+ where A is activity of the ith residue and At is total activity of residues.
373
+
374
+ Advantages:
375
+ - Loops over species only - much faster than equil.reaction
376
+ - No root finding - those games might fail at times
377
+
378
+ Disadvantage:
379
+ - Only works for per-residue reactions (n.balance = 1)
380
+ - Can create NaN logacts if the Astars are huge/small
381
+
382
+ Parameters
383
+ ----------
384
+ Astar : list of ndarray
385
+ Normalized affinities for each species
386
+ n_balance : list of float
387
+ Balancing coefficients (must all be 1)
388
+ loga_balance : ndarray
389
+ Log activity of the balanced quantity
390
+
391
+ Returns
392
+ -------
393
+ list of ndarray
394
+ Equilibrium log activities for each species
395
+ """
396
+
397
+ if not all(n == 1 for n in n_balance):
398
+ raise ValueError("won't run equil.boltzmann for balance != 1")
399
+
400
+ # Initialize output object
401
+ A = [np.array(a, copy=True) for a in Astar]
402
+
403
+ # Remember the dimensions of elements of Astar
404
+ Astardim = Astar[0].shape if Astar[0].ndim > 0 else (len(Astar[0]),)
405
+
406
+ # First loop: make vectors
407
+ A = [a.flatten() for a in A]
408
+ loga_balance_vec = loga_balance.flatten()
409
+
410
+ # Second loop: get the exponentiated Astars (numerators)
411
+ # Need to convert /2.303RT to /RT
412
+ A = [np.exp(np.log(10) * Astar[i].flatten() / n_balance[i])
413
+ for i in range(len(A))]
414
+
415
+ # Third loop: accumulate the denominator
416
+ # Initialize variable to hold the sum
417
+ At = np.zeros_like(A[0])
418
+ for i in range(len(A)):
419
+ At = At + A[i] * n_balance[i]
420
+
421
+ # Fourth loop: calculate log abundances
422
+ A = [loga_balance_vec + np.log10(A[i] / At) for i in range(len(A))]
423
+
424
+ # Fifth loop: restore dimensions
425
+ A = [a.reshape(Astardim) for a in A]
426
+
427
+ return A
428
+
429
+
430
+ def equil_reaction(Astar: List[np.ndarray],
431
+ n_balance: List[float],
432
+ loga_balance: np.ndarray,
433
+ tol: float = np.finfo(float).eps ** 0.25) -> List[np.ndarray]:
434
+ """
435
+ Calculate equilibrium activities using reaction-based method.
436
+
437
+ To turn the affinities/RT (A) of formation reactions into
438
+ logactivities of species (logact(things)) at metastable equilibrium.
439
+
440
+ For any reaction stuff = thing,
441
+ A = logK - logQ
442
+ = logK - logact(thing) + logact(stuff)
443
+ given Astar = A + logact(thing),
444
+ given Abar = A / n.balance,
445
+ logact(thing) = Astar - Abar * n.balance [2]
446
+
447
+ where n.balance is the number of the balanced quantity
448
+ (conserved component) in each species.
449
+
450
+ Equilibrium values of logact(thing) satisfy:
451
+ 1) Abar is equal for all species
452
+ 2) log10(sum of (10^logact(thing) * n.balance)) = loga.balance [1]
453
+
454
+ Because of the logarithms, we can't solve the equations directly.
455
+ Instead, use root-finding to compute Abar satisfying [1].
456
+
457
+ Parameters
458
+ ----------
459
+ Astar : list of ndarray
460
+ Normalized affinities for each species
461
+ n_balance : list of float
462
+ Balancing coefficients
463
+ loga_balance : ndarray
464
+ Log activity of the balanced quantity
465
+ tol : float
466
+ Tolerance for root-finding
467
+
468
+ Returns
469
+ -------
470
+ list of ndarray
471
+ Equilibrium log activities for each species
472
+ """
473
+
474
+ # We can't run on one species
475
+ if len(Astar) == 1:
476
+ raise ValueError("at least two species needed for reaction-based equilibration")
477
+
478
+ # Remember the dimensions and names
479
+ Adim = Astar[0].shape if Astar[0].ndim > 0 else None
480
+
481
+ # Make a matrix out of the list of Astar
482
+ Astar_array = np.array([a.flatten() for a in Astar]).T
483
+
484
+ if len(loga_balance) != Astar_array.shape[0]:
485
+ raise ValueError("length of loga.balance must be equal to the number of conditions for affinity()")
486
+
487
+ # Function definitions:
488
+ def logafun(logact):
489
+ """Calculate log of activity of balanced quantity from logact(thing) of all species [1]"""
490
+ # Use log-sum-exp trick for numerical stability
491
+ # log10(sum(10^x_i * n_i)) = log10(sum(n_i * 10^x_i))
492
+ # = max(x) + log10(sum(n_i * 10^(x_i - max(x))))
493
+ # This prevents overflow when x_i values are very large or very small
494
+
495
+ logact = np.asarray(logact)
496
+ n_balance_arr = np.asarray(n_balance)
497
+
498
+ # Find maximum for numerical stability
499
+ max_logact = np.max(logact)
500
+
501
+ # Compute sum in log space with shifted values
502
+ # sum(n_i * 10^x_i) = 10^max(x) * sum(n_i * 10^(x_i - max(x)))
503
+ shifted = logact - max_logact
504
+ sum_shifted = np.sum(n_balance_arr * 10**shifted)
505
+
506
+ # Convert back: log10(10^max(x) * sum(...)) = max(x) + log10(sum(...))
507
+ return max_logact + np.log10(sum_shifted)
508
+
509
+ def logactfun(Abar, i):
510
+ """Calculate logact(thing) from Abar for the ith condition [2]"""
511
+ return Astar_array[i, :] - Abar * np.array(n_balance)
512
+
513
+ def logadiff(Abar, i):
514
+ """Calculate difference between logafun and loga.balance for the ith condition"""
515
+ return loga_balance[i] - logafun(logactfun(Abar, i))
516
+
517
+ def Abarrange(i):
518
+ """Calculate a range of Abar that gives negative and positive values of logadiff for the ith condition"""
519
+ # Starting guess of Abar (min/max) from range of Astar / n.balance
520
+ Abar_range = [
521
+ np.min(Astar_array[i, :] / n_balance),
522
+ np.max(Astar_array[i, :] / n_balance)
523
+ ]
524
+
525
+ # diff(Abar.range) can't be 0 (dlogadiff.dAbar becomes NaN)
526
+ if Abar_range[1] - Abar_range[0] == 0:
527
+ Abar_range[0] -= 0.1
528
+ Abar_range[1] += 0.1
529
+
530
+ # The range of logadiff
531
+ logadiff_min = logadiff(Abar_range[0], i)
532
+ logadiff_max = logadiff(Abar_range[1], i)
533
+
534
+ # We're out of luck if they're both infinite
535
+ if np.isinf(logadiff_min) and np.isinf(logadiff_max):
536
+ raise ValueError("FIXME: there are no initial guesses for Abar that give "
537
+ "finite values of the differences in logarithm of activity "
538
+ "of the conserved component")
539
+
540
+ # If one of them is infinite we might have a chance
541
+ if np.isinf(logadiff_min):
542
+ # Decrease the Abar range by increasing the minimum
543
+ Abar_range[0] = Abar_range[0] + 0.99 * (Abar_range[1] - Abar_range[0])
544
+ logadiff_min = logadiff(Abar_range[0], i)
545
+ if np.isinf(logadiff_min):
546
+ raise ValueError("FIXME: the second initial guess for Abar.min failed")
547
+
548
+ if np.isinf(logadiff_max):
549
+ # Decrease the Abar range by decreasing the maximum
550
+ Abar_range[1] = Abar_range[1] - 0.99 * (Abar_range[1] - Abar_range[0])
551
+ logadiff_max = logadiff(Abar_range[1], i)
552
+ if np.isinf(logadiff_max):
553
+ raise ValueError("FIXME: the second initial guess for Abar.max failed")
554
+
555
+ iter_count = 0
556
+ while logadiff_min > 0 or logadiff_max < 0:
557
+ # The change of logadiff with Abar
558
+ # It's a weighted mean of the n.balance
559
+ dlogadiff_dAbar = (logadiff_max - logadiff_min) / (Abar_range[1] - Abar_range[0])
560
+
561
+ # Change Abar to center logadiff (min/max) on zero
562
+ logadiff_mean = (logadiff_min + logadiff_max) / 2
563
+ Abar_range[0] -= logadiff_mean / dlogadiff_dAbar
564
+ Abar_range[1] -= logadiff_mean / dlogadiff_dAbar
565
+
566
+ # One iteration is enough for the examples in the package
567
+ # but there might be a case where the range of logadiff doesn't cross zero
568
+ logadiff_min = logadiff(Abar_range[0], i)
569
+ logadiff_max = logadiff(Abar_range[1], i)
570
+ iter_count += 1
571
+
572
+ if iter_count > 5:
573
+ raise ValueError("FIXME: we seem to be stuck! This function (Abarrange() in "
574
+ "equil.reaction()) can't find a range of Abar such that the differences "
575
+ "in logarithm of activity of the conserved component cross zero")
576
+
577
+ return Abar_range
578
+
579
+ def Abarfun(i):
580
+ """Calculate an equilibrium Abar for the ith condition"""
581
+ # Get limits of Abar where logadiff brackets zero
582
+ Abar_range = Abarrange(i)
583
+
584
+ # Now for the real thing: brentq (Python's uniroot)!
585
+ Abar = brentq(logadiff, Abar_range[0], Abar_range[1], args=(i,), xtol=tol)
586
+ return Abar
587
+
588
+ # Calculate the logact(thing) for each condition
589
+ logact = []
590
+ for i in range(Astar_array.shape[0]):
591
+ # Get the equilibrium Abar for each condition
592
+ Abar = Abarfun(i)
593
+ logact.append(logactfun(Abar, i))
594
+
595
+ # Restore the dimensions
596
+ logact = np.array(logact)
597
+
598
+ # Convert back to list of arrays with original dimensions
599
+ result = []
600
+ for i in range(logact.shape[1]):
601
+ thisla = logact[:, i]
602
+ if Adim is not None:
603
+ thisla = thisla.reshape(Adim)
604
+ result.append(thisla)
605
+
606
+ return result
607
+
608
+
609
+ def _balance(aout: Dict[str, Any],
610
+ balance: Optional[Union[str, int, List[float]]] = None,
611
+ messages: bool = True) -> Dict[str, Any]:
612
+ """
613
+ Return balancing coefficients and description.
614
+
615
+ Generate n.balance from user-given or automatically identified basis species.
616
+
617
+ Parameters
618
+ ----------
619
+ aout : dict
620
+ Output from affinity()
621
+ balance : str, int, or list of float, optional
622
+ Balance specification:
623
+ - None: autoselect using which_balance
624
+ - name of basis species: balanced on this basis species
625
+ - "length": balanced on sequence length of proteins
626
+ - "volume": standard-state volume listed in thermo()$OBIGT
627
+ - 1: balanced on one mole of species (formula units)
628
+ - numeric vector: user-defined n.balance
629
+
630
+ Returns
631
+ -------
632
+ dict
633
+ Dictionary with keys:
634
+ - n_balance : list, Balancing coefficients
635
+ - balance : str or list, Balancing description
636
+ """
637
+
638
+ # The index of the basis species that might be balanced
639
+ ibalance = None
640
+
641
+ # Deal with proteins
642
+ isprotein = ['_' in str(name) for name in aout['species']['name']]
643
+ if balance is None and all(isprotein):
644
+ balance = "length"
645
+
646
+ # Try to automatically find a balance
647
+ if balance is None:
648
+ ibalance = which_balance(aout['species'])
649
+ # No shared basis species and balance not specified by user - an error
650
+ if ibalance is None or len(ibalance) == 0:
651
+ raise ValueError("no basis species is present in all formation reactions")
652
+
653
+ # Change "1" to 1 (numeric)
654
+ if balance == "1":
655
+ balance = 1
656
+
657
+ if isinstance(balance, (int, float, list, np.ndarray)):
658
+ # A numeric vector
659
+ if isinstance(balance, (int, float)):
660
+ balance = [balance]
661
+ n_balance = list(balance) * (len(aout['values']) // len(balance) + 1)
662
+ n_balance = n_balance[:len(aout['values'])]
663
+
664
+ msgtxt = f"balance: on supplied numeric argument ({','.join(map(str, balance))})"
665
+ if balance == [1]:
666
+ msgtxt = f"{msgtxt} [1 means balance on formula units]"
667
+ if messages:
668
+ print(msgtxt)
669
+ else:
670
+ # "length" for balancing on protein length
671
+ if balance == "length":
672
+ if not all(isprotein):
673
+ raise ValueError("'length' was the requested balance, but some species are not proteins")
674
+ n_balance = [protein_length(name) for name in aout['species']['name']]
675
+ if messages:
676
+ print("balance: on protein length")
677
+ elif balance == "volume":
678
+ ispecies_list = aout['species']['ispecies'].tolist()
679
+ volumes = info(ispecies_list, check_it=False, messages=messages)['V']
680
+ n_balance = volumes.tolist()
681
+ if messages:
682
+ print("balance: on volume")
683
+ else:
684
+ # Is the balance the name of a basis species?
685
+ if ibalance is None or len(ibalance) == 0:
686
+ # Get basis rownames
687
+ basis_names = list(aout['basis'].index)
688
+ try:
689
+ ibalance = [basis_names.index(balance)]
690
+ except ValueError:
691
+ raise ValueError(f"basis species ({balance}) not available to balance reactions")
692
+
693
+ # The name of the basis species (need this if we got ibalance from which_balance, above)
694
+ balance = list(aout['species'].columns)[ibalance[0]]
695
+ if messages:
696
+ print(f"balance: on moles of {balance} in formation reactions")
697
+
698
+ # The balancing coefficients
699
+ n_balance = aout['species'].iloc[:, ibalance[0]].tolist()
700
+
701
+ # We check that all formation reactions contain this basis species
702
+ if any(n == 0 for n in n_balance):
703
+ raise ValueError(f"some species have no {balance} in the formation reaction")
704
+
705
+ return {'n_balance': n_balance, 'balance': balance}
706
+
707
+
708
+ def which_balance(species: pd.DataFrame) -> List[int]:
709
+ """
710
+ Return column(s) of species that all have non-zero values.
711
+
712
+ Find the first basis species that is present in all species of interest.
713
+ It can be used to balance the system.
714
+
715
+ Parameters
716
+ ----------
717
+ species : pd.DataFrame
718
+ Species dataframe from affinity output
719
+
720
+ Returns
721
+ -------
722
+ list of int
723
+ Indices of basis species columns that have non-zero values for all species
724
+ """
725
+
726
+ # Number of basis species columns (exclude the last 4 metadata columns)
727
+ nbasis = len(species.columns) - 4
728
+
729
+ ib = []
730
+ for i in range(nbasis):
731
+ coeff = species.iloc[:, i]
732
+ # Check if all coefficients are non-zero
733
+ if all(c != 0 for c in coeff):
734
+ ib.append(i)
735
+ break # R version returns first match
736
+
737
+ return ib
738
+
739
+
740
+ def protein_length(name: Union[str, List[str]]) -> Union[int, List[int]]:
741
+ """
742
+ Get protein sequence length.
743
+
744
+ Parameters
745
+ ----------
746
+ name : str or list of str
747
+ Protein name(s) (with underscore separator)
748
+
749
+ Returns
750
+ -------
751
+ int or list of int
752
+ Sequence length(s)
753
+ """
754
+
755
+ if isinstance(name, str):
756
+ # Single protein
757
+ if '_' not in name:
758
+ raise ValueError(f"protein name '{name}' does not contain underscore")
759
+ # For now, return a placeholder - would need actual protein database
760
+ # In R this would look up the actual sequence length
761
+ return 100 # Placeholder
762
+ else:
763
+ # Multiple proteins
764
+ return [protein_length(n) for n in name]
765
+
766
+
767
+ def moles(eout: Dict[str, Any]) -> Dict[str, np.ndarray]:
768
+ """
769
+ Calculate total moles of elements from equilibrate output.
770
+
771
+ Parameters
772
+ ----------
773
+ eout : dict
774
+ Output from equilibrate()
775
+
776
+ Returns
777
+ -------
778
+ dict
779
+ Dictionary with element names as keys and mole arrays as values
780
+ """
781
+
782
+ # Exponentiate loga.equil to get activities
783
+ act = [10**np.array(x) for x in eout['loga_equil']]
784
+
785
+ # Initialize list for moles of basis species
786
+ nbasis_list = [act[0] * 0 for _ in range(len(eout['basis']))]
787
+
788
+ # Loop over species
789
+ for i in range(len(eout['species'])):
790
+ # Loop over basis species
791
+ for j in range(len(eout['basis'])):
792
+ # The coefficient of this basis species in the formation reaction of this species
793
+ n = eout['species'].iloc[i, j]
794
+ # Accumulate the number of moles of basis species
795
+ nbasis_list[j] = nbasis_list[j] + act[i] * n
796
+
797
+ # Initialize list for moles of elements (same as number of basis species)
798
+ nelem = [act[0] * 0 for _ in range(len(eout['basis']))]
799
+
800
+ # Loop over basis species
801
+ for i in range(len(eout['basis'])):
802
+ # Loop over elements
803
+ for j in range(len(eout['basis'])):
804
+ # The coefficient of this element in the formula of this basis species
805
+ n = eout['basis'].iloc[i, j]
806
+ # Accumulate the number of moles of elements
807
+ nelem[j] = nelem[j] + nbasis_list[i] * n
808
+
809
+ # Add element names
810
+ element_names = list(eout['basis'].columns)[:len(eout['basis'])]
811
+ result = {element_names[i]: nelem[i] for i in range(len(nelem))}
812
+
813
+ return result