pychnosz 1.1.1__cp311-cp311-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. pychnosz/.dylibs/libgcc_s.1.1.dylib +0 -0
  2. pychnosz/.dylibs/libgfortran.5.dylib +0 -0
  3. pychnosz/.dylibs/libquadmath.0.dylib +0 -0
  4. pychnosz/__init__.py +129 -0
  5. pychnosz/biomolecules/__init__.py +29 -0
  6. pychnosz/biomolecules/ionize_aa.py +197 -0
  7. pychnosz/biomolecules/proteins.py +595 -0
  8. pychnosz/core/__init__.py +46 -0
  9. pychnosz/core/affinity.py +1256 -0
  10. pychnosz/core/animation.py +593 -0
  11. pychnosz/core/balance.py +334 -0
  12. pychnosz/core/basis.py +716 -0
  13. pychnosz/core/diagram.py +3336 -0
  14. pychnosz/core/equilibrate.py +813 -0
  15. pychnosz/core/equilibrium.py +554 -0
  16. pychnosz/core/info.py +821 -0
  17. pychnosz/core/retrieve.py +364 -0
  18. pychnosz/core/speciation.py +580 -0
  19. pychnosz/core/species.py +599 -0
  20. pychnosz/core/subcrt.py +1700 -0
  21. pychnosz/core/thermo.py +593 -0
  22. pychnosz/core/unicurve.py +1226 -0
  23. pychnosz/data/__init__.py +11 -0
  24. pychnosz/data/add_obigt.py +327 -0
  25. pychnosz/data/extdata/Berman/BDat17_2017.csv +2 -0
  26. pychnosz/data/extdata/Berman/Ber88_1988.csv +68 -0
  27. pychnosz/data/extdata/Berman/Ber90_1990.csv +5 -0
  28. pychnosz/data/extdata/Berman/DS10_2010.csv +6 -0
  29. pychnosz/data/extdata/Berman/FDM+14_2014.csv +2 -0
  30. pychnosz/data/extdata/Berman/Got04_2004.csv +5 -0
  31. pychnosz/data/extdata/Berman/JUN92_1992.csv +3 -0
  32. pychnosz/data/extdata/Berman/SHD91_1991.csv +12 -0
  33. pychnosz/data/extdata/Berman/VGT92_1992.csv +2 -0
  34. pychnosz/data/extdata/Berman/VPT01_2001.csv +3 -0
  35. pychnosz/data/extdata/Berman/VPV05_2005.csv +2 -0
  36. pychnosz/data/extdata/Berman/ZS92_1992.csv +11 -0
  37. pychnosz/data/extdata/Berman/sympy.R +99 -0
  38. pychnosz/data/extdata/Berman/testing/BA96.bib +12 -0
  39. pychnosz/data/extdata/Berman/testing/BA96_Berman.csv +21 -0
  40. pychnosz/data/extdata/Berman/testing/BA96_OBIGT.csv +21 -0
  41. pychnosz/data/extdata/Berman/testing/BA96_refs.csv +6 -0
  42. pychnosz/data/extdata/OBIGT/AD.csv +25 -0
  43. pychnosz/data/extdata/OBIGT/Berman_cr.csv +93 -0
  44. pychnosz/data/extdata/OBIGT/DEW.csv +211 -0
  45. pychnosz/data/extdata/OBIGT/H2O_aq.csv +4 -0
  46. pychnosz/data/extdata/OBIGT/SLOP98.csv +411 -0
  47. pychnosz/data/extdata/OBIGT/SUPCRT92.csv +178 -0
  48. pychnosz/data/extdata/OBIGT/inorganic_aq.csv +729 -0
  49. pychnosz/data/extdata/OBIGT/inorganic_cr.csv +273 -0
  50. pychnosz/data/extdata/OBIGT/inorganic_gas.csv +20 -0
  51. pychnosz/data/extdata/OBIGT/organic_aq.csv +1104 -0
  52. pychnosz/data/extdata/OBIGT/organic_cr.csv +481 -0
  53. pychnosz/data/extdata/OBIGT/organic_gas.csv +268 -0
  54. pychnosz/data/extdata/OBIGT/organic_liq.csv +533 -0
  55. pychnosz/data/extdata/OBIGT/testing/GEMSFIT.csv +43 -0
  56. pychnosz/data/extdata/OBIGT/testing/IGEM.csv +17 -0
  57. pychnosz/data/extdata/OBIGT/testing/Sandia.csv +8 -0
  58. pychnosz/data/extdata/OBIGT/testing/SiO2.csv +4 -0
  59. pychnosz/data/extdata/misc/AD03_Fig1a.csv +69 -0
  60. pychnosz/data/extdata/misc/AD03_Fig1b.csv +43 -0
  61. pychnosz/data/extdata/misc/AD03_Fig1c.csv +89 -0
  62. pychnosz/data/extdata/misc/AD03_Fig1d.csv +30 -0
  63. pychnosz/data/extdata/misc/BZA10.csv +5 -0
  64. pychnosz/data/extdata/misc/HW97_Cp.csv +90 -0
  65. pychnosz/data/extdata/misc/HWM96_V.csv +229 -0
  66. pychnosz/data/extdata/misc/LA19_test.csv +7 -0
  67. pychnosz/data/extdata/misc/Mer75_Table4.csv +42 -0
  68. pychnosz/data/extdata/misc/OBIGT_check.csv +423 -0
  69. pychnosz/data/extdata/misc/PM90.csv +7 -0
  70. pychnosz/data/extdata/misc/RH95.csv +23 -0
  71. pychnosz/data/extdata/misc/RH98_Table15.csv +17 -0
  72. pychnosz/data/extdata/misc/SC10_Rainbow.csv +19 -0
  73. pychnosz/data/extdata/misc/SK95.csv +55 -0
  74. pychnosz/data/extdata/misc/SOJSH.csv +61 -0
  75. pychnosz/data/extdata/misc/SS98_Fig5a.csv +81 -0
  76. pychnosz/data/extdata/misc/SS98_Fig5b.csv +84 -0
  77. pychnosz/data/extdata/misc/TKSS14_Fig2.csv +25 -0
  78. pychnosz/data/extdata/misc/bluered.txt +1000 -0
  79. pychnosz/data/extdata/protein/Cas/Cas_aa.csv +177 -0
  80. pychnosz/data/extdata/protein/Cas/Cas_uniprot.csv +186 -0
  81. pychnosz/data/extdata/protein/Cas/download.R +34 -0
  82. pychnosz/data/extdata/protein/Cas/mkaa.R +34 -0
  83. pychnosz/data/extdata/protein/POLG.csv +12 -0
  84. pychnosz/data/extdata/protein/TBD+05.csv +393 -0
  85. pychnosz/data/extdata/protein/TBD+05_aa.csv +393 -0
  86. pychnosz/data/extdata/protein/rubisco.csv +28 -0
  87. pychnosz/data/extdata/protein/rubisco.fasta +239 -0
  88. pychnosz/data/extdata/protein/rubisco_aa.csv +28 -0
  89. pychnosz/data/extdata/src/H2O92D.f.orig +3457 -0
  90. pychnosz/data/extdata/src/README.txt +5 -0
  91. pychnosz/data/extdata/taxonomy/names.dmp +215 -0
  92. pychnosz/data/extdata/taxonomy/nodes.dmp +63 -0
  93. pychnosz/data/extdata/thermo/Bdot_acirc.csv +60 -0
  94. pychnosz/data/extdata/thermo/buffer.csv +40 -0
  95. pychnosz/data/extdata/thermo/element.csv +135 -0
  96. pychnosz/data/extdata/thermo/groups.csv +6 -0
  97. pychnosz/data/extdata/thermo/opt.csv +2 -0
  98. pychnosz/data/extdata/thermo/protein.csv +506 -0
  99. pychnosz/data/extdata/thermo/refs.csv +343 -0
  100. pychnosz/data/extdata/thermo/stoich.csv.xz +0 -0
  101. pychnosz/data/loader.py +431 -0
  102. pychnosz/data/mod_obigt.py +322 -0
  103. pychnosz/data/obigt.py +471 -0
  104. pychnosz/data/worm.py +228 -0
  105. pychnosz/fortran/__init__.py +16 -0
  106. pychnosz/fortran/h2o92.dylib +0 -0
  107. pychnosz/fortran/h2o92_interface.py +527 -0
  108. pychnosz/geochemistry/__init__.py +21 -0
  109. pychnosz/geochemistry/minerals.py +514 -0
  110. pychnosz/geochemistry/redox.py +500 -0
  111. pychnosz/models/__init__.py +47 -0
  112. pychnosz/models/archer_wang.py +165 -0
  113. pychnosz/models/berman.py +309 -0
  114. pychnosz/models/cgl.py +381 -0
  115. pychnosz/models/dew.py +997 -0
  116. pychnosz/models/hkf.py +523 -0
  117. pychnosz/models/hkf_helpers.py +222 -0
  118. pychnosz/models/iapws95.py +1113 -0
  119. pychnosz/models/supcrt92_fortran.py +238 -0
  120. pychnosz/models/water.py +480 -0
  121. pychnosz/utils/__init__.py +27 -0
  122. pychnosz/utils/expression.py +1074 -0
  123. pychnosz/utils/formula.py +830 -0
  124. pychnosz/utils/formula_ox.py +227 -0
  125. pychnosz/utils/reset.py +33 -0
  126. pychnosz/utils/units.py +259 -0
  127. pychnosz-1.1.1.dist-info/METADATA +197 -0
  128. pychnosz-1.1.1.dist-info/RECORD +131 -0
  129. pychnosz-1.1.1.dist-info/WHEEL +5 -0
  130. pychnosz-1.1.1.dist-info/licenses/LICENSE.txt +19 -0
  131. pychnosz-1.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1256 @@
1
+ """
2
+ Affinity calculation module.
3
+
4
+ This module provides Python equivalents of the R functions in affinity.R:
5
+ - affinity(): Calculate chemical affinities of formation reactions
6
+ - Energy calculation utilities and argument processing
7
+ - Variable expansion and multi-dimensional calculations
8
+
9
+ Author: CHNOSZ Python port
10
+ """
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+ from typing import Union, List, Optional, Dict, Any, Tuple
15
+ import warnings
16
+
17
+ from .thermo import thermo
18
+ from .basis import get_basis, is_basis_defined
19
+ from .species import get_species, is_species_defined
20
+ from .subcrt import subcrt
21
+
22
+
23
+ class AffinityError(Exception):
24
+ """Exception raised for affinity-related errors."""
25
+ pass
26
+
27
+
28
+ def affinity(messages: bool = True, basis: Optional[pd.DataFrame] = None,
29
+ species: Optional[pd.DataFrame] = None, iprotein: Optional[Union[int, List[int], np.ndarray]] = None,
30
+ loga_protein: Union[float, List[float]] = 0.0, **kwargs) -> Dict[str, Any]:
31
+ """
32
+ Calculate affinities of formation reactions.
33
+
34
+ This function calculates chemical affinities for the formation reactions of
35
+ species of interest from user-selected basis species. The affinities are
36
+ calculated as A/2.303RT where A is the chemical affinity.
37
+
38
+ Parameters
39
+ ----------
40
+ messages : bool, default True
41
+ Whether to print informational messages
42
+ basis : pd.DataFrame, optional
43
+ Basis species definition to use (if not using global basis)
44
+ species : pd.DataFrame, optional
45
+ Species definition to use (if not using global species)
46
+ iprotein : int, list of int, or array, optional
47
+ Build proteins from residues (row numbers in thermo().protein)
48
+ loga_protein : float or list of float, default 0.0
49
+ Activity of proteins (log scale)
50
+ **kwargs : dict
51
+ Variable arguments defining calculation conditions:
52
+ - Basis species names (e.g., CO2=[-60, 20, 5]): Variable basis species activities
53
+ - T : float or list, Temperature in °C
54
+ - P : float or list, Pressure in bar
55
+ - property : str, Property to calculate ("A", "logK", "G", etc.)
56
+ - exceed_Ttr : bool, Allow extrapolation beyond transition temperatures
57
+ - exceed_rhomin : bool, Allow calculations below minimum water density
58
+ - return_buffer : bool, Return buffer activities
59
+ - balance : str, Balance method for protein buffers
60
+
61
+ Returns
62
+ -------
63
+ dict
64
+ Dictionary containing:
65
+ - fun : str, Function name ("affinity")
66
+ - args : dict, Arguments used in calculation
67
+ - sout : dict, Subcrt calculation results
68
+ - property : str, Property calculated
69
+ - basis : pd.DataFrame, Basis species definition
70
+ - species : pd.DataFrame, Species of interest definition
71
+ - T : float or array, Temperature(s) in Kelvin
72
+ - P : float or array, Pressure(s) in bar
73
+ - vars : list, Variable names
74
+ - vals : dict, Variable values
75
+ - values : dict, Calculated affinity values by species
76
+
77
+ Examples
78
+ --------
79
+ >>> import pychnosz
80
+ >>> pychnosz.reset()
81
+ >>> pychnosz.basis(["CO2", "H2O", "NH3", "H2S", "H+", "O2"])
82
+ >>> pychnosz.species(["glycine", "tyrosine", "serine", "methionine"])
83
+ >>> result = pychnosz.affinity(CO2=[-60, 20, 5], T=350, P=2000)
84
+ >>> print(result['values'][1566]) # Glycine affinities
85
+
86
+ >>> # With proteins
87
+ >>> import pandas as pd
88
+ >>> aa = pd.read_csv("POLG.csv")
89
+ >>> iprotein = pychnosz.add_protein(aa)
90
+ >>> pychnosz.basis("CHNOSe")
91
+ >>> a = pychnosz.affinity(iprotein=iprotein, pH=[2, 14], Eh=[-1, 1])
92
+
93
+ Notes
94
+ -----
95
+ This implementation maintains complete fidelity to R CHNOSZ affinity():
96
+ - Identical argument processing including dynamic basis species parameters
97
+ - Same variable expansion and multi-dimensional calculations
98
+ - Exact energy() function behavior for property calculations
99
+ - Identical output structure and formatting
100
+ - Support for protein calculations via iprotein parameter
101
+ """
102
+
103
+ # Get thermo object for protein handling
104
+ thermo_obj = thermo()
105
+
106
+ # Handle iprotein parameter
107
+ ires = None
108
+ original_species = None
109
+ if iprotein is not None:
110
+ # Convert to array
111
+ if isinstance(iprotein, (int, np.integer)):
112
+ iprotein = np.array([iprotein])
113
+ elif isinstance(iprotein, list):
114
+ iprotein = np.array(iprotein)
115
+
116
+ # Check all proteins are available
117
+ if np.any(np.isnan(iprotein)):
118
+ raise AffinityError("`iprotein` has some NA values")
119
+ if thermo_obj.protein is None or not np.all(iprotein < len(thermo_obj.protein)):
120
+ raise AffinityError("some value(s) of `iprotein` are not rownumbers of thermo().protein")
121
+
122
+ # Add protein residues to the species list
123
+ # Amino acids in 3-letter code
124
+ aminoacids_3 = ["Ala", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile", "Lys", "Leu",
125
+ "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr", "Val", "Trp", "Tyr"]
126
+
127
+ # Use _RESIDUE notation (matches R CHNOSZ affinity.R line 84)
128
+ resnames_residue = ["H2O_RESIDUE"] + [f"{aa}_RESIDUE" for aa in aminoacids_3]
129
+
130
+ # Save original species
131
+ from .species import species as species_func
132
+ original_species = get_species() if is_species_defined() else None
133
+
134
+ # Add residue species with activity 0 (all in "aq" state)
135
+ species_func(resnames_residue, state="aq", add=True, messages=messages)
136
+
137
+ # Get indices of residues in species list
138
+ species_df_temp = get_species()
139
+ ires = []
140
+ for name in resnames_residue:
141
+ idx = np.where(species_df_temp['name'] == name)[0]
142
+ if len(idx) > 0:
143
+ ires.append(idx[0])
144
+ ires = np.array(ires)
145
+
146
+ # Check if basis and species are defined (use provided or global)
147
+ if basis is None:
148
+ if not is_basis_defined():
149
+ raise AffinityError("basis species are not defined")
150
+ basis_df = get_basis()
151
+ else:
152
+ basis_df = basis
153
+
154
+ if species is None:
155
+ if not is_species_defined():
156
+ raise AffinityError("species are not defined")
157
+ species_df = get_species()
158
+ else:
159
+ species_df = species
160
+
161
+ # Process arguments
162
+ args_orig = dict(kwargs)
163
+
164
+ # Handle argument recall (if first argument is previous affinity result)
165
+ if len(args_orig) > 0:
166
+ first_key = list(args_orig.keys())[0]
167
+ first_value = args_orig[first_key]
168
+ if (isinstance(first_value, dict) and
169
+ first_value.get('fun') == 'affinity'):
170
+ # Update arguments from previous result
171
+ aargs = first_value.get('args', {})
172
+ # Update with new arguments (skip the first one)
173
+ new_args = dict(list(args_orig.items())[1:])
174
+ aargs.update(new_args)
175
+ return affinity(**aargs)
176
+
177
+ # Process energy arguments
178
+ args = energy_args(args_orig, messages, basis_df=basis_df)
179
+
180
+ # Get property to calculate
181
+ property_name = args.get('what', 'A')
182
+
183
+ # Get thermo data
184
+ thermo_obj = thermo()
185
+ # basis_df and species_df are already set above
186
+
187
+ # Determine if we need specific property calculation
188
+ if property_name and property_name != 'A':
189
+ # Calculate specific property using energy function
190
+ energy_result = energy(
191
+ what=property_name,
192
+ vars=args['vars'],
193
+ vals=args['vals'],
194
+ lims=args['lims'],
195
+ T=args['T'],
196
+ P=args['P'],
197
+ IS=args.get('IS', 0),
198
+ exceed_Ttr=kwargs.get('exceed_Ttr', True),
199
+ exceed_rhomin=kwargs.get('exceed_rhomin', False),
200
+ basis_df=basis_df,
201
+ species_df=species_df,
202
+ messages=messages
203
+ )
204
+ affinity_values = energy_result['a']
205
+ energy_sout = energy_result['sout']
206
+ else:
207
+ # Calculate affinities (A/2.303RT)
208
+ energy_result = energy(
209
+ what='A',
210
+ vars=args['vars'],
211
+ vals=args['vals'],
212
+ lims=args['lims'],
213
+ T=args['T'],
214
+ P=args['P'],
215
+ IS=args.get('IS', 0),
216
+ exceed_Ttr=kwargs.get('exceed_Ttr', True),
217
+ exceed_rhomin=kwargs.get('exceed_rhomin', False),
218
+ basis_df=basis_df,
219
+ species_df=species_df,
220
+ messages=messages
221
+ )
222
+ affinity_values = energy_result['a']
223
+ energy_sout = energy_result['sout']
224
+
225
+ # Handle protein affinity calculations if iprotein was provided
226
+ if iprotein is not None and ires is not None:
227
+ # Calculate protein affinities from residue affinities using group additivity
228
+ # Normalize loga_protein to match number of proteins
229
+ if isinstance(loga_protein, (int, float)):
230
+ loga_protein_arr = np.full(len(iprotein), loga_protein)
231
+ else:
232
+ loga_protein_arr = np.array(loga_protein)
233
+ if len(loga_protein_arr) < len(iprotein):
234
+ loga_protein_arr = np.resize(loga_protein_arr, len(iprotein))
235
+
236
+ # Calculate affinity for each protein
237
+ protein_affinities = {}
238
+
239
+ for ip, iprot in enumerate(iprotein):
240
+ # Get protein amino acid composition from thermo().protein
241
+ # Columns 4:24 contain chains and amino acid counts (0-indexed: columns 4-23)
242
+ protein_row = thermo_obj.protein.iloc[iprot]
243
+ aa_counts = protein_row.iloc[4:24].values.astype(float)
244
+
245
+ # Calculate protein affinity by summing residue affinities weighted by composition
246
+ # affinity_values keys are ispecies indices
247
+ # Get the ispecies for each residue
248
+ species_df_current = get_species()
249
+ residue_ispecies = species_df_current.iloc[ires]['ispecies'].values
250
+
251
+ # Initialize protein affinity with same shape as residue affinities
252
+ first_residue_key = residue_ispecies[0]
253
+ if first_residue_key in affinity_values:
254
+ template_affinity = affinity_values[first_residue_key]
255
+ protein_affinity = np.zeros_like(template_affinity)
256
+
257
+ # Sum up contributions from all residues
258
+ for i, res_ispecies in enumerate(residue_ispecies):
259
+ if res_ispecies in affinity_values:
260
+ residue_contrib = affinity_values[res_ispecies] * aa_counts[i]
261
+ protein_affinity = protein_affinity + residue_contrib
262
+
263
+ # Subtract protein activity
264
+ protein_affinity = protein_affinity - loga_protein_arr[ip]
265
+
266
+ # Use negative index to denote protein (matches R CHNOSZ convention)
267
+ protein_key = -(iprot + 1) # Negative of (row number + 1)
268
+ protein_affinities[protein_key] = protein_affinity
269
+
270
+ # Add ionization affinity if H+ is in basis (matching R CHNOSZ behavior)
271
+ if 'H+' in basis_df.index:
272
+ if messages:
273
+ print("affinity: ionizing proteins ...")
274
+
275
+ # Get protein amino acid compositions
276
+ from ..biomolecules.proteins import pinfo
277
+ from ..biomolecules.ionize_aa import ionize_aa
278
+
279
+ # Get aa compositions for these proteins
280
+ aa = pinfo(iprotein)
281
+
282
+ # Determine pH values from vars/vals or basis
283
+ # Check if H+ is a variable
284
+ if 'H+' in args['vars']:
285
+ # H+ is a variable - get pH from vals
286
+ iHplus = args['vars'].index('H+')
287
+ pH_vals = -np.array(args['vals'][iHplus]) # pH = -log(a_H+)
288
+ else:
289
+ # H+ is constant - get from basis
290
+ pH_val = -basis_df.loc['H+', 'logact'] # pH = -log(a_H+)
291
+ pH_vals = np.array([pH_val])
292
+
293
+ # Get T values (already processed earlier)
294
+ T_vals = args['T']
295
+ if isinstance(T_vals, (int, float)):
296
+ T_celsius = T_vals - 273.15
297
+ else:
298
+ T_celsius = T_vals - 273.15
299
+
300
+ # Get P values
301
+ P_vals = args['P']
302
+
303
+ # Calculate ionization affinity
304
+ # ionize_aa expects arrays, so ensure T, P, pH are properly shaped
305
+ # For grid calculations, we need to expand T, P, pH into a grid matching the affinity grid
306
+ if len(args['vars']) >= 2:
307
+ # Multi-dimensional case - create grid
308
+ # Figure out which vars are T, P, H+
309
+ var_names = args['vars']
310
+ has_T_var = 'T' in var_names
311
+ has_P_var = 'P' in var_names
312
+ has_Hplus_var = 'H+' in var_names
313
+
314
+ # Build T, P, pH grids matching the affinity calculation grid
315
+ if has_T_var and has_Hplus_var:
316
+ # Both T and pH vary - create meshgrid
317
+ T_grid, pH_grid = np.meshgrid(T_celsius, pH_vals, indexing='ij')
318
+ T_flat = T_grid.flatten()
319
+ pH_flat = pH_grid.flatten()
320
+ if isinstance(P_vals, str):
321
+ P_flat = np.array([P_vals] * len(T_flat))
322
+ else:
323
+ P_flat = np.full(len(T_flat), P_vals if isinstance(P_vals, (int, float)) else P_vals[0])
324
+ elif has_T_var:
325
+ # Only T varies
326
+ T_flat = T_celsius if isinstance(T_celsius, np.ndarray) else np.array([T_celsius])
327
+ pH_flat = np.full(len(T_flat), pH_vals[0])
328
+ P_flat = np.array([P_vals] * len(T_flat)) if isinstance(P_vals, str) else np.full(len(T_flat), P_vals if isinstance(P_vals, (int, float)) else P_vals[0])
329
+ elif has_Hplus_var:
330
+ # Only pH varies
331
+ pH_flat = pH_vals
332
+ T_flat = np.full(len(pH_flat), T_celsius if isinstance(T_celsius, (int, float)) else T_celsius[0])
333
+ P_flat = np.array([P_vals] * len(pH_flat)) if isinstance(P_vals, str) else np.full(len(pH_flat), P_vals if isinstance(P_vals, (int, float)) else P_vals[0])
334
+ else:
335
+ # No T or pH variables
336
+ T_flat = np.array([T_celsius if isinstance(T_celsius, (int, float)) else T_celsius[0]])
337
+ pH_flat = pH_vals
338
+ P_flat = np.array([P_vals] if isinstance(P_vals, str) else [P_vals if isinstance(P_vals, (int, float)) else P_vals[0]])
339
+ else:
340
+ # Single or no variable case
341
+ T_flat = np.array([T_celsius if isinstance(T_celsius, (int, float)) else T_celsius[0]])
342
+ pH_flat = pH_vals if isinstance(pH_vals, np.ndarray) else np.array([pH_vals[0] if hasattr(pH_vals, '__getitem__') else pH_vals])
343
+ P_flat = np.array([P_vals] if isinstance(P_vals, str) else [P_vals if isinstance(P_vals, (int, float)) else P_vals[0]])
344
+
345
+ # Call ionize_aa to get ionization affinity
346
+ ionization_result = ionize_aa(aa, property="A", T=T_flat, P=P_flat, pH=pH_flat)
347
+
348
+ # Add ionization affinity to formation affinity for each protein
349
+ for ip, iprot in enumerate(iprotein):
350
+ protein_key = -(iprot + 1)
351
+ ionization_affinity = ionization_result.iloc[:, ip].values
352
+
353
+ # Reshape to match formation affinity dimensions if needed
354
+ formation_affinity = protein_affinities[protein_key]
355
+ if isinstance(formation_affinity, np.ndarray):
356
+ if formation_affinity.shape != ionization_affinity.shape:
357
+ # Reshape ionization affinity to match formation affinity
358
+ ionization_affinity = ionization_affinity.reshape(formation_affinity.shape)
359
+
360
+ # Add ionization to formation affinity
361
+ protein_affinities[protein_key] = formation_affinity + ionization_affinity
362
+
363
+ # Replace affinity_values with protein affinities
364
+ affinity_values = protein_affinities
365
+
366
+ # Calculate stoichiometric coefficients for proteins using matrix multiplication
367
+ # This matches R CHNOSZ: protbasis <- t(t((resspecies[ires, 1:nrow(thermo$basis)])) %*% t((thermo$protein[iprotein, 5:25])))
368
+ # IMPORTANT: Get the species list BEFORE deletion
369
+ species_df_with_residues = get_species()
370
+
371
+ # Extract basis species coefficients from residue species (rows = residues, cols = basis species)
372
+ # ires contains indices of residues in the species list
373
+ # We need the columns corresponding to basis species
374
+ basis_cols = list(basis_df.index) # e.g., ['CO2', 'H2O', 'NH3', 'H2S', 'e-', 'H+']
375
+
376
+ # Create residue coefficient matrix (n_residues x n_basis)
377
+ # resspecies[ires, 1:nrow(thermo$basis)] in R
378
+ res_coeffs = species_df_with_residues.iloc[ires][basis_cols].values.astype(float)
379
+
380
+ # Get amino acid composition matrix (n_proteins x n_residues)
381
+ # thermo$protein[iprotein, 5:25] in R (columns 5-25 contain chains and 20 amino acids)
382
+ # In Python (0-indexed): columns 4:24 contain chains and 20 amino acids
383
+ aa_composition = []
384
+ for iprot in iprotein:
385
+ protein_row = thermo_obj.protein.iloc[iprot]
386
+ # Columns 4:24 contain: chains, Ala, Cys, Asp, Glu, Phe, Gly, His, Ile, Lys, Leu,
387
+ # Met, Asn, Pro, Gln, Arg, Ser, Thr, Val, Trp, Tyr
388
+ aa_counts = protein_row.iloc[4:24].values.astype(float)
389
+ aa_composition.append(aa_counts)
390
+ aa_composition = np.array(aa_composition) # Shape: (n_proteins, 21)
391
+
392
+ # Matrix multiplication: (n_proteins x 21) @ (21 x n_basis) = (n_proteins x n_basis)
393
+ # Note: res_coeffs has shape (21, n_basis) - first row is H2O, next 20 are amino acids
394
+ # R code: t(t(resspecies) %*% t(protein)) means: (n_basis x n_residues) @ (n_residues x n_proteins) = (n_basis x n_proteins)
395
+ # Then transpose to get (n_proteins x n_basis)
396
+ # In Python: (n_proteins x n_residues) @ (n_residues x n_basis) = (n_proteins x n_basis)
397
+ protein_coeffs = aa_composition @ res_coeffs # Shape: (n_proteins, n_basis)
398
+
399
+ # Delete residue species from species list now that we have the coefficients
400
+ from .species import species as species_func
401
+ species_func(ires.tolist(), delete=True, messages=False)
402
+
403
+ if original_species is not None:
404
+ # Restore original species (but we've already calculated, so just update species_df)
405
+ pass
406
+
407
+ # Create DataFrame for proteins with basis species coefficients
408
+ species_data = {}
409
+
410
+ # Add basis species columns
411
+ for j, basis_sp in enumerate(basis_cols):
412
+ species_data[basis_sp] = protein_coeffs[:, j]
413
+
414
+ # Add metadata columns
415
+ protein_names = []
416
+ protein_ispecies = []
417
+
418
+ for iprot in iprotein:
419
+ prot_row = thermo_obj.protein.iloc[iprot]
420
+ # Escape underscores for LaTeX compatibility in diagram labels
421
+ protein_name = f"{prot_row['protein']}_{prot_row['organism']}"
422
+ # Replace underscores with escaped version for matplotlib/LaTeX
423
+ protein_name_escaped = protein_name.replace('_', r'\_')
424
+ protein_names.append(protein_name_escaped)
425
+ protein_ispecies.append(-(iprot + 1)) # Negative index
426
+
427
+ species_data['ispecies'] = protein_ispecies
428
+ species_data['logact'] = loga_protein_arr[:len(iprotein)]
429
+ species_data['state'] = ['aq'] * len(iprotein)
430
+ species_data['name'] = protein_names
431
+
432
+ species_df = pd.DataFrame(species_data)
433
+
434
+ # Process temperature and pressure for output
435
+ T_out = args['T']
436
+ P_out = args['P']
437
+ vars_list = args['vars']
438
+ vals_dict = {}
439
+
440
+ # Convert variable names and values for output
441
+ # Important: Keep vars_list with actual basis species names (H+, e-) for internal use
442
+ # but create display versions in vals_dict with user-friendly names (pH, pe, Eh)
443
+ vars_list_display = vars_list.copy()
444
+ for i, var in enumerate(vars_list):
445
+ # Handle pH, pe, Eh conversions for output
446
+ if var == 'H+' and 'pH' in args_orig:
447
+ vars_list_display[i] = 'pH'
448
+ vals_dict['pH'] = [-val for val in args['vals'][i]]
449
+ elif var == 'e-' and 'pe' in args_orig:
450
+ vars_list_display[i] = 'pe'
451
+ vals_dict['pe'] = [-val for val in args['vals'][i]]
452
+ elif var == 'e-' and 'Eh' in args_orig:
453
+ vars_list_display[i] = 'Eh'
454
+ # Convert from log(a_e-) back to Eh using temperature-dependent formula
455
+ # log(a_e-) = -pe, so pe = -log(a_e-)
456
+ # Eh = pe * (ln(10) * R * T) / F = -log(a_e-) * T / 5039.76
457
+ T_kelvin = args['T'] if isinstance(args['T'], (int, float)) else args['T'][0] if hasattr(args['T'], '__len__') else 298.15
458
+ conversion_factor = T_kelvin / 5039.76 # volts per pe unit
459
+ vals_dict['Eh'] = [-val * conversion_factor for val in args['vals'][i]]
460
+ else:
461
+ vals_dict[var] = args['vals'][i]
462
+
463
+ # Keep vars_list as-is (with basis species names) for internal calculations
464
+ # vars_list_display will be used for output only
465
+
466
+ # Check if T or P are variables
467
+ if 'T' in vars_list:
468
+ T_out = [] # Variable T
469
+ # Convert back to Celsius for output
470
+ T_vals = vals_dict['T']
471
+ vals_dict['T'] = [T - 273.15 for T in T_vals]
472
+ else:
473
+ # Convert to Kelvin for output (matching R)
474
+ T_out = args['T']
475
+
476
+ if 'P' in vars_list:
477
+ P_out = [] # Variable P
478
+ else:
479
+ P_out = args['P']
480
+
481
+ # Build output dictionary matching R CHNOSZ structure
482
+ result = {
483
+ 'fun': 'affinity',
484
+ 'args': {
485
+ **args_orig,
486
+ 'property': property_name,
487
+ 'exceed_Ttr': kwargs.get('exceed_Ttr', False),
488
+ 'exceed_rhomin': kwargs.get('exceed_rhomin', False),
489
+ 'return_buffer': kwargs.get('return_buffer', False),
490
+ 'balance': kwargs.get('balance', 'PBB')
491
+ },
492
+ 'sout': energy_sout,
493
+ 'property': property_name,
494
+ 'basis': basis_df,
495
+ 'species': species_df,
496
+ 'T': T_out,
497
+ 'P': P_out,
498
+ 'vars': vars_list_display, # Use display version with 'Eh', 'pH', 'pe' for output
499
+ 'vals': vals_dict,
500
+ 'values': affinity_values
501
+ }
502
+
503
+ return result
504
+
505
+
506
+ def energy_args(args: Dict[str, Any], messages: bool = True, basis_df: Optional[pd.DataFrame] = None) -> Dict[str, Any]:
507
+ """
508
+ Process arguments for energy calculations.
509
+
510
+ Converts variable arguments into consistent format for multi-dimensional
511
+ calculations, handling T, P, IS and basis species variables.
512
+
513
+ Parameters
514
+ ----------
515
+ args : dict
516
+ Raw arguments from affinity() call
517
+
518
+ Returns
519
+ -------
520
+ dict
521
+ Processed arguments with consistent variable structure
522
+ """
523
+
524
+ thermo_obj = thermo()
525
+ if basis_df is None:
526
+ basis_df = get_basis()
527
+
528
+ # Default values
529
+ T = 298.15
530
+ P = "Psat"
531
+ IS = 0
532
+ T_is_var = P_is_var = IS_is_var = False
533
+
534
+ # Process T, P, IS arguments
535
+ if 'T' in args:
536
+ T = args['T']
537
+ if hasattr(T, '__len__') and len(T) > 1:
538
+ T_is_var = True
539
+ # Convert to Kelvin if needed (assuming Celsius input)
540
+ if T_is_var:
541
+ if isinstance(T, (list, tuple)):
542
+ # Handle [T1, T2, npoints] format or [T1, T2] (default to 256 points)
543
+ if len(T) == 3:
544
+ T = np.linspace(T[0] + 273.15, T[1] + 273.15, int(T[2]))
545
+ elif len(T) == 2:
546
+ # Default resolution: 256 points (R CHNOSZ standard)
547
+ T = np.linspace(T[0] + 273.15, T[1] + 273.15, 256)
548
+ else:
549
+ T = np.array(T) + 273.15
550
+ else:
551
+ T = T + 273.15
552
+ else:
553
+ T = T + 273.15
554
+
555
+ if 'P' in args:
556
+ P = args['P']
557
+ if hasattr(P, '__len__') and len(P) > 1:
558
+ P_is_var = True
559
+ if P_is_var and P != "Psat":
560
+ if isinstance(P, (list, tuple)):
561
+ if len(P) == 3:
562
+ P = np.linspace(P[0], P[1], int(P[2]))
563
+ elif len(P) == 2:
564
+ # Default resolution: 256 points (R CHNOSZ standard)
565
+ P = np.linspace(P[0], P[1], 256)
566
+
567
+ if 'IS' in args:
568
+ IS = args['IS']
569
+ if hasattr(IS, '__len__') and len(IS) > 1:
570
+ IS_is_var = True
571
+ if isinstance(IS, (list, tuple)):
572
+ if len(IS) == 3:
573
+ IS = np.linspace(IS[0], IS[1], int(IS[2]))
574
+ elif len(IS) == 2:
575
+ # Default resolution: 256 points (R CHNOSZ standard)
576
+ IS = np.linspace(IS[0], IS[1], 256)
577
+
578
+ # Print status messages
579
+ if messages:
580
+ if not T_is_var:
581
+ T_celsius = T - 273.15 if isinstance(T, (int, float)) else T[0] - 273.15
582
+ print(f'affinity: temperature is {T_celsius:.0f} ºC')
583
+
584
+ if not P_is_var:
585
+ if P == "Psat":
586
+ print("affinity: pressure is Psat")
587
+ else:
588
+ print(f'affinity: pressure is {P} bar')
589
+
590
+ if not IS_is_var and IS != 0:
591
+ print(f'affinity: ionic strength is {IS}')
592
+
593
+ # Default property
594
+ what = 'A'
595
+ if 'what' in args:
596
+ what = args['what']
597
+
598
+ # Process variable arguments
599
+ # Preserve the order in which variables were specified (R CHNOSZ compatibility)
600
+ vars_list = []
601
+ vals_list = []
602
+ lims_list = []
603
+
604
+ # Track which T/P/IS are variables and process them in the order they appear in args
605
+ tps_vars = {'T': (T_is_var, T), 'P': (P_is_var, P), 'IS': (IS_is_var, IS)}
606
+
607
+ # Add T, P, IS in the order they appear in args (preserves user's specification order)
608
+ for arg_name in args.keys():
609
+ if arg_name in ['T', 'P', 'IS'] and tps_vars[arg_name][0]:
610
+ var_name = arg_name
611
+ var_value = tps_vars[arg_name][1]
612
+
613
+ vars_list.append(var_name)
614
+ vals_list.append(var_value)
615
+
616
+ if isinstance(args[arg_name], (list, tuple)):
617
+ if len(args[arg_name]) == 3:
618
+ # User specified [min, max, npoints]
619
+ if arg_name == 'T':
620
+ lims_list.append([args[arg_name][0] + 273.15, args[arg_name][1] + 273.15, args[arg_name][2]])
621
+ else:
622
+ lims_list.append([args[arg_name][0], args[arg_name][1], args[arg_name][2]])
623
+ elif len(args[arg_name]) == 2:
624
+ # User specified [min, max], default to 256 points
625
+ if arg_name == 'T':
626
+ lims_list.append([args[arg_name][0] + 273.15, args[arg_name][1] + 273.15, 256])
627
+ else:
628
+ lims_list.append([args[arg_name][0], args[arg_name][1], 256])
629
+ else:
630
+ # User provided explicit array of values
631
+ lims_list.append([var_value.min(), var_value.max(), len(var_value)])
632
+ else:
633
+ lims_list.append([var_value.min(), var_value.max(), len(var_value)])
634
+
635
+ # Process basis species variables
636
+ basis_names = basis_df.index.tolist()
637
+
638
+ for arg_name, arg_value in args.items():
639
+ # Skip T, P, IS, and non-basis arguments
640
+ if arg_name in ['T', 'P', 'IS', 'what', 'property', 'exceed_Ttr', 'exceed_rhomin', 'return_buffer', 'balance']:
641
+ continue
642
+
643
+ # Handle pH -> H+, pe -> e-, Eh -> e-
644
+ var_name = arg_name
645
+ var_values = arg_value
646
+
647
+ if arg_name == 'pH':
648
+ var_name = 'H+'
649
+ if hasattr(var_values, '__len__'):
650
+ if len(var_values) >= 3:
651
+ # [pH1, pH2, npoints] -> [-pH1, -pH2, npoints] for H+ (logact)
652
+ # pH and log(a_H+) are related by: pH = -log(a_H+), so log(a_H+) = -pH
653
+ var_values = np.linspace(-var_values[0], -var_values[1], int(var_values[2]))
654
+ elif len(var_values) >= 2:
655
+ var_values = [-v for v in var_values]
656
+ else:
657
+ # Single value in a list [pH]
658
+ var_values = np.array([-var_values[0]])
659
+ else:
660
+ # Scalar value
661
+ var_values = np.array([-var_values])
662
+ elif arg_name == 'pe':
663
+ var_name = 'e-'
664
+ if hasattr(var_values, '__len__'):
665
+ if len(var_values) >= 3:
666
+ # pe = -log(a_e-), so log(a_e-) = -pe
667
+ # For pe range [pe1, pe2], log(a_e-) range is [-pe1, -pe2]
668
+ var_values = np.linspace(-var_values[0], -var_values[1], int(var_values[2]))
669
+ elif len(var_values) >= 2:
670
+ var_values = [-v for v in var_values]
671
+ else:
672
+ # Single value in a list [pe]
673
+ var_values = np.array([-var_values[0]])
674
+ else:
675
+ # Scalar value
676
+ var_values = np.array([-var_values])
677
+ elif arg_name == 'Eh':
678
+ var_name = 'e-'
679
+ # Convert Eh (volts) to log(a_e-) using temperature-dependent formula
680
+ # pe = Eh * F / (ln(10) * R * T) where pe = -log(a_e-)
681
+ # Therefore: log(a_e-) = -pe = -Eh * F / (ln(10) * R * T)
682
+ # where R = 0.00831470 kJ/(mol·K), F = 96.4935 kJ/(V·mol), T in Kelvin
683
+ # This gives: log(a_e-) = -Eh * 96.4935 / (2.303 * 0.00831470 * T)
684
+ # = -Eh * 96.4935 / (0.019145 * T)
685
+ # = -Eh * 5039.76 / T
686
+
687
+ # Get temperature for conversion (default to 25°C if not specified)
688
+ T_kelvin = T if isinstance(T, (int, float)) else T[0] if hasattr(T, '__len__') else 298.15
689
+ conversion_factor = 5039.76 / T_kelvin # pe per volt (need to negate for log(a_e-))
690
+
691
+ if hasattr(var_values, '__len__') and len(var_values) >= 2:
692
+ if len(var_values) == 3:
693
+ # [Eh1, Eh2, npoints] format
694
+ # Convert to log(a_e-) = -pe = -Eh * conversion_factor
695
+ logact_start = -var_values[0] * conversion_factor
696
+ logact_end = -var_values[1] * conversion_factor
697
+ var_values = np.linspace(logact_start, logact_end, int(var_values[2]))
698
+ elif len(var_values) == 2:
699
+ # [Eh1, Eh2] format - default to 256 points like R
700
+ logact_start = -var_values[0] * conversion_factor
701
+ logact_end = -var_values[1] * conversion_factor
702
+ var_values = np.linspace(logact_start, logact_end, 256)
703
+ else:
704
+ # List of explicit Eh values
705
+ var_values = [-v * conversion_factor for v in var_values]
706
+ else:
707
+ # Single value
708
+ var_values = -var_values * conversion_factor
709
+
710
+ # Check if this is a basis species
711
+ if var_name in basis_names:
712
+ vars_list.append(var_name)
713
+
714
+ # Process values
715
+ if isinstance(var_values, (list, tuple)):
716
+ if len(var_values) == 3:
717
+ # [min, max, npoints] format
718
+ vals_array = np.linspace(var_values[0], var_values[1], int(var_values[2]))
719
+ vals_list.append(vals_array)
720
+ lims_list.append(var_values)
721
+
722
+ # Print variable info
723
+ if messages:
724
+ n_vals = int(var_values[2])
725
+ print(f'affinity: variable {len(vars_list)} is log10(a_{var_name}) at {n_vals} values from {var_values[0]} to {var_values[1]}')
726
+
727
+ elif len(var_values) == 2:
728
+ # [min, max] format - default to 256 points (R CHNOSZ behavior)
729
+ vals_array = np.linspace(var_values[0], var_values[1], 256)
730
+ vals_list.append(vals_array)
731
+ lims_list.append([var_values[0], var_values[1], 256])
732
+
733
+ # Print variable info
734
+ if messages:
735
+ print(f'affinity: variable {len(vars_list)} is log10(a_{var_name}) at 256 values from {var_values[0]} to {var_values[1]}')
736
+
737
+ else:
738
+ # Explicit array of values
739
+ vals_list.append(np.array(var_values))
740
+ lims_list.append([min(var_values), max(var_values), len(var_values)])
741
+ else:
742
+ # Single value
743
+ if not hasattr(var_values, '__len__'):
744
+ var_values = [var_values]
745
+ vals_list.append(np.array(var_values))
746
+ lims_list.append([var_values[0], var_values[-1], len(var_values)])
747
+ else:
748
+ # Not a recognized basis species or variable
749
+ raise AffinityError(f"{arg_name} is not one of T, P, or IS, and does not match any basis species")
750
+
751
+ return {
752
+ 'what': what,
753
+ 'vars': vars_list,
754
+ 'vals': vals_list,
755
+ 'lims': lims_list,
756
+ 'T': T,
757
+ 'P': P,
758
+ 'IS': IS
759
+ }
760
+
761
+
762
+ def energy(what: str, vars: List[str], vals: List, lims: List,
763
+ T: Union[float, np.ndarray] = 298.15,
764
+ P: Union[float, str] = "Psat",
765
+ IS: float = 0,
766
+ sout: Optional[Dict] = None,
767
+ exceed_Ttr: bool = True,
768
+ exceed_rhomin: bool = False,
769
+ basis_df: Optional[pd.DataFrame] = None,
770
+ species_df: Optional[pd.DataFrame] = None,
771
+ messages: bool = True) -> Dict[str, Any]:
772
+ """
773
+ Calculate energy properties over multiple dimensions.
774
+
775
+ This is the core calculation function that handles multi-dimensional
776
+ property calculations for basis and formed species.
777
+
778
+ Parameters
779
+ ----------
780
+ what : str
781
+ Property to calculate ("A", "logK", "G", "H", etc.)
782
+ vars : list of str
783
+ Variable names
784
+ vals : list of arrays
785
+ Variable values
786
+ lims : list of limits
787
+ Variable limits [min, max, npoints]
788
+ T : float or array
789
+ Temperature(s) in Kelvin
790
+ P : float or str
791
+ Pressure(s) in bar or "Psat"
792
+ IS : float
793
+ Ionic strength
794
+ sout : dict, optional
795
+ Pre-calculated subcrt results
796
+ exceed_Ttr : bool
797
+ Allow extrapolation beyond transitions
798
+ exceed_rhomin : bool
799
+ Allow below minimum density
800
+
801
+ Returns
802
+ -------
803
+ dict
804
+ Dictionary with 'sout' (subcrt results) and 'a' (property values)
805
+ """
806
+
807
+ # Get system data
808
+ thermo_obj = thermo()
809
+ if basis_df is None:
810
+ basis_df = get_basis()
811
+ if species_df is None:
812
+ species_df = get_species()
813
+
814
+ n_basis = len(basis_df)
815
+ n_species = len(species_df)
816
+
817
+ # Determine array dimensions
818
+ if len(vars) == 0:
819
+ mydim = [1]
820
+ else:
821
+ mydim = [lim[2] for lim in lims]
822
+
823
+ # Prepare subcrt call
824
+ if what in ['G', 'H', 'S', 'Cp', 'V', 'E', 'kT', 'logK'] or what == 'A':
825
+ # Need to call subcrt for thermodynamic properties
826
+
827
+ # Prepare species list (basis + formed species)
828
+ all_species = basis_df['ispecies'].tolist() + species_df['ispecies'].tolist()
829
+
830
+ # Prepare T, P, IS for subcrt (convert T from Kelvin to Celsius)
831
+ subcrt_T = T - 273.15 if isinstance(T, (int, float)) else T - 273.15
832
+ subcrt_P = P
833
+ subcrt_IS = IS
834
+
835
+ # Handle variable T, P, IS
836
+ if 'T' in vars:
837
+ # T in vals is already in Kelvin, convert to Celsius for subcrt
838
+ T_vals = vals[vars.index('T')]
839
+ subcrt_T = T_vals - 273.15 if isinstance(T_vals, (int, float)) else T_vals - 273.15
840
+ if 'P' in vars:
841
+ subcrt_P = vals[vars.index('P')]
842
+ if 'IS' in vars:
843
+ subcrt_IS = vals[vars.index('IS')]
844
+
845
+ # Call subcrt
846
+ # Skip sout calculation for affinity (what=='A') since the affinity block
847
+ # has its own optimized batch subcrt call
848
+ if sout is None and what != 'A':
849
+ try:
850
+ # Determine grid parameter for subcrt
851
+ grid_param = None
852
+ if len(vars) > 1:
853
+ # Multi-variable case - use appropriate grid
854
+ subcrt_vars = [v for v in vars if v in ['T', 'P', 'IS']]
855
+ if len(subcrt_vars) >= 2:
856
+ grid_param = subcrt_vars[0] # Use first subcrt variable
857
+
858
+ sout_result = subcrt(
859
+ species=all_species,
860
+ T=subcrt_T,
861
+ P=subcrt_P,
862
+ IS=subcrt_IS,
863
+ property='logK',
864
+ grid=grid_param,
865
+ exceed_Ttr=exceed_Ttr,
866
+ exceed_rhomin=exceed_rhomin,
867
+ messages=messages,
868
+ show=False
869
+ )
870
+ sout_data = sout_result.out
871
+
872
+ except Exception as e:
873
+ warnings.warn(f"subcrt calculation failed: {e}")
874
+ # Create dummy sout data
875
+ n_conditions = np.prod(mydim) if len(mydim) > 0 else 1
876
+ sout_data = pd.DataFrame({
877
+ 'T': np.full(n_conditions, T if isinstance(T, (int, float)) else T[0]) - 273.15,
878
+ 'P': np.full(n_conditions, 1.0 if P == "Psat" else (P if isinstance(P, (int, float)) else P[0])),
879
+ 'logK': np.full(n_conditions, np.nan)
880
+ })
881
+ else:
882
+ sout_data = sout
883
+
884
+ # Calculate the requested property
885
+ if what == 'A':
886
+ # Calculate affinities A/2.303RT following R CHNOSZ logic exactly
887
+ affinity_values = {}
888
+
889
+ # Get basis and species information
890
+ basis_names = basis_df.index.tolist()
891
+ n_conditions = np.prod(mydim) if len(mydim) > 0 else 1
892
+
893
+ # Create activity arrays for each basis species using multi-dimensional grid expansion
894
+ # This implements R's expand.grid functionality using numpy.meshgrid
895
+ logact_basis_arrays = {}
896
+
897
+ if len(vars) > 1:
898
+ # Multi-dimensional case: create meshgrid for all variables
899
+ var_arrays = []
900
+ var_names_ordered = []
901
+
902
+ # Collect variable arrays in order
903
+ for var_name in vars:
904
+ if var_name in basis_names:
905
+ var_idx = vars.index(var_name)
906
+ var_arrays.append(np.array(vals[var_idx]))
907
+ var_names_ordered.append(var_name)
908
+
909
+ # Create meshgrid for basis species variables
910
+ if var_arrays:
911
+ # meshgrid creates N-D arrays where each variable varies along its own axis
912
+ # indexing='ij' gives matrix indexing (first index varies down rows)
913
+ meshgrids = np.meshgrid(*var_arrays, indexing='ij')
914
+
915
+ # Map meshgrid results back to basis species
916
+ for i, var_name in enumerate(var_names_ordered):
917
+ logact_basis_arrays[var_name] = meshgrids[i]
918
+
919
+ # Handle all basis species (variables and fixed)
920
+ for j, basis_name in enumerate(basis_names):
921
+ if basis_name in vars and basis_name not in logact_basis_arrays:
922
+ # Single variable case
923
+ var_idx = vars.index(basis_name)
924
+ logact_basis_arrays[basis_name] = np.array(vals[var_idx])
925
+ elif basis_name not in logact_basis_arrays:
926
+ # Fixed activity from basis definition - broadcast to full grid
927
+ basis_logact = basis_df.iloc[j]['logact']
928
+ try:
929
+ logact_val = float(basis_logact)
930
+ except (ValueError, TypeError):
931
+ logact_val = 0.0
932
+
933
+ if len(mydim) > 1:
934
+ # Multi-dimensional: broadcast scalar to full grid shape
935
+ logact_basis_arrays[basis_name] = np.full(mydim, logact_val)
936
+ else:
937
+ # Single dimension
938
+ logact_basis_arrays[basis_name] = np.full(n_conditions, logact_val)
939
+
940
+ # For affinities, we need logK of balanced formation reactions
941
+ # Optimize by calling subcrt once for all basis + non-basis species
942
+ # to get logK of formation from elements, then calculate formation from basis
943
+ formation_logK = {}
944
+
945
+ # Convert T from Kelvin back to Celsius for subcrt (subcrt expects Celsius)
946
+ T_celsius = T - 273.15
947
+
948
+ # Get all unique species (basis + formed species) using ispecies indices
949
+ # to avoid redundant info_character lookups
950
+ basis_ispecies_list = basis_df['ispecies'].tolist()
951
+ species_ispecies_list = species_df['ispecies'].tolist()
952
+ all_species_indices = list(dict.fromkeys(basis_ispecies_list + species_ispecies_list))
953
+
954
+ # Create mapping from names to ispecies indices
955
+ # Note: multiple names (e.g., "Fe" and "iron") can map to the same ispecies
956
+ basis_names_list = basis_names # Already defined at line 548
957
+ species_names_list = species_df['name'].tolist()
958
+
959
+ # Build a name->ispecies mapping
960
+ name_to_ispecies = {}
961
+ for name, ispec in zip(basis_names_list, basis_ispecies_list):
962
+ name_to_ispecies[name] = ispec
963
+ for name, ispec in zip(species_names_list, species_ispecies_list):
964
+ name_to_ispecies[name] = ispec
965
+
966
+ # Build ispecies->result_index mapping for batch result access
967
+ ispecies_to_result_idx = {ispec: idx for idx, ispec in enumerate(all_species_indices)}
968
+
969
+ # All unique names (may have duplicates that refer to same ispecies)
970
+ all_species_names = list(dict.fromkeys(basis_names_list + species_names_list))
971
+
972
+ # Single batch subcrt call to get logK of formation from elements for all species
973
+ # Use ispecies indices to avoid redundant lookups
974
+ try:
975
+ # Determine grid parameter for subcrt when we have multiple T/P variables
976
+ grid_param = None
977
+ if len(vars) >= 2:
978
+ # Check if we have T and/or P as variables
979
+ if 'T' in vars and 'P' in vars:
980
+ # Both T and P vary - use T as grid variable (R CHNOSZ convention)
981
+ grid_param = 'T'
982
+ elif 'T' in vars:
983
+ grid_param = 'T'
984
+ elif 'P' in vars:
985
+ grid_param = 'P'
986
+
987
+ batch_result = subcrt(all_species_indices, property="logK", T=T_celsius, P=P, grid=grid_param, messages=messages, show=False)
988
+
989
+ # Extract logK values from batch result
990
+ # batch_result.out is a dict with 'species_data' list
991
+ # When T/P are variable, each species_data DataFrame has multiple rows
992
+ species_logK_from_elements = {}
993
+ if isinstance(batch_result.out, dict) and 'species_data' in batch_result.out:
994
+ # Map each name to its data using the ispecies->result_idx mapping
995
+ for sp_name in all_species_names:
996
+ ispec = name_to_ispecies[sp_name]
997
+ result_idx = ispecies_to_result_idx[ispec]
998
+ sp_data = batch_result.out['species_data'][result_idx]
999
+
1000
+ if 'logK' in sp_data.columns:
1001
+ # Get all logK values (may be array if T/P variable)
1002
+ logK_vals = sp_data['logK'].values
1003
+ # Handle NaN values by keeping them as nan (they will propagate to affinity)
1004
+ # DO NOT replace nan with 0.0 as this causes incorrect affinity calculations
1005
+ # logK_vals = np.where(np.isnan(logK_vals), 0.0, logK_vals)
1006
+
1007
+ # Reshape if we have a 2-D grid
1008
+ if len(mydim) > 1 and len(logK_vals) == np.prod(mydim):
1009
+ # Reshape flattened array to match grid dimensions
1010
+ # mydim is [nT, nP] or similar, and grid='T' gives row-major order
1011
+ logK_vals = logK_vals.reshape(mydim)
1012
+
1013
+ species_logK_from_elements[sp_name] = logK_vals
1014
+ else:
1015
+ # No logK column - use zeros
1016
+ n_rows = len(sp_data)
1017
+ if len(mydim) > 1 and n_rows == np.prod(mydim):
1018
+ species_logK_from_elements[sp_name] = np.zeros(mydim)
1019
+ else:
1020
+ species_logK_from_elements[sp_name] = np.zeros(n_rows)
1021
+ elif isinstance(batch_result.out, pd.DataFrame):
1022
+ # Single species case - result.out is a DataFrame directly
1023
+ sp_data = batch_result.out
1024
+ sp_name = all_species_names[0]
1025
+ if 'logK' in sp_data.columns:
1026
+ logK_vals = sp_data['logK'].values
1027
+ # Handle NaN values by keeping them as nan (they will propagate to affinity)
1028
+ # DO NOT replace nan with 0.0 as this causes incorrect affinity calculations
1029
+ # logK_vals = np.where(np.isnan(logK_vals), 0.0, logK_vals)
1030
+
1031
+ # Reshape if we have a 2-D grid
1032
+ if len(mydim) > 1 and len(logK_vals) == np.prod(mydim):
1033
+ logK_vals = logK_vals.reshape(mydim)
1034
+
1035
+ species_logK_from_elements[sp_name] = logK_vals
1036
+ else:
1037
+ n_rows = len(sp_data)
1038
+ if len(mydim) > 1 and n_rows == np.prod(mydim):
1039
+ species_logK_from_elements[sp_name] = np.zeros(mydim)
1040
+ else:
1041
+ species_logK_from_elements[sp_name] = np.zeros(n_rows)
1042
+ else:
1043
+ # Fallback if structure is different
1044
+ for sp_name in all_species_names:
1045
+ if len(mydim) > 1:
1046
+ species_logK_from_elements[sp_name] = np.zeros(mydim)
1047
+ else:
1048
+ species_logK_from_elements[sp_name] = np.array([0.0])
1049
+
1050
+ # Now calculate formation logK from basis species for each formed species
1051
+ for i in range(n_species):
1052
+ species_idx = species_df.iloc[i]['ispecies']
1053
+ species_name = species_df.iloc[i]['name']
1054
+
1055
+ # Check if this species is also a basis species
1056
+ is_basis_species = species_idx in basis_df['ispecies'].values
1057
+
1058
+ if is_basis_species:
1059
+ # Species is in the basis - formation from basis is trivial
1060
+ formation_logK[species_idx] = 0.0
1061
+ else:
1062
+ # Calculate formation logK from basis using stoichiometry
1063
+ # The species() coefficients represent: species = basis_products - basis_reactants
1064
+ # For logK from elements: logK_formation = logK_species - sum(coeff_i * logK_basis_i)
1065
+ logK_formation_val = species_logK_from_elements.get(species_name, 0.0)
1066
+
1067
+ # Subtract contribution from basis species
1068
+ for basis_name in basis_names_list:
1069
+ coeff = species_df.iloc[i][basis_name]
1070
+ basis_logK = species_logK_from_elements.get(basis_name, 0.0)
1071
+ logK_formation_val -= coeff * basis_logK
1072
+
1073
+ formation_logK[species_idx] = logK_formation_val
1074
+
1075
+ except Exception as e:
1076
+ warnings.warn(f"Batch subcrt call failed, falling back to individual calls: {e}")
1077
+ # Fallback to old method if batch call fails
1078
+ for i in range(n_species):
1079
+ species_idx = species_df.iloc[i]['ispecies']
1080
+ is_basis_species = species_idx in basis_df['ispecies'].values
1081
+
1082
+ if is_basis_species:
1083
+ formation_logK[species_idx] = 0.0
1084
+ else:
1085
+ try:
1086
+ species_name = species_df.iloc[i]['name']
1087
+ formation_result = subcrt([species_name], [1], T=T_celsius, P=P, messages=messages, show=False)
1088
+
1089
+ # Handle both single DataFrame and dict of DataFrames
1090
+ if hasattr(formation_result, 'out'):
1091
+ if isinstance(formation_result.out, dict) and 'species_data' in formation_result.out:
1092
+ # Multiple conditions (T/P arrays) - result.out is a dict
1093
+ sp_data = formation_result.out['species_data'][0]
1094
+ if 'logK' in sp_data.columns:
1095
+ logK_vals = sp_data['logK'].values
1096
+ # Keep nan values as is
1097
+ # logK_vals = np.where(np.isnan(logK_vals), 0.0, logK_vals)
1098
+ logK_val = logK_vals
1099
+ else:
1100
+ logK_val = np.zeros(len(sp_data))
1101
+ elif isinstance(formation_result.out, pd.DataFrame):
1102
+ # Single condition - result.out is a DataFrame
1103
+ if 'logK' in formation_result.out.columns:
1104
+ logK_val = formation_result.out['logK'].values
1105
+ # Keep nan values as is
1106
+ # logK_val = np.where(np.isnan(logK_val), 0.0, logK_val)
1107
+ else:
1108
+ logK_val = 0.0
1109
+ else:
1110
+ logK_val = 0.0
1111
+ else:
1112
+ logK_val = 0.0
1113
+ formation_logK[species_idx] = logK_val
1114
+ except Exception as e2:
1115
+ warnings.warn(f"Could not get formation logK for species {species_idx}: {e2}")
1116
+ formation_logK[species_idx] = 0.0
1117
+
1118
+ # Calculate affinities for each formed species
1119
+ for i in range(n_species):
1120
+ species_idx = species_df.iloc[i]['ispecies']
1121
+
1122
+ # Get the formation reaction logK (already balanced)
1123
+ logK_formation = formation_logK[species_idx]
1124
+
1125
+ # Get formation reaction stoichiometry from species DataFrame
1126
+ # These are the stoichiometric coefficients from the balanced reaction
1127
+ formation_coeffs = {}
1128
+ for basis_name in basis_names:
1129
+ formation_coeffs[basis_name] = species_df.iloc[i][basis_name]
1130
+
1131
+ # Calculate logQ using R CHNOSZ logic:
1132
+ # logQ = +1 * logact_species + sum(-coeff_i * logact_basis_i)
1133
+ # Species gets +1 coefficient (product), all basis species get negative coefficients (reactants)
1134
+
1135
+ # Species activity (always +1 coefficient on product side)
1136
+ species_logact = species_df.iloc[i]['logact']
1137
+ try:
1138
+ species_logact_val = float(species_logact)
1139
+ except (ValueError, TypeError):
1140
+ species_logact_val = 0.0
1141
+
1142
+ # Start with species contribution: +1 * logact_species
1143
+ # Create array with proper dimensions to match the grid
1144
+ if len(mydim) > 1:
1145
+ logQ_arrays = np.full(mydim, species_logact_val)
1146
+ else:
1147
+ logQ_arrays = np.full(n_conditions, species_logact_val)
1148
+
1149
+ # Add contributions from all basis species: -coeff_i * logact_basis_i
1150
+ for basis_name in formation_coeffs:
1151
+ coeff = formation_coeffs[basis_name]
1152
+ logact_array = logact_basis_arrays[basis_name]
1153
+ # DEBUG
1154
+ if False and species_idx == 763: # ethanol
1155
+ print(f" Basis {basis_name}: coeff={coeff}, logact_array[0]={logact_array[0] if hasattr(logact_array, '__getitem__') else logact_array}")
1156
+ # All basis species contributions are negative (reactant side)
1157
+ logQ_arrays += (-coeff) * logact_array
1158
+
1159
+ # Calculate affinity: A/2.303RT = logK - logQ
1160
+ # Handle shape broadcasting when logK varies along fewer dimensions than logQ
1161
+ # This happens when we have basis variables (e.g., H2S) and subcrt variables (e.g., T)
1162
+ # logK only varies with subcrt variables (T, P, IS) but logQ varies with all variables
1163
+ if isinstance(logK_formation, np.ndarray) and isinstance(logQ_arrays, np.ndarray):
1164
+ if logK_formation.shape != logQ_arrays.shape:
1165
+ # Need to broadcast logK to match logQ dimensions
1166
+ if len(mydim) > 1 and logK_formation.ndim == 1:
1167
+ # logK is 1-D but should be broadcast to 2-D
1168
+ # Determine which dimension logK varies along
1169
+ # Check if logK length matches first dimension of mydim (typically T)
1170
+ if len(logK_formation) == mydim[0]:
1171
+ # logK varies along first dimension, broadcast to second
1172
+ logK_formation = np.broadcast_to(logK_formation[:, np.newaxis], mydim)
1173
+ elif len(logK_formation) == mydim[1]:
1174
+ # logK varies along second dimension, broadcast to first
1175
+ logK_formation = np.broadcast_to(logK_formation[np.newaxis, :], mydim)
1176
+ elif len(logK_formation) == np.prod(mydim):
1177
+ # logK is flattened, reshape it
1178
+ logK_formation = logK_formation.reshape(mydim)
1179
+
1180
+ affinity_array = logK_formation - logQ_arrays
1181
+
1182
+ # DEBUG: Check first value
1183
+ if False: # Set to True for debugging
1184
+ if hasattr(affinity_array, '__getitem__'):
1185
+ print(f"\nDEBUG affinity for species {species_idx}:")
1186
+ print(f" logK_formation[0] = {logK_formation[0] if hasattr(logK_formation, '__getitem__') else logK_formation}")
1187
+ print(f" logQ_arrays[0] = {logQ_arrays[0] if hasattr(logQ_arrays, '__getitem__') else logQ_arrays}")
1188
+ print(f" affinity_array[0] = {affinity_array[0]}")
1189
+
1190
+ # Store result with proper dimensions
1191
+ # Keep array structure if we have multiple variables, even if n_conditions == 1
1192
+ # This ensures diagram() can detect the correct dimensionality (matching R behavior)
1193
+ if n_conditions == 1 and len(mydim) <= 1:
1194
+ # True scalar case: no variables or single variable with 1 point
1195
+ affinity_values[species_idx] = affinity_array.item() if hasattr(affinity_array, 'item') else affinity_array
1196
+ else:
1197
+ # Multi-dimensional case: preserve array structure
1198
+ # Array already has correct shape from meshgrid
1199
+ affinity_values[species_idx] = affinity_array
1200
+
1201
+ return {
1202
+ 'sout': sout_data,
1203
+ 'a': affinity_values
1204
+ }
1205
+
1206
+ elif what == 'logK':
1207
+ # Extract logK values from subcrt results
1208
+ logK_values = {}
1209
+
1210
+ for i in range(n_species):
1211
+ species_idx = species_df.iloc[i]['ispecies']
1212
+
1213
+ if hasattr(sout_data, 'iloc') and len(sout_data) > n_basis + i:
1214
+ logK_val = sout_data.iloc[n_basis + i]['logK'] if 'logK' in sout_data.columns else np.nan
1215
+ else:
1216
+ logK_val = np.nan
1217
+
1218
+ # Expand to proper dimensions
1219
+ if np.prod(mydim) > 1:
1220
+ logK_values[species_idx] = np.full(mydim, logK_val)
1221
+ else:
1222
+ logK_values[species_idx] = logK_val
1223
+
1224
+ return {
1225
+ 'sout': sout_data,
1226
+ 'a': logK_values
1227
+ }
1228
+
1229
+ else:
1230
+ # Other thermodynamic properties
1231
+ prop_values = {}
1232
+
1233
+ for i in range(n_species):
1234
+ species_idx = species_df.iloc[i]['ispecies']
1235
+
1236
+ if hasattr(sout_data, 'iloc') and len(sout_data) > n_basis + i:
1237
+ prop_val = sout_data.iloc[n_basis + i][what] if what in sout_data.columns else np.nan
1238
+ else:
1239
+ prop_val = np.nan
1240
+
1241
+ # Expand to proper dimensions
1242
+ if np.prod(mydim) > 1:
1243
+ prop_values[species_idx] = np.full(mydim, prop_val)
1244
+ else:
1245
+ prop_values[species_idx] = prop_val
1246
+
1247
+ return {
1248
+ 'sout': sout_data,
1249
+ 'a': prop_values
1250
+ }
1251
+
1252
+
1253
+ # Export main functions
1254
+ __all__ = [
1255
+ 'affinity', 'energy_args', 'energy', 'AffinityError'
1256
+ ]