pychnosz 1.1.12__cp310-cp310-macosx_15_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. pychnosz/.dylibs/libgcc_s.1.1.dylib +0 -0
  2. pychnosz/.dylibs/libgfortran.5.dylib +0 -0
  3. pychnosz/.dylibs/libquadmath.0.dylib +0 -0
  4. pychnosz/__init__.py +129 -0
  5. pychnosz/_version.py +34 -0
  6. pychnosz/biomolecules/__init__.py +29 -0
  7. pychnosz/biomolecules/ionize_aa.py +197 -0
  8. pychnosz/biomolecules/proteins.py +595 -0
  9. pychnosz/core/__init__.py +46 -0
  10. pychnosz/core/affinity.py +1256 -0
  11. pychnosz/core/animation.py +593 -0
  12. pychnosz/core/balance.py +334 -0
  13. pychnosz/core/basis.py +716 -0
  14. pychnosz/core/diagram.py +3336 -0
  15. pychnosz/core/equilibrate.py +813 -0
  16. pychnosz/core/equilibrium.py +554 -0
  17. pychnosz/core/info.py +821 -0
  18. pychnosz/core/retrieve.py +364 -0
  19. pychnosz/core/speciation.py +580 -0
  20. pychnosz/core/species.py +599 -0
  21. pychnosz/core/subcrt.py +1696 -0
  22. pychnosz/core/thermo.py +593 -0
  23. pychnosz/core/unicurve.py +1226 -0
  24. pychnosz/data/__init__.py +11 -0
  25. pychnosz/data/add_obigt.py +327 -0
  26. pychnosz/data/extdata/Berman/BDat17_2017.csv +2 -0
  27. pychnosz/data/extdata/Berman/Ber88_1988.csv +68 -0
  28. pychnosz/data/extdata/Berman/Ber90_1990.csv +5 -0
  29. pychnosz/data/extdata/Berman/DS10_2010.csv +6 -0
  30. pychnosz/data/extdata/Berman/FDM+14_2014.csv +2 -0
  31. pychnosz/data/extdata/Berman/Got04_2004.csv +5 -0
  32. pychnosz/data/extdata/Berman/JUN92_1992.csv +3 -0
  33. pychnosz/data/extdata/Berman/SHD91_1991.csv +12 -0
  34. pychnosz/data/extdata/Berman/VGT92_1992.csv +2 -0
  35. pychnosz/data/extdata/Berman/VPT01_2001.csv +3 -0
  36. pychnosz/data/extdata/Berman/VPV05_2005.csv +2 -0
  37. pychnosz/data/extdata/Berman/ZS92_1992.csv +11 -0
  38. pychnosz/data/extdata/Berman/sympy.R +99 -0
  39. pychnosz/data/extdata/Berman/testing/BA96.bib +12 -0
  40. pychnosz/data/extdata/Berman/testing/BA96_Berman.csv +21 -0
  41. pychnosz/data/extdata/Berman/testing/BA96_OBIGT.csv +21 -0
  42. pychnosz/data/extdata/Berman/testing/BA96_refs.csv +6 -0
  43. pychnosz/data/extdata/OBIGT/AD.csv +25 -0
  44. pychnosz/data/extdata/OBIGT/Berman_cr.csv +93 -0
  45. pychnosz/data/extdata/OBIGT/DEW.csv +211 -0
  46. pychnosz/data/extdata/OBIGT/H2O_aq.csv +4 -0
  47. pychnosz/data/extdata/OBIGT/SLOP98.csv +411 -0
  48. pychnosz/data/extdata/OBIGT/SUPCRT92.csv +178 -0
  49. pychnosz/data/extdata/OBIGT/inorganic_aq.csv +729 -0
  50. pychnosz/data/extdata/OBIGT/inorganic_cr.csv +273 -0
  51. pychnosz/data/extdata/OBIGT/inorganic_gas.csv +20 -0
  52. pychnosz/data/extdata/OBIGT/organic_aq.csv +1104 -0
  53. pychnosz/data/extdata/OBIGT/organic_cr.csv +481 -0
  54. pychnosz/data/extdata/OBIGT/organic_gas.csv +268 -0
  55. pychnosz/data/extdata/OBIGT/organic_liq.csv +533 -0
  56. pychnosz/data/extdata/OBIGT/testing/GEMSFIT.csv +43 -0
  57. pychnosz/data/extdata/OBIGT/testing/IGEM.csv +17 -0
  58. pychnosz/data/extdata/OBIGT/testing/Sandia.csv +8 -0
  59. pychnosz/data/extdata/OBIGT/testing/SiO2.csv +4 -0
  60. pychnosz/data/extdata/misc/AD03_Fig1a.csv +69 -0
  61. pychnosz/data/extdata/misc/AD03_Fig1b.csv +43 -0
  62. pychnosz/data/extdata/misc/AD03_Fig1c.csv +89 -0
  63. pychnosz/data/extdata/misc/AD03_Fig1d.csv +30 -0
  64. pychnosz/data/extdata/misc/BZA10.csv +5 -0
  65. pychnosz/data/extdata/misc/HW97_Cp.csv +90 -0
  66. pychnosz/data/extdata/misc/HWM96_V.csv +229 -0
  67. pychnosz/data/extdata/misc/LA19_test.csv +7 -0
  68. pychnosz/data/extdata/misc/Mer75_Table4.csv +42 -0
  69. pychnosz/data/extdata/misc/OBIGT_check.csv +423 -0
  70. pychnosz/data/extdata/misc/PM90.csv +7 -0
  71. pychnosz/data/extdata/misc/RH95.csv +23 -0
  72. pychnosz/data/extdata/misc/RH98_Table15.csv +17 -0
  73. pychnosz/data/extdata/misc/SC10_Rainbow.csv +19 -0
  74. pychnosz/data/extdata/misc/SK95.csv +55 -0
  75. pychnosz/data/extdata/misc/SOJSH.csv +61 -0
  76. pychnosz/data/extdata/misc/SS98_Fig5a.csv +81 -0
  77. pychnosz/data/extdata/misc/SS98_Fig5b.csv +84 -0
  78. pychnosz/data/extdata/misc/TKSS14_Fig2.csv +25 -0
  79. pychnosz/data/extdata/misc/bluered.txt +1000 -0
  80. pychnosz/data/extdata/protein/Cas/Cas_aa.csv +177 -0
  81. pychnosz/data/extdata/protein/Cas/Cas_uniprot.csv +186 -0
  82. pychnosz/data/extdata/protein/Cas/download.R +34 -0
  83. pychnosz/data/extdata/protein/Cas/mkaa.R +34 -0
  84. pychnosz/data/extdata/protein/POLG.csv +12 -0
  85. pychnosz/data/extdata/protein/TBD+05.csv +393 -0
  86. pychnosz/data/extdata/protein/TBD+05_aa.csv +393 -0
  87. pychnosz/data/extdata/protein/rubisco.csv +28 -0
  88. pychnosz/data/extdata/protein/rubisco.fasta +239 -0
  89. pychnosz/data/extdata/protein/rubisco_aa.csv +28 -0
  90. pychnosz/data/extdata/src/H2O92D.f.orig +3457 -0
  91. pychnosz/data/extdata/src/README.txt +5 -0
  92. pychnosz/data/extdata/taxonomy/names.dmp +215 -0
  93. pychnosz/data/extdata/taxonomy/nodes.dmp +63 -0
  94. pychnosz/data/extdata/thermo/Bdot_acirc.csv +60 -0
  95. pychnosz/data/extdata/thermo/buffer.csv +40 -0
  96. pychnosz/data/extdata/thermo/element.csv +135 -0
  97. pychnosz/data/extdata/thermo/groups.csv +6 -0
  98. pychnosz/data/extdata/thermo/opt.csv +2 -0
  99. pychnosz/data/extdata/thermo/protein.csv +506 -0
  100. pychnosz/data/extdata/thermo/refs.csv +343 -0
  101. pychnosz/data/extdata/thermo/stoich.csv.xz +0 -0
  102. pychnosz/data/loader.py +431 -0
  103. pychnosz/data/mod_obigt.py +322 -0
  104. pychnosz/data/obigt.py +471 -0
  105. pychnosz/data/worm.py +228 -0
  106. pychnosz/fortran/.gitignore +6 -0
  107. pychnosz/fortran/__init__.py +16 -0
  108. pychnosz/fortran/h2o92.dylib +0 -0
  109. pychnosz/fortran/h2o92_interface.py +527 -0
  110. pychnosz/geochemistry/__init__.py +21 -0
  111. pychnosz/geochemistry/minerals.py +514 -0
  112. pychnosz/geochemistry/redox.py +500 -0
  113. pychnosz/models/__init__.py +47 -0
  114. pychnosz/models/archer_wang.py +165 -0
  115. pychnosz/models/berman.py +309 -0
  116. pychnosz/models/cgl.py +381 -0
  117. pychnosz/models/dew.py +997 -0
  118. pychnosz/models/hkf.py +523 -0
  119. pychnosz/models/hkf_helpers.py +231 -0
  120. pychnosz/models/iapws95.py +1113 -0
  121. pychnosz/models/supcrt92_fortran.py +238 -0
  122. pychnosz/models/water.py +480 -0
  123. pychnosz/utils/__init__.py +27 -0
  124. pychnosz/utils/expression.py +1074 -0
  125. pychnosz/utils/formula.py +830 -0
  126. pychnosz/utils/formula_ox.py +227 -0
  127. pychnosz/utils/reset.py +33 -0
  128. pychnosz/utils/units.py +259 -0
  129. pychnosz-1.1.12.dist-info/METADATA +197 -0
  130. pychnosz-1.1.12.dist-info/RECORD +133 -0
  131. pychnosz-1.1.12.dist-info/WHEEL +5 -0
  132. pychnosz-1.1.12.dist-info/licenses/LICENSE.txt +19 -0
  133. pychnosz-1.1.12.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1696 @@
1
+ """
2
+ CHNOSZ subcrt() function - Calculate standard molal thermodynamic properties.
3
+
4
+ This module implements the core subcrt() function that calculates standard molal
5
+ thermodynamic properties of species and reactions, maintaining complete fidelity
6
+ to the R CHNOSZ implementation.
7
+
8
+ References:
9
+ - R CHNOSZ package subcrt.R
10
+ - Shock, E. L., Oelkers, E. H., Johnson, J. W., Sverjensky, D. A., & Helgeson, H. C. (1992).
11
+ Calculation of the thermodynamic properties of aqueous species at high pressures and temperatures.
12
+ """
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ from typing import Union, List, Optional, Dict, Any, Tuple
17
+ import warnings
18
+
19
+ from ..core.thermo import thermo
20
+ from ..core.info import info
21
+ from ..models.water import water
22
+ from ..utils.formula import makeup
23
+
24
+
25
+ class SubcrtResult:
26
+ """Result structure for subcrt() calculations, matching R CHNOSZ output."""
27
+
28
+ def __init__(self):
29
+ self.species = None # Species information DataFrame
30
+ self.out = None # Calculated properties DataFrame
31
+ self.reaction = None # Reaction summary DataFrame
32
+ self.warnings = [] # Warning messages
33
+
34
+ def __repr__(self):
35
+ if self.out is not None:
36
+ return f"SubcrtResult with {len(self.out)} properties calculated"
37
+ return "SubcrtResult (no calculations performed)"
38
+
39
+
40
+ def subcrt(species: Union[str, List[str], int, List[int]],
41
+ coeff: Union[int, float, List[Union[int, float]], None] = 1,
42
+ state: Optional[Union[str, List[str]]] = None,
43
+ property: List[str] = ["logK", "G", "H", "S", "V", "Cp"],
44
+ T: Union[float, List[float], np.ndarray] = np.concatenate([[273.16], 273.15 + np.arange(25, 351, 25)]),
45
+ P: Union[float, List[float], np.ndarray, str] = "Psat",
46
+ grid: Optional[str] = None,
47
+ convert: bool = True,
48
+ exceed_Ttr: bool = True,
49
+ exceed_rhomin: bool = False,
50
+ logact: Optional[List[float]] = None,
51
+ autobalance: bool = True,
52
+ use_polymorphs: bool = True,
53
+ IS: Union[float, List[float]] = 0,
54
+ messages: bool = True,
55
+ show: bool = True,
56
+ basis: Optional[pd.DataFrame] = None,
57
+ _recursion_count: int = 0) -> SubcrtResult:
58
+ """
59
+ Calculate standard molal thermodynamic properties of species and reactions.
60
+
61
+ This function reproduces the behavior of R CHNOSZ subcrt() exactly, including
62
+ all argument handling, validation, calculations, and output formatting.
63
+
64
+ Parameters
65
+ ----------
66
+ species : str, list of str, int, or list of int
67
+ Species names, formulas, or indices in thermodynamic database
68
+ coeff : int, float, list, or None
69
+ Stoichiometric coefficients for reaction calculation
70
+ If 1 (default), calculate individual species properties
71
+ If list, calculate reaction with given coefficients
72
+ state : str, list of str, or None
73
+ Physical states ("aq", "cr", "gas", "liq") for species
74
+ property : list of str
75
+ Properties to calculate: "logK", "G", "H", "S", "V", "Cp", "rho", "kT", "E"
76
+ T : float, list, or ndarray
77
+ Temperature(s) in K (default: 273.16, then 298.15 to 623.15 by 25 K)
78
+ P : float, list, ndarray, or "Psat"
79
+ Pressure(s) in bar or "Psat" for saturation pressure
80
+ grid : str or None
81
+ Grid calculation mode: "T", "P", "IS", or None
82
+ convert : bool
83
+ Convert temperature/pressure units (default: True)
84
+ exceed_Ttr : bool
85
+ Allow calculations beyond transition temperatures (default: False)
86
+ exceed_rhomin : bool
87
+ Allow calculations below minimum water density (default: False)
88
+ logact : list of float or None
89
+ Activity coefficients (log10 scale)
90
+ autobalance : bool
91
+ Automatically balance reactions using basis species (default: True)
92
+ use_polymorphs : bool
93
+ Include polymorphic phases for minerals (default: True)
94
+ IS : float or list of float
95
+ Ionic strength for activity corrections (default: 0)
96
+ messages : bool, default True
97
+ Whether to print informational messages
98
+ show : bool, default True
99
+ Whether to display result tables in Jupyter notebooks (default: True)
100
+ Set to False when calling subcrt() from other functions
101
+ basis : pd.DataFrame, optional
102
+ Basis species definition to use for autobalancing (if not using global basis)
103
+
104
+ Returns
105
+ -------
106
+ SubcrtResult
107
+ Object containing:
108
+ - species: DataFrame with species information
109
+ - out: DataFrame with calculated thermodynamic properties
110
+ - reaction: DataFrame with reaction stoichiometry (if reaction)
111
+ - warnings: List of warning messages
112
+
113
+ Examples
114
+ --------
115
+ >>> import pychnosz
116
+ >>> pychnosz.reset()
117
+ >>>
118
+ >>> # Single species properties
119
+ >>> result = subcrt("H2O", T=25, P=1)
120
+ >>> print(result.out[["G", "H", "S", "Cp"]])
121
+ >>>
122
+ >>> # Reaction calculation
123
+ >>> result = subcrt(["H2O", "H+", "OH-"], [-1, 1, 1], T=25, P=1)
124
+ >>> print(f"Water dissociation ΔG° = {result.out.G[0]:.3f} kJ/mol")
125
+ >>>
126
+ >>> # Temperature array
127
+ >>> result = subcrt("quartz", T=[25, 100, 200], P=1)
128
+ >>> print(result.out[["T", "G", "H", "S"]])
129
+
130
+ Notes
131
+ -----
132
+ This implementation maintains complete fidelity to R CHNOSZ subcrt():
133
+ - Identical argument processing and validation
134
+ - Same species lookup and polymorphic handling
135
+ - Exact HKF and CGL equation-of-state calculations
136
+ - Same reaction balancing and autobalance logic
137
+ - Identical output structure and formatting
138
+ - Same warning and error messages
139
+ """
140
+
141
+ result = SubcrtResult()
142
+
143
+ # Prevent infinite recursion in auto-balancing
144
+ if _recursion_count > 5:
145
+ result.warnings.append("Maximum recursion depth reached in auto-balancing")
146
+ return result
147
+
148
+ try:
149
+ # === Phase 1: Argument Processing and Validation ===
150
+ # (Exactly matching R subcrt.R lines 21-77)
151
+
152
+ # Handle argument reordering if states are second argument
153
+ if coeff != 1 and isinstance(coeff, (list, str)) and isinstance(coeff[0] if isinstance(coeff, list) else coeff, str):
154
+ # States were passed as second argument - reorder
155
+ if state is not None:
156
+ if isinstance(state, (int, float)) or (isinstance(state, list) and all(isinstance(x, (int, float)) for x in state)):
157
+ # Third argument is coefficients
158
+ new_coeff = state
159
+ new_state = coeff
160
+ return subcrt(species, new_coeff, new_state, property, T, P, grid,
161
+ convert, exceed_Ttr, exceed_rhomin, logact, autobalance, use_polymorphs, IS,
162
+ messages, show, basis, _recursion_count)
163
+ else:
164
+ raise ValueError("If both coeff and state are given, one should be numeric coefficients")
165
+ else:
166
+ # Only states provided, no coefficients
167
+ new_state = coeff
168
+ return subcrt(species, 1, new_state, property, T, P, grid,
169
+ convert, exceed_Ttr, exceed_rhomin, logact, autobalance, use_polymorphs, IS,
170
+ messages, show, basis, _recursion_count)
171
+
172
+ # Determine if this is a reaction calculation
173
+ do_reaction = (coeff != 1 and coeff is not None and
174
+ (isinstance(coeff, list) or isinstance(coeff, (int, float)) and coeff != 1))
175
+
176
+ # Convert inputs to consistent formats
177
+ species = [species] if isinstance(species, (str, int)) else list(species)
178
+ if state is not None:
179
+ state = [state] if isinstance(state, str) else list(state)
180
+ # Make species and state same length
181
+ if len(state) > len(species):
182
+ species = species * (len(state) // len(species) + 1)
183
+ species = species[:len(state)]
184
+ elif len(species) > len(state):
185
+ state = state * (len(species) // len(state) + 1)
186
+ state = state[:len(species)]
187
+
188
+ if do_reaction:
189
+ if isinstance(coeff, (int, float)):
190
+ coeff = [coeff]
191
+ coeff = list(coeff)
192
+
193
+ # Validate properties
194
+ allowed_properties = ["rho", "logK", "G", "H", "S", "Cp", "V", "kT", "E"]
195
+ if isinstance(property, str):
196
+ property = [property]
197
+
198
+ invalid_props = [p for p in property if p not in allowed_properties]
199
+ if invalid_props:
200
+ if len(invalid_props) == 1:
201
+ raise ValueError(f"invalid property name: {invalid_props[0]}")
202
+ else:
203
+ raise ValueError(f"invalid property names: {', '.join(invalid_props)}")
204
+
205
+ # Length checking
206
+ if do_reaction and len(species) != len(coeff):
207
+ raise ValueError("the length of 'coeff' must equal the number of species")
208
+
209
+ if logact is not None and len(logact) != len(species):
210
+ raise ValueError("the length of 'logact' must equal the number of species")
211
+
212
+ # Unit conversion
213
+ T_array = np.atleast_1d(np.asarray(T, dtype=float))
214
+ # Convert temperature to Kelvin if convert=True (matching R CHNOSZ behavior)
215
+ # R: if(convert) T <- envert(T, "K") - converts Celsius input to Kelvin
216
+ # Default parameter is [273.16, 298.15, 323.15, ..., 623.15] which is already in K, so only convert user input
217
+ default_T = np.concatenate([[273.16], 273.15 + np.arange(25, 351, 25)])
218
+ if convert and not np.array_equal(T_array, default_T[:len(T_array)]):
219
+ # User provided temperature, assume Celsius and convert to Kelvin
220
+ T_array = T_array + 273.15
221
+
222
+ # Handle T=273.15K (0°C) exactly - R CHNOSZ uses 273.16K (0.01°C) instead
223
+ # This avoids numerical issues at the freezing point
224
+ T_array = np.where(np.abs(T_array - 273.15) < 1e-10, 273.16, T_array)
225
+
226
+ if isinstance(P, str) and P == "Psat":
227
+ P_array = "Psat"
228
+ else:
229
+ P_array = np.atleast_1d(np.asarray(P, dtype=float))
230
+ # P is assumed to be in bar (R CHNOSZ standard)
231
+
232
+ # Warning for high temperatures with Psat
233
+ # Check if P is "Psat" (compare to the original P, not P_array which may be converted)
234
+ if isinstance(P, str) and P == "Psat" and np.any(T_array > 647.067):
235
+ n_over = np.sum(T_array > 647.067)
236
+ vtext = "value" if n_over == 1 else "values"
237
+ result.warnings.append(f"P = 'Psat' undefined for T > Tcritical ({n_over} T {vtext})")
238
+
239
+ # === Phase 2: Grid Processing ===
240
+ # Handle grid calculations (T-P arrays)
241
+ if grid is not None:
242
+ if grid == "T":
243
+ # Grid over temperature
244
+ new_T = []
245
+ for temp in T_array:
246
+ if isinstance(P_array, str):
247
+ new_T.extend([temp] * 1)
248
+ else:
249
+ new_T.extend([temp] * len(P_array))
250
+ if isinstance(P_array, str):
251
+ new_P = P_array
252
+ else:
253
+ new_P = list(P_array) * len(T_array)
254
+ T_array = np.array(new_T)
255
+ P_array = new_P
256
+ elif grid == "P":
257
+ # Grid over pressure
258
+ if not isinstance(P_array, str):
259
+ new_P = []
260
+ for press in P_array:
261
+ new_P.extend([press] * len(T_array))
262
+ new_T = list(T_array) * len(P_array)
263
+ T_array = np.array(new_T)
264
+ P_array = np.array(new_P)
265
+ elif grid == "IS":
266
+ # Grid over ionic strength
267
+ IS_array = np.atleast_1d(np.asarray(IS))
268
+ original_len = max(len(T_array), len(P_array) if not isinstance(P_array, str) else 1)
269
+ new_IS = []
270
+ for ionic_str in IS_array:
271
+ new_IS.extend([ionic_str] * original_len)
272
+ T_array = np.tile(T_array, len(IS_array))
273
+ if isinstance(P_array, str):
274
+ P_array = P_array
275
+ else:
276
+ P_array = np.tile(P_array, len(IS_array))
277
+ IS = new_IS
278
+ else:
279
+ # Ensure T and P are same length
280
+ if isinstance(P_array, str):
281
+ # P = "Psat", keep T as is
282
+ pass
283
+ else:
284
+ max_len = max(len(T_array), len(P_array))
285
+ if len(T_array) < max_len:
286
+ T_array = np.resize(T_array, max_len)
287
+ if len(P_array) < max_len:
288
+ P_array = np.resize(P_array, max_len)
289
+
290
+ # === Phase 3: Species Lookup and Validation ===
291
+ result.species, result.reaction, iphases, isaq, isH2O, iscgl, polymorph_species, ispecies = _process_species(
292
+ species, state, coeff, do_reaction, use_polymorphs, messages=messages)
293
+
294
+ # === Phase 4: Generate Output Message ===
295
+ if (len(species) > 1 or convert) and messages:
296
+ _print_subcrt_message(species, T_array, P_array, isaq.any() or isH2O.any(), messages)
297
+
298
+ # === Phase 5: Reaction Balance Check ===
299
+ if do_reaction and autobalance:
300
+ # Use original ispecies and coeff for balance check (before polymorph expansion)
301
+ # This matches R CHNOSZ behavior where balance check happens before polymorph expansion
302
+ rebalanced_result = _check_reaction_balance(result, species, coeff, state, property,
303
+ T_array, P_array, grid, convert, logact,
304
+ exceed_Ttr, exceed_rhomin, IS, ispecies, _recursion_count, basis, T, P, messages, show)
305
+ if rebalanced_result is not None: # If reaction was rebalanced, return the result
306
+ return rebalanced_result
307
+
308
+ # === Phase 6: Property Calculations ===
309
+ result.out, calc_warnings = _calculate_properties(property, iphases, isaq, isH2O, iscgl,
310
+ T_array, P_array, exceed_rhomin, exceed_Ttr, IS, logact, do_reaction)
311
+ # Add calculation warnings to result
312
+ result.warnings.extend(calc_warnings)
313
+
314
+ # === Phase 6.5: Polymorph Selection ===
315
+ if use_polymorphs:
316
+ # Select stable polymorphs based on minimum Gibbs energy
317
+ # Apply to both individual species AND reactions (matching R CHNOSZ behavior)
318
+ thermo_sys = thermo()
319
+ if do_reaction:
320
+ # For reactions, also update coefficients and rebuild reaction DataFrame
321
+ result.out, updated_coeff, updated_iphases = _select_stable_polymorphs(result.out, iphases, polymorph_species, ispecies, thermo_sys, result.reaction['coeff'].tolist(), messages)
322
+ # Rebuild reaction DataFrame with updated species list
323
+ reaction_data = []
324
+ for i, iph in enumerate(updated_iphases):
325
+ row = thermo_sys.obigt.loc[iph]
326
+ model = row.get('model', 'unknown')
327
+ if model == "H2O":
328
+ water_model = thermo_sys.get_option('water', 'SUPCRT92')
329
+ model = f"water.{water_model}"
330
+ reaction_data.append({
331
+ 'coeff': updated_coeff[i],
332
+ 'name': row['name'],
333
+ 'formula': row['formula'],
334
+ 'state': row['state'],
335
+ 'ispecies': iph,
336
+ 'model': model
337
+ })
338
+ result.reaction = pd.DataFrame(reaction_data)
339
+ else:
340
+ # For individual species, no coefficient update needed
341
+ result.out, _ = _select_stable_polymorphs(result.out, iphases, polymorph_species, ispecies, thermo_sys, None, messages)
342
+
343
+ # For single species (non-reaction), convert back to DataFrame format
344
+ if not do_reaction and isinstance(result.out, dict) and 'species_data' in result.out and len(result.out['species_data']) == 1:
345
+ result.out = result.out['species_data'][0]
346
+
347
+ # === Phase 7: Reaction Property Summation ===
348
+ if do_reaction:
349
+ result.out = _sum_reaction_properties(result.out, result.reaction['coeff'])
350
+
351
+ # === Phase 8: Unit Conversion (convert=True) ===
352
+ if convert:
353
+ # Apply R CHNOSZ compatible conversion
354
+ # This matches the observed behavior where convert=TRUE gives different results
355
+ # than just multiplying by 4.184
356
+ result.out = _apply_r_chnosz_conversion(result.out, do_reaction)
357
+
358
+ # Recalculate logK after unit conversion to ensure consistency
359
+ if do_reaction and 'logK' in property and 'G' in result.out.columns:
360
+ if not result.out['G'].isna().all():
361
+ R = 8.314462618 # J/(mol·K) - CODATA 2018 value
362
+ T_array = np.atleast_1d(T_array)
363
+ result.out['logK'] = -result.out['G'] / (np.log(10) * R * T_array)
364
+
365
+ # Display tables in Jupyter notebooks if show=True
366
+ if show:
367
+ _display_subcrt_result(result)
368
+
369
+ # Print warnings (matching R CHNOSZ behavior - lines 621-624)
370
+ if result.warnings and messages:
371
+ for warn in result.warnings:
372
+ warnings.warn(warn)
373
+
374
+ return result
375
+
376
+ except Exception as e:
377
+ result.warnings.append(f"subcrt error: {str(e)}")
378
+ raise
379
+
380
+
381
+ def _process_species(species, state, coeff, do_reaction, use_polymorphs, messages=True):
382
+ """Process species lookup, validation, and polymorphic expansion."""
383
+
384
+ thermo_sys = thermo()
385
+
386
+ # Species information lists
387
+ ispecies = []
388
+ newstate = []
389
+
390
+ # Look up each species
391
+ for i, sp in enumerate(species):
392
+ if isinstance(sp, (int, np.integer)):
393
+ # Numeric species index (1-based in R, matches our DataFrame index shifted by +1 in obigt.py)
394
+ sindex = int(sp)
395
+ if sindex not in thermo_sys.obigt.index:
396
+ raise ValueError(f"{sp} is not a valid row number of thermo database")
397
+ ispecies.append(sindex)
398
+ newstate.append(thermo_sys.obigt.loc[sindex]['state'])
399
+ else:
400
+ # Named species - look up in database
401
+ sp_state = state[i] if state and i < len(state) else None
402
+ sindex = info(sp, sp_state, messages=messages)
403
+ # Check for both None and NaN (info() returns NaN for nonexistent species)
404
+ if sindex is None or (isinstance(sindex, float) and np.isnan(sindex)):
405
+ if sp_state:
406
+ raise ValueError(f"no info found for {sp} {sp_state}")
407
+ else:
408
+ raise ValueError(f"no info found for {sp}")
409
+ # info() returns 1-based index which matches our DataFrame index (shifted by +1 in obigt.py)
410
+ ispecies.append(sindex)
411
+ newstate.append(thermo_sys.obigt.loc[sindex]['state'])
412
+
413
+ # Handle polymorphic expansion for minerals
414
+ iphases = []
415
+ polymorph_species = []
416
+ coeff_new = []
417
+
418
+ for i, isp in enumerate(ispecies):
419
+ sp_state = newstate[i]
420
+ sp_coeff = coeff[i] if do_reaction else 1
421
+
422
+ if sp_state == "cr" and use_polymorphs:
423
+ # Look for polymorphs (cr, cr2, cr3, etc.)
424
+ sp_name = thermo_sys.obigt.loc[isp]['name']
425
+ polymorph_states = ["cr", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", "cr8", "cr9"]
426
+
427
+ # Find all polymorphs
428
+ polymorphs = []
429
+ for poly_state in polymorph_states:
430
+ matches = thermo_sys.obigt[
431
+ (thermo_sys.obigt['name'] == sp_name) &
432
+ (thermo_sys.obigt['state'] == poly_state)
433
+ ]
434
+ if not matches.empty:
435
+ polymorphs.extend(matches.index.tolist())
436
+
437
+ if len(polymorphs) > 1:
438
+ # Multiple polymorphs found
439
+ iphases.extend(polymorphs)
440
+ # CRITICAL FIX: Use position i (not isp) to track which original species
441
+ # this corresponds to. This allows the same species to appear multiple times
442
+ # in a reaction (e.g., SO4-2 appearing twice with different coefficients)
443
+ polymorph_species.extend([i] * len(polymorphs))
444
+ coeff_new.extend([sp_coeff] * len(polymorphs))
445
+ else:
446
+ # Single phase
447
+ iphases.append(isp)
448
+ polymorph_species.append(i)
449
+ coeff_new.append(sp_coeff)
450
+ else:
451
+ # Non-mineral or non-polymorph
452
+ iphases.append(isp)
453
+ polymorph_species.append(i)
454
+ coeff_new.append(sp_coeff)
455
+
456
+ # Create reaction DataFrame
457
+ reaction_data = []
458
+ for i, iph in enumerate(iphases):
459
+ row = thermo_sys.obigt.loc[iph]
460
+ model = row.get('model', 'unknown')
461
+
462
+ # Identify water model for H2O
463
+ if model == "H2O":
464
+ water_model = thermo_sys.get_option('water', 'SUPCRT92')
465
+ model = f"water.{water_model}"
466
+
467
+ reaction_data.append({
468
+ 'coeff': coeff_new[i],
469
+ 'name': row['name'],
470
+ 'formula': row['formula'],
471
+ 'state': row['state'],
472
+ 'ispecies': iph,
473
+ 'model': model
474
+ })
475
+
476
+ reaction_df = pd.DataFrame(reaction_data)
477
+
478
+ # Identify aqueous species and models
479
+ isaq = reaction_df['model'].str.upper().isin(['HKF', 'AD', 'DEW'])
480
+ isH2O = reaction_df['model'].str.contains('water.', na=False)
481
+ iscgl = reaction_df['model'].isin(['CGL', 'CGL_Ttr', 'Berman'])
482
+
483
+ # Species summary DataFrame
484
+ species_data = []
485
+ for i, isp in enumerate(ispecies):
486
+ row = thermo_sys.obigt.loc[isp]
487
+ species_data.append({
488
+ 'name': row['name'],
489
+ 'formula': row['formula'],
490
+ 'state': row['state'],
491
+ 'ispecies': isp
492
+ })
493
+
494
+ species_df = pd.DataFrame(species_data)
495
+
496
+ return species_df, reaction_df, iphases, isaq, isH2O, iscgl, polymorph_species, ispecies
497
+
498
+
499
+ def _print_subcrt_message(species, T, P, is_wet, messages=True):
500
+ """Print subcrt calculation message matching R output."""
501
+ if not messages:
502
+ return
503
+
504
+ # Temperature text - display in Celsius like R
505
+ if len(T) == 1:
506
+ T_celsius = T[0] - 273.15
507
+ T_text = f"{T_celsius:.0f} ºC"
508
+ else:
509
+ T_text = f"{len(T)} values of T (ºC)"
510
+
511
+ # Pressure text
512
+ if isinstance(P, str) and P == "Psat":
513
+ P_text = "Psat"
514
+ elif hasattr(P, '__len__') and len(P) == 1:
515
+ P_text = f"{P[0]:.2f} bar"
516
+ else:
517
+ P_text = "P (bar)"
518
+
519
+ if is_wet:
520
+ P_text += " (wet)"
521
+
522
+ print(f"subcrt: {len(species)} species at {T_text} and {P_text} [energy units: J]")
523
+
524
+
525
+ def _check_reaction_balance(result, species, coeff, state, property, T, P, grid,
526
+ convert, logact, exceed_Ttr, exceed_rhomin, IS, iphases, recursion_count, basis_arg=None, original_T=None, original_P=None, messages=True, show=True):
527
+ """Check reaction balance and auto-balance if needed."""
528
+
529
+ # Calculate mass balance
530
+ formulas = [result.species.iloc[i]['formula'] for i in range(len(species))]
531
+
532
+ try:
533
+ mass_balance = makeup(iphases, coeff, sum_formulas=True)
534
+
535
+ # Check if balanced (within tolerance) - use smaller tolerance for better precision
536
+ tolerance = 1e-6
537
+ unbalanced_elements = {elem: val for elem, val in mass_balance.items()
538
+ if abs(val) > tolerance}
539
+
540
+ if unbalanced_elements:
541
+ # Reaction is unbalanced - show missing composition
542
+ missing_composition = {elem: -val for elem, val in unbalanced_elements.items()}
543
+ if messages:
544
+ print("subcrt: reaction is not balanced; it is missing this composition:")
545
+ # Format like R CHNOSZ: elements on one line, values on the next
546
+ elem_names = list(missing_composition.keys())
547
+ elem_values = list(missing_composition.values())
548
+ print(" ".join(elem_names))
549
+ print(" ".join([str(val) for val in elem_values]))
550
+
551
+ # Try to balance using basis species
552
+ thermo_sys = thermo()
553
+ # Use provided basis or get from global state
554
+ if basis_arg is not None:
555
+ basis_for_balance = basis_arg
556
+ elif hasattr(thermo_sys, 'basis') and thermo_sys.basis is not None:
557
+ basis_for_balance = thermo_sys.basis
558
+ else:
559
+ basis_for_balance = None
560
+
561
+ if basis_for_balance is not None:
562
+ # Get basis element columns
563
+ basis_elements = [col for col in basis_for_balance.columns
564
+ if col not in ['ispecies', 'logact', 'state']]
565
+
566
+ # Check if all missing elements are in basis
567
+ missing_elements = set(missing_composition.keys())
568
+ if missing_elements.issubset(set(basis_elements)):
569
+
570
+ # Calculate coefficients for missing composition from basis species
571
+ # Create a matrix with the missing composition
572
+ missing_matrix = np.zeros((1, len(basis_elements)))
573
+ for i, elem in enumerate(basis_elements):
574
+ missing_matrix[0, i] = missing_composition.get(elem, 0)
575
+
576
+ try:
577
+ # For multi-species balancing, we need to find the minimal solution
578
+ # R CHNOSZ tends to prefer simple integer solutions
579
+
580
+ # Get basis matrix - need to transpose to match R CHNOSZ behavior
581
+ # In R: tbmat is transposed so that solve(tbmat, x) works correctly
582
+ basis_matrix = basis_for_balance[basis_elements].values.T # Transpose: (elements × basis_species)
583
+
584
+ # Try to find simple integer solutions first
585
+ basis_coeffs = _find_simple_integer_solution(basis_matrix, missing_matrix.flatten(), basis_for_balance.index.tolist(), missing_composition)
586
+
587
+ if basis_coeffs is None:
588
+ # Fall back to linear algebra solution
589
+ basis_coeffs = np.linalg.solve(basis_matrix, missing_matrix.T).flatten()
590
+
591
+ # Apply R CHNOSZ's zapsmall equivalent (digits=7)
592
+ basis_coeffs = np.around(basis_coeffs, decimals=7)
593
+
594
+ # Clean up very small numbers to exactly zero
595
+ basis_coeffs[np.abs(basis_coeffs) < 1e-7] = 0
596
+
597
+ # Get non-zero coefficients and corresponding basis species
598
+ nonzero_indices = np.abs(basis_coeffs) > 1e-6
599
+ if np.any(nonzero_indices):
600
+ # Get basis species info
601
+ basis_indices = basis_for_balance['ispecies'].values[nonzero_indices]
602
+ basis_coeffs_nz = basis_coeffs[nonzero_indices]
603
+ basis_states = basis_for_balance['state'].values[nonzero_indices]
604
+ basis_logacts = basis_for_balance['logact'].values[nonzero_indices]
605
+
606
+ # Create new species list and coefficients
607
+ new_species = list(species) + [int(idx) for idx in basis_indices]
608
+ new_coeff = list(coeff) + list(basis_coeffs_nz)
609
+ new_state = list(state) if state else [None] * len(species)
610
+ new_state.extend(list(basis_states))
611
+
612
+ # Handle logact values - only add if original logact was provided
613
+ new_logact = None
614
+ if logact is not None:
615
+ new_logact = list(logact)
616
+ # Add basis logact values, but only if they are numeric
617
+ for la in basis_logacts:
618
+ try:
619
+ new_logact.append(float(la))
620
+ except (ValueError, TypeError):
621
+ # Non-numeric logact (possibly buffer name)
622
+ if messages:
623
+ print(f"subcrt: logact values of basis species are NA.")
624
+ new_logact.append(0.0) # Default value
625
+
626
+ # Check if this is a trivial reaction (same species)
627
+ thermo_obj = thermo()
628
+ new_formulas = []
629
+ for sp in new_species:
630
+ if isinstance(sp, int):
631
+ new_formulas.append(thermo_obj.obigt.loc[sp]['formula'])
632
+ else:
633
+ # Look up formula from species name
634
+ sp_info = info(sp, messages=messages)
635
+ new_formulas.append(thermo_obj.obigt.loc[sp_info]['formula'])
636
+
637
+ original_formulas = [result.species.iloc[i]['formula'] for i in range(len(species))]
638
+ if set(new_formulas) == set(original_formulas) and set(new_state) == set(state if state else [None] * len(species)):
639
+ if messages:
640
+ print("subcrt: balanced reaction, but it is a non-reaction; restarting...")
641
+ else:
642
+ if messages:
643
+ print("subcrt: adding missing composition from basis definition and restarting...")
644
+
645
+ # Recursively call subcrt with balanced reaction
646
+ # Use original T and P values to avoid double conversion issues
647
+ T_to_use = original_T if original_T is not None else T
648
+ P_to_use = original_P if original_P is not None else P
649
+ return subcrt(species=new_species, coeff=new_coeff, state=new_state,
650
+ property=property, T=T_to_use, P=P_to_use, grid=grid, convert=convert,
651
+ logact=new_logact, exceed_Ttr=exceed_Ttr, exceed_rhomin=exceed_rhomin, IS=IS,
652
+ messages=messages, show=show, basis=basis_arg, _recursion_count=recursion_count + 1)
653
+
654
+ except np.linalg.LinAlgError:
655
+ from ..utils.formula import as_chemical_formula
656
+ missing_formula = as_chemical_formula(missing_composition)
657
+ result.warnings.append(f"reaction among {','.join(species)} was unbalanced, missing {missing_formula}")
658
+ else:
659
+ from ..utils.formula import as_chemical_formula
660
+ missing_formula = as_chemical_formula(missing_composition)
661
+ result.warnings.append(f"reaction among {','.join(species)} was unbalanced, missing {missing_formula}")
662
+ else:
663
+ from ..utils.formula import as_chemical_formula
664
+ missing_formula = as_chemical_formula(missing_composition)
665
+ result.warnings.append(f"reaction among {','.join(species)} was unbalanced, missing {missing_formula}")
666
+
667
+ except Exception as e:
668
+ result.warnings.append(f"could not check reaction balance: {str(e)}")
669
+ import traceback
670
+ traceback.print_exc()
671
+
672
+ return None # Continue with original calculation
673
+
674
+
675
+ def _select_stable_polymorphs(properties_data, iphases, polymorph_species, ispecies, thermo_sys, reaction_coeff=None, messages=True):
676
+ """
677
+ Select stable polymorphs based on minimum Gibbs energy at each T-P condition.
678
+
679
+ This function replicates the R CHNOSZ polymorph selection logic from lines 441-499
680
+ in subcrt.R, where the stable polymorph is determined by finding the minimum
681
+ Gibbs energy at each temperature-pressure point.
682
+
683
+ Parameters
684
+ ----------
685
+ properties_data : dict
686
+ Dictionary with 'species_data' containing calculated properties for all polymorphs
687
+ iphases : list
688
+ List of phase indices (includes all polymorphs)
689
+ polymorph_species : list
690
+ Maps each phase to its original species index
691
+ ispecies : list
692
+ Original species indices (without polymorphic expansion)
693
+ thermo_sys : ThermoSystem
694
+ Thermodynamic system for species names
695
+ reaction_coeff : list, optional
696
+ Reaction coefficients that need to be updated when polymorphs are collapsed
697
+
698
+ Returns
699
+ -------
700
+ dict or tuple
701
+ If reaction_coeff is None: Updated properties_data with only stable polymorphs
702
+ If reaction_coeff is not None: (Updated properties_data, Updated coefficients)
703
+ """
704
+ if not isinstance(properties_data, dict) or 'species_data' not in properties_data:
705
+ if reaction_coeff is not None:
706
+ return properties_data, reaction_coeff, iphases
707
+ else:
708
+ return properties_data, iphases
709
+
710
+ species_data_list = properties_data['species_data']
711
+ n_conditions = len(properties_data['T'])
712
+
713
+ # Group phases by original species
714
+ species_groups = {}
715
+ for i, orig_species in enumerate(polymorph_species):
716
+ if orig_species not in species_groups:
717
+ species_groups[orig_species] = []
718
+ species_groups[orig_species].append(i)
719
+
720
+ new_species_data = []
721
+ new_iphases = []
722
+ new_polymorph_species = []
723
+ new_coefficients = [] if reaction_coeff is not None else None
724
+
725
+ for orig_species_idx, phase_indices in species_groups.items():
726
+ # Check if we have duplicated phases (same species repeated) vs. actual polymorphs
727
+ # In R: if(TRUE %in% duplicated(iphases[are.polymorphs]))
728
+ phases_for_this_species = [iphases[i] for i in phase_indices]
729
+
730
+ # If there are duplicate iphase values, filter to unique polymorphs only
731
+ # (this handles cases like subcrt(['O2', 'O2'], [-1, 1], ['gas', 'gas']))
732
+ if len(phases_for_this_species) != len(set(phases_for_this_species)):
733
+ # We have duplicates - keep only one of each unique phase
734
+ unique_phases = {}
735
+ for idx in phase_indices:
736
+ phase_id = iphases[idx]
737
+ if phase_id not in unique_phases:
738
+ unique_phases[phase_id] = idx
739
+ phase_indices = list(unique_phases.values())
740
+
741
+ if len(phase_indices) > 1:
742
+ # Multiple polymorphs - select stable one at each T-P point
743
+ species_name = thermo_sys.obigt.loc[iphases[phase_indices[0]]]['name']
744
+ if messages:
745
+ print(f"subcrt: {len(phase_indices)} polymorphs for {species_name} ... ", end="")
746
+
747
+ # DEBUG: Print G values for all polymorphs
748
+ debug_polymorphs = False # Set to True for debugging
749
+
750
+ # Collect Gibbs energies and check temperature validity for all polymorphs
751
+ G_data = []
752
+ z_T_values = [] # Transition temperatures
753
+
754
+ for poly_idx, phase_i in enumerate(phase_indices):
755
+ obigt_idx = iphases[phase_i]
756
+
757
+ # Get G values
758
+ if phase_i < len(species_data_list) and 'G' in species_data_list[phase_i].columns:
759
+ G_values = species_data_list[phase_i]['G'].values
760
+ else:
761
+ G_values = np.full(n_conditions, np.nan)
762
+ G_data.append(G_values)
763
+
764
+ # Get transition temperature (z.T) for this polymorph
765
+ z_T = thermo_sys.obigt.loc[obigt_idx].get('z.T', np.nan)
766
+ z_T_values.append(z_T)
767
+
768
+ if debug_polymorphs and species_name == "iron":
769
+ state_loc = thermo_sys.obigt.loc[obigt_idx]['state'] if obigt_idx in thermo_sys.obigt.index else 'INVALID_LOC'
770
+ print(f"\n Polymorph {poly_idx+1} (idx={obigt_idx}, state={state_loc}, z.T={z_T}): G={G_values[0]:.2f}" if len(G_values) > 0 else f"\n Polymorph {poly_idx+1}: G=NaN")
771
+
772
+ if not G_data:
773
+ # No G data available - just take first polymorph
774
+ stable_polymorph_indices = np.zeros(n_conditions, dtype=int)
775
+ else:
776
+ G_array = np.array(G_data).T # Shape: (n_conditions, n_polymorphs)
777
+ z_T_array = np.array(z_T_values) # Shape: (n_polymorphs,)
778
+ stable_polymorph_indices = np.full(n_conditions, 0, dtype=int)
779
+
780
+ # Get temperature array from species data
781
+ if species_data_list and len(species_data_list[0]) > 0:
782
+ T_celsius = species_data_list[0]['T'].values # In Celsius
783
+ T_kelvin = T_celsius + 273.15 # Convert to Kelvin for comparison with z.T
784
+ else:
785
+ T_kelvin = np.full(n_conditions, 298.15) # Default to 25°C
786
+
787
+ for j in range(n_conditions):
788
+ G_row = G_array[j, :]
789
+ T_j = T_kelvin[j]
790
+
791
+ # Filter polymorphs by temperature range validity
792
+ # Each polymorph cr, cr2, cr3, cr4 has a transition temperature z.T
793
+ # Polymorph i is valid if: z.T[i-1] <= T < z.T[i]
794
+ # where z.T[0] = 0 (cr is valid from absolute zero)
795
+ temp_valid_mask = np.zeros(len(phase_indices), dtype=bool)
796
+
797
+ for poly_idx in range(len(phase_indices)):
798
+ z_T_curr = z_T_array[poly_idx]
799
+
800
+ # Lower bound: previous polymorph's z.T (or 0 for first polymorph)
801
+ if poly_idx == 0:
802
+ T_min = 0.0
803
+ else:
804
+ z_T_prev = z_T_array[poly_idx - 1]
805
+ T_min = z_T_prev if not np.isnan(z_T_prev) else 0.0
806
+
807
+ # Upper bound: current polymorph's z.T
808
+ if np.isnan(z_T_curr):
809
+ T_max = np.inf # No upper limit
810
+ else:
811
+ T_max = z_T_curr
812
+
813
+ # Check if T is in range [T_min, T_max)
814
+ temp_valid_mask[poly_idx] = (T_j >= T_min) and (T_j < T_max)
815
+
816
+ # Combine temperature validity with G availability
817
+ valid_mask = temp_valid_mask & ~np.isnan(G_row)
818
+
819
+ if np.any(valid_mask):
820
+ # Find minimum G among temperature-valid, non-NaN polymorphs
821
+ valid_indices = np.where(valid_mask)[0]
822
+ min_idx = valid_indices[np.argmin(G_row[valid_mask])]
823
+ stable_polymorph_indices[j] = min_idx
824
+ elif np.any(~np.isnan(G_row)):
825
+ # No temperature-valid polymorphs, but we have G data
826
+ # Use the polymorph with the highest transition temperature (most stable at high T)
827
+ available_indices = np.where(~np.isnan(G_row))[0]
828
+ # Among available, choose the one with highest z.T (or last if all NaN)
829
+ z_T_available = z_T_array[available_indices]
830
+ if np.any(~np.isnan(z_T_available)):
831
+ max_z_T_idx = available_indices[np.nanargmax(z_T_available)]
832
+ stable_polymorph_indices[j] = max_z_T_idx
833
+ else:
834
+ stable_polymorph_indices[j] = available_indices[-1]
835
+ else:
836
+ # All NaN - use first polymorph
837
+ stable_polymorph_indices[j] = 0
838
+
839
+ # Create combined result using stable polymorph at each T-P point
840
+ combined_data = species_data_list[phase_indices[0]].copy()
841
+
842
+ for j in range(n_conditions):
843
+ stable_idx = stable_polymorph_indices[j]
844
+ stable_phase_i = phase_indices[stable_idx]
845
+ if stable_phase_i < len(species_data_list):
846
+ # Copy data from stable polymorph for this T-P point
847
+ for col in combined_data.columns:
848
+ if col in species_data_list[stable_phase_i].columns:
849
+ combined_data.iloc[j, combined_data.columns.get_loc(col)] = \
850
+ species_data_list[stable_phase_i].iloc[j, species_data_list[stable_phase_i].columns.get_loc(col)]
851
+
852
+ # Add polymorph column to track which polymorph was selected
853
+ combined_data['polymorph'] = stable_polymorph_indices + 1 # 1-based like R
854
+
855
+ new_species_data.append(combined_data)
856
+ new_iphases.append(iphases[phase_indices[0]]) # Use first phase index as representative
857
+ new_polymorph_species.append(orig_species_idx)
858
+
859
+ # Update coefficients - use the coefficient of the first polymorph
860
+ # (all polymorphs of the same species should have the same coefficient)
861
+ if new_coefficients is not None:
862
+ new_coefficients.append(reaction_coeff[phase_indices[0]])
863
+
864
+ # Report which polymorphs are stable
865
+ unique_polymorphs = np.unique(stable_polymorph_indices + 1)
866
+ if messages:
867
+ if len(unique_polymorphs) > 1:
868
+ word = "are"
869
+ p_word = "polymorphs"
870
+ else:
871
+ word = "is"
872
+ p_word = "polymorph"
873
+ print(f"{p_word} {','.join(map(str, unique_polymorphs))} {word} stable")
874
+
875
+ else:
876
+ # Single polymorph - keep as-is
877
+ phase_i = phase_indices[0]
878
+ new_species_data.append(species_data_list[phase_i])
879
+ new_iphases.append(iphases[phase_i])
880
+ new_polymorph_species.append(orig_species_idx)
881
+
882
+ # Update coefficients - single species keeps its coefficient
883
+ if new_coefficients is not None:
884
+ new_coefficients.append(reaction_coeff[phase_i])
885
+
886
+ # Update the properties data structure
887
+ updated_properties = properties_data.copy()
888
+ updated_properties['species_data'] = new_species_data
889
+ updated_properties['n_species'] = len(new_species_data)
890
+
891
+ if reaction_coeff is not None:
892
+ return updated_properties, new_coefficients, new_iphases
893
+ else:
894
+ return updated_properties, new_iphases
895
+
896
+
897
+ def _calculate_properties(property, iphases, isaq, isH2O, iscgl, T, P, exceed_rhomin, exceed_Ttr, IS, logact, do_reaction=True):
898
+ """Calculate thermodynamic properties for all species.
899
+
900
+ Returns
901
+ -------
902
+ tuple
903
+ (result_df, warnings_list) - result data and list of warning messages
904
+ """
905
+
906
+ from ..models.hkf import hkf
907
+ from ..models.cgl import cgl
908
+
909
+ thermo_sys = thermo()
910
+ n_conditions = len(T)
911
+
912
+ # Initialize warnings list
913
+ calc_warnings = []
914
+
915
+ # Properties to calculate from EOS (exclude logK and rho which are derived)
916
+ eosprop = [p for p in property if p not in ["logK", "rho"]]
917
+
918
+ # If logK is requested but G is not in the list, add G to eosprop
919
+ # because logK is calculated from G
920
+ if "logK" in property and "G" not in eosprop:
921
+ eosprop.append("G")
922
+
923
+ # Initialize results storage - use species index as key
924
+ all_properties = {}
925
+
926
+ # Always use equation of state calculations (matching R CHNOSZ behavior)
927
+ # R CHNOSZ has no "standard conditions bypass" - it always calls HKF for aqueous species
928
+
929
+ # Convert P="Psat" to actual pressure values for all calculations
930
+ if isinstance(P, str) and P == "Psat":
931
+ from ..models.water import water
932
+ # Calculate Psat for all temperatures at once (vectorized)
933
+ P_calculated = water("Psat", T=T)
934
+ P_calculated = np.atleast_1d(P_calculated)
935
+
936
+ # IMPORTANT: Add small epsilon to Psat to ensure liquid phase
937
+ # When P = Psat exactly, water properties can switch to steam phase
938
+ # Adding a tiny amount ensures we stay in liquid phase, matching R CHNOSZ behavior
939
+ P_calculated = P_calculated + 0.0001 # Add 0.1 millibar
940
+ else:
941
+ P_calculated = P
942
+
943
+ # Calculate aqueous species properties using HKF
944
+ if isaq.any():
945
+ aq_indices = np.where(isaq)[0]
946
+ aq_params = thermo_sys.obigt.loc[[iphases[i] for i in aq_indices]]
947
+
948
+ # CRITICAL FIX: Reset index to avoid duplicate index issues when same species
949
+ # appears multiple times (e.g., two SO4-2 in a balanced reaction)
950
+ # Store original OBIGT indices for later reference
951
+ original_obigt_indices = aq_params.index.tolist()
952
+ aq_params = aq_params.reset_index(drop=True)
953
+
954
+ try:
955
+ # Get water properties needed for HKF
956
+ H2O_props = ["rho"]
957
+ if IS != 0: # Need additional properties for activity corrections
958
+ H2O_props += ["A_DH", "B_DH"]
959
+ if isH2O.any(): # Water is in the reaction
960
+ H2O_props += eosprop
961
+
962
+ # HKF model now handles array T/P (vectorized)
963
+ # Initialize storage for results across all T/P conditions
964
+ for aq_idx in aq_indices:
965
+ all_properties[aq_idx] = {prop: [] for prop in eosprop}
966
+
967
+ # Call HKF model once with all T/P conditions (vectorized)
968
+ T_array = np.atleast_1d(T)
969
+ P_array = np.atleast_1d(P_calculated)
970
+
971
+ # Call HKF model for all T/P conditions at once
972
+ aq_results, H2O_data = hkf(property=eosprop, parameters=aq_params,
973
+ T=T_array, P=P_array, H2O_props=H2O_props)
974
+
975
+ # DEBUG: Check what HKF returns
976
+ if False:
977
+ print(f"\nDEBUG after HKF call:")
978
+ print(f" T_array: {T_array}")
979
+ print(f" aq_results keys: {list(aq_results.keys())}")
980
+ for key in aq_results.keys():
981
+ print(f" aq_results[{key}] keys: {list(aq_results[key].keys())[:5]}")
982
+ if 'V' in aq_results[key]:
983
+ print(f" V shape/values: {np.array(aq_results[key]['V']).shape}, {aq_results[key]['V']}")
984
+
985
+ # Extract results for each species and property
986
+ for i, aq_idx in enumerate(aq_indices):
987
+ # Use sequential index (0, 1, 2, ...) since we reset the index above
988
+ df_index = i
989
+ species_props = aq_results[df_index]
990
+
991
+ # Check E_units to determine if values are already in J
992
+ # Use original OBIGT index for this lookup
993
+ species_row = thermo_sys.obigt.loc[original_obigt_indices[i]]
994
+ e_units = species_row.get('E_units', 'cal')
995
+ already_in_joules = (e_units == 'J')
996
+
997
+ # DEBUG
998
+ if False: # Set to True for debugging
999
+ print(f"\nDEBUG HKF results extraction:")
1000
+ print(f" i={i}, aq_idx={aq_idx}, df_index={df_index}")
1001
+ print(f" V values (first 3): {species_props.get('V', [])[:3]}")
1002
+ print(f" logK values (first 3): {species_props.get('logK', [])[:3]}")
1003
+
1004
+ for prop in eosprop:
1005
+ if prop in species_props:
1006
+ # Convert HKF results from cal to J to match water function units
1007
+ # BUT skip conversion for species already in J units (E_units='J')
1008
+ values = species_props[prop]
1009
+ if prop in ['G', 'H', 'S', 'Cp'] and not already_in_joules:
1010
+ values = values * 4.184
1011
+
1012
+ # Store array of values for all T/P conditions
1013
+ all_properties[aq_idx][prop] = np.atleast_1d(values)
1014
+
1015
+ # Store water properties if needed (when water is among aqueous species)
1016
+ # IMPORTANT: Reuse H2O_data from HKF instead of calling water() again!
1017
+ # This matches R CHNOSZ behavior (line 308: H2O.PT <- hkfstuff$H2O)
1018
+ if isH2O.any():
1019
+ h2o_indices = np.where(isH2O)[0]
1020
+
1021
+ # Use water properties already calculated by HKF (no redundant call!)
1022
+ # H2O_data is returned from hkf() and contains all needed properties
1023
+ for h2o_idx in h2o_indices:
1024
+ if h2o_idx not in all_properties:
1025
+ all_properties[h2o_idx] = {}
1026
+
1027
+ for prop in eosprop:
1028
+ if isinstance(H2O_data, dict) and prop in H2O_data:
1029
+ # Get property value from dict
1030
+ value = H2O_data[prop]
1031
+ all_properties[h2o_idx][prop] = np.atleast_1d(value)
1032
+ elif hasattr(H2O_data, prop):
1033
+ # Get property value from object attribute
1034
+ value = getattr(H2O_data, prop)
1035
+ all_properties[h2o_idx][prop] = np.atleast_1d(value)
1036
+ else:
1037
+ # Property not available
1038
+ all_properties[h2o_idx][prop] = np.full(n_conditions, np.nan)
1039
+
1040
+ # Set properties to NA for density below 0.35 g/cm3 (threshold used in SUPCRT92)
1041
+ # Matching R CHNOSZ subcrt.R lines 309-318
1042
+ if not exceed_rhomin:
1043
+ # Get water density from H2O_data (in kg/m³)
1044
+ if isinstance(H2O_data, dict) and 'rho' in H2O_data:
1045
+ rho_values = np.atleast_1d(H2O_data['rho'])
1046
+ elif hasattr(H2O_data, 'rho'):
1047
+ rho_values = np.atleast_1d(H2O_data.rho)
1048
+ else:
1049
+ rho_values = None
1050
+
1051
+ if rho_values is not None:
1052
+ # Check for low density (< 350 kg/m³ = 0.35 g/cm³)
1053
+ ilowrho = rho_values < 350
1054
+ # Set NaN values to False (don't flag them)
1055
+ ilowrho = np.where(np.isnan(rho_values), False, ilowrho)
1056
+
1057
+ if np.any(ilowrho):
1058
+ # Set all aqueous species properties to NaN for low-density conditions
1059
+ for aq_idx in aq_indices:
1060
+ for prop in eosprop:
1061
+ if aq_idx in all_properties and prop in all_properties[aq_idx]:
1062
+ prop_array = np.array(all_properties[aq_idx][prop])
1063
+ prop_array[ilowrho] = np.nan
1064
+ all_properties[aq_idx][prop] = prop_array
1065
+
1066
+ # Add warning message
1067
+ n_lowrho = np.sum(ilowrho)
1068
+ ptext = "pair" if n_lowrho == 1 else "pairs"
1069
+ calc_warnings.append(f"below minimum density for applicability of revised HKF equations ({n_lowrho} T,P {ptext})")
1070
+
1071
+ except Exception as e:
1072
+ print(f"Warning: HKF calculation failed: {e}")
1073
+ # Fill with NaN for failed aqueous calculations
1074
+ for aq_idx in aq_indices:
1075
+ all_properties[aq_idx] = {prop: np.full(n_conditions, np.nan) for prop in eosprop}
1076
+
1077
+ # Handle water species directly if present and not handled by HKF (mirroring R CHNOSZ behavior)
1078
+ if isH2O.any() and not isaq.any():
1079
+ # We're not using the HKF, but still want water properties
1080
+ from ..models.water import water
1081
+
1082
+ try:
1083
+ # Calculate water properties directly - mirroring R line 333
1084
+ H2O_props = ["rho"] + eosprop
1085
+ H2O_PT = water(property=H2O_props, T=T, P=P)
1086
+
1087
+ # Store water properties for all H2O species
1088
+ h2o_indices = np.where(isH2O)[0]
1089
+ for h2o_idx in h2o_indices:
1090
+ if h2o_idx not in all_properties:
1091
+ all_properties[h2o_idx] = {}
1092
+ for prop in eosprop:
1093
+ if hasattr(H2O_PT, prop):
1094
+ # Water function returns scalar or array
1095
+ prop_value = getattr(H2O_PT, prop)
1096
+ if np.isscalar(prop_value):
1097
+ all_properties[h2o_idx][prop] = np.full(n_conditions, prop_value)
1098
+ else:
1099
+ all_properties[h2o_idx][prop] = np.atleast_1d(prop_value)
1100
+ elif isinstance(H2O_PT, dict) and prop in H2O_PT:
1101
+ # Dictionary format
1102
+ prop_value = H2O_PT[prop]
1103
+ if np.isscalar(prop_value):
1104
+ all_properties[h2o_idx][prop] = np.full(n_conditions, prop_value)
1105
+ else:
1106
+ all_properties[h2o_idx][prop] = np.atleast_1d(prop_value)
1107
+ else:
1108
+ # Property not available
1109
+ all_properties[h2o_idx][prop] = np.full(n_conditions, np.nan)
1110
+
1111
+ except Exception as e:
1112
+ print(f"Warning: Direct water calculation failed: {e}")
1113
+ # Fill with NaN for failed water calculations
1114
+ h2o_indices = np.where(isH2O)[0]
1115
+ for h2o_idx in h2o_indices:
1116
+ all_properties[h2o_idx] = {prop: np.full(n_conditions, np.nan) for prop in eosprop}
1117
+
1118
+ # Calculate crystalline/gas/liquid species properties using CGL
1119
+ if iscgl.any():
1120
+ cgl_indices = np.where(iscgl)[0]
1121
+ cgl_params = thermo_sys.obigt.loc[[iphases[i] for i in cgl_indices]]
1122
+
1123
+ # Reset index to avoid duplicate index issues (same fix as for HKF)
1124
+ original_cgl_obigt_indices = cgl_params.index.tolist()
1125
+ cgl_params = cgl_params.reset_index(drop=True)
1126
+
1127
+ try:
1128
+ # CGL model now handles array T/P (vectorized)
1129
+ # Initialize storage for results across all T/P conditions
1130
+ for cgl_idx in cgl_indices:
1131
+ all_properties[cgl_idx] = {prop: [] for prop in eosprop}
1132
+
1133
+ # Call CGL model once with all T/P conditions (vectorized)
1134
+ T_array = np.atleast_1d(T)
1135
+ P_array = np.atleast_1d(P_calculated)
1136
+
1137
+ # Call CGL model for all T/P conditions at once
1138
+ cgl_result = cgl(property=eosprop, parameters=cgl_params, T=T_array, P=P_array)
1139
+
1140
+ # Extract results for each species
1141
+ for i, cgl_idx in enumerate(cgl_indices):
1142
+ # Use sequential index since we reset the index above
1143
+ df_index = i
1144
+ species_props = cgl_result[df_index]
1145
+
1146
+ # Check if this species uses Berman model
1147
+ # NOTE: A mineral is only Berman if it LACKS standard thermodynamic data (G,H,S)
1148
+ # If G,H,S are present, use regular CGL even if heat capacity coefficients are all zero
1149
+ # Use original OBIGT index for this lookup
1150
+ species_row = thermo_sys.obigt.loc[original_cgl_obigt_indices[i]]
1151
+ berman_cols = ['a1.a', 'a2.b', 'a3.c', 'a4.d', 'c1.e', 'c2.f', 'omega.lambda', 'z.T']
1152
+ has_standard_thermo = pd.notna(species_row.get('G', np.nan)) and pd.notna(species_row.get('H', np.nan)) and pd.notna(species_row.get('S', np.nan))
1153
+ all_coeffs_zero_or_na = all(pd.isna(species_row.get(col, np.nan)) or species_row.get(col, 0) == 0 for col in berman_cols)
1154
+ is_berman = all_coeffs_zero_or_na and not has_standard_thermo
1155
+
1156
+ # Check E_units to determine if values are already in J
1157
+ # IMPORTANT: As of the cgl.py fix, Berman minerals return cal/mol (converted from J/mol)
1158
+ # even though they have E_units='J' in OBIGT. So we need cal->J conversion for them.
1159
+ # Only skip conversion for non-Berman species explicitly marked with E_units='J'
1160
+ e_units = species_row.get('E_units', 'cal')
1161
+ already_in_joules = (e_units == 'J') and not is_berman
1162
+
1163
+ for prop in eosprop:
1164
+ if prop in species_props:
1165
+ # Convert CGL results from cal to J to match HKF and water function units
1166
+ # BUT skip conversion for species already in J units (Berman minerals or E_units='J')
1167
+ prop_values = species_props[prop]
1168
+ if prop in ['G', 'H', 'S', 'Cp'] and not already_in_joules:
1169
+ prop_values = prop_values * 4.184
1170
+
1171
+ # Store array of values for all T/P conditions
1172
+ all_properties[cgl_idx][prop] = np.atleast_1d(prop_values)
1173
+
1174
+ except Exception as e:
1175
+ import traceback
1176
+ print(f"Warning: CGL calculation failed: {e}")
1177
+ print(f"Traceback:")
1178
+ traceback.print_exc()
1179
+ print(f"CGL species that failed:")
1180
+ for i, cgl_idx in enumerate(cgl_indices):
1181
+ df_index = cgl_params.index[i]
1182
+ species_row = thermo_sys.obigt.loc[df_index]
1183
+ print(f" {species_row['name']} (index {df_index})")
1184
+ # Fill with NaN for failed CGL calculations
1185
+ for cgl_idx in cgl_indices:
1186
+ all_properties[cgl_idx] = {prop: np.full(n_conditions, np.nan) for prop in eosprop}
1187
+
1188
+ # Create output DataFrame structure
1189
+ # For single species, return properties directly
1190
+ # For multiple species in reactions, return dict for summation
1191
+ # For multiple species without reactions, treat each as individual
1192
+
1193
+ # Determine if we should automatically add rho to output (matching R CHNOSZ behavior)
1194
+ # R adds rho when: "rho" in property OR (using default properties AND any aqueous/H2O species)
1195
+ default_properties = ["logK", "G", "H", "S", "V", "Cp"]
1196
+ is_default_property_list = (property == default_properties)
1197
+ should_add_rho = ("rho" in property) or (is_default_property_list and (isaq.any() or isH2O.any()))
1198
+
1199
+ if len(iphases) == 1:
1200
+ # Single species - return properties directly
1201
+ # species_idx should be 0 (the enumerate index into iphases/isaq arrays)
1202
+ # NOT iphases[0] (the actual OBIGT index)
1203
+ species_idx = 0
1204
+ output_data = {'T': T - 273.15} # Convert to Celsius for output like R
1205
+
1206
+ if isinstance(P, str) and P == "Psat":
1207
+ # Calculate actual Psat values for output (vectorized)
1208
+ from ..models.water import water
1209
+ P_values = water("Psat", T=T)
1210
+ output_data['P'] = np.atleast_1d(P_values)
1211
+ else:
1212
+ output_data['P'] = P
1213
+
1214
+ # Add rho column if needed (matching R CHNOSZ - appears after P, before other properties)
1215
+ if should_add_rho:
1216
+ try:
1217
+ from ..models.water import water
1218
+ rho_result = water('rho', T=T, P=P_calculated)
1219
+ # Convert from kg/m³ to g/cm³ (divide by 1000) to match R CHNOSZ
1220
+ output_data['rho'] = np.atleast_1d(rho_result) / 1000
1221
+ except:
1222
+ output_data['rho'] = np.full(n_conditions, np.nan)
1223
+
1224
+ # Add calculated properties
1225
+ for prop in property:
1226
+ if prop == "logK":
1227
+ # Calculate logK from G for individual species (matching R behavior)
1228
+ if species_idx in all_properties and 'G' in all_properties[species_idx]:
1229
+ G_values = all_properties[species_idx]['G']
1230
+ if not np.all(np.isnan(G_values)):
1231
+ # DEBUG
1232
+ if False: # Set to True for debugging
1233
+ print(f"DEBUG logK calculation for species_idx={species_idx}:")
1234
+ print(f" G_values[0] = {G_values[0]}")
1235
+ print(f" T[0] = {T[0]}")
1236
+
1237
+ # logK = -G°/(ln(10)*R*T), using T in Kelvin for calculation
1238
+ R = 8.314462618 # J/(mol·K) - CODATA 2018 value
1239
+ T_kelvin = T # T is already in Kelvin here
1240
+ logK_values = -G_values / (np.log(10) * R * T_kelvin)
1241
+ output_data[prop] = logK_values
1242
+ else:
1243
+ output_data[prop] = np.full(n_conditions, np.nan)
1244
+ else:
1245
+ output_data[prop] = np.full(n_conditions, np.nan)
1246
+ elif prop == "rho":
1247
+ # Skip - already added above if should_add_rho is True
1248
+ if not should_add_rho:
1249
+ # Only add here if it was explicitly requested but not in default case
1250
+ try:
1251
+ from ..models.water import water
1252
+ rho_result = water('rho', T=T, P=P_calculated)
1253
+ # Convert from kg/m³ to g/cm³ (divide by 1000) to match R CHNOSZ
1254
+ output_data[prop] = np.atleast_1d(rho_result) / 1000
1255
+ except:
1256
+ output_data[prop] = np.full(n_conditions, np.nan)
1257
+ else:
1258
+ # Regular thermodynamic property
1259
+ if species_idx in all_properties and prop in all_properties[species_idx]:
1260
+ output_data[prop] = all_properties[species_idx][prop]
1261
+ else:
1262
+ output_data[prop] = np.full(n_conditions, np.nan)
1263
+
1264
+ result_df = pd.DataFrame(output_data)
1265
+
1266
+ else:
1267
+ # Multiple species - return all properties for reaction summation
1268
+ all_species_data = []
1269
+
1270
+ for i, phase_idx in enumerate(iphases):
1271
+ # DEBUG
1272
+ if False: # Set to True for debugging
1273
+ print(f"\nDEBUG: Processing species i={i}, phase_idx={phase_idx}")
1274
+ print(f" all_properties keys: {list(all_properties.keys())}")
1275
+ if i in all_properties:
1276
+ print(f" WARNING: Using wrong index! i={i} exists in all_properties")
1277
+ if phase_idx in all_properties:
1278
+ print(f" CORRECT: phase_idx={phase_idx} exists in all_properties")
1279
+
1280
+ species_data = {'T': T - 273.15} # Convert to Celsius for output like R
1281
+
1282
+ if isinstance(P, str):
1283
+ species_data['P'] = P_calculated
1284
+ else:
1285
+ species_data['P'] = P
1286
+
1287
+ # Add rho column if needed (matching R CHNOSZ - appears after P, before other properties)
1288
+ if should_add_rho:
1289
+ try:
1290
+ from ..models.water import water
1291
+ rho_result = water('rho', T=T, P=P_calculated)
1292
+ # Convert from kg/m³ to g/cm³ (divide by 1000) to match R CHNOSZ
1293
+ # Handle both scalar and array returns from water()
1294
+ if np.isscalar(rho_result):
1295
+ species_data['rho'] = np.full(n_conditions, rho_result / 1000)
1296
+ else:
1297
+ species_data['rho'] = np.atleast_1d(rho_result) / 1000
1298
+ except:
1299
+ species_data['rho'] = np.full(n_conditions, np.nan)
1300
+
1301
+ # Add properties for this species
1302
+ # If logK is requested, we need to store G as well (for reaction calculations)
1303
+ props_to_store = list(property)
1304
+ if 'logK' in property and 'G' not in props_to_store:
1305
+ props_to_store.append('G')
1306
+
1307
+ # Loop over properties to store
1308
+ for prop in props_to_store:
1309
+ if prop == "logK":
1310
+ # Calculate logK from G for individual species
1311
+ if i in all_properties and 'G' in all_properties[i]:
1312
+ G_values = all_properties[i]['G']
1313
+ if not np.all(np.isnan(G_values)):
1314
+ # logK = -G°/(ln(10)*R*T), using T in Kelvin for calculation
1315
+ R = 8.314462618 # J/(mol·K) - CODATA 2018 value
1316
+ T_kelvin = T # T is already in Kelvin here
1317
+ logK_values = -G_values / (np.log(10) * R * T_kelvin)
1318
+ species_data[prop] = logK_values
1319
+ else:
1320
+ species_data[prop] = np.full(n_conditions, np.nan)
1321
+ else:
1322
+ species_data[prop] = np.full(n_conditions, np.nan)
1323
+ elif prop == "rho":
1324
+ # Skip - already added above if should_add_rho is True
1325
+ if not should_add_rho:
1326
+ # Only add here if it was explicitly requested but not in default case
1327
+ try:
1328
+ from ..models.water import water
1329
+ rho_result = water('rho', T=T, P=P_calculated)
1330
+ # Convert from kg/m³ to g/cm³ (divide by 1000) to match R CHNOSZ
1331
+ # Handle both scalar and array returns from water()
1332
+ if np.isscalar(rho_result):
1333
+ species_data[prop] = np.full(n_conditions, rho_result / 1000)
1334
+ else:
1335
+ species_data[prop] = np.atleast_1d(rho_result) / 1000
1336
+ except:
1337
+ species_data[prop] = np.full(n_conditions, np.nan)
1338
+ elif prop in eosprop:
1339
+ # Regular thermodynamic property from EOS calculations
1340
+ if i in all_properties and prop in all_properties[i]:
1341
+ species_data[prop] = all_properties[i][prop]
1342
+ else:
1343
+ species_data[prop] = np.full(n_conditions, np.nan)
1344
+
1345
+ all_species_data.append(pd.DataFrame(species_data))
1346
+
1347
+ # Return structure that can be used for reaction summation
1348
+ result_df = {
1349
+ 'species_data': all_species_data,
1350
+ 'n_species': len(iphases),
1351
+ 'T': T,
1352
+ 'P': P,
1353
+ 'properties': property,
1354
+ 'eosprop': eosprop
1355
+ }
1356
+
1357
+ return result_df, calc_warnings
1358
+
1359
+
1360
+ def _sum_reaction_properties(properties_data, coefficients):
1361
+ """Sum individual species properties to get reaction properties."""
1362
+
1363
+ if isinstance(properties_data, pd.DataFrame):
1364
+ # Single species case - just return as is
1365
+ return properties_data
1366
+
1367
+ if not isinstance(properties_data, dict) or 'species_data' not in properties_data:
1368
+ # Fallback for unexpected format
1369
+ return properties_data
1370
+
1371
+ # Extract data from the dictionary structure
1372
+ species_data_list = properties_data['species_data']
1373
+ T = properties_data['T']
1374
+ P = properties_data['P']
1375
+ property_list = properties_data['properties']
1376
+
1377
+ if not species_data_list or len(species_data_list) != len(coefficients):
1378
+ # Mismatch - return empty DataFrame
1379
+ n_conditions = len(T)
1380
+
1381
+ # Get pressure values for fallback case
1382
+ if isinstance(P, str) and species_data_list:
1383
+ first_species_df = species_data_list[0]
1384
+ if 'P' in first_species_df.columns:
1385
+ P_values = first_species_df['P'].values
1386
+ else:
1387
+ P_values = np.full(n_conditions, np.nan)
1388
+ else:
1389
+ P_values = P if not isinstance(P, str) else np.full(n_conditions, np.nan)
1390
+
1391
+ return pd.DataFrame({
1392
+ 'T': T - 273.15, # Convert to Celsius for output like R
1393
+ 'P': P_values
1394
+ })
1395
+
1396
+ # Initialize reaction DataFrame
1397
+ n_conditions = len(T)
1398
+
1399
+ # Get pressure values - if P was "Psat", get actual values from species data
1400
+ if isinstance(P, str) and species_data_list:
1401
+ # Get pressure from first species (all should have same pressure conditions)
1402
+ first_species_df = species_data_list[0]
1403
+ if 'P' in first_species_df.columns:
1404
+ P_values = first_species_df['P'].values
1405
+ else:
1406
+ P_values = np.full(n_conditions, np.nan)
1407
+ else:
1408
+ P_values = P if not isinstance(P, str) else np.full(n_conditions, np.nan)
1409
+
1410
+ # Build reaction_data in the correct column order to match R CHNOSZ
1411
+ # Order: T, P, rho (if present), then properties in property_list order
1412
+ reaction_data = {
1413
+ 'T': T - 273.15, # Convert to Celsius for output like R
1414
+ 'P': P_values
1415
+ }
1416
+
1417
+ # Check if rho should be added (matching R CHNOSZ behavior)
1418
+ # Add rho if it's in species_data (it was added during property calculation)
1419
+ has_rho = species_data_list and 'rho' in species_data_list[0].columns
1420
+ if has_rho:
1421
+ # Get rho from first species (rho is same for all species at given T-P)
1422
+ reaction_data['rho'] = species_data_list[0]['rho'].values
1423
+
1424
+ # Debug: check what properties are available
1425
+ if species_data_list:
1426
+ available_props = species_data_list[0].columns.tolist()
1427
+
1428
+ # Need to calculate G if logK is requested but G is not
1429
+ need_G_for_logK = 'logK' in property_list and 'G' not in property_list
1430
+
1431
+ # Sum properties in the order specified by property_list to match R CHNOSZ column order
1432
+ # This ensures the output columns appear in the same order as the property parameter
1433
+ for prop in property_list:
1434
+ if prop == 'logK':
1435
+ # Calculate logK from ΔG
1436
+ # First, make sure G is calculated if not already in property_list
1437
+ if 'G' not in reaction_data:
1438
+ # Calculate G
1439
+ G_sum = np.zeros(n_conditions)
1440
+ all_nan = True
1441
+ for species_df, coeff in zip(species_data_list, coefficients):
1442
+ if 'G' in species_df.columns:
1443
+ species_values = species_df['G'].values
1444
+ if not np.isnan(species_values).all():
1445
+ G_sum += coeff * species_values
1446
+ all_nan = False
1447
+ # Always store G in reaction_data so logK can use it
1448
+ reaction_data['G'] = G_sum if not all_nan else np.full(n_conditions, np.nan)
1449
+
1450
+ # Now calculate logK from G
1451
+ G_values = reaction_data.get('G', np.full(n_conditions, np.nan))
1452
+ if not np.isnan(G_values).all():
1453
+ # logK = -ΔG°/(ln(10)*R*T)
1454
+ T_array = np.atleast_1d(T)
1455
+ R = 8.314462618 # J/(mol·K) - CODATA 2018 value
1456
+ reaction_data['logK'] = -G_values / (np.log(10) * R * T_array)
1457
+ else:
1458
+ reaction_data['logK'] = np.full(n_conditions, np.nan)
1459
+
1460
+ elif prop == 'rho':
1461
+ # Already added above, skip
1462
+ pass
1463
+ else:
1464
+ # Regular thermodynamic property - sum weighted by coefficients
1465
+ prop_sum = np.zeros(n_conditions)
1466
+ all_nan = True
1467
+
1468
+ for species_df, coeff in zip(species_data_list, coefficients):
1469
+ if prop in species_df.columns:
1470
+ species_values = species_df[prop].values
1471
+ if not np.isnan(species_values).all():
1472
+ prop_sum += coeff * species_values
1473
+ all_nan = False
1474
+
1475
+ reaction_data[prop] = prop_sum if not all_nan else np.full(n_conditions, np.nan)
1476
+
1477
+ # Remove G if it wasn't originally requested (we only added it to calculate logK)
1478
+ if 'logK' in property_list and 'G' not in property_list and 'G' in reaction_data:
1479
+ del reaction_data['G']
1480
+
1481
+ return pd.DataFrame(reaction_data)
1482
+
1483
+
1484
+ def _apply_r_chnosz_conversion(result_df, do_reaction=True):
1485
+ """
1486
+ Apply R CHNOSZ convert=TRUE conversion behavior.
1487
+
1488
+ Based on analysis of R CHNOSZ, convert=TRUE produces specific conversion factors
1489
+ that are not simply 4.184 multiplication. This function applies the empirically
1490
+ determined conversion factors to match R CHNOSZ convert=TRUE output.
1491
+
1492
+ Parameters
1493
+ ----------
1494
+ result_df : pd.DataFrame or dict
1495
+ DataFrame with calculated thermodynamic properties (convert=FALSE equivalent)
1496
+ or dict structure for multiple species
1497
+ do_reaction : bool
1498
+ Whether this is a reaction calculation
1499
+
1500
+ Returns
1501
+ -------
1502
+ pd.DataFrame or dict
1503
+ DataFrame with R CHNOSZ convert=TRUE equivalent values
1504
+ """
1505
+ if result_df is None:
1506
+ return result_df
1507
+
1508
+ # If it's a dictionary (multi-species case)
1509
+ if isinstance(result_df, dict):
1510
+ if do_reaction:
1511
+ # For reactions, return as-is since conversion will be applied after summation
1512
+ return result_df
1513
+ else:
1514
+ # For non-reaction multiple species, apply conversion to each species DataFrame
1515
+ if 'species_data' in result_df:
1516
+ converted_species_data = []
1517
+ for species_df in result_df['species_data']:
1518
+ converted_species_data.append(_apply_r_chnosz_conversion(species_df, True))
1519
+ result_df['species_data'] = converted_species_data
1520
+ return result_df
1521
+
1522
+ if result_df.empty:
1523
+ return result_df
1524
+
1525
+ # Skip conversion for reaction results (after summation) - they're already in correct units
1526
+ if do_reaction: # This means it's a reaction result (after summation)
1527
+ return result_df
1528
+
1529
+ converted_df = result_df.copy()
1530
+
1531
+ # Apply empirically determined conversion factors to match R CHNOSZ convert=TRUE
1532
+ # These factors are derived from comparing R CHNOSZ convert=FALSE vs convert=TRUE
1533
+ # For OH- at 298.15 K:
1534
+ # convert=FALSE: G=-157297.5, H=-230023.8, S=-10.711, Cp=-136.338
1535
+ # convert=TRUE: G=-140185.6, H=-307327.3, S=-170.854, Cp=-1263.642
1536
+
1537
+ # Apply selective conversion:
1538
+ # - HKF-calculated properties (from HKF function) are already in J units
1539
+ # - Berman-calculated properties (from Berman function) are already in J units
1540
+ # - Database standard state values still need cal->J conversion
1541
+ # - Cp from HKF/Berman should NOT be converted (already in J units)
1542
+
1543
+ # Detect if values are from EOS calculations (HKF/Berman) vs database
1544
+ # EOS calculations return values in reasonable J/mol ranges, while database
1545
+ # cal/mol values are typically 4x smaller in magnitude
1546
+
1547
+ # All values at this point should already be in Joules since:
1548
+ # - HKF results (aqueous species) are converted to J at lines 657-660
1549
+ # - Water function results are already in J
1550
+ # - CGL results (minerals) are now converted to J at lines 756-759
1551
+ # So no additional conversion should be needed here
1552
+
1553
+ # The convert=True flag in R CHNOSZ subcrt() just means "return results in Joules"
1554
+ # Since we've already converted everything to Joules in the calculation phase,
1555
+ # no additional conversion is needed here
1556
+
1557
+ return converted_df
1558
+
1559
+
1560
+ def _apply_unit_conversion(result_df):
1561
+ """
1562
+ Simple unit conversion (4.184 factor) - kept for reference.
1563
+ This was the initial implementation before discovering R CHNOSZ complexity.
1564
+ """
1565
+ if result_df is None or result_df.empty:
1566
+ return result_df
1567
+
1568
+ # Conversion factor from calories to Joules (matching R CHNOSZ)
1569
+ cal_to_J = 4.184
1570
+
1571
+ # Energy properties that need conversion from cal to J
1572
+ energy_properties = ['G', 'H', 'S', 'Cp']
1573
+
1574
+ # Apply conversion to energy properties present in the DataFrame
1575
+ converted_df = result_df.copy()
1576
+ for prop in energy_properties:
1577
+ if prop in converted_df.columns:
1578
+ converted_df[prop] = converted_df[prop] * cal_to_J
1579
+
1580
+ return converted_df
1581
+
1582
+
1583
+ def _find_simple_integer_solution(basis_matrix, missing_vector, basis_species_names, missing_composition):
1584
+ """
1585
+ Find simple integer solutions for basis species coefficients.
1586
+
1587
+ This tries to match R CHNOSZ behavior by preferring simple integer combinations
1588
+ like 1 H2O + 1 NH3 over complex fractional solutions.
1589
+ """
1590
+ # For small problems, try combinations of 1-3 species with coefficients 1-3
1591
+ n_species = len(basis_species_names)
1592
+
1593
+ # Try single species solutions first (coefficient 1-3)
1594
+ for i in range(n_species):
1595
+ for coeff in [1, 2, 3]:
1596
+ test_coeffs = np.zeros(n_species)
1597
+ test_coeffs[i] = coeff
1598
+ result = basis_matrix @ test_coeffs
1599
+ if np.allclose(result, missing_vector, atol=1e-10):
1600
+ return test_coeffs
1601
+
1602
+ # Try two-species solutions (coefficients 1-2 each)
1603
+ for i in range(n_species):
1604
+ for j in range(i+1, n_species):
1605
+ for coeff1 in [1, 2]:
1606
+ for coeff2 in [1, 2]:
1607
+ test_coeffs = np.zeros(n_species)
1608
+ test_coeffs[i] = coeff1
1609
+ test_coeffs[j] = coeff2
1610
+ result = basis_matrix @ test_coeffs
1611
+ if np.allclose(result, missing_vector, atol=1e-10):
1612
+ return test_coeffs
1613
+
1614
+ # Try three-species solutions (coefficient 1 each)
1615
+ for i in range(n_species):
1616
+ for j in range(i+1, n_species):
1617
+ for k in range(j+1, n_species):
1618
+ test_coeffs = np.zeros(n_species)
1619
+ test_coeffs[i] = 1
1620
+ test_coeffs[j] = 1
1621
+ test_coeffs[k] = 1
1622
+ result = basis_matrix @ test_coeffs
1623
+ if np.allclose(result, missing_vector, atol=1e-10):
1624
+ return test_coeffs
1625
+
1626
+ return None # No simple solution found
1627
+
1628
+
1629
+ # Update the main __init__.py to use the real implementation
1630
+ def _update_init_file():
1631
+ """Update __init__.py to import real subcrt instead of placeholder."""
1632
+
1633
+ init_path = "/home/jupyteruser/CHNOSZ-main/python/chnosz/__init__.py"
1634
+
1635
+ # Read current content
1636
+ with open(init_path, 'r') as f:
1637
+ content = f.read()
1638
+
1639
+ # Replace placeholder with real import
1640
+ new_content = content.replace(
1641
+ '# from .core.subcrt import subcrt',
1642
+ 'from .core.subcrt import subcrt'
1643
+ ).replace(
1644
+ '''def subcrt(*args, **kwargs):
1645
+ """Placeholder for subcrt function (not yet implemented)."""
1646
+ raise NotImplementedError("subcrt function not yet implemented in Python version")''',
1647
+ ''
1648
+ )
1649
+
1650
+ # Update __all__ to include subcrt properly
1651
+ new_content = new_content.replace(
1652
+ " 'reset'",
1653
+ " 'reset',\n 'subcrt'"
1654
+ )
1655
+
1656
+ # Write updated content
1657
+ with open(init_path, 'w') as f:
1658
+ f.write(new_content)
1659
+
1660
+
1661
+ # Call the update function when module is imported
1662
+ # _update_init_file() # Commented out for safety - will do manually
1663
+
1664
+
1665
+ def _display_subcrt_result(result: SubcrtResult):
1666
+ """
1667
+ Display subcrt result tables in Jupyter notebooks.
1668
+
1669
+ This function displays the .reaction and .out tables using IPython.display
1670
+ if running in a Jupyter environment.
1671
+
1672
+ Parameters
1673
+ ----------
1674
+ result : SubcrtResult
1675
+ The result object from subcrt()
1676
+ """
1677
+ try:
1678
+ # Check if we're in a Jupyter/IPython environment
1679
+ from IPython.display import display
1680
+ from IPython import get_ipython
1681
+
1682
+ # Check if IPython is available and we're in an interactive environment
1683
+ if get_ipython() is not None:
1684
+ # Display reaction table if it exists
1685
+ if result.reaction is not None and not result.reaction.empty:
1686
+ display(result.reaction)
1687
+
1688
+ # Display output table if it exists
1689
+ if result.out is not None and not result.out.empty:
1690
+ display(result.out)
1691
+ except ImportError:
1692
+ # IPython not available - not in a Jupyter environment
1693
+ pass
1694
+ except Exception:
1695
+ # Any other error - silently ignore
1696
+ pass