PyPI - pychnosz - Versions diffs - 1.1.11__cp312-cp312-win_amd64.whl - Mend

pychnosz 1.1.11__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

pychnosz/__init__.py +129 -0
pychnosz/biomolecules/__init__.py +29 -0
pychnosz/biomolecules/ionize_aa.py +197 -0
pychnosz/biomolecules/proteins.py +595 -0
pychnosz/core/__init__.py +46 -0
pychnosz/core/affinity.py +1256 -0
pychnosz/core/animation.py +593 -0
pychnosz/core/balance.py +334 -0
pychnosz/core/basis.py +716 -0
pychnosz/core/diagram.py +3336 -0
pychnosz/core/equilibrate.py +813 -0
pychnosz/core/equilibrium.py +554 -0
pychnosz/core/info.py +821 -0
pychnosz/core/retrieve.py +364 -0
pychnosz/core/speciation.py +580 -0
pychnosz/core/species.py +599 -0
pychnosz/core/subcrt.py +1696 -0
pychnosz/core/thermo.py +593 -0
pychnosz/core/unicurve.py +1226 -0
pychnosz/data/__init__.py +11 -0
pychnosz/data/add_obigt.py +327 -0
pychnosz/data/extdata/Berman/BDat17_2017.csv +2 -0
pychnosz/data/extdata/Berman/Ber88_1988.csv +68 -0
pychnosz/data/extdata/Berman/Ber90_1990.csv +5 -0
pychnosz/data/extdata/Berman/DS10_2010.csv +6 -0
pychnosz/data/extdata/Berman/FDM+14_2014.csv +2 -0
pychnosz/data/extdata/Berman/Got04_2004.csv +5 -0
pychnosz/data/extdata/Berman/JUN92_1992.csv +3 -0
pychnosz/data/extdata/Berman/SHD91_1991.csv +12 -0
pychnosz/data/extdata/Berman/VGT92_1992.csv +2 -0
pychnosz/data/extdata/Berman/VPT01_2001.csv +3 -0
pychnosz/data/extdata/Berman/VPV05_2005.csv +2 -0
pychnosz/data/extdata/Berman/ZS92_1992.csv +11 -0
pychnosz/data/extdata/Berman/sympy.R +99 -0
pychnosz/data/extdata/Berman/testing/BA96.bib +12 -0
pychnosz/data/extdata/Berman/testing/BA96_Berman.csv +21 -0
pychnosz/data/extdata/Berman/testing/BA96_OBIGT.csv +21 -0
pychnosz/data/extdata/Berman/testing/BA96_refs.csv +6 -0
pychnosz/data/extdata/OBIGT/AD.csv +25 -0
pychnosz/data/extdata/OBIGT/Berman_cr.csv +93 -0
pychnosz/data/extdata/OBIGT/DEW.csv +211 -0
pychnosz/data/extdata/OBIGT/H2O_aq.csv +4 -0
pychnosz/data/extdata/OBIGT/SLOP98.csv +411 -0
pychnosz/data/extdata/OBIGT/SUPCRT92.csv +178 -0
pychnosz/data/extdata/OBIGT/inorganic_aq.csv +729 -0
pychnosz/data/extdata/OBIGT/inorganic_cr.csv +273 -0
pychnosz/data/extdata/OBIGT/inorganic_gas.csv +20 -0
pychnosz/data/extdata/OBIGT/organic_aq.csv +1104 -0
pychnosz/data/extdata/OBIGT/organic_cr.csv +481 -0
pychnosz/data/extdata/OBIGT/organic_gas.csv +268 -0
pychnosz/data/extdata/OBIGT/organic_liq.csv +533 -0
pychnosz/data/extdata/OBIGT/testing/GEMSFIT.csv +43 -0
pychnosz/data/extdata/OBIGT/testing/IGEM.csv +17 -0
pychnosz/data/extdata/OBIGT/testing/Sandia.csv +8 -0
pychnosz/data/extdata/OBIGT/testing/SiO2.csv +4 -0
pychnosz/data/extdata/misc/AD03_Fig1a.csv +69 -0
pychnosz/data/extdata/misc/AD03_Fig1b.csv +43 -0
pychnosz/data/extdata/misc/AD03_Fig1c.csv +89 -0
pychnosz/data/extdata/misc/AD03_Fig1d.csv +30 -0
pychnosz/data/extdata/misc/BZA10.csv +5 -0
pychnosz/data/extdata/misc/HW97_Cp.csv +90 -0
pychnosz/data/extdata/misc/HWM96_V.csv +229 -0
pychnosz/data/extdata/misc/LA19_test.csv +7 -0
pychnosz/data/extdata/misc/Mer75_Table4.csv +42 -0
pychnosz/data/extdata/misc/OBIGT_check.csv +423 -0
pychnosz/data/extdata/misc/PM90.csv +7 -0
pychnosz/data/extdata/misc/RH95.csv +23 -0
pychnosz/data/extdata/misc/RH98_Table15.csv +17 -0
pychnosz/data/extdata/misc/SC10_Rainbow.csv +19 -0
pychnosz/data/extdata/misc/SK95.csv +55 -0
pychnosz/data/extdata/misc/SOJSH.csv +61 -0
pychnosz/data/extdata/misc/SS98_Fig5a.csv +81 -0
pychnosz/data/extdata/misc/SS98_Fig5b.csv +84 -0
pychnosz/data/extdata/misc/TKSS14_Fig2.csv +25 -0
pychnosz/data/extdata/misc/bluered.txt +1000 -0
pychnosz/data/extdata/protein/Cas/Cas_aa.csv +177 -0
pychnosz/data/extdata/protein/Cas/Cas_uniprot.csv +186 -0
pychnosz/data/extdata/protein/Cas/download.R +34 -0
pychnosz/data/extdata/protein/Cas/mkaa.R +34 -0
pychnosz/data/extdata/protein/POLG.csv +12 -0
pychnosz/data/extdata/protein/TBD+05.csv +393 -0
pychnosz/data/extdata/protein/TBD+05_aa.csv +393 -0
pychnosz/data/extdata/protein/rubisco.csv +28 -0
pychnosz/data/extdata/protein/rubisco.fasta +239 -0
pychnosz/data/extdata/protein/rubisco_aa.csv +28 -0
pychnosz/data/extdata/src/H2O92D.f.orig +3457 -0
pychnosz/data/extdata/src/README.txt +5 -0
pychnosz/data/extdata/taxonomy/names.dmp +215 -0
pychnosz/data/extdata/taxonomy/nodes.dmp +63 -0
pychnosz/data/extdata/thermo/Bdot_acirc.csv +60 -0
pychnosz/data/extdata/thermo/buffer.csv +40 -0
pychnosz/data/extdata/thermo/element.csv +135 -0
pychnosz/data/extdata/thermo/groups.csv +6 -0
pychnosz/data/extdata/thermo/opt.csv +2 -0
pychnosz/data/extdata/thermo/protein.csv +506 -0
pychnosz/data/extdata/thermo/refs.csv +343 -0
pychnosz/data/extdata/thermo/stoich.csv.xz +0 -0
pychnosz/data/loader.py +431 -0
pychnosz/data/mod_obigt.py +322 -0
pychnosz/data/obigt.py +471 -0
pychnosz/data/worm.py +228 -0
pychnosz/fortran/__init__.py +16 -0
pychnosz/fortran/h2o92.dll +0 -0
pychnosz/fortran/h2o92_interface.py +527 -0
pychnosz/geochemistry/__init__.py +21 -0
pychnosz/geochemistry/minerals.py +514 -0
pychnosz/geochemistry/redox.py +500 -0
pychnosz/models/__init__.py +47 -0
pychnosz/models/archer_wang.py +165 -0
pychnosz/models/berman.py +309 -0
pychnosz/models/cgl.py +381 -0
pychnosz/models/dew.py +997 -0
pychnosz/models/hkf.py +523 -0
pychnosz/models/hkf_helpers.py +231 -0
pychnosz/models/iapws95.py +1113 -0
pychnosz/models/supcrt92_fortran.py +238 -0
pychnosz/models/water.py +480 -0
pychnosz/utils/__init__.py +27 -0
pychnosz/utils/expression.py +1074 -0
pychnosz/utils/formula.py +830 -0
pychnosz/utils/formula_ox.py +227 -0
pychnosz/utils/reset.py +33 -0
pychnosz/utils/units.py +259 -0
pychnosz-1.1.11.dist-info/METADATA +197 -0
pychnosz-1.1.11.dist-info/RECORD +128 -0
pychnosz-1.1.11.dist-info/WHEEL +5 -0
pychnosz-1.1.11.dist-info/licenses/LICENSE.txt +19 -0
pychnosz-1.1.11.dist-info/top_level.txt +1 -0

pychnosz/core/subcrt.py ADDED Viewed

@@ -0,0 +1,1696 @@
+"""
+CHNOSZ subcrt() function - Calculate standard molal thermodynamic properties.
+This module implements the core subcrt() function that calculates standard molal
+thermodynamic properties of species and reactions, maintaining complete fidelity
+to the R CHNOSZ implementation.
+References:
+- R CHNOSZ package subcrt.R
+- Shock, E. L., Oelkers, E. H., Johnson, J. W., Sverjensky, D. A., & Helgeson, H. C. (1992).
+  Calculation of the thermodynamic properties of aqueous species at high pressures and temperatures.
+"""
+import numpy as np
+import pandas as pd
+from typing import Union, List, Optional, Dict, Any, Tuple
+import warnings
+from ..core.thermo import thermo
+from ..core.info import info
+from ..models.water import water
+from ..utils.formula import makeup
+class SubcrtResult:
+    """Result structure for subcrt() calculations, matching R CHNOSZ output."""
+    def __init__(self):
+        self.species = None     # Species information DataFrame
+        self.out = None         # Calculated properties DataFrame
+        self.reaction = None    # Reaction summary DataFrame
+        self.warnings = []      # Warning messages
+    def __repr__(self):
+        if self.out is not None:
+            return f"SubcrtResult with {len(self.out)} properties calculated"
+        return "SubcrtResult (no calculations performed)"
+def subcrt(species: Union[str, List[str], int, List[int]],
+           coeff: Union[int, float, List[Union[int, float]], None] = 1,
+           state: Optional[Union[str, List[str]]] = None,
+           property: List[str] = ["logK", "G", "H", "S", "V", "Cp"],
+           T: Union[float, List[float], np.ndarray] = np.concatenate([[273.16], 273.15 + np.arange(25, 351, 25)]),
+           P: Union[float, List[float], np.ndarray, str] = "Psat",
+           grid: Optional[str] = None,
+           convert: bool = True,
+           exceed_Ttr: bool = True,
+           exceed_rhomin: bool = False,
+           logact: Optional[List[float]] = None,
+           autobalance: bool = True,
+           use_polymorphs: bool = True,
+           IS: Union[float, List[float]] = 0,
+           messages: bool = True,
+           show: bool = True,
+           basis: Optional[pd.DataFrame] = None,
+           _recursion_count: int = 0) -> SubcrtResult:
+    """
+    Calculate standard molal thermodynamic properties of species and reactions.
+    This function reproduces the behavior of R CHNOSZ subcrt() exactly, including
+    all argument handling, validation, calculations, and output formatting.
+    Parameters
+    ----------
+    species : str, list of str, int, or list of int
+        Species names, formulas, or indices in thermodynamic database
+    coeff : int, float, list, or None
+        Stoichiometric coefficients for reaction calculation
+        If 1 (default), calculate individual species properties
+        If list, calculate reaction with given coefficients
+    state : str, list of str, or None
+        Physical states ("aq", "cr", "gas", "liq") for species
+    property : list of str
+        Properties to calculate: "logK", "G", "H", "S", "V", "Cp", "rho", "kT", "E"
+    T : float, list, or ndarray
+        Temperature(s) in K (default: 273.16, then 298.15 to 623.15 by 25 K)
+    P : float, list, ndarray, or "Psat"
+        Pressure(s) in bar or "Psat" for saturation pressure
+    grid : str or None
+        Grid calculation mode: "T", "P", "IS", or None
+    convert : bool
+        Convert temperature/pressure units (default: True)
+    exceed_Ttr : bool
+        Allow calculations beyond transition temperatures (default: False)
+    exceed_rhomin : bool
+        Allow calculations below minimum water density (default: False)
+    logact : list of float or None
+        Activity coefficients (log10 scale)
+    autobalance : bool
+        Automatically balance reactions using basis species (default: True)
+    use_polymorphs : bool
+        Include polymorphic phases for minerals (default: True)
+    IS : float or list of float
+        Ionic strength for activity corrections (default: 0)
+    messages : bool, default True
+        Whether to print informational messages
+    show : bool, default True
+        Whether to display result tables in Jupyter notebooks (default: True)
+        Set to False when calling subcrt() from other functions
+    basis : pd.DataFrame, optional
+        Basis species definition to use for autobalancing (if not using global basis)
+    Returns
+    -------
+    SubcrtResult
+        Object containing:
+        - species: DataFrame with species information
+        - out: DataFrame with calculated thermodynamic properties
+        - reaction: DataFrame with reaction stoichiometry (if reaction)
+        - warnings: List of warning messages
+    Examples
+    --------
+    >>> import pychnosz
+    >>> pychnosz.reset()
+    >>>
+    >>> # Single species properties
+    >>> result = subcrt("H2O", T=25, P=1)
+    >>> print(result.out[["G", "H", "S", "Cp"]])
+    >>>
+    >>> # Reaction calculation
+    >>> result = subcrt(["H2O", "H+", "OH-"], [-1, 1, 1], T=25, P=1)
+    >>> print(f"Water dissociation ΔG° = {result.out.G[0]:.3f} kJ/mol")
+    >>>
+    >>> # Temperature array
+    >>> result = subcrt("quartz", T=[25, 100, 200], P=1)
+    >>> print(result.out[["T", "G", "H", "S"]])
+    Notes
+    -----
+    This implementation maintains complete fidelity to R CHNOSZ subcrt():
+    - Identical argument processing and validation
+    - Same species lookup and polymorphic handling
+    - Exact HKF and CGL equation-of-state calculations
+    - Same reaction balancing and autobalance logic
+    - Identical output structure and formatting
+    - Same warning and error messages
+    """
+    result = SubcrtResult()
+    # Prevent infinite recursion in auto-balancing
+    if _recursion_count > 5:
+        result.warnings.append("Maximum recursion depth reached in auto-balancing")
+        return result
+    try:
+        # === Phase 1: Argument Processing and Validation ===
+        # (Exactly matching R subcrt.R lines 21-77)
+        # Handle argument reordering if states are second argument
+        if coeff != 1 and isinstance(coeff, (list, str)) and isinstance(coeff[0] if isinstance(coeff, list) else coeff, str):
+            # States were passed as second argument - reorder
+            if state is not None:
+                if isinstance(state, (int, float)) or (isinstance(state, list) and all(isinstance(x, (int, float)) for x in state)):
+                    # Third argument is coefficients
+                    new_coeff = state
+                    new_state = coeff
+                    return subcrt(species, new_coeff, new_state, property, T, P, grid,
+                                convert, exceed_Ttr, exceed_rhomin, logact, autobalance, use_polymorphs, IS,
+                                messages, show, basis, _recursion_count)
+                else:
+                    raise ValueError("If both coeff and state are given, one should be numeric coefficients")
+            else:
+                # Only states provided, no coefficients
+                new_state = coeff
+                return subcrt(species, 1, new_state, property, T, P, grid,
+                            convert, exceed_Ttr, exceed_rhomin, logact, autobalance, use_polymorphs, IS,
+                            messages, show, basis, _recursion_count)
+        # Determine if this is a reaction calculation
+        do_reaction = (coeff != 1 and coeff is not None and
+                      (isinstance(coeff, list) or isinstance(coeff, (int, float)) and coeff != 1))
+        # Convert inputs to consistent formats
+        species = [species] if isinstance(species, (str, int)) else list(species)
+        if state is not None:
+            state = [state] if isinstance(state, str) else list(state)
+            # Make species and state same length
+            if len(state) > len(species):
+                species = species * (len(state) // len(species) + 1)
+                species = species[:len(state)]
+            elif len(species) > len(state):
+                state = state * (len(species) // len(state) + 1)
+                state = state[:len(species)]
+        if do_reaction:
+            if isinstance(coeff, (int, float)):
+                coeff = [coeff]
+            coeff = list(coeff)
+        # Validate properties
+        allowed_properties = ["rho", "logK", "G", "H", "S", "Cp", "V", "kT", "E"]
+        if isinstance(property, str):
+            property = [property]
+        invalid_props = [p for p in property if p not in allowed_properties]
+        if invalid_props:
+            if len(invalid_props) == 1:
+                raise ValueError(f"invalid property name: {invalid_props[0]}")
+            else:
+                raise ValueError(f"invalid property names: {', '.join(invalid_props)}")
+        # Length checking
+        if do_reaction and len(species) != len(coeff):
+            raise ValueError("the length of 'coeff' must equal the number of species")
+        if logact is not None and len(logact) != len(species):
+            raise ValueError("the length of 'logact' must equal the number of species")
+        # Unit conversion
+        T_array = np.atleast_1d(np.asarray(T, dtype=float))
+        # Convert temperature to Kelvin if convert=True (matching R CHNOSZ behavior)
+        # R: if(convert) T <- envert(T, "K") - converts Celsius input to Kelvin
+        # Default parameter is [273.16, 298.15, 323.15, ..., 623.15] which is already in K, so only convert user input
+        default_T = np.concatenate([[273.16], 273.15 + np.arange(25, 351, 25)])
+        if convert and not np.array_equal(T_array, default_T[:len(T_array)]):
+            # User provided temperature, assume Celsius and convert to Kelvin
+            T_array = T_array + 273.15
+        # Handle T=273.15K (0°C) exactly - R CHNOSZ uses 273.16K (0.01°C) instead
+        # This avoids numerical issues at the freezing point
+        T_array = np.where(np.abs(T_array - 273.15) < 1e-10, 273.16, T_array)
+        if isinstance(P, str) and P == "Psat":
+            P_array = "Psat"
+        else:
+            P_array = np.atleast_1d(np.asarray(P, dtype=float))
+            # P is assumed to be in bar (R CHNOSZ standard)
+        # Warning for high temperatures with Psat
+        # Check if P is "Psat" (compare to the original P, not P_array which may be converted)
+        if isinstance(P, str) and P == "Psat" and np.any(T_array > 647.067):
+            n_over = np.sum(T_array > 647.067)
+            vtext = "value" if n_over == 1 else "values"
+            result.warnings.append(f"P = 'Psat' undefined for T > Tcritical ({n_over} T {vtext})")
+        # === Phase 2: Grid Processing ===
+        # Handle grid calculations (T-P arrays)
+        if grid is not None:
+            if grid == "T":
+                # Grid over temperature
+                new_T = []
+                for temp in T_array:
+                    if isinstance(P_array, str):
+                        new_T.extend([temp] * 1)
+                    else:
+                        new_T.extend([temp] * len(P_array))
+                if isinstance(P_array, str):
+                    new_P = P_array
+                else:
+                    new_P = list(P_array) * len(T_array)
+                T_array = np.array(new_T)
+                P_array = new_P
+            elif grid == "P":
+                # Grid over pressure
+                if not isinstance(P_array, str):
+                    new_P = []
+                    for press in P_array:
+                        new_P.extend([press] * len(T_array))
+                    new_T = list(T_array) * len(P_array)
+                    T_array = np.array(new_T)
+                    P_array = np.array(new_P)
+            elif grid == "IS":
+                # Grid over ionic strength
+                IS_array = np.atleast_1d(np.asarray(IS))
+                original_len = max(len(T_array), len(P_array) if not isinstance(P_array, str) else 1)
+                new_IS = []
+                for ionic_str in IS_array:
+                    new_IS.extend([ionic_str] * original_len)
+                T_array = np.tile(T_array, len(IS_array))
+                if isinstance(P_array, str):
+                    P_array = P_array
+                else:
+                    P_array = np.tile(P_array, len(IS_array))
+                IS = new_IS
+        else:
+            # Ensure T and P are same length
+            if isinstance(P_array, str):
+                # P = "Psat", keep T as is
+                pass
+            else:
+                max_len = max(len(T_array), len(P_array))
+                if len(T_array) < max_len:
+                    T_array = np.resize(T_array, max_len)
+                if len(P_array) < max_len:
+                    P_array = np.resize(P_array, max_len)
+        # === Phase 3: Species Lookup and Validation ===
+        result.species, result.reaction, iphases, isaq, isH2O, iscgl, polymorph_species, ispecies = _process_species(
+            species, state, coeff, do_reaction, use_polymorphs, messages=messages)
+        # === Phase 4: Generate Output Message ===
+        if (len(species) > 1 or convert) and messages:
+            _print_subcrt_message(species, T_array, P_array, isaq.any() or isH2O.any(), messages)
+        # === Phase 5: Reaction Balance Check ===
+        if do_reaction and autobalance:
+            # Use original ispecies and coeff for balance check (before polymorph expansion)
+            # This matches R CHNOSZ behavior where balance check happens before polymorph expansion
+            rebalanced_result = _check_reaction_balance(result, species, coeff, state, property,
+                                                      T_array, P_array, grid, convert, logact,
+                                                      exceed_Ttr, exceed_rhomin, IS, ispecies, _recursion_count, basis, T, P, messages, show)
+            if rebalanced_result is not None:  # If reaction was rebalanced, return the result
+                return rebalanced_result
+        # === Phase 6: Property Calculations ===
+        result.out, calc_warnings = _calculate_properties(property, iphases, isaq, isH2O, iscgl,
+                                         T_array, P_array, exceed_rhomin, exceed_Ttr, IS, logact, do_reaction)
+        # Add calculation warnings to result
+        result.warnings.extend(calc_warnings)
+        # === Phase 6.5: Polymorph Selection ===
+        if use_polymorphs:
+            # Select stable polymorphs based on minimum Gibbs energy
+            # Apply to both individual species AND reactions (matching R CHNOSZ behavior)
+            thermo_sys = thermo()
+            if do_reaction:
+                # For reactions, also update coefficients and rebuild reaction DataFrame
+                result.out, updated_coeff, updated_iphases = _select_stable_polymorphs(result.out, iphases, polymorph_species, ispecies, thermo_sys, result.reaction['coeff'].tolist(), messages)
+                # Rebuild reaction DataFrame with updated species list
+                reaction_data = []
+                for i, iph in enumerate(updated_iphases):
+                    row = thermo_sys.obigt.loc[iph]
+                    model = row.get('model', 'unknown')
+                    if model == "H2O":
+                        water_model = thermo_sys.get_option('water', 'SUPCRT92')
+                        model = f"water.{water_model}"
+                    reaction_data.append({
+                        'coeff': updated_coeff[i],
+                        'name': row['name'],
+                        'formula': row['formula'],
+                        'state': row['state'],
+                        'ispecies': iph,
+                        'model': model
+                    })
+                result.reaction = pd.DataFrame(reaction_data)
+            else:
+                # For individual species, no coefficient update needed
+                result.out, _ = _select_stable_polymorphs(result.out, iphases, polymorph_species, ispecies, thermo_sys, None, messages)
+            # For single species (non-reaction), convert back to DataFrame format
+            if not do_reaction and isinstance(result.out, dict) and 'species_data' in result.out and len(result.out['species_data']) == 1:
+                result.out = result.out['species_data'][0]
+        # === Phase 7: Reaction Property Summation ===
+        if do_reaction:
+            result.out = _sum_reaction_properties(result.out, result.reaction['coeff'])
+        # === Phase 8: Unit Conversion (convert=True) ===
+        if convert:
+            # Apply R CHNOSZ compatible conversion
+            # This matches the observed behavior where convert=TRUE gives different results
+            # than just multiplying by 4.184
+            result.out = _apply_r_chnosz_conversion(result.out, do_reaction)
+            # Recalculate logK after unit conversion to ensure consistency
+            if do_reaction and 'logK' in property and 'G' in result.out.columns:
+                if not result.out['G'].isna().all():
+                    R = 8.314462618  # J/(mol·K) - CODATA 2018 value
+                    T_array = np.atleast_1d(T_array)
+                    result.out['logK'] = -result.out['G'] / (np.log(10) * R * T_array)
+        # Display tables in Jupyter notebooks if show=True
+        if show:
+            _display_subcrt_result(result)
+        # Print warnings (matching R CHNOSZ behavior - lines 621-624)
+        if result.warnings and messages:
+            for warn in result.warnings:
+                warnings.warn(warn)
+        return result
+    except Exception as e:
+        result.warnings.append(f"subcrt error: {str(e)}")
+        raise
+def _process_species(species, state, coeff, do_reaction, use_polymorphs, messages=True):
+    """Process species lookup, validation, and polymorphic expansion."""
+    thermo_sys = thermo()
+    # Species information lists
+    ispecies = []
+    newstate = []
+    # Look up each species
+    for i, sp in enumerate(species):
+        if isinstance(sp, (int, np.integer)):
+            # Numeric species index (1-based in R, matches our DataFrame index shifted by +1 in obigt.py)
+            sindex = int(sp)
+            if sindex not in thermo_sys.obigt.index:
+                raise ValueError(f"{sp} is not a valid row number of thermo database")
+            ispecies.append(sindex)
+            newstate.append(thermo_sys.obigt.loc[sindex]['state'])
+        else:
+            # Named species - look up in database
+            sp_state = state[i] if state and i < len(state) else None
+            sindex = info(sp, sp_state, messages=messages)
+            # Check for both None and NaN (info() returns NaN for nonexistent species)
+            if sindex is None or (isinstance(sindex, float) and np.isnan(sindex)):
+                if sp_state:
+                    raise ValueError(f"no info found for {sp} {sp_state}")
+                else:
+                    raise ValueError(f"no info found for {sp}")
+            # info() returns 1-based index which matches our DataFrame index (shifted by +1 in obigt.py)
+            ispecies.append(sindex)
+            newstate.append(thermo_sys.obigt.loc[sindex]['state'])
+    # Handle polymorphic expansion for minerals
+    iphases = []
+    polymorph_species = []
+    coeff_new = []
+    for i, isp in enumerate(ispecies):
+        sp_state = newstate[i]
+        sp_coeff = coeff[i] if do_reaction else 1
+        if sp_state == "cr" and use_polymorphs:
+            # Look for polymorphs (cr, cr2, cr3, etc.)
+            sp_name = thermo_sys.obigt.loc[isp]['name']
+            polymorph_states = ["cr", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", "cr8", "cr9"]
+            # Find all polymorphs
+            polymorphs = []
+            for poly_state in polymorph_states:
+                matches = thermo_sys.obigt[
+                    (thermo_sys.obigt['name'] == sp_name) &
+                    (thermo_sys.obigt['state'] == poly_state)
+                ]
+                if not matches.empty:
+                    polymorphs.extend(matches.index.tolist())
+            if len(polymorphs) > 1:
+                # Multiple polymorphs found
+                iphases.extend(polymorphs)
+                # CRITICAL FIX: Use position i (not isp) to track which original species
+                # this corresponds to. This allows the same species to appear multiple times
+                # in a reaction (e.g., SO4-2 appearing twice with different coefficients)
+                polymorph_species.extend([i] * len(polymorphs))
+                coeff_new.extend([sp_coeff] * len(polymorphs))
+            else:
+                # Single phase
+                iphases.append(isp)
+                polymorph_species.append(i)
+                coeff_new.append(sp_coeff)
+        else:
+            # Non-mineral or non-polymorph
+            iphases.append(isp)
+            polymorph_species.append(i)
+            coeff_new.append(sp_coeff)
+    # Create reaction DataFrame
+    reaction_data = []
+    for i, iph in enumerate(iphases):
+        row = thermo_sys.obigt.loc[iph]
+        model = row.get('model', 'unknown')
+        # Identify water model for H2O
+        if model == "H2O":
+            water_model = thermo_sys.get_option('water', 'SUPCRT92')
+            model = f"water.{water_model}"
+        reaction_data.append({
+            'coeff': coeff_new[i],
+            'name': row['name'],
+            'formula': row['formula'],
+            'state': row['state'],
+            'ispecies': iph,
+            'model': model
+        })
+    reaction_df = pd.DataFrame(reaction_data)
+    # Identify aqueous species and models
+    isaq = reaction_df['model'].str.upper().isin(['HKF', 'AD', 'DEW'])
+    isH2O = reaction_df['model'].str.contains('water.', na=False)
+    iscgl = reaction_df['model'].isin(['CGL', 'CGL_Ttr', 'Berman'])
+    # Species summary DataFrame
+    species_data = []
+    for i, isp in enumerate(ispecies):
+        row = thermo_sys.obigt.loc[isp]
+        species_data.append({
+            'name': row['name'],
+            'formula': row['formula'],
+            'state': row['state'],
+            'ispecies': isp
+        })
+    species_df = pd.DataFrame(species_data)
+    return species_df, reaction_df, iphases, isaq, isH2O, iscgl, polymorph_species, ispecies
+def _print_subcrt_message(species, T, P, is_wet, messages=True):
+    """Print subcrt calculation message matching R output."""
+    if not messages:
+        return
+    # Temperature text - display in Celsius like R
+    if len(T) == 1:
+        T_celsius = T[0] - 273.15
+        T_text = f"{T_celsius:.0f} ºC"
+    else:
+        T_text = f"{len(T)} values of T (ºC)"
+    # Pressure text
+    if isinstance(P, str) and P == "Psat":
+        P_text = "Psat"
+    elif hasattr(P, '__len__') and len(P) == 1:
+        P_text = f"{P[0]:.2f} bar"
+    else:
+        P_text = "P (bar)"
+    if is_wet:
+        P_text += " (wet)"
+    print(f"subcrt: {len(species)} species at {T_text} and {P_text} [energy units: J]")
+def _check_reaction_balance(result, species, coeff, state, property, T, P, grid,
+                          convert, logact, exceed_Ttr, exceed_rhomin, IS, iphases, recursion_count, basis_arg=None, original_T=None, original_P=None, messages=True, show=True):
+    """Check reaction balance and auto-balance if needed."""
+    # Calculate mass balance
+    formulas = [result.species.iloc[i]['formula'] for i in range(len(species))]
+    try:
+        mass_balance = makeup(iphases, coeff, sum_formulas=True)
+        # Check if balanced (within tolerance) - use smaller tolerance for better precision
+        tolerance = 1e-6
+        unbalanced_elements = {elem: val for elem, val in mass_balance.items()
+                             if abs(val) > tolerance}
+        if unbalanced_elements:
+            # Reaction is unbalanced - show missing composition
+            missing_composition = {elem: -val for elem, val in unbalanced_elements.items()}
+            if messages:
+                print("subcrt: reaction is not balanced; it is missing this composition:")
+                # Format like R CHNOSZ: elements on one line, values on the next
+                elem_names = list(missing_composition.keys())
+                elem_values = list(missing_composition.values())
+                print(" ".join(elem_names))
+                print(" ".join([str(val) for val in elem_values]))
+            # Try to balance using basis species
+            thermo_sys = thermo()
+            # Use provided basis or get from global state
+            if basis_arg is not None:
+                basis_for_balance = basis_arg
+            elif hasattr(thermo_sys, 'basis') and thermo_sys.basis is not None:
+                basis_for_balance = thermo_sys.basis
+            else:
+                basis_for_balance = None
+            if basis_for_balance is not None:
+                # Get basis element columns
+                basis_elements = [col for col in basis_for_balance.columns
+                                if col not in ['ispecies', 'logact', 'state']]
+                # Check if all missing elements are in basis
+                missing_elements = set(missing_composition.keys())
+                if missing_elements.issubset(set(basis_elements)):
+                    # Calculate coefficients for missing composition from basis species
+                    # Create a matrix with the missing composition
+                    missing_matrix = np.zeros((1, len(basis_elements)))
+                    for i, elem in enumerate(basis_elements):
+                        missing_matrix[0, i] = missing_composition.get(elem, 0)
+                    try:
+                        # For multi-species balancing, we need to find the minimal solution
+                        # R CHNOSZ tends to prefer simple integer solutions
+                        # Get basis matrix - need to transpose to match R CHNOSZ behavior
+                        # In R: tbmat is transposed so that solve(tbmat, x) works correctly
+                        basis_matrix = basis_for_balance[basis_elements].values.T  # Transpose: (elements × basis_species)
+                        # Try to find simple integer solutions first
+                        basis_coeffs = _find_simple_integer_solution(basis_matrix, missing_matrix.flatten(), basis_for_balance.index.tolist(), missing_composition)
+                        if basis_coeffs is None:
+                            # Fall back to linear algebra solution
+                            basis_coeffs = np.linalg.solve(basis_matrix, missing_matrix.T).flatten()
+                            # Apply R CHNOSZ's zapsmall equivalent (digits=7)
+                            basis_coeffs = np.around(basis_coeffs, decimals=7)
+                            # Clean up very small numbers to exactly zero
+                            basis_coeffs[np.abs(basis_coeffs) < 1e-7] = 0
+                        # Get non-zero coefficients and corresponding basis species
+                        nonzero_indices = np.abs(basis_coeffs) > 1e-6
+                        if np.any(nonzero_indices):
+                            # Get basis species info
+                            basis_indices = basis_for_balance['ispecies'].values[nonzero_indices]
+                            basis_coeffs_nz = basis_coeffs[nonzero_indices]
+                            basis_states = basis_for_balance['state'].values[nonzero_indices]
+                            basis_logacts = basis_for_balance['logact'].values[nonzero_indices]
+                            # Create new species list and coefficients
+                            new_species = list(species) + [int(idx) for idx in basis_indices]
+                            new_coeff = list(coeff) + list(basis_coeffs_nz)
+                            new_state = list(state) if state else [None] * len(species)
+                            new_state.extend(list(basis_states))
+                            # Handle logact values - only add if original logact was provided
+                            new_logact = None
+                            if logact is not None:
+                                new_logact = list(logact)
+                                # Add basis logact values, but only if they are numeric
+                                for la in basis_logacts:
+                                    try:
+                                        new_logact.append(float(la))
+                                    except (ValueError, TypeError):
+                                        # Non-numeric logact (possibly buffer name)
+                                        if messages:
+                                            print(f"subcrt: logact values of basis species are NA.")
+                                        new_logact.append(0.0)  # Default value
+                            # Check if this is a trivial reaction (same species)
+                            thermo_obj = thermo()
+                            new_formulas = []
+                            for sp in new_species:
+                                if isinstance(sp, int):
+                                    new_formulas.append(thermo_obj.obigt.loc[sp]['formula'])
+                                else:
+                                    # Look up formula from species name
+                                    sp_info = info(sp, messages=messages)
+                                    new_formulas.append(thermo_obj.obigt.loc[sp_info]['formula'])
+                            original_formulas = [result.species.iloc[i]['formula'] for i in range(len(species))]
+                            if set(new_formulas) == set(original_formulas) and set(new_state) == set(state if state else [None] * len(species)):
+                                if messages:
+                                    print("subcrt: balanced reaction, but it is a non-reaction; restarting...")
+                            else:
+                                if messages:
+                                    print("subcrt: adding missing composition from basis definition and restarting...")
+                            # Recursively call subcrt with balanced reaction
+                            # Use original T and P values to avoid double conversion issues
+                            T_to_use = original_T if original_T is not None else T
+                            P_to_use = original_P if original_P is not None else P
+                            return subcrt(species=new_species, coeff=new_coeff, state=new_state,
+                                        property=property, T=T_to_use, P=P_to_use, grid=grid, convert=convert,
+                                        logact=new_logact, exceed_Ttr=exceed_Ttr, exceed_rhomin=exceed_rhomin, IS=IS,
+                                        messages=messages, show=show, basis=basis_arg, _recursion_count=recursion_count + 1)
+                    except np.linalg.LinAlgError:
+                        from ..utils.formula import as_chemical_formula
+                        missing_formula = as_chemical_formula(missing_composition)
+                        result.warnings.append(f"reaction among {','.join(species)} was unbalanced, missing {missing_formula}")
+                else:
+                    from ..utils.formula import as_chemical_formula
+                    missing_formula = as_chemical_formula(missing_composition)
+                    result.warnings.append(f"reaction among {','.join(species)} was unbalanced, missing {missing_formula}")
+            else:
+                from ..utils.formula import as_chemical_formula
+                missing_formula = as_chemical_formula(missing_composition)
+                result.warnings.append(f"reaction among {','.join(species)} was unbalanced, missing {missing_formula}")
+    except Exception as e:
+        result.warnings.append(f"could not check reaction balance: {str(e)}")
+        import traceback
+        traceback.print_exc()
+    return None  # Continue with original calculation
+def _select_stable_polymorphs(properties_data, iphases, polymorph_species, ispecies, thermo_sys, reaction_coeff=None, messages=True):
+    """
+    Select stable polymorphs based on minimum Gibbs energy at each T-P condition.
+    This function replicates the R CHNOSZ polymorph selection logic from lines 441-499
+    in subcrt.R, where the stable polymorph is determined by finding the minimum
+    Gibbs energy at each temperature-pressure point.
+    Parameters
+    ----------
+    properties_data : dict
+        Dictionary with 'species_data' containing calculated properties for all polymorphs
+    iphases : list
+        List of phase indices (includes all polymorphs)
+    polymorph_species : list
+        Maps each phase to its original species index
+    ispecies : list
+        Original species indices (without polymorphic expansion)
+    thermo_sys : ThermoSystem
+        Thermodynamic system for species names
+    reaction_coeff : list, optional
+        Reaction coefficients that need to be updated when polymorphs are collapsed
+    Returns
+    -------
+    dict or tuple
+        If reaction_coeff is None: Updated properties_data with only stable polymorphs
+        If reaction_coeff is not None: (Updated properties_data, Updated coefficients)
+    """
+    if not isinstance(properties_data, dict) or 'species_data' not in properties_data:
+        if reaction_coeff is not None:
+            return properties_data, reaction_coeff, iphases
+        else:
+            return properties_data, iphases
+    species_data_list = properties_data['species_data']
+    n_conditions = len(properties_data['T'])
+    # Group phases by original species
+    species_groups = {}
+    for i, orig_species in enumerate(polymorph_species):
+        if orig_species not in species_groups:
+            species_groups[orig_species] = []
+        species_groups[orig_species].append(i)
+    new_species_data = []
+    new_iphases = []
+    new_polymorph_species = []
+    new_coefficients = [] if reaction_coeff is not None else None
+    for orig_species_idx, phase_indices in species_groups.items():
+        # Check if we have duplicated phases (same species repeated) vs. actual polymorphs
+        # In R: if(TRUE %in% duplicated(iphases[are.polymorphs]))
+        phases_for_this_species = [iphases[i] for i in phase_indices]
+        # If there are duplicate iphase values, filter to unique polymorphs only
+        # (this handles cases like subcrt(['O2', 'O2'], [-1, 1], ['gas', 'gas']))
+        if len(phases_for_this_species) != len(set(phases_for_this_species)):
+            # We have duplicates - keep only one of each unique phase
+            unique_phases = {}
+            for idx in phase_indices:
+                phase_id = iphases[idx]
+                if phase_id not in unique_phases:
+                    unique_phases[phase_id] = idx
+            phase_indices = list(unique_phases.values())
+        if len(phase_indices) > 1:
+            # Multiple polymorphs - select stable one at each T-P point
+            species_name = thermo_sys.obigt.loc[iphases[phase_indices[0]]]['name']
+            if messages:
+                print(f"subcrt: {len(phase_indices)} polymorphs for {species_name} ... ", end="")
+            # DEBUG: Print G values for all polymorphs
+            debug_polymorphs = False  # Set to True for debugging
+            # Collect Gibbs energies and check temperature validity for all polymorphs
+            G_data = []
+            z_T_values = []  # Transition temperatures
+            for poly_idx, phase_i in enumerate(phase_indices):
+                obigt_idx = iphases[phase_i]
+                # Get G values
+                if phase_i < len(species_data_list) and 'G' in species_data_list[phase_i].columns:
+                    G_values = species_data_list[phase_i]['G'].values
+                else:
+                    G_values = np.full(n_conditions, np.nan)
+                G_data.append(G_values)
+                # Get transition temperature (z.T) for this polymorph
+                z_T = thermo_sys.obigt.loc[obigt_idx].get('z.T', np.nan)
+                z_T_values.append(z_T)
+                if debug_polymorphs and species_name == "iron":
+                    state_loc = thermo_sys.obigt.loc[obigt_idx]['state'] if obigt_idx in thermo_sys.obigt.index else 'INVALID_LOC'
+                    print(f"\n  Polymorph {poly_idx+1} (idx={obigt_idx}, state={state_loc}, z.T={z_T}): G={G_values[0]:.2f}" if len(G_values) > 0 else f"\n  Polymorph {poly_idx+1}: G=NaN")
+            if not G_data:
+                # No G data available - just take first polymorph
+                stable_polymorph_indices = np.zeros(n_conditions, dtype=int)
+            else:
+                G_array = np.array(G_data).T  # Shape: (n_conditions, n_polymorphs)
+                z_T_array = np.array(z_T_values)  # Shape: (n_polymorphs,)
+                stable_polymorph_indices = np.full(n_conditions, 0, dtype=int)
+                # Get temperature array from species data
+                if species_data_list and len(species_data_list[0]) > 0:
+                    T_celsius = species_data_list[0]['T'].values  # In Celsius
+                    T_kelvin = T_celsius + 273.15  # Convert to Kelvin for comparison with z.T
+                else:
+                    T_kelvin = np.full(n_conditions, 298.15)  # Default to 25°C
+                for j in range(n_conditions):
+                    G_row = G_array[j, :]
+                    T_j = T_kelvin[j]
+                    # Filter polymorphs by temperature range validity
+                    # Each polymorph cr, cr2, cr3, cr4 has a transition temperature z.T
+                    # Polymorph i is valid if: z.T[i-1] <= T < z.T[i]
+                    # where z.T[0] = 0 (cr is valid from absolute zero)
+                    temp_valid_mask = np.zeros(len(phase_indices), dtype=bool)
+                    for poly_idx in range(len(phase_indices)):
+                        z_T_curr = z_T_array[poly_idx]
+                        # Lower bound: previous polymorph's z.T (or 0 for first polymorph)
+                        if poly_idx == 0:
+                            T_min = 0.0
+                        else:
+                            z_T_prev = z_T_array[poly_idx - 1]
+                            T_min = z_T_prev if not np.isnan(z_T_prev) else 0.0
+                        # Upper bound: current polymorph's z.T
+                        if np.isnan(z_T_curr):
+                            T_max = np.inf  # No upper limit
+                        else:
+                            T_max = z_T_curr
+                        # Check if T is in range [T_min, T_max)
+                        temp_valid_mask[poly_idx] = (T_j >= T_min) and (T_j < T_max)
+                    # Combine temperature validity with G availability
+                    valid_mask = temp_valid_mask & ~np.isnan(G_row)
+                    if np.any(valid_mask):
+                        # Find minimum G among temperature-valid, non-NaN polymorphs
+                        valid_indices = np.where(valid_mask)[0]
+                        min_idx = valid_indices[np.argmin(G_row[valid_mask])]
+                        stable_polymorph_indices[j] = min_idx
+                    elif np.any(~np.isnan(G_row)):
+                        # No temperature-valid polymorphs, but we have G data
+                        # Use the polymorph with the highest transition temperature (most stable at high T)
+                        available_indices = np.where(~np.isnan(G_row))[0]
+                        # Among available, choose the one with highest z.T (or last if all NaN)
+                        z_T_available = z_T_array[available_indices]
+                        if np.any(~np.isnan(z_T_available)):
+                            max_z_T_idx = available_indices[np.nanargmax(z_T_available)]
+                            stable_polymorph_indices[j] = max_z_T_idx
+                        else:
+                            stable_polymorph_indices[j] = available_indices[-1]
+                    else:
+                        # All NaN - use first polymorph
+                        stable_polymorph_indices[j] = 0
+            # Create combined result using stable polymorph at each T-P point
+            combined_data = species_data_list[phase_indices[0]].copy()
+            for j in range(n_conditions):
+                stable_idx = stable_polymorph_indices[j]
+                stable_phase_i = phase_indices[stable_idx]
+                if stable_phase_i < len(species_data_list):
+                    # Copy data from stable polymorph for this T-P point
+                    for col in combined_data.columns:
+                        if col in species_data_list[stable_phase_i].columns:
+                            combined_data.iloc[j, combined_data.columns.get_loc(col)] = \
+                                species_data_list[stable_phase_i].iloc[j, species_data_list[stable_phase_i].columns.get_loc(col)]
+            # Add polymorph column to track which polymorph was selected
+            combined_data['polymorph'] = stable_polymorph_indices + 1  # 1-based like R
+            new_species_data.append(combined_data)
+            new_iphases.append(iphases[phase_indices[0]])  # Use first phase index as representative
+            new_polymorph_species.append(orig_species_idx)
+            # Update coefficients - use the coefficient of the first polymorph
+            # (all polymorphs of the same species should have the same coefficient)
+            if new_coefficients is not None:
+                new_coefficients.append(reaction_coeff[phase_indices[0]])
+            # Report which polymorphs are stable
+            unique_polymorphs = np.unique(stable_polymorph_indices + 1)
+            if messages:
+                if len(unique_polymorphs) > 1:
+                    word = "are"
+                    p_word = "polymorphs"
+                else:
+                    word = "is"
+                    p_word = "polymorph"
+                print(f"{p_word} {','.join(map(str, unique_polymorphs))} {word} stable")
+        else:
+            # Single polymorph - keep as-is
+            phase_i = phase_indices[0]
+            new_species_data.append(species_data_list[phase_i])
+            new_iphases.append(iphases[phase_i])
+            new_polymorph_species.append(orig_species_idx)
+            # Update coefficients - single species keeps its coefficient
+            if new_coefficients is not None:
+                new_coefficients.append(reaction_coeff[phase_i])
+    # Update the properties data structure
+    updated_properties = properties_data.copy()
+    updated_properties['species_data'] = new_species_data
+    updated_properties['n_species'] = len(new_species_data)
+    if reaction_coeff is not None:
+        return updated_properties, new_coefficients, new_iphases
+    else:
+        return updated_properties, new_iphases
+def _calculate_properties(property, iphases, isaq, isH2O, iscgl, T, P, exceed_rhomin, exceed_Ttr, IS, logact, do_reaction=True):
+    """Calculate thermodynamic properties for all species.
+    Returns
+    -------
+    tuple
+        (result_df, warnings_list) - result data and list of warning messages
+    """
+    from ..models.hkf import hkf
+    from ..models.cgl import cgl
+    thermo_sys = thermo()
+    n_conditions = len(T)
+    # Initialize warnings list
+    calc_warnings = []
+    # Properties to calculate from EOS (exclude logK and rho which are derived)
+    eosprop = [p for p in property if p not in ["logK", "rho"]]
+    # If logK is requested but G is not in the list, add G to eosprop
+    # because logK is calculated from G
+    if "logK" in property and "G" not in eosprop:
+        eosprop.append("G")
+    # Initialize results storage - use species index as key
+    all_properties = {}
+    # Always use equation of state calculations (matching R CHNOSZ behavior)
+    # R CHNOSZ has no "standard conditions bypass" - it always calls HKF for aqueous species
+    # Convert P="Psat" to actual pressure values for all calculations
+    if isinstance(P, str) and P == "Psat":
+        from ..models.water import water
+        # Calculate Psat for all temperatures at once (vectorized)
+        P_calculated = water("Psat", T=T)
+        P_calculated = np.atleast_1d(P_calculated)
+        # IMPORTANT: Add small epsilon to Psat to ensure liquid phase
+        # When P = Psat exactly, water properties can switch to steam phase
+        # Adding a tiny amount ensures we stay in liquid phase, matching R CHNOSZ behavior
+        P_calculated = P_calculated + 0.0001  # Add 0.1 millibar
+    else:
+        P_calculated = P
+    # Calculate aqueous species properties using HKF
+    if isaq.any():
+        aq_indices = np.where(isaq)[0]
+        aq_params = thermo_sys.obigt.loc[[iphases[i] for i in aq_indices]]
+        # CRITICAL FIX: Reset index to avoid duplicate index issues when same species
+        # appears multiple times (e.g., two SO4-2 in a balanced reaction)
+        # Store original OBIGT indices for later reference
+        original_obigt_indices = aq_params.index.tolist()
+        aq_params = aq_params.reset_index(drop=True)
+        try:
+            # Get water properties needed for HKF
+            H2O_props = ["rho"]
+            if IS != 0:  # Need additional properties for activity corrections
+                H2O_props += ["A_DH", "B_DH"]
+            if isH2O.any():  # Water is in the reaction
+                H2O_props += eosprop
+            # HKF model now handles array T/P (vectorized)
+            # Initialize storage for results across all T/P conditions
+            for aq_idx in aq_indices:
+                all_properties[aq_idx] = {prop: [] for prop in eosprop}
+            # Call HKF model once with all T/P conditions (vectorized)
+            T_array = np.atleast_1d(T)
+            P_array = np.atleast_1d(P_calculated)
+            # Call HKF model for all T/P conditions at once
+            aq_results, H2O_data = hkf(property=eosprop, parameters=aq_params,
+                                       T=T_array, P=P_array, H2O_props=H2O_props)
+            # DEBUG: Check what HKF returns
+            if False:
+                print(f"\nDEBUG after HKF call:")
+                print(f"  T_array: {T_array}")
+                print(f"  aq_results keys: {list(aq_results.keys())}")
+                for key in aq_results.keys():
+                    print(f"  aq_results[{key}] keys: {list(aq_results[key].keys())[:5]}")
+                    if 'V' in aq_results[key]:
+                        print(f"    V shape/values: {np.array(aq_results[key]['V']).shape}, {aq_results[key]['V']}")
+            # Extract results for each species and property
+            for i, aq_idx in enumerate(aq_indices):
+                # Use sequential index (0, 1, 2, ...) since we reset the index above
+                df_index = i
+                species_props = aq_results[df_index]
+                # Check E_units to determine if values are already in J
+                # Use original OBIGT index for this lookup
+                species_row = thermo_sys.obigt.loc[original_obigt_indices[i]]
+                e_units = species_row.get('E_units', 'cal')
+                already_in_joules = (e_units == 'J')
+                # DEBUG
+                if False:  # Set to True for debugging
+                    print(f"\nDEBUG HKF results extraction:")
+                    print(f"  i={i}, aq_idx={aq_idx}, df_index={df_index}")
+                    print(f"  V values (first 3): {species_props.get('V', [])[:3]}")
+                    print(f"  logK values (first 3): {species_props.get('logK', [])[:3]}")
+                for prop in eosprop:
+                    if prop in species_props:
+                        # Convert HKF results from cal to J to match water function units
+                        # BUT skip conversion for species already in J units (E_units='J')
+                        values = species_props[prop]
+                        if prop in ['G', 'H', 'S', 'Cp'] and not already_in_joules:
+                            values = values * 4.184
+                        # Store array of values for all T/P conditions
+                        all_properties[aq_idx][prop] = np.atleast_1d(values)
+            # Store water properties if needed (when water is among aqueous species)
+            # IMPORTANT: Reuse H2O_data from HKF instead of calling water() again!
+            # This matches R CHNOSZ behavior (line 308: H2O.PT <- hkfstuff$H2O)
+            if isH2O.any():
+                h2o_indices = np.where(isH2O)[0]
+                # Use water properties already calculated by HKF (no redundant call!)
+                # H2O_data is returned from hkf() and contains all needed properties
+                for h2o_idx in h2o_indices:
+                    if h2o_idx not in all_properties:
+                        all_properties[h2o_idx] = {}
+                    for prop in eosprop:
+                        if isinstance(H2O_data, dict) and prop in H2O_data:
+                            # Get property value from dict
+                            value = H2O_data[prop]
+                            all_properties[h2o_idx][prop] = np.atleast_1d(value)
+                        elif hasattr(H2O_data, prop):
+                            # Get property value from object attribute
+                            value = getattr(H2O_data, prop)
+                            all_properties[h2o_idx][prop] = np.atleast_1d(value)
+                        else:
+                            # Property not available
+                            all_properties[h2o_idx][prop] = np.full(n_conditions, np.nan)
+            # Set properties to NA for density below 0.35 g/cm3 (threshold used in SUPCRT92)
+            # Matching R CHNOSZ subcrt.R lines 309-318
+            if not exceed_rhomin:
+                # Get water density from H2O_data (in kg/m³)
+                if isinstance(H2O_data, dict) and 'rho' in H2O_data:
+                    rho_values = np.atleast_1d(H2O_data['rho'])
+                elif hasattr(H2O_data, 'rho'):
+                    rho_values = np.atleast_1d(H2O_data.rho)
+                else:
+                    rho_values = None
+                if rho_values is not None:
+                    # Check for low density (< 350 kg/m³ = 0.35 g/cm³)
+                    ilowrho = rho_values < 350
+                    # Set NaN values to False (don't flag them)
+                    ilowrho = np.where(np.isnan(rho_values), False, ilowrho)
+                    if np.any(ilowrho):
+                        # Set all aqueous species properties to NaN for low-density conditions
+                        for aq_idx in aq_indices:
+                            for prop in eosprop:
+                                if aq_idx in all_properties and prop in all_properties[aq_idx]:
+                                    prop_array = np.array(all_properties[aq_idx][prop])
+                                    prop_array[ilowrho] = np.nan
+                                    all_properties[aq_idx][prop] = prop_array
+                        # Add warning message
+                        n_lowrho = np.sum(ilowrho)
+                        ptext = "pair" if n_lowrho == 1 else "pairs"
+                        calc_warnings.append(f"below minimum density for applicability of revised HKF equations ({n_lowrho} T,P {ptext})")
+        except Exception as e:
+            print(f"Warning: HKF calculation failed: {e}")
+            # Fill with NaN for failed aqueous calculations
+            for aq_idx in aq_indices:
+                all_properties[aq_idx] = {prop: np.full(n_conditions, np.nan) for prop in eosprop}
+    # Handle water species directly if present and not handled by HKF (mirroring R CHNOSZ behavior)
+    if isH2O.any() and not isaq.any():
+        # We're not using the HKF, but still want water properties
+        from ..models.water import water
+        try:
+            # Calculate water properties directly - mirroring R line 333
+            H2O_props = ["rho"] + eosprop
+            H2O_PT = water(property=H2O_props, T=T, P=P)
+            # Store water properties for all H2O species
+            h2o_indices = np.where(isH2O)[0]
+            for h2o_idx in h2o_indices:
+                if h2o_idx not in all_properties:
+                    all_properties[h2o_idx] = {}
+                for prop in eosprop:
+                    if hasattr(H2O_PT, prop):
+                        # Water function returns scalar or array
+                        prop_value = getattr(H2O_PT, prop)
+                        if np.isscalar(prop_value):
+                            all_properties[h2o_idx][prop] = np.full(n_conditions, prop_value)
+                        else:
+                            all_properties[h2o_idx][prop] = np.atleast_1d(prop_value)
+                    elif isinstance(H2O_PT, dict) and prop in H2O_PT:
+                        # Dictionary format
+                        prop_value = H2O_PT[prop]
+                        if np.isscalar(prop_value):
+                            all_properties[h2o_idx][prop] = np.full(n_conditions, prop_value)
+                        else:
+                            all_properties[h2o_idx][prop] = np.atleast_1d(prop_value)
+                    else:
+                        # Property not available
+                        all_properties[h2o_idx][prop] = np.full(n_conditions, np.nan)
+        except Exception as e:
+            print(f"Warning: Direct water calculation failed: {e}")
+            # Fill with NaN for failed water calculations
+            h2o_indices = np.where(isH2O)[0]
+            for h2o_idx in h2o_indices:
+                all_properties[h2o_idx] = {prop: np.full(n_conditions, np.nan) for prop in eosprop}
+    # Calculate crystalline/gas/liquid species properties using CGL
+    if iscgl.any():
+        cgl_indices = np.where(iscgl)[0]
+        cgl_params = thermo_sys.obigt.loc[[iphases[i] for i in cgl_indices]]
+        # Reset index to avoid duplicate index issues (same fix as for HKF)
+        original_cgl_obigt_indices = cgl_params.index.tolist()
+        cgl_params = cgl_params.reset_index(drop=True)
+        try:
+            # CGL model now handles array T/P (vectorized)
+            # Initialize storage for results across all T/P conditions
+            for cgl_idx in cgl_indices:
+                all_properties[cgl_idx] = {prop: [] for prop in eosprop}
+            # Call CGL model once with all T/P conditions (vectorized)
+            T_array = np.atleast_1d(T)
+            P_array = np.atleast_1d(P_calculated)
+            # Call CGL model for all T/P conditions at once
+            cgl_result = cgl(property=eosprop, parameters=cgl_params, T=T_array, P=P_array)
+            # Extract results for each species
+            for i, cgl_idx in enumerate(cgl_indices):
+                # Use sequential index since we reset the index above
+                df_index = i
+                species_props = cgl_result[df_index]
+                # Check if this species uses Berman model
+                # NOTE: A mineral is only Berman if it LACKS standard thermodynamic data (G,H,S)
+                # If G,H,S are present, use regular CGL even if heat capacity coefficients are all zero
+                # Use original OBIGT index for this lookup
+                species_row = thermo_sys.obigt.loc[original_cgl_obigt_indices[i]]
+                berman_cols = ['a1.a', 'a2.b', 'a3.c', 'a4.d', 'c1.e', 'c2.f', 'omega.lambda', 'z.T']
+                has_standard_thermo = pd.notna(species_row.get('G', np.nan)) and pd.notna(species_row.get('H', np.nan)) and pd.notna(species_row.get('S', np.nan))
+                all_coeffs_zero_or_na = all(pd.isna(species_row.get(col, np.nan)) or species_row.get(col, 0) == 0 for col in berman_cols)
+                is_berman = all_coeffs_zero_or_na and not has_standard_thermo
+                # Check E_units to determine if values are already in J
+                # IMPORTANT: As of the cgl.py fix, Berman minerals return cal/mol (converted from J/mol)
+                # even though they have E_units='J' in OBIGT. So we need cal->J conversion for them.
+                # Only skip conversion for non-Berman species explicitly marked with E_units='J'
+                e_units = species_row.get('E_units', 'cal')
+                already_in_joules = (e_units == 'J') and not is_berman
+                for prop in eosprop:
+                    if prop in species_props:
+                        # Convert CGL results from cal to J to match HKF and water function units
+                        # BUT skip conversion for species already in J units (Berman minerals or E_units='J')
+                        prop_values = species_props[prop]
+                        if prop in ['G', 'H', 'S', 'Cp'] and not already_in_joules:
+                            prop_values = prop_values * 4.184
+                        # Store array of values for all T/P conditions
+                        all_properties[cgl_idx][prop] = np.atleast_1d(prop_values)
+        except Exception as e:
+            import traceback
+            print(f"Warning: CGL calculation failed: {e}")
+            print(f"Traceback:")
+            traceback.print_exc()
+            print(f"CGL species that failed:")
+            for i, cgl_idx in enumerate(cgl_indices):
+                df_index = cgl_params.index[i]
+                species_row = thermo_sys.obigt.loc[df_index]
+                print(f"  {species_row['name']} (index {df_index})")
+            # Fill with NaN for failed CGL calculations
+            for cgl_idx in cgl_indices:
+                all_properties[cgl_idx] = {prop: np.full(n_conditions, np.nan) for prop in eosprop}
+    # Create output DataFrame structure
+    # For single species, return properties directly
+    # For multiple species in reactions, return dict for summation
+    # For multiple species without reactions, treat each as individual
+    # Determine if we should automatically add rho to output (matching R CHNOSZ behavior)
+    # R adds rho when: "rho" in property OR (using default properties AND any aqueous/H2O species)
+    default_properties = ["logK", "G", "H", "S", "V", "Cp"]
+    is_default_property_list = (property == default_properties)
+    should_add_rho = ("rho" in property) or (is_default_property_list and (isaq.any() or isH2O.any()))
+    if len(iphases) == 1:
+        # Single species - return properties directly
+        # species_idx should be 0 (the enumerate index into iphases/isaq arrays)
+        # NOT iphases[0] (the actual OBIGT index)
+        species_idx = 0
+        output_data = {'T': T - 273.15}  # Convert to Celsius for output like R
+        if isinstance(P, str) and P == "Psat":
+            # Calculate actual Psat values for output (vectorized)
+            from ..models.water import water
+            P_values = water("Psat", T=T)
+            output_data['P'] = np.atleast_1d(P_values)
+        else:
+            output_data['P'] = P
+        # Add rho column if needed (matching R CHNOSZ - appears after P, before other properties)
+        if should_add_rho:
+            try:
+                from ..models.water import water
+                rho_result = water('rho', T=T, P=P_calculated)
+                # Convert from kg/m³ to g/cm³ (divide by 1000) to match R CHNOSZ
+                output_data['rho'] = np.atleast_1d(rho_result) / 1000
+            except:
+                output_data['rho'] = np.full(n_conditions, np.nan)
+        # Add calculated properties
+        for prop in property:
+            if prop == "logK":
+                # Calculate logK from G for individual species (matching R behavior)
+                if species_idx in all_properties and 'G' in all_properties[species_idx]:
+                    G_values = all_properties[species_idx]['G']
+                    if not np.all(np.isnan(G_values)):
+                        # DEBUG
+                        if False:  # Set to True for debugging
+                            print(f"DEBUG logK calculation for species_idx={species_idx}:")
+                            print(f"  G_values[0] = {G_values[0]}")
+                            print(f"  T[0] = {T[0]}")
+                        # logK = -G°/(ln(10)*R*T), using T in Kelvin for calculation
+                        R = 8.314462618  # J/(mol·K) - CODATA 2018 value
+                        T_kelvin = T  # T is already in Kelvin here
+                        logK_values = -G_values / (np.log(10) * R * T_kelvin)
+                        output_data[prop] = logK_values
+                    else:
+                        output_data[prop] = np.full(n_conditions, np.nan)
+                else:
+                    output_data[prop] = np.full(n_conditions, np.nan)
+            elif prop == "rho":
+                # Skip - already added above if should_add_rho is True
+                if not should_add_rho:
+                    # Only add here if it was explicitly requested but not in default case
+                    try:
+                        from ..models.water import water
+                        rho_result = water('rho', T=T, P=P_calculated)
+                        # Convert from kg/m³ to g/cm³ (divide by 1000) to match R CHNOSZ
+                        output_data[prop] = np.atleast_1d(rho_result) / 1000
+                    except:
+                        output_data[prop] = np.full(n_conditions, np.nan)
+            else:
+                # Regular thermodynamic property
+                if species_idx in all_properties and prop in all_properties[species_idx]:
+                    output_data[prop] = all_properties[species_idx][prop]
+                else:
+                    output_data[prop] = np.full(n_conditions, np.nan)
+        result_df = pd.DataFrame(output_data)
+    else:
+        # Multiple species - return all properties for reaction summation
+        all_species_data = []
+        for i, phase_idx in enumerate(iphases):
+            # DEBUG
+            if False:  # Set to True for debugging
+                print(f"\nDEBUG: Processing species i={i}, phase_idx={phase_idx}")
+                print(f"  all_properties keys: {list(all_properties.keys())}")
+                if i in all_properties:
+                    print(f"  WARNING: Using wrong index! i={i} exists in all_properties")
+                if phase_idx in all_properties:
+                    print(f"  CORRECT: phase_idx={phase_idx} exists in all_properties")
+            species_data = {'T': T - 273.15}  # Convert to Celsius for output like R
+            if isinstance(P, str):
+                species_data['P'] = P_calculated
+            else:
+                species_data['P'] = P
+            # Add rho column if needed (matching R CHNOSZ - appears after P, before other properties)
+            if should_add_rho:
+                try:
+                    from ..models.water import water
+                    rho_result = water('rho', T=T, P=P_calculated)
+                    # Convert from kg/m³ to g/cm³ (divide by 1000) to match R CHNOSZ
+                    # Handle both scalar and array returns from water()
+                    if np.isscalar(rho_result):
+                        species_data['rho'] = np.full(n_conditions, rho_result / 1000)
+                    else:
+                        species_data['rho'] = np.atleast_1d(rho_result) / 1000
+                except:
+                    species_data['rho'] = np.full(n_conditions, np.nan)
+            # Add properties for this species
+            # If logK is requested, we need to store G as well (for reaction calculations)
+            props_to_store = list(property)
+            if 'logK' in property and 'G' not in props_to_store:
+                props_to_store.append('G')
+            # Loop over properties to store
+            for prop in props_to_store:
+                if prop == "logK":
+                    # Calculate logK from G for individual species
+                    if i in all_properties and 'G' in all_properties[i]:
+                        G_values = all_properties[i]['G']
+                        if not np.all(np.isnan(G_values)):
+                            # logK = -G°/(ln(10)*R*T), using T in Kelvin for calculation
+                            R = 8.314462618  # J/(mol·K) - CODATA 2018 value
+                            T_kelvin = T  # T is already in Kelvin here
+                            logK_values = -G_values / (np.log(10) * R * T_kelvin)
+                            species_data[prop] = logK_values
+                        else:
+                            species_data[prop] = np.full(n_conditions, np.nan)
+                    else:
+                        species_data[prop] = np.full(n_conditions, np.nan)
+                elif prop == "rho":
+                    # Skip - already added above if should_add_rho is True
+                    if not should_add_rho:
+                        # Only add here if it was explicitly requested but not in default case
+                        try:
+                            from ..models.water import water
+                            rho_result = water('rho', T=T, P=P_calculated)
+                            # Convert from kg/m³ to g/cm³ (divide by 1000) to match R CHNOSZ
+                            # Handle both scalar and array returns from water()
+                            if np.isscalar(rho_result):
+                                species_data[prop] = np.full(n_conditions, rho_result / 1000)
+                            else:
+                                species_data[prop] = np.atleast_1d(rho_result) / 1000
+                        except:
+                            species_data[prop] = np.full(n_conditions, np.nan)
+                elif prop in eosprop:
+                    # Regular thermodynamic property from EOS calculations
+                    if i in all_properties and prop in all_properties[i]:
+                        species_data[prop] = all_properties[i][prop]
+                    else:
+                        species_data[prop] = np.full(n_conditions, np.nan)
+            all_species_data.append(pd.DataFrame(species_data))
+        # Return structure that can be used for reaction summation
+        result_df = {
+            'species_data': all_species_data,
+            'n_species': len(iphases),
+            'T': T,
+            'P': P,
+            'properties': property,
+            'eosprop': eosprop
+        }
+    return result_df, calc_warnings
+def _sum_reaction_properties(properties_data, coefficients):
+    """Sum individual species properties to get reaction properties."""
+    if isinstance(properties_data, pd.DataFrame):
+        # Single species case - just return as is
+        return properties_data
+    if not isinstance(properties_data, dict) or 'species_data' not in properties_data:
+        # Fallback for unexpected format
+        return properties_data
+    # Extract data from the dictionary structure
+    species_data_list = properties_data['species_data']
+    T = properties_data['T']
+    P = properties_data['P']
+    property_list = properties_data['properties']
+    if not species_data_list or len(species_data_list) != len(coefficients):
+        # Mismatch - return empty DataFrame
+        n_conditions = len(T)
+        # Get pressure values for fallback case
+        if isinstance(P, str) and species_data_list:
+            first_species_df = species_data_list[0]
+            if 'P' in first_species_df.columns:
+                P_values = first_species_df['P'].values
+            else:
+                P_values = np.full(n_conditions, np.nan)
+        else:
+            P_values = P if not isinstance(P, str) else np.full(n_conditions, np.nan)
+        return pd.DataFrame({
+            'T': T - 273.15,  # Convert to Celsius for output like R
+            'P': P_values
+        })
+    # Initialize reaction DataFrame
+    n_conditions = len(T)
+    # Get pressure values - if P was "Psat", get actual values from species data
+    if isinstance(P, str) and species_data_list:
+        # Get pressure from first species (all should have same pressure conditions)
+        first_species_df = species_data_list[0]
+        if 'P' in first_species_df.columns:
+            P_values = first_species_df['P'].values
+        else:
+            P_values = np.full(n_conditions, np.nan)
+    else:
+        P_values = P if not isinstance(P, str) else np.full(n_conditions, np.nan)
+    # Build reaction_data in the correct column order to match R CHNOSZ
+    # Order: T, P, rho (if present), then properties in property_list order
+    reaction_data = {
+        'T': T - 273.15,  # Convert to Celsius for output like R
+        'P': P_values
+    }
+    # Check if rho should be added (matching R CHNOSZ behavior)
+    # Add rho if it's in species_data (it was added during property calculation)
+    has_rho = species_data_list and 'rho' in species_data_list[0].columns
+    if has_rho:
+        # Get rho from first species (rho is same for all species at given T-P)
+        reaction_data['rho'] = species_data_list[0]['rho'].values
+    # Debug: check what properties are available
+    if species_data_list:
+        available_props = species_data_list[0].columns.tolist()
+    # Need to calculate G if logK is requested but G is not
+    need_G_for_logK = 'logK' in property_list and 'G' not in property_list
+    # Sum properties in the order specified by property_list to match R CHNOSZ column order
+    # This ensures the output columns appear in the same order as the property parameter
+    for prop in property_list:
+        if prop == 'logK':
+            # Calculate logK from ΔG
+            # First, make sure G is calculated if not already in property_list
+            if 'G' not in reaction_data:
+                # Calculate G
+                G_sum = np.zeros(n_conditions)
+                all_nan = True
+                for species_df, coeff in zip(species_data_list, coefficients):
+                    if 'G' in species_df.columns:
+                        species_values = species_df['G'].values
+                        if not np.isnan(species_values).all():
+                            G_sum += coeff * species_values
+                            all_nan = False
+                # Always store G in reaction_data so logK can use it
+                reaction_data['G'] = G_sum if not all_nan else np.full(n_conditions, np.nan)
+            # Now calculate logK from G
+            G_values = reaction_data.get('G', np.full(n_conditions, np.nan))
+            if not np.isnan(G_values).all():
+                # logK = -ΔG°/(ln(10)*R*T)
+                T_array = np.atleast_1d(T)
+                R = 8.314462618  # J/(mol·K) - CODATA 2018 value
+                reaction_data['logK'] = -G_values / (np.log(10) * R * T_array)
+            else:
+                reaction_data['logK'] = np.full(n_conditions, np.nan)
+        elif prop == 'rho':
+            # Already added above, skip
+            pass
+        else:
+            # Regular thermodynamic property - sum weighted by coefficients
+            prop_sum = np.zeros(n_conditions)
+            all_nan = True
+            for species_df, coeff in zip(species_data_list, coefficients):
+                if prop in species_df.columns:
+                    species_values = species_df[prop].values
+                    if not np.isnan(species_values).all():
+                        prop_sum += coeff * species_values
+                        all_nan = False
+            reaction_data[prop] = prop_sum if not all_nan else np.full(n_conditions, np.nan)
+    # Remove G if it wasn't originally requested (we only added it to calculate logK)
+    if 'logK' in property_list and 'G' not in property_list and 'G' in reaction_data:
+        del reaction_data['G']
+    return pd.DataFrame(reaction_data)
+def _apply_r_chnosz_conversion(result_df, do_reaction=True):
+    """
+    Apply R CHNOSZ convert=TRUE conversion behavior.
+    Based on analysis of R CHNOSZ, convert=TRUE produces specific conversion factors
+    that are not simply 4.184 multiplication. This function applies the empirically
+    determined conversion factors to match R CHNOSZ convert=TRUE output.
+    Parameters
+    ----------
+    result_df : pd.DataFrame or dict
+        DataFrame with calculated thermodynamic properties (convert=FALSE equivalent)
+        or dict structure for multiple species
+    do_reaction : bool
+        Whether this is a reaction calculation
+    Returns
+    -------
+    pd.DataFrame or dict
+        DataFrame with R CHNOSZ convert=TRUE equivalent values
+    """
+    if result_df is None:
+        return result_df
+    # If it's a dictionary (multi-species case)
+    if isinstance(result_df, dict):
+        if do_reaction:
+            # For reactions, return as-is since conversion will be applied after summation
+            return result_df
+        else:
+            # For non-reaction multiple species, apply conversion to each species DataFrame
+            if 'species_data' in result_df:
+                converted_species_data = []
+                for species_df in result_df['species_data']:
+                    converted_species_data.append(_apply_r_chnosz_conversion(species_df, True))
+                result_df['species_data'] = converted_species_data
+            return result_df
+    if result_df.empty:
+        return result_df
+    # Skip conversion for reaction results (after summation) - they're already in correct units
+    if do_reaction:  # This means it's a reaction result (after summation)
+        return result_df
+    converted_df = result_df.copy()
+    # Apply empirically determined conversion factors to match R CHNOSZ convert=TRUE
+    # These factors are derived from comparing R CHNOSZ convert=FALSE vs convert=TRUE
+    # For OH- at 298.15 K:
+    # convert=FALSE: G=-157297.5, H=-230023.8, S=-10.711, Cp=-136.338
+    # convert=TRUE:  G=-140185.6, H=-307327.3, S=-170.854, Cp=-1263.642
+    # Apply selective conversion:
+    # - HKF-calculated properties (from HKF function) are already in J units
+    # - Berman-calculated properties (from Berman function) are already in J units
+    # - Database standard state values still need cal->J conversion
+    # - Cp from HKF/Berman should NOT be converted (already in J units)
+    # Detect if values are from EOS calculations (HKF/Berman) vs database
+    # EOS calculations return values in reasonable J/mol ranges, while database
+    # cal/mol values are typically 4x smaller in magnitude
+    # All values at this point should already be in Joules since:
+    # - HKF results (aqueous species) are converted to J at lines 657-660
+    # - Water function results are already in J
+    # - CGL results (minerals) are now converted to J at lines 756-759
+    # So no additional conversion should be needed here
+    # The convert=True flag in R CHNOSZ subcrt() just means "return results in Joules"
+    # Since we've already converted everything to Joules in the calculation phase,
+    # no additional conversion is needed here
+    return converted_df
+def _apply_unit_conversion(result_df):
+    """
+    Simple unit conversion (4.184 factor) - kept for reference.
+    This was the initial implementation before discovering R CHNOSZ complexity.
+    """
+    if result_df is None or result_df.empty:
+        return result_df
+    # Conversion factor from calories to Joules (matching R CHNOSZ)
+    cal_to_J = 4.184
+    # Energy properties that need conversion from cal to J
+    energy_properties = ['G', 'H', 'S', 'Cp']
+    # Apply conversion to energy properties present in the DataFrame
+    converted_df = result_df.copy()
+    for prop in energy_properties:
+        if prop in converted_df.columns:
+            converted_df[prop] = converted_df[prop] * cal_to_J
+    return converted_df
+def _find_simple_integer_solution(basis_matrix, missing_vector, basis_species_names, missing_composition):
+    """
+    Find simple integer solutions for basis species coefficients.
+    This tries to match R CHNOSZ behavior by preferring simple integer combinations
+    like 1 H2O + 1 NH3 over complex fractional solutions.
+    """
+    # For small problems, try combinations of 1-3 species with coefficients 1-3
+    n_species = len(basis_species_names)
+    # Try single species solutions first (coefficient 1-3)
+    for i in range(n_species):
+        for coeff in [1, 2, 3]:
+            test_coeffs = np.zeros(n_species)
+            test_coeffs[i] = coeff
+            result = basis_matrix @ test_coeffs
+            if np.allclose(result, missing_vector, atol=1e-10):
+                return test_coeffs
+    # Try two-species solutions (coefficients 1-2 each)
+    for i in range(n_species):
+        for j in range(i+1, n_species):
+            for coeff1 in [1, 2]:
+                for coeff2 in [1, 2]:
+                    test_coeffs = np.zeros(n_species)
+                    test_coeffs[i] = coeff1
+                    test_coeffs[j] = coeff2
+                    result = basis_matrix @ test_coeffs
+                    if np.allclose(result, missing_vector, atol=1e-10):
+                        return test_coeffs
+    # Try three-species solutions (coefficient 1 each)
+    for i in range(n_species):
+        for j in range(i+1, n_species):
+            for k in range(j+1, n_species):
+                test_coeffs = np.zeros(n_species)
+                test_coeffs[i] = 1
+                test_coeffs[j] = 1
+                test_coeffs[k] = 1
+                result = basis_matrix @ test_coeffs
+                if np.allclose(result, missing_vector, atol=1e-10):
+                    return test_coeffs
+    return None  # No simple solution found
+# Update the main __init__.py to use the real implementation
+def _update_init_file():
+    """Update __init__.py to import real subcrt instead of placeholder."""
+    init_path = "/home/jupyteruser/CHNOSZ-main/python/chnosz/__init__.py"
+    # Read current content
+    with open(init_path, 'r') as f:
+        content = f.read()
+    # Replace placeholder with real import
+    new_content = content.replace(
+        '# from .core.subcrt import subcrt',
+        'from .core.subcrt import subcrt'
+    ).replace(
+        '''def subcrt(*args, **kwargs):
+    """Placeholder for subcrt function (not yet implemented)."""
+    raise NotImplementedError("subcrt function not yet implemented in Python version")''',
+        ''
+    )
+    # Update __all__ to include subcrt properly
+    new_content = new_content.replace(
+        "    'reset'",
+        "    'reset',\n    'subcrt'"
+    )
+    # Write updated content
+    with open(init_path, 'w') as f:
+        f.write(new_content)
+# Call the update function when module is imported
+# _update_init_file()  # Commented out for safety - will do manually
+def _display_subcrt_result(result: SubcrtResult):
+    """
+    Display subcrt result tables in Jupyter notebooks.
+    This function displays the .reaction and .out tables using IPython.display
+    if running in a Jupyter environment.
+    Parameters
+    ----------
+    result : SubcrtResult
+        The result object from subcrt()
+    """
+    try:
+        # Check if we're in a Jupyter/IPython environment
+        from IPython.display import display
+        from IPython import get_ipython
+        # Check if IPython is available and we're in an interactive environment
+        if get_ipython() is not None:
+            # Display reaction table if it exists
+            if result.reaction is not None and not result.reaction.empty:
+                display(result.reaction)
+            # Display output table if it exists
+            if result.out is not None and not result.out.empty:
+                display(result.out)
+    except ImportError:
+        # IPython not available - not in a Jupyter environment
+        pass
+    except Exception:
+        # Any other error - silently ignore
+        pass