pychnosz 1.1.11__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pychnosz/__init__.py +129 -0
- pychnosz/biomolecules/__init__.py +29 -0
- pychnosz/biomolecules/ionize_aa.py +197 -0
- pychnosz/biomolecules/proteins.py +595 -0
- pychnosz/core/__init__.py +46 -0
- pychnosz/core/affinity.py +1256 -0
- pychnosz/core/animation.py +593 -0
- pychnosz/core/balance.py +334 -0
- pychnosz/core/basis.py +716 -0
- pychnosz/core/diagram.py +3336 -0
- pychnosz/core/equilibrate.py +813 -0
- pychnosz/core/equilibrium.py +554 -0
- pychnosz/core/info.py +821 -0
- pychnosz/core/retrieve.py +364 -0
- pychnosz/core/speciation.py +580 -0
- pychnosz/core/species.py +599 -0
- pychnosz/core/subcrt.py +1696 -0
- pychnosz/core/thermo.py +593 -0
- pychnosz/core/unicurve.py +1226 -0
- pychnosz/data/__init__.py +11 -0
- pychnosz/data/add_obigt.py +327 -0
- pychnosz/data/extdata/Berman/BDat17_2017.csv +2 -0
- pychnosz/data/extdata/Berman/Ber88_1988.csv +68 -0
- pychnosz/data/extdata/Berman/Ber90_1990.csv +5 -0
- pychnosz/data/extdata/Berman/DS10_2010.csv +6 -0
- pychnosz/data/extdata/Berman/FDM+14_2014.csv +2 -0
- pychnosz/data/extdata/Berman/Got04_2004.csv +5 -0
- pychnosz/data/extdata/Berman/JUN92_1992.csv +3 -0
- pychnosz/data/extdata/Berman/SHD91_1991.csv +12 -0
- pychnosz/data/extdata/Berman/VGT92_1992.csv +2 -0
- pychnosz/data/extdata/Berman/VPT01_2001.csv +3 -0
- pychnosz/data/extdata/Berman/VPV05_2005.csv +2 -0
- pychnosz/data/extdata/Berman/ZS92_1992.csv +11 -0
- pychnosz/data/extdata/Berman/sympy.R +99 -0
- pychnosz/data/extdata/Berman/testing/BA96.bib +12 -0
- pychnosz/data/extdata/Berman/testing/BA96_Berman.csv +21 -0
- pychnosz/data/extdata/Berman/testing/BA96_OBIGT.csv +21 -0
- pychnosz/data/extdata/Berman/testing/BA96_refs.csv +6 -0
- pychnosz/data/extdata/OBIGT/AD.csv +25 -0
- pychnosz/data/extdata/OBIGT/Berman_cr.csv +93 -0
- pychnosz/data/extdata/OBIGT/DEW.csv +211 -0
- pychnosz/data/extdata/OBIGT/H2O_aq.csv +4 -0
- pychnosz/data/extdata/OBIGT/SLOP98.csv +411 -0
- pychnosz/data/extdata/OBIGT/SUPCRT92.csv +178 -0
- pychnosz/data/extdata/OBIGT/inorganic_aq.csv +729 -0
- pychnosz/data/extdata/OBIGT/inorganic_cr.csv +273 -0
- pychnosz/data/extdata/OBIGT/inorganic_gas.csv +20 -0
- pychnosz/data/extdata/OBIGT/organic_aq.csv +1104 -0
- pychnosz/data/extdata/OBIGT/organic_cr.csv +481 -0
- pychnosz/data/extdata/OBIGT/organic_gas.csv +268 -0
- pychnosz/data/extdata/OBIGT/organic_liq.csv +533 -0
- pychnosz/data/extdata/OBIGT/testing/GEMSFIT.csv +43 -0
- pychnosz/data/extdata/OBIGT/testing/IGEM.csv +17 -0
- pychnosz/data/extdata/OBIGT/testing/Sandia.csv +8 -0
- pychnosz/data/extdata/OBIGT/testing/SiO2.csv +4 -0
- pychnosz/data/extdata/misc/AD03_Fig1a.csv +69 -0
- pychnosz/data/extdata/misc/AD03_Fig1b.csv +43 -0
- pychnosz/data/extdata/misc/AD03_Fig1c.csv +89 -0
- pychnosz/data/extdata/misc/AD03_Fig1d.csv +30 -0
- pychnosz/data/extdata/misc/BZA10.csv +5 -0
- pychnosz/data/extdata/misc/HW97_Cp.csv +90 -0
- pychnosz/data/extdata/misc/HWM96_V.csv +229 -0
- pychnosz/data/extdata/misc/LA19_test.csv +7 -0
- pychnosz/data/extdata/misc/Mer75_Table4.csv +42 -0
- pychnosz/data/extdata/misc/OBIGT_check.csv +423 -0
- pychnosz/data/extdata/misc/PM90.csv +7 -0
- pychnosz/data/extdata/misc/RH95.csv +23 -0
- pychnosz/data/extdata/misc/RH98_Table15.csv +17 -0
- pychnosz/data/extdata/misc/SC10_Rainbow.csv +19 -0
- pychnosz/data/extdata/misc/SK95.csv +55 -0
- pychnosz/data/extdata/misc/SOJSH.csv +61 -0
- pychnosz/data/extdata/misc/SS98_Fig5a.csv +81 -0
- pychnosz/data/extdata/misc/SS98_Fig5b.csv +84 -0
- pychnosz/data/extdata/misc/TKSS14_Fig2.csv +25 -0
- pychnosz/data/extdata/misc/bluered.txt +1000 -0
- pychnosz/data/extdata/protein/Cas/Cas_aa.csv +177 -0
- pychnosz/data/extdata/protein/Cas/Cas_uniprot.csv +186 -0
- pychnosz/data/extdata/protein/Cas/download.R +34 -0
- pychnosz/data/extdata/protein/Cas/mkaa.R +34 -0
- pychnosz/data/extdata/protein/POLG.csv +12 -0
- pychnosz/data/extdata/protein/TBD+05.csv +393 -0
- pychnosz/data/extdata/protein/TBD+05_aa.csv +393 -0
- pychnosz/data/extdata/protein/rubisco.csv +28 -0
- pychnosz/data/extdata/protein/rubisco.fasta +239 -0
- pychnosz/data/extdata/protein/rubisco_aa.csv +28 -0
- pychnosz/data/extdata/src/H2O92D.f.orig +3457 -0
- pychnosz/data/extdata/src/README.txt +5 -0
- pychnosz/data/extdata/taxonomy/names.dmp +215 -0
- pychnosz/data/extdata/taxonomy/nodes.dmp +63 -0
- pychnosz/data/extdata/thermo/Bdot_acirc.csv +60 -0
- pychnosz/data/extdata/thermo/buffer.csv +40 -0
- pychnosz/data/extdata/thermo/element.csv +135 -0
- pychnosz/data/extdata/thermo/groups.csv +6 -0
- pychnosz/data/extdata/thermo/opt.csv +2 -0
- pychnosz/data/extdata/thermo/protein.csv +506 -0
- pychnosz/data/extdata/thermo/refs.csv +343 -0
- pychnosz/data/extdata/thermo/stoich.csv.xz +0 -0
- pychnosz/data/loader.py +431 -0
- pychnosz/data/mod_obigt.py +322 -0
- pychnosz/data/obigt.py +471 -0
- pychnosz/data/worm.py +228 -0
- pychnosz/fortran/__init__.py +16 -0
- pychnosz/fortran/h2o92.dll +0 -0
- pychnosz/fortran/h2o92_interface.py +527 -0
- pychnosz/geochemistry/__init__.py +21 -0
- pychnosz/geochemistry/minerals.py +514 -0
- pychnosz/geochemistry/redox.py +500 -0
- pychnosz/models/__init__.py +47 -0
- pychnosz/models/archer_wang.py +165 -0
- pychnosz/models/berman.py +309 -0
- pychnosz/models/cgl.py +381 -0
- pychnosz/models/dew.py +997 -0
- pychnosz/models/hkf.py +523 -0
- pychnosz/models/hkf_helpers.py +231 -0
- pychnosz/models/iapws95.py +1113 -0
- pychnosz/models/supcrt92_fortran.py +238 -0
- pychnosz/models/water.py +480 -0
- pychnosz/utils/__init__.py +27 -0
- pychnosz/utils/expression.py +1074 -0
- pychnosz/utils/formula.py +830 -0
- pychnosz/utils/formula_ox.py +227 -0
- pychnosz/utils/reset.py +33 -0
- pychnosz/utils/units.py +259 -0
- pychnosz-1.1.11.dist-info/METADATA +197 -0
- pychnosz-1.1.11.dist-info/RECORD +128 -0
- pychnosz-1.1.11.dist-info/WHEEL +5 -0
- pychnosz-1.1.11.dist-info/licenses/LICENSE.txt +19 -0
- pychnosz-1.1.11.dist-info/top_level.txt +1 -0
pychnosz/core/species.py
ADDED
|
@@ -0,0 +1,599 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Formed species management module.
|
|
3
|
+
|
|
4
|
+
This module provides Python equivalents of the R functions in species.R:
|
|
5
|
+
- species(): Define and manage species of interest for thermodynamic calculations
|
|
6
|
+
- Formation reaction calculations from basis species
|
|
7
|
+
- Species list management and validation
|
|
8
|
+
|
|
9
|
+
Author: CHNOSZ Python port
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import numpy as np
|
|
14
|
+
from typing import Union, List, Optional, Dict, Any, Tuple
|
|
15
|
+
import warnings
|
|
16
|
+
|
|
17
|
+
from .thermo import thermo
|
|
18
|
+
from .info import info, find_species
|
|
19
|
+
from .basis import get_basis, is_basis_defined
|
|
20
|
+
from ..utils.formula import makeup, species_basis
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SpeciesError(Exception):
|
|
24
|
+
"""Exception raised for species-related errors."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def species(species: Optional[Union[str, int, List[Union[str, int]], pd.Series]] = None,
|
|
29
|
+
state: Optional[Union[str, List[str]]] = None,
|
|
30
|
+
delete: bool = False,
|
|
31
|
+
add: bool = False,
|
|
32
|
+
index_return: bool = False,
|
|
33
|
+
global_state: bool = True,
|
|
34
|
+
basis: Optional[pd.DataFrame] = None,
|
|
35
|
+
messages: bool = True) -> Optional[Union[pd.DataFrame, List[int]]]:
|
|
36
|
+
"""
|
|
37
|
+
Define species of interest for thermodynamic calculations.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
species : str, int, list, pd.Series, or None
|
|
42
|
+
Species name(s), formula(s), or index(es).
|
|
43
|
+
Can also be a pandas Series (e.g., from retrieve()).
|
|
44
|
+
If None, returns current species definition.
|
|
45
|
+
state : str, list of str, or None
|
|
46
|
+
Physical state(s) for the species
|
|
47
|
+
delete : bool, default False
|
|
48
|
+
If True, delete species (all if species is None)
|
|
49
|
+
add : bool, default False
|
|
50
|
+
If True, add to existing species instead of replacing
|
|
51
|
+
index_return : bool, default False
|
|
52
|
+
If True, return species indices instead of DataFrame
|
|
53
|
+
global_state : bool, default True
|
|
54
|
+
If True, store species in global thermo().species (default behavior)
|
|
55
|
+
If False, return species definition without storing globally (local state)
|
|
56
|
+
basis : pd.DataFrame, optional
|
|
57
|
+
Basis species definition to use (if not using global basis)
|
|
58
|
+
Required when global_state=False and basis is not defined globally
|
|
59
|
+
messages : bool, default True
|
|
60
|
+
If True, print informational messages
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
pd.DataFrame, list of int, or None
|
|
65
|
+
Species definition DataFrame or indices, or None if deleted
|
|
66
|
+
|
|
67
|
+
Examples
|
|
68
|
+
--------
|
|
69
|
+
>>> # Define species of interest
|
|
70
|
+
>>> species(["CO2", "HCO3-", "CO3-2"])
|
|
71
|
+
|
|
72
|
+
>>> # Add more species
|
|
73
|
+
>>> species(["CH4", "C2H4"], add=True)
|
|
74
|
+
|
|
75
|
+
>>> # Delete specific species
|
|
76
|
+
>>> species(["CO2"], delete=True)
|
|
77
|
+
|
|
78
|
+
>>> # Delete all species
|
|
79
|
+
>>> species(delete=True)
|
|
80
|
+
|
|
81
|
+
>>> # Use output from retrieve()
|
|
82
|
+
>>> zn_species = retrieve("Zn", ["O", "H"], state="aq")
|
|
83
|
+
>>> species(zn_species)
|
|
84
|
+
"""
|
|
85
|
+
thermo_obj = thermo()
|
|
86
|
+
|
|
87
|
+
# Handle pandas Series (e.g., from retrieve())
|
|
88
|
+
if isinstance(species, pd.Series):
|
|
89
|
+
# Extract the integer indices from the Series values
|
|
90
|
+
species = species.values.tolist()
|
|
91
|
+
|
|
92
|
+
# Handle NA species
|
|
93
|
+
if species is pd.NA or species is np.nan:
|
|
94
|
+
raise SpeciesError("'species' is NA")
|
|
95
|
+
|
|
96
|
+
# Handle deletion
|
|
97
|
+
if delete:
|
|
98
|
+
return _delete_species(species, thermo_obj)
|
|
99
|
+
|
|
100
|
+
# Return current species if no arguments
|
|
101
|
+
if species is None and state is None:
|
|
102
|
+
if index_return:
|
|
103
|
+
if thermo_obj.species is not None:
|
|
104
|
+
return list(range(1, len(thermo_obj.species) + 1))
|
|
105
|
+
else:
|
|
106
|
+
return []
|
|
107
|
+
return thermo_obj.species
|
|
108
|
+
|
|
109
|
+
# Use all species indices if species is None but state is given
|
|
110
|
+
if species is None and thermo_obj.species is not None:
|
|
111
|
+
species = list(range(1, len(thermo_obj.species) + 1))
|
|
112
|
+
|
|
113
|
+
# Process state argument
|
|
114
|
+
state = _process_state_argument(state)
|
|
115
|
+
|
|
116
|
+
# Make species and state same length
|
|
117
|
+
species, state = _match_argument_lengths(species, state)
|
|
118
|
+
|
|
119
|
+
# Handle numeric state (treat as logact)
|
|
120
|
+
logact = None
|
|
121
|
+
if state is not None and len(state) > 0:
|
|
122
|
+
if isinstance(state[0], (int, float)):
|
|
123
|
+
logact = [float(s) for s in state]
|
|
124
|
+
state = None
|
|
125
|
+
elif _can_be_numeric(state[0]):
|
|
126
|
+
logact = [float(s) for s in state]
|
|
127
|
+
state = None
|
|
128
|
+
|
|
129
|
+
# Handle species-state combinations for proteins
|
|
130
|
+
if state is not None:
|
|
131
|
+
species, state = _handle_protein_naming(species, state, thermo_obj)
|
|
132
|
+
|
|
133
|
+
# Process species argument
|
|
134
|
+
iOBIGT = None
|
|
135
|
+
if isinstance(species[0], str):
|
|
136
|
+
# Check if species are in current definition
|
|
137
|
+
if thermo_obj.species is not None:
|
|
138
|
+
existing_indices = _match_existing_species(species, thermo_obj.species)
|
|
139
|
+
if all(idx is not None for idx in existing_indices) and logact is not None:
|
|
140
|
+
# Update activities of existing species
|
|
141
|
+
# Update activities of existing species directly
|
|
142
|
+
species_indices = [i+1 for i in existing_indices] # Convert to 1-based
|
|
143
|
+
return _update_existing_species(species_indices, None, logact, index_return, thermo_obj)
|
|
144
|
+
|
|
145
|
+
# Look up species in database
|
|
146
|
+
iOBIGT = _lookup_species_indices(species, state, messages)
|
|
147
|
+
|
|
148
|
+
else:
|
|
149
|
+
# Handle numeric species
|
|
150
|
+
if thermo_obj.species is not None:
|
|
151
|
+
max_current = len(thermo_obj.species)
|
|
152
|
+
if all(isinstance(s, int) and s <= max_current for s in species):
|
|
153
|
+
# Referring to existing species
|
|
154
|
+
return _update_existing_species(species, state, logact, index_return, thermo_obj)
|
|
155
|
+
|
|
156
|
+
# Referring to OBIGT indices
|
|
157
|
+
iOBIGT = species
|
|
158
|
+
|
|
159
|
+
# Create or modify species definition
|
|
160
|
+
if iOBIGT is not None:
|
|
161
|
+
return _create_species_definition(iOBIGT, state, logact, add, index_return, thermo_obj, global_state, basis)
|
|
162
|
+
else:
|
|
163
|
+
return _update_existing_species(species, state, logact, index_return, thermo_obj)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _delete_species(species: Optional[Union[str, int, List]], thermo_obj) -> Optional[pd.DataFrame]:
|
|
167
|
+
"""Delete species from the current definition."""
|
|
168
|
+
if species is None:
|
|
169
|
+
# Delete all species
|
|
170
|
+
thermo_obj.species = None
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
if thermo_obj.species is None:
|
|
174
|
+
raise SpeciesError("nonexistent species definition")
|
|
175
|
+
|
|
176
|
+
# Ensure species is a list
|
|
177
|
+
if not isinstance(species, list):
|
|
178
|
+
species = [species]
|
|
179
|
+
|
|
180
|
+
# Find species to delete
|
|
181
|
+
indices_to_delete = []
|
|
182
|
+
for sp in species:
|
|
183
|
+
if isinstance(sp, str):
|
|
184
|
+
# Match by name
|
|
185
|
+
matches = thermo_obj.species[thermo_obj.species['name'] == sp].index.tolist()
|
|
186
|
+
elif isinstance(sp, int):
|
|
187
|
+
# Match by row number (1-based)
|
|
188
|
+
if 1 <= sp <= len(thermo_obj.species):
|
|
189
|
+
matches = [sp - 1] # Convert to 0-based
|
|
190
|
+
else:
|
|
191
|
+
matches = []
|
|
192
|
+
else:
|
|
193
|
+
matches = []
|
|
194
|
+
|
|
195
|
+
if matches:
|
|
196
|
+
indices_to_delete.extend(matches)
|
|
197
|
+
else:
|
|
198
|
+
warnings.warn(f"species: {sp} not present, so cannot be deleted")
|
|
199
|
+
|
|
200
|
+
# Remove duplicates and sort
|
|
201
|
+
indices_to_delete = sorted(set(indices_to_delete))
|
|
202
|
+
|
|
203
|
+
if indices_to_delete:
|
|
204
|
+
# Delete species
|
|
205
|
+
thermo_obj.species = thermo_obj.species.drop(indices_to_delete).reset_index(drop=True)
|
|
206
|
+
|
|
207
|
+
if len(thermo_obj.species) == 0:
|
|
208
|
+
thermo_obj.species = None
|
|
209
|
+
|
|
210
|
+
return thermo_obj.species
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _process_state_argument(state) -> Optional[List]:
|
|
214
|
+
"""Process state argument into consistent format."""
|
|
215
|
+
if state is None:
|
|
216
|
+
return None
|
|
217
|
+
|
|
218
|
+
if isinstance(state, str):
|
|
219
|
+
return [state]
|
|
220
|
+
elif isinstance(state, (list, tuple)):
|
|
221
|
+
return list(state)
|
|
222
|
+
else:
|
|
223
|
+
return [state]
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _match_argument_lengths(species, state) -> Tuple[List, Optional[List]]:
|
|
227
|
+
"""Ensure species and state arguments have compatible lengths."""
|
|
228
|
+
if not isinstance(species, list):
|
|
229
|
+
species = [species]
|
|
230
|
+
|
|
231
|
+
if state is not None:
|
|
232
|
+
if len(species) > len(state):
|
|
233
|
+
# Extend state to match species length
|
|
234
|
+
state = state * ((len(species) // len(state)) + 1)
|
|
235
|
+
state = state[:len(species)]
|
|
236
|
+
elif len(state) > len(species):
|
|
237
|
+
# Extend species to match state length
|
|
238
|
+
species = species * ((len(state) // len(species)) + 1)
|
|
239
|
+
species = species[:len(state)]
|
|
240
|
+
|
|
241
|
+
return species, state
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _can_be_numeric(value) -> bool:
|
|
245
|
+
"""Check if value can be converted to numeric."""
|
|
246
|
+
try:
|
|
247
|
+
float(value)
|
|
248
|
+
return True
|
|
249
|
+
except (ValueError, TypeError):
|
|
250
|
+
return False
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _handle_protein_naming(species: List, state: List, thermo_obj) -> Tuple[List, List]:
|
|
254
|
+
"""Handle protein naming convention (species_organism)."""
|
|
255
|
+
if state is None:
|
|
256
|
+
return species, state
|
|
257
|
+
|
|
258
|
+
# Get all available states
|
|
259
|
+
all_states = thermo_obj.obigt['state'].unique().tolist()
|
|
260
|
+
|
|
261
|
+
# Check if states look like actual states or should be treated as suffixes
|
|
262
|
+
if not all(s in all_states or _can_be_numeric(s) for s in state):
|
|
263
|
+
# Treat as protein organism suffixes
|
|
264
|
+
new_species = []
|
|
265
|
+
for sp, st in zip(species, state):
|
|
266
|
+
if '_' not in str(sp) and not _can_be_numeric(st):
|
|
267
|
+
new_species.append(f"{sp}_{st}")
|
|
268
|
+
else:
|
|
269
|
+
new_species.append(sp)
|
|
270
|
+
|
|
271
|
+
# Use default state for proteins
|
|
272
|
+
default_state = thermo_obj.get_option('state', 'aq')
|
|
273
|
+
state = [default_state] * len(species)
|
|
274
|
+
species = new_species
|
|
275
|
+
|
|
276
|
+
return species, state
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _match_existing_species(species: List[str], species_df: pd.DataFrame) -> List[Optional[int]]:
|
|
280
|
+
"""Match species names to existing species definition."""
|
|
281
|
+
indices = []
|
|
282
|
+
for sp in species:
|
|
283
|
+
matches = species_df[species_df['name'] == sp].index.tolist()
|
|
284
|
+
indices.append(matches[0] if matches else None)
|
|
285
|
+
return indices
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _lookup_species_indices(species: List[str], state: Optional[List[str]], messages: bool = True) -> List[int]:
|
|
289
|
+
"""Look up species indices in the OBIGT database."""
|
|
290
|
+
iOBIGT = []
|
|
291
|
+
|
|
292
|
+
for i, sp in enumerate(species):
|
|
293
|
+
sp_state = state[i] if state and i < len(state) else None
|
|
294
|
+
|
|
295
|
+
try:
|
|
296
|
+
# Use info function to find species
|
|
297
|
+
idx = info(sp, sp_state, messages=messages)
|
|
298
|
+
if pd.isna(idx):
|
|
299
|
+
raise SpeciesError(f"species not available: {sp}")
|
|
300
|
+
iOBIGT.append(idx)
|
|
301
|
+
except Exception:
|
|
302
|
+
raise SpeciesError(f"species not available: {sp}")
|
|
303
|
+
|
|
304
|
+
return iOBIGT
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _update_existing_species(species: List[int], state, logact, index_return: bool,
|
|
308
|
+
thermo_obj) -> Union[pd.DataFrame, List[int]]:
|
|
309
|
+
"""Update activities or states of existing species."""
|
|
310
|
+
if thermo_obj.species is None:
|
|
311
|
+
raise SpeciesError("no species definition exists")
|
|
312
|
+
|
|
313
|
+
# Validate species indices
|
|
314
|
+
max_species = len(thermo_obj.species)
|
|
315
|
+
species_indices = []
|
|
316
|
+
for sp in species:
|
|
317
|
+
if isinstance(sp, int) and 1 <= sp <= max_species:
|
|
318
|
+
species_indices.append(sp - 1) # Convert to 0-based
|
|
319
|
+
else:
|
|
320
|
+
raise SpeciesError(f"invalid species index: {sp}")
|
|
321
|
+
|
|
322
|
+
# Return without changes if no updates requested
|
|
323
|
+
if state is None and logact is None:
|
|
324
|
+
if index_return:
|
|
325
|
+
return [i + 1 for i in species_indices] # Convert back to 1-based
|
|
326
|
+
else:
|
|
327
|
+
return thermo_obj.species.iloc[species_indices]
|
|
328
|
+
|
|
329
|
+
# Update log activities
|
|
330
|
+
if logact is not None:
|
|
331
|
+
for i, idx in enumerate(species_indices):
|
|
332
|
+
if i < len(logact):
|
|
333
|
+
thermo_obj.species.loc[idx, 'logact'] = logact[i]
|
|
334
|
+
|
|
335
|
+
# Update states
|
|
336
|
+
if state is not None:
|
|
337
|
+
_update_species_states(species_indices, state, thermo_obj)
|
|
338
|
+
|
|
339
|
+
if index_return:
|
|
340
|
+
return [i + 1 for i in species_indices]
|
|
341
|
+
else:
|
|
342
|
+
# Return full species definition like R CHNOSZ
|
|
343
|
+
return thermo_obj.species
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _update_species_states(species_indices: List[int], states: List[str], thermo_obj) -> None:
|
|
347
|
+
"""Update states of existing species."""
|
|
348
|
+
for i, idx in enumerate(species_indices):
|
|
349
|
+
if i >= len(states):
|
|
350
|
+
break
|
|
351
|
+
|
|
352
|
+
new_state = states[i]
|
|
353
|
+
current_row = thermo_obj.species.iloc[idx]
|
|
354
|
+
species_name = current_row['name']
|
|
355
|
+
current_formula = thermo_obj.obigt.iloc[current_row['ispecies']-1]['formula']
|
|
356
|
+
|
|
357
|
+
# Find species in new state
|
|
358
|
+
try:
|
|
359
|
+
# First try by name
|
|
360
|
+
if '_' in species_name: # Protein
|
|
361
|
+
new_ispecies = find_species(species_name, new_state)
|
|
362
|
+
else:
|
|
363
|
+
# Try name first, then formula
|
|
364
|
+
try:
|
|
365
|
+
new_ispecies = find_species(species_name, new_state)
|
|
366
|
+
except ValueError:
|
|
367
|
+
new_ispecies = find_species(current_formula, new_state)
|
|
368
|
+
|
|
369
|
+
# Update species data
|
|
370
|
+
thermo_obj.species.loc[idx, 'ispecies'] = new_ispecies
|
|
371
|
+
thermo_obj.species.loc[idx, 'state'] = new_state
|
|
372
|
+
thermo_obj.species.loc[idx, 'name'] = thermo_obj.obigt.iloc[new_ispecies-1]['name']
|
|
373
|
+
|
|
374
|
+
except ValueError:
|
|
375
|
+
warnings.warn(f"can't update state of species {idx+1} to {new_state}",
|
|
376
|
+
category=UserWarning)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _create_species_definition(iOBIGT: List[int], state, logact, add: bool,
|
|
380
|
+
index_return: bool, thermo_obj, global_state: bool = True,
|
|
381
|
+
basis_df: Optional[pd.DataFrame] = None) -> Union[pd.DataFrame, List[int]]:
|
|
382
|
+
"""Create new species definition from OBIGT indices."""
|
|
383
|
+
# Use provided basis or get from global state
|
|
384
|
+
if basis_df is None:
|
|
385
|
+
if not is_basis_defined():
|
|
386
|
+
raise SpeciesError("basis species are not defined")
|
|
387
|
+
basis_df = get_basis()
|
|
388
|
+
|
|
389
|
+
# Calculate formation reactions with the provided basis
|
|
390
|
+
formation_coeffs = species_basis(iOBIGT, basis_df=basis_df)
|
|
391
|
+
|
|
392
|
+
# Get species information
|
|
393
|
+
species_states = []
|
|
394
|
+
species_names = []
|
|
395
|
+
|
|
396
|
+
for idx in iOBIGT:
|
|
397
|
+
obigt_row = thermo_obj.obigt.iloc[idx - 1] # Convert to 0-based
|
|
398
|
+
species_states.append(obigt_row['state'])
|
|
399
|
+
species_names.append(obigt_row['name'])
|
|
400
|
+
|
|
401
|
+
# Set default log activities
|
|
402
|
+
if logact is None:
|
|
403
|
+
logact = []
|
|
404
|
+
for state_val in species_states:
|
|
405
|
+
if state_val == 'aq':
|
|
406
|
+
logact.append(-3.0)
|
|
407
|
+
else:
|
|
408
|
+
logact.append(0.0)
|
|
409
|
+
|
|
410
|
+
# Create new species DataFrame
|
|
411
|
+
basis_formulas = basis_df.index.tolist()
|
|
412
|
+
|
|
413
|
+
# Build stoichiometric part
|
|
414
|
+
stoich_data = {}
|
|
415
|
+
for i, formula in enumerate(basis_formulas):
|
|
416
|
+
stoich_data[formula] = formation_coeffs[:, i]
|
|
417
|
+
|
|
418
|
+
# Add other columns
|
|
419
|
+
new_data = pd.DataFrame(stoich_data)
|
|
420
|
+
new_data['ispecies'] = iOBIGT
|
|
421
|
+
new_data['logact'] = logact
|
|
422
|
+
new_data['state'] = species_states
|
|
423
|
+
new_data['name'] = species_names
|
|
424
|
+
|
|
425
|
+
# Handle adding vs replacing
|
|
426
|
+
if global_state:
|
|
427
|
+
# Use global state
|
|
428
|
+
if thermo_obj.species is None or not add:
|
|
429
|
+
# Create new or replace existing
|
|
430
|
+
thermo_obj.species = new_data
|
|
431
|
+
species_indices = list(range(len(new_data)))
|
|
432
|
+
else:
|
|
433
|
+
# Add to existing - check for duplicates
|
|
434
|
+
existing_indices = set(thermo_obj.species['ispecies'].tolist())
|
|
435
|
+
new_indices = []
|
|
436
|
+
rows_to_add = []
|
|
437
|
+
|
|
438
|
+
for i, idx in enumerate(iOBIGT):
|
|
439
|
+
if idx not in existing_indices:
|
|
440
|
+
new_indices.append(len(thermo_obj.species) + len(rows_to_add))
|
|
441
|
+
rows_to_add.append(new_data.iloc[i])
|
|
442
|
+
|
|
443
|
+
if rows_to_add:
|
|
444
|
+
# Add new rows
|
|
445
|
+
new_rows_df = pd.DataFrame(rows_to_add)
|
|
446
|
+
thermo_obj.species = pd.concat([thermo_obj.species, new_rows_df],
|
|
447
|
+
ignore_index=True)
|
|
448
|
+
|
|
449
|
+
# Find all species indices (including existing ones)
|
|
450
|
+
species_indices = []
|
|
451
|
+
for idx in iOBIGT:
|
|
452
|
+
match_idx = thermo_obj.species[thermo_obj.species['ispecies'] == idx].index[0]
|
|
453
|
+
species_indices.append(match_idx)
|
|
454
|
+
|
|
455
|
+
# Reset index to ensure continuous numbering
|
|
456
|
+
if thermo_obj.species is not None:
|
|
457
|
+
thermo_obj.species.reset_index(drop=True, inplace=True)
|
|
458
|
+
|
|
459
|
+
# Return results
|
|
460
|
+
if index_return:
|
|
461
|
+
return [i + 1 for i in species_indices] # Convert to 1-based
|
|
462
|
+
else:
|
|
463
|
+
return thermo_obj.species
|
|
464
|
+
else:
|
|
465
|
+
# Local state - just return the dataframe
|
|
466
|
+
if index_return:
|
|
467
|
+
return list(range(1, len(new_data) + 1))
|
|
468
|
+
else:
|
|
469
|
+
return new_data
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
# Convenience functions
|
|
473
|
+
def get_species() -> Optional[pd.DataFrame]:
|
|
474
|
+
"""
|
|
475
|
+
Get current species definition.
|
|
476
|
+
|
|
477
|
+
Returns
|
|
478
|
+
-------
|
|
479
|
+
pd.DataFrame or None
|
|
480
|
+
Current species definition
|
|
481
|
+
"""
|
|
482
|
+
return thermo().species
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def is_species_defined() -> bool:
|
|
486
|
+
"""
|
|
487
|
+
Check if species are currently defined.
|
|
488
|
+
|
|
489
|
+
Returns
|
|
490
|
+
-------
|
|
491
|
+
bool
|
|
492
|
+
True if species are defined
|
|
493
|
+
"""
|
|
494
|
+
return thermo().species is not None
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def n_species() -> int:
|
|
498
|
+
"""
|
|
499
|
+
Get number of defined species.
|
|
500
|
+
|
|
501
|
+
Returns
|
|
502
|
+
-------
|
|
503
|
+
int
|
|
504
|
+
Number of defined species
|
|
505
|
+
"""
|
|
506
|
+
species_df = get_species()
|
|
507
|
+
return len(species_df) if species_df is not None else 0
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def species_names() -> List[str]:
|
|
511
|
+
"""
|
|
512
|
+
Get names of defined species.
|
|
513
|
+
|
|
514
|
+
Returns
|
|
515
|
+
-------
|
|
516
|
+
list of str
|
|
517
|
+
Species names
|
|
518
|
+
"""
|
|
519
|
+
species_df = get_species()
|
|
520
|
+
if species_df is not None:
|
|
521
|
+
return species_df['name'].tolist()
|
|
522
|
+
else:
|
|
523
|
+
return []
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def species_formulas() -> List[str]:
|
|
527
|
+
"""
|
|
528
|
+
Get formulas of defined species.
|
|
529
|
+
|
|
530
|
+
Returns
|
|
531
|
+
-------
|
|
532
|
+
list of str
|
|
533
|
+
Species formulas
|
|
534
|
+
"""
|
|
535
|
+
thermo_obj = thermo()
|
|
536
|
+
species_df = get_species()
|
|
537
|
+
|
|
538
|
+
if species_df is not None and thermo_obj.obigt is not None:
|
|
539
|
+
formulas = []
|
|
540
|
+
for idx in species_df['ispecies']:
|
|
541
|
+
formula = thermo_obj.obigt.iloc[idx - 1]['formula']
|
|
542
|
+
formulas.append(formula)
|
|
543
|
+
return formulas
|
|
544
|
+
else:
|
|
545
|
+
return []
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def species_states() -> List[str]:
|
|
549
|
+
"""
|
|
550
|
+
Get states of defined species.
|
|
551
|
+
|
|
552
|
+
Returns
|
|
553
|
+
-------
|
|
554
|
+
list of str
|
|
555
|
+
Species states
|
|
556
|
+
"""
|
|
557
|
+
species_df = get_species()
|
|
558
|
+
if species_df is not None:
|
|
559
|
+
return species_df['state'].tolist()
|
|
560
|
+
else:
|
|
561
|
+
return []
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def find_species_index(name: str) -> int:
|
|
565
|
+
"""
|
|
566
|
+
Find index of species in current definition.
|
|
567
|
+
|
|
568
|
+
Parameters
|
|
569
|
+
----------
|
|
570
|
+
name : str
|
|
571
|
+
Species name to find
|
|
572
|
+
|
|
573
|
+
Returns
|
|
574
|
+
-------
|
|
575
|
+
int
|
|
576
|
+
Species index (1-based) in current definition
|
|
577
|
+
|
|
578
|
+
Raises
|
|
579
|
+
------
|
|
580
|
+
SpeciesError
|
|
581
|
+
If species not found
|
|
582
|
+
"""
|
|
583
|
+
species_df = get_species()
|
|
584
|
+
if species_df is None:
|
|
585
|
+
raise SpeciesError("no species definition exists")
|
|
586
|
+
|
|
587
|
+
matches = species_df[species_df['name'] == name].index.tolist()
|
|
588
|
+
if not matches:
|
|
589
|
+
raise SpeciesError(f"species '{name}' not found in current definition")
|
|
590
|
+
|
|
591
|
+
return matches[0] + 1 # Convert to 1-based
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
# Export main functions
|
|
595
|
+
__all__ = [
|
|
596
|
+
'species', 'get_species', 'is_species_defined', 'n_species',
|
|
597
|
+
'species_names', 'species_formulas', 'species_states',
|
|
598
|
+
'find_species_index', 'SpeciesError'
|
|
599
|
+
]
|