pychnosz 1.1.1__cp311-cp311-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. pychnosz/.dylibs/libgcc_s.1.1.dylib +0 -0
  2. pychnosz/.dylibs/libgfortran.5.dylib +0 -0
  3. pychnosz/.dylibs/libquadmath.0.dylib +0 -0
  4. pychnosz/__init__.py +129 -0
  5. pychnosz/biomolecules/__init__.py +29 -0
  6. pychnosz/biomolecules/ionize_aa.py +197 -0
  7. pychnosz/biomolecules/proteins.py +595 -0
  8. pychnosz/core/__init__.py +46 -0
  9. pychnosz/core/affinity.py +1256 -0
  10. pychnosz/core/animation.py +593 -0
  11. pychnosz/core/balance.py +334 -0
  12. pychnosz/core/basis.py +716 -0
  13. pychnosz/core/diagram.py +3336 -0
  14. pychnosz/core/equilibrate.py +813 -0
  15. pychnosz/core/equilibrium.py +554 -0
  16. pychnosz/core/info.py +821 -0
  17. pychnosz/core/retrieve.py +364 -0
  18. pychnosz/core/speciation.py +580 -0
  19. pychnosz/core/species.py +599 -0
  20. pychnosz/core/subcrt.py +1700 -0
  21. pychnosz/core/thermo.py +593 -0
  22. pychnosz/core/unicurve.py +1226 -0
  23. pychnosz/data/__init__.py +11 -0
  24. pychnosz/data/add_obigt.py +327 -0
  25. pychnosz/data/extdata/Berman/BDat17_2017.csv +2 -0
  26. pychnosz/data/extdata/Berman/Ber88_1988.csv +68 -0
  27. pychnosz/data/extdata/Berman/Ber90_1990.csv +5 -0
  28. pychnosz/data/extdata/Berman/DS10_2010.csv +6 -0
  29. pychnosz/data/extdata/Berman/FDM+14_2014.csv +2 -0
  30. pychnosz/data/extdata/Berman/Got04_2004.csv +5 -0
  31. pychnosz/data/extdata/Berman/JUN92_1992.csv +3 -0
  32. pychnosz/data/extdata/Berman/SHD91_1991.csv +12 -0
  33. pychnosz/data/extdata/Berman/VGT92_1992.csv +2 -0
  34. pychnosz/data/extdata/Berman/VPT01_2001.csv +3 -0
  35. pychnosz/data/extdata/Berman/VPV05_2005.csv +2 -0
  36. pychnosz/data/extdata/Berman/ZS92_1992.csv +11 -0
  37. pychnosz/data/extdata/Berman/sympy.R +99 -0
  38. pychnosz/data/extdata/Berman/testing/BA96.bib +12 -0
  39. pychnosz/data/extdata/Berman/testing/BA96_Berman.csv +21 -0
  40. pychnosz/data/extdata/Berman/testing/BA96_OBIGT.csv +21 -0
  41. pychnosz/data/extdata/Berman/testing/BA96_refs.csv +6 -0
  42. pychnosz/data/extdata/OBIGT/AD.csv +25 -0
  43. pychnosz/data/extdata/OBIGT/Berman_cr.csv +93 -0
  44. pychnosz/data/extdata/OBIGT/DEW.csv +211 -0
  45. pychnosz/data/extdata/OBIGT/H2O_aq.csv +4 -0
  46. pychnosz/data/extdata/OBIGT/SLOP98.csv +411 -0
  47. pychnosz/data/extdata/OBIGT/SUPCRT92.csv +178 -0
  48. pychnosz/data/extdata/OBIGT/inorganic_aq.csv +729 -0
  49. pychnosz/data/extdata/OBIGT/inorganic_cr.csv +273 -0
  50. pychnosz/data/extdata/OBIGT/inorganic_gas.csv +20 -0
  51. pychnosz/data/extdata/OBIGT/organic_aq.csv +1104 -0
  52. pychnosz/data/extdata/OBIGT/organic_cr.csv +481 -0
  53. pychnosz/data/extdata/OBIGT/organic_gas.csv +268 -0
  54. pychnosz/data/extdata/OBIGT/organic_liq.csv +533 -0
  55. pychnosz/data/extdata/OBIGT/testing/GEMSFIT.csv +43 -0
  56. pychnosz/data/extdata/OBIGT/testing/IGEM.csv +17 -0
  57. pychnosz/data/extdata/OBIGT/testing/Sandia.csv +8 -0
  58. pychnosz/data/extdata/OBIGT/testing/SiO2.csv +4 -0
  59. pychnosz/data/extdata/misc/AD03_Fig1a.csv +69 -0
  60. pychnosz/data/extdata/misc/AD03_Fig1b.csv +43 -0
  61. pychnosz/data/extdata/misc/AD03_Fig1c.csv +89 -0
  62. pychnosz/data/extdata/misc/AD03_Fig1d.csv +30 -0
  63. pychnosz/data/extdata/misc/BZA10.csv +5 -0
  64. pychnosz/data/extdata/misc/HW97_Cp.csv +90 -0
  65. pychnosz/data/extdata/misc/HWM96_V.csv +229 -0
  66. pychnosz/data/extdata/misc/LA19_test.csv +7 -0
  67. pychnosz/data/extdata/misc/Mer75_Table4.csv +42 -0
  68. pychnosz/data/extdata/misc/OBIGT_check.csv +423 -0
  69. pychnosz/data/extdata/misc/PM90.csv +7 -0
  70. pychnosz/data/extdata/misc/RH95.csv +23 -0
  71. pychnosz/data/extdata/misc/RH98_Table15.csv +17 -0
  72. pychnosz/data/extdata/misc/SC10_Rainbow.csv +19 -0
  73. pychnosz/data/extdata/misc/SK95.csv +55 -0
  74. pychnosz/data/extdata/misc/SOJSH.csv +61 -0
  75. pychnosz/data/extdata/misc/SS98_Fig5a.csv +81 -0
  76. pychnosz/data/extdata/misc/SS98_Fig5b.csv +84 -0
  77. pychnosz/data/extdata/misc/TKSS14_Fig2.csv +25 -0
  78. pychnosz/data/extdata/misc/bluered.txt +1000 -0
  79. pychnosz/data/extdata/protein/Cas/Cas_aa.csv +177 -0
  80. pychnosz/data/extdata/protein/Cas/Cas_uniprot.csv +186 -0
  81. pychnosz/data/extdata/protein/Cas/download.R +34 -0
  82. pychnosz/data/extdata/protein/Cas/mkaa.R +34 -0
  83. pychnosz/data/extdata/protein/POLG.csv +12 -0
  84. pychnosz/data/extdata/protein/TBD+05.csv +393 -0
  85. pychnosz/data/extdata/protein/TBD+05_aa.csv +393 -0
  86. pychnosz/data/extdata/protein/rubisco.csv +28 -0
  87. pychnosz/data/extdata/protein/rubisco.fasta +239 -0
  88. pychnosz/data/extdata/protein/rubisco_aa.csv +28 -0
  89. pychnosz/data/extdata/src/H2O92D.f.orig +3457 -0
  90. pychnosz/data/extdata/src/README.txt +5 -0
  91. pychnosz/data/extdata/taxonomy/names.dmp +215 -0
  92. pychnosz/data/extdata/taxonomy/nodes.dmp +63 -0
  93. pychnosz/data/extdata/thermo/Bdot_acirc.csv +60 -0
  94. pychnosz/data/extdata/thermo/buffer.csv +40 -0
  95. pychnosz/data/extdata/thermo/element.csv +135 -0
  96. pychnosz/data/extdata/thermo/groups.csv +6 -0
  97. pychnosz/data/extdata/thermo/opt.csv +2 -0
  98. pychnosz/data/extdata/thermo/protein.csv +506 -0
  99. pychnosz/data/extdata/thermo/refs.csv +343 -0
  100. pychnosz/data/extdata/thermo/stoich.csv.xz +0 -0
  101. pychnosz/data/loader.py +431 -0
  102. pychnosz/data/mod_obigt.py +322 -0
  103. pychnosz/data/obigt.py +471 -0
  104. pychnosz/data/worm.py +228 -0
  105. pychnosz/fortran/__init__.py +16 -0
  106. pychnosz/fortran/h2o92.dylib +0 -0
  107. pychnosz/fortran/h2o92_interface.py +527 -0
  108. pychnosz/geochemistry/__init__.py +21 -0
  109. pychnosz/geochemistry/minerals.py +514 -0
  110. pychnosz/geochemistry/redox.py +500 -0
  111. pychnosz/models/__init__.py +47 -0
  112. pychnosz/models/archer_wang.py +165 -0
  113. pychnosz/models/berman.py +309 -0
  114. pychnosz/models/cgl.py +381 -0
  115. pychnosz/models/dew.py +997 -0
  116. pychnosz/models/hkf.py +523 -0
  117. pychnosz/models/hkf_helpers.py +222 -0
  118. pychnosz/models/iapws95.py +1113 -0
  119. pychnosz/models/supcrt92_fortran.py +238 -0
  120. pychnosz/models/water.py +480 -0
  121. pychnosz/utils/__init__.py +27 -0
  122. pychnosz/utils/expression.py +1074 -0
  123. pychnosz/utils/formula.py +830 -0
  124. pychnosz/utils/formula_ox.py +227 -0
  125. pychnosz/utils/reset.py +33 -0
  126. pychnosz/utils/units.py +259 -0
  127. pychnosz-1.1.1.dist-info/METADATA +197 -0
  128. pychnosz-1.1.1.dist-info/RECORD +131 -0
  129. pychnosz-1.1.1.dist-info/WHEEL +5 -0
  130. pychnosz-1.1.1.dist-info/licenses/LICENSE.txt +19 -0
  131. pychnosz-1.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,364 @@
1
+ """
2
+ Species retrieval by element composition.
3
+
4
+ This module provides Python equivalents of the R functions in retrieve.R:
5
+ - retrieve(): Retrieve species containing specified elements
6
+
7
+ Author: CHNOSZ Python port
8
+ """
9
+
10
+ import pandas as pd
11
+ import numpy as np
12
+ from typing import Union, List, Tuple, Optional, Dict, Any
13
+ import warnings
14
+ import re
15
+
16
+ from .thermo import thermo
17
+ from ..utils.formula import makeup, i2A
18
+
19
+
20
+ def retrieve(elements: Optional[Union[str, List[str], Tuple[str]]] = None,
21
+ ligands: Optional[Union[str, List[str], Tuple[str]]] = None,
22
+ state: Optional[Union[str, List[str], Tuple[str]]] = None,
23
+ T: Optional[Union[float, List[float]]] = None,
24
+ P: Union[str, float, List[float]] = "Psat",
25
+ add_charge: bool = True,
26
+ hide_groups: bool = True,
27
+ messages: bool = True) -> pd.Series:
28
+ """
29
+ Retrieve species containing specified elements.
30
+
31
+ Parameters
32
+ ----------
33
+ elements : str, list of str, or tuple of str, optional
34
+ Elements in a chemical system. If `elements` is a string, retrieve
35
+ species containing that element.
36
+
37
+ E.g., `retrieve("Au")` will return all species containing Au.
38
+
39
+ If `elements` is a list, retrieve species that have all of the elements
40
+ in the list.
41
+
42
+ E.g., `retrieve(["Au", "Cl"])` will return all species that have both
43
+ Au and Cl.
44
+
45
+ If `elements` is a tuple, retrieve species relevant to the system,
46
+ including charged species.
47
+
48
+ E.g., `retrieve(("Au", "Cl"))` will return species that have Au
49
+ and/or Cl, including charged species, but no other elements.
50
+
51
+ ligands : str, list of str, or tuple of str, optional
52
+ Elements present in any ligands. This affects the species search:
53
+ - If ligands is a state ('cr', 'liq', 'gas', 'aq'), use that as the state filter
54
+ - Otherwise, include elements in the system defined by ligands
55
+
56
+ state : str, list of str, or tuple of str, optional
57
+ Filter the result on these state(s) ('aq', 'cr', 'gas', 'liq').
58
+
59
+ T : float or list of float, optional
60
+ Temperature (K) for filtering species with non-NA Gibbs energy.
61
+
62
+ P : str, float, or list of float, default "Psat"
63
+ Pressure for Gibbs energy calculation. Default is "Psat" (saturation).
64
+
65
+ add_charge : bool, default True
66
+ For chemical systems (tuple input), automatically include charge (Z).
67
+
68
+ hide_groups : bool, default True
69
+ Exclude group species (names in brackets like [CH2]).
70
+
71
+ messages : bool, default True
72
+ Print informational messages. If False, suppress messages about
73
+ updating the stoichiometric matrix and other information.
74
+
75
+ Returns
76
+ -------
77
+ pd.Series
78
+ Series of species indices (1-based) with chemical formulas as index.
79
+ This behaves like R's named vector - you can access by name or position.
80
+ Names are chemical formulas (or 'e-' for electrons).
81
+ Values are species indices that match the criteria.
82
+
83
+ Examples
84
+ --------
85
+ >>> # All species containing Au
86
+ >>> retrieve("Au")
87
+
88
+ >>> # All species that have both Au and Cl
89
+ >>> retrieve(["Au", "Cl"])
90
+
91
+ >>> # Au-Cl system: species with Au and/or Cl, including charged species
92
+ >>> retrieve(("Au", "Cl"))
93
+
94
+ >>> # All Au-bearing species in the Au-Cl system
95
+ >>> retrieve("Au", ("Cl",))
96
+
97
+ >>> # All uncharged Au-bearing species in the Au-Cl system
98
+ >>> retrieve("Au", ("Cl",), add_charge=False)
99
+
100
+ >>> # Minerals in the system SiO2-MgO-CaO-CO2
101
+ >>> retrieve(("Si", "Mg", "Ca", "C", "O"), state="cr")
102
+
103
+ Notes
104
+ -----
105
+ This function uses 1-based indexing to match R CHNOSZ conventions.
106
+ The returned indices are labels that can be used with .loc[], not positions.
107
+ """
108
+ # Empty argument handling
109
+ if elements is None:
110
+ return pd.Series([], dtype=int)
111
+
112
+ thermo_obj = thermo()
113
+
114
+ # Initialize database if needed
115
+ if not thermo_obj.is_initialized():
116
+ thermo_obj.reset()
117
+
118
+ ## Stoichiometric matrix
119
+ # Get stoichiometric matrix from thermo object
120
+ stoich = _get_or_update_stoich(thermo_obj, messages=messages)
121
+
122
+ ## Generate error for missing element(s)
123
+ allelements = []
124
+ if elements is not None:
125
+ if isinstance(elements, (list, tuple)):
126
+ allelements.extend(elements)
127
+ else:
128
+ allelements.append(elements)
129
+ if ligands is not None:
130
+ if isinstance(ligands, (list, tuple)):
131
+ allelements.extend(ligands)
132
+ else:
133
+ allelements.append(ligands)
134
+
135
+ not_present = [elem for elem in allelements if elem not in stoich.columns and elem != "all"]
136
+ if not_present:
137
+ if len(not_present) == 1:
138
+ raise ValueError(f'"{not_present[0]}" is not an element that is present in any species in the database')
139
+ else:
140
+ raise ValueError(f'"{", ".join(not_present)}" are not elements that are present in any species in the database')
141
+
142
+ ## Handle 'ligands' argument
143
+ if ligands is not None:
144
+ # If 'ligands' is cr, liq, gas, or aq, use that as the state
145
+ if ligands in ['cr', 'liq', 'gas', 'aq']:
146
+ state = ligands
147
+ ispecies = retrieve(elements, add_charge=add_charge, messages=messages)
148
+ else:
149
+ # Include the element in the system defined by the ligands list
150
+ # Convert ligands to tuple if it's a string or list
151
+ if isinstance(ligands, str):
152
+ ligands_tuple = (ligands,)
153
+ elif isinstance(ligands, list):
154
+ ligands_tuple = tuple(ligands)
155
+ else:
156
+ ligands_tuple = ligands
157
+
158
+ # Combine elements with ligands
159
+ if isinstance(elements, str):
160
+ combined = (elements,) + ligands_tuple
161
+ elif isinstance(elements, list):
162
+ combined = tuple(elements) + ligands_tuple
163
+ else:
164
+ combined = elements + ligands_tuple
165
+
166
+ # Call retrieve() for each argument and take the intersection
167
+ r1 = retrieve(elements, add_charge=add_charge, messages=messages)
168
+ r2 = retrieve(combined, add_charge=add_charge, messages=messages)
169
+ ispecies = np.intersect1d(r1, r2)
170
+ else:
171
+ ## Species identification
172
+ ispecies_list = []
173
+
174
+ # Determine if elements is a tuple (chemical system)
175
+ is_system = isinstance(elements, tuple)
176
+
177
+ # Convert single string to list for iteration
178
+ if isinstance(elements, str):
179
+ elements_iter = [elements]
180
+ else:
181
+ elements_iter = list(elements)
182
+
183
+ # Automatically add charge to a system
184
+ if add_charge and is_system and "Z" not in elements_iter:
185
+ elements_iter.append("Z")
186
+
187
+ # Proceed element-by-element
188
+ for element in elements_iter:
189
+ if element == "all":
190
+ ispecies_list.append(np.array(thermo_obj.obigt.index.tolist()))
191
+ else:
192
+ # Identify the species that have the element
193
+ has_element = (stoich[element] != 0)
194
+ ispecies_list.append(np.array(stoich.index[has_element].tolist()))
195
+
196
+ # Now we have a list of ispecies (one array for each element)
197
+ # What we do next depends on whether the argument is a tuple or not
198
+ if is_system:
199
+ # For a chemical system, all species are included that do not contain any other elements
200
+ ispecies = np.unique(np.concatenate(ispecies_list))
201
+
202
+ # Get columns not in elements
203
+ other_columns = [col for col in stoich.columns if col not in elements_iter]
204
+
205
+ if other_columns:
206
+ # Check which species have other elements
207
+ otherstoich = stoich.loc[ispecies, other_columns]
208
+ iother = (otherstoich != 0).any(axis=1)
209
+ ispecies = ispecies[~iother.values]
210
+ else:
211
+ # Get species that have all the elements; the species must be present in each array
212
+ # This is the intersection of all arrays
213
+ ispecies = ispecies_list[0]
214
+ for arr in ispecies_list[1:]:
215
+ ispecies = np.intersect1d(ispecies, arr)
216
+
217
+ # Exclude groups
218
+ if hide_groups:
219
+ obigt = thermo_obj.obigt
220
+ names = obigt.loc[ispecies, 'name'].values
221
+ is_group = np.array([bool(re.match(r'^\[.*\]$', str(name))) for name in names])
222
+ ispecies = ispecies[~is_group]
223
+
224
+ # Filter on state
225
+ if state is not None:
226
+ obigt = thermo_obj.obigt
227
+
228
+ # Ensure state is a list
229
+ if isinstance(state, str):
230
+ state_list = [state]
231
+ elif isinstance(state, tuple):
232
+ state_list = list(state)
233
+ else:
234
+ state_list = state
235
+
236
+ species_states = obigt.loc[ispecies, 'state'].values
237
+ istate = np.array([s in state_list for s in species_states])
238
+ ispecies = ispecies[istate]
239
+
240
+ # Require non-NA Delta G0 at specific temperature
241
+ if T is not None:
242
+ from .subcrt import subcrt
243
+ # Suppress warnings and (optionally) messages
244
+ with warnings.catch_warnings():
245
+ warnings.simplefilter("ignore")
246
+ try:
247
+ result = subcrt(ispecies.tolist(), T=T, P=P, messages=False, show=False)
248
+ if result is not None and 'out' in result:
249
+ G_values = []
250
+ for species_out in result['out']:
251
+ if isinstance(species_out, dict) and 'G' in species_out:
252
+ G = species_out['G']
253
+ if isinstance(G, (list, np.ndarray)):
254
+ G_values.append(G[0] if len(G) > 0 else np.nan)
255
+ else:
256
+ G_values.append(G)
257
+ else:
258
+ G_values.append(np.nan)
259
+
260
+ # Filter out species with NA G values
261
+ has_G = np.array([not pd.isna(g) for g in G_values])
262
+ ispecies = ispecies[has_G]
263
+ except:
264
+ # If subcrt fails, keep all species
265
+ pass
266
+
267
+ # Create a pandas Series with formula names (R-style named vector)
268
+ obigt = thermo_obj.obigt
269
+ formulas = obigt.loc[ispecies, 'formula'].values
270
+
271
+ # Use e- instead of (Z-1) for electron
272
+ formulas = np.array([f if f != '(Z-1)' else 'e-' for f in formulas])
273
+
274
+ # Return empty Series if nothing found
275
+ if len(ispecies) == 0:
276
+ return pd.Series([], dtype=int)
277
+
278
+ # Create a pandas Series with formulas as index (R-style named vector)
279
+ # This allows both named access (result["Au"]) and positional access (result[0])
280
+ result = pd.Series(ispecies, index=formulas)
281
+
282
+ return result
283
+
284
+
285
+ def _get_or_update_stoich(thermo_obj, messages: bool = True) -> pd.DataFrame:
286
+ """
287
+ Get or update the stoichiometric matrix.
288
+
289
+ This function manages the stoichiometric matrix cache, updating it
290
+ when the OBIGT database changes.
291
+
292
+ Parameters
293
+ ----------
294
+ thermo_obj : ThermoSystem
295
+ The thermodynamic system object
296
+ messages : bool, default True
297
+ Print informational messages about updating the stoichiometric matrix
298
+
299
+ Returns
300
+ -------
301
+ pd.DataFrame
302
+ Stoichiometric matrix with species indices as index and elements as columns
303
+ """
304
+ obigt = thermo_obj.obigt
305
+ if obigt is None:
306
+ raise RuntimeError("Thermodynamic database not initialized")
307
+
308
+ formula = obigt['formula']
309
+
310
+ # Check if we have a cached stoichiometric DataFrame
311
+ # We'll store it as a private attribute _stoich_df
312
+ if not hasattr(thermo_obj, '_stoich_df'):
313
+ thermo_obj._stoich_df = None
314
+ thermo_obj._stoich_df_formulas = None
315
+
316
+ stoich_df = thermo_obj._stoich_df
317
+ stoich_df_formulas = thermo_obj._stoich_df_formulas
318
+
319
+ # Check if stoichiometric matrix needs updating
320
+ if stoich_df is None or stoich_df_formulas is None or not np.array_equal(stoich_df_formulas, formula.values):
321
+ # Update needed
322
+ if messages:
323
+ print("retrieve: updating stoichiometric matrix")
324
+
325
+ # Calculate stoichiometry for all formulas
326
+ # Use makeup to get stoichiometric matrix
327
+ with warnings.catch_warnings():
328
+ warnings.simplefilter("ignore")
329
+
330
+ # Get makeup for all formulas
331
+ makeups = []
332
+ all_elements = set()
333
+
334
+ for f in formula:
335
+ try:
336
+ m = makeup(str(f))
337
+ if m is not None:
338
+ makeups.append(m)
339
+ all_elements.update(m.keys())
340
+ else:
341
+ makeups.append({})
342
+ except:
343
+ makeups.append({})
344
+
345
+ # Sort elements for consistent column order
346
+ all_elements = sorted(list(all_elements))
347
+
348
+ # Build stoichiometric matrix
349
+ stoich_data = []
350
+ for m in makeups:
351
+ row = [m.get(elem, 0) for elem in all_elements]
352
+ stoich_data.append(row)
353
+
354
+ # Create DataFrame with species indices as index (matching obigt.index)
355
+ stoich_df = pd.DataFrame(stoich_data, columns=all_elements, index=obigt.index)
356
+
357
+ # Store the stoichiometric matrix
358
+ thermo_obj._stoich_df = stoich_df
359
+ thermo_obj._stoich_df_formulas = formula.values.copy()
360
+
361
+ return stoich_df
362
+
363
+
364
+ __all__ = ['retrieve']