geometallurgy 0.4.13__py3-none-any.whl → 0.4.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elphick/geomet/__init__.py +11 -11
- elphick/geomet/base.py +1133 -1133
- elphick/geomet/block_model.py +319 -319
- elphick/geomet/config/__init__.py +1 -1
- elphick/geomet/config/config_read.py +39 -39
- elphick/geomet/config/flowsheet_example_partition.yaml +31 -31
- elphick/geomet/config/flowsheet_example_simple.yaml +25 -25
- elphick/geomet/config/mc_config.yml +35 -35
- elphick/geomet/data/downloader.py +39 -39
- elphick/geomet/data/register.csv +12 -12
- elphick/geomet/datasets/__init__.py +2 -2
- elphick/geomet/datasets/datasets.py +47 -47
- elphick/geomet/datasets/downloader.py +40 -40
- elphick/geomet/datasets/register.csv +12 -12
- elphick/geomet/datasets/sample_data.py +196 -196
- elphick/geomet/extras.py +35 -35
- elphick/geomet/flowsheet/__init__.py +1 -1
- elphick/geomet/flowsheet/flowsheet.py +1216 -1216
- elphick/geomet/flowsheet/loader.py +99 -99
- elphick/geomet/flowsheet/operation.py +256 -256
- elphick/geomet/flowsheet/stream.py +39 -39
- elphick/geomet/interval_sample.py +641 -641
- elphick/geomet/io.py +379 -379
- elphick/geomet/plot.py +147 -147
- elphick/geomet/sample.py +28 -28
- elphick/geomet/utils/amenability.py +49 -49
- elphick/geomet/utils/block_model_converter.py +93 -93
- elphick/geomet/utils/components.py +136 -136
- elphick/geomet/utils/data.py +49 -49
- elphick/geomet/utils/estimates.py +108 -108
- elphick/geomet/utils/interp.py +193 -193
- elphick/geomet/utils/interp2.py +134 -134
- elphick/geomet/utils/layout.py +72 -72
- elphick/geomet/utils/moisture.py +61 -61
- elphick/geomet/utils/pandas.py +378 -378
- elphick/geomet/utils/parallel.py +29 -29
- elphick/geomet/utils/partition.py +63 -63
- elphick/geomet/utils/size.py +51 -51
- elphick/geomet/utils/timer.py +80 -80
- elphick/geomet/utils/viz.py +56 -56
- elphick/geomet/validate.py.hide +176 -176
- {geometallurgy-0.4.13.dist-info → geometallurgy-0.4.15.dist-info}/LICENSE +21 -21
- {geometallurgy-0.4.13.dist-info → geometallurgy-0.4.15.dist-info}/METADATA +2 -3
- geometallurgy-0.4.15.dist-info/RECORD +48 -0
- {geometallurgy-0.4.13.dist-info → geometallurgy-0.4.15.dist-info}/WHEEL +1 -1
- elphick/geomet/utils/output.html +0 -617
- geometallurgy-0.4.13.dist-info/RECORD +0 -49
- {geometallurgy-0.4.13.dist-info → geometallurgy-0.4.15.dist-info}/entry_points.txt +0 -0
elphick/geomet/utils/pandas.py
CHANGED
|
@@ -1,378 +1,378 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Pandas utils
|
|
3
|
-
"""
|
|
4
|
-
import inspect
|
|
5
|
-
import logging
|
|
6
|
-
import tokenize
|
|
7
|
-
from io import StringIO
|
|
8
|
-
from token import STRING
|
|
9
|
-
from typing import List, Dict, Optional, Literal
|
|
10
|
-
|
|
11
|
-
import numpy as np
|
|
12
|
-
import pandas as pd
|
|
13
|
-
|
|
14
|
-
from elphick.geomet.utils.components import is_compositional, get_components
|
|
15
|
-
from elphick.geomet.utils.moisture import solve_mass_moisture, detect_moisture_column
|
|
16
|
-
from elphick.geomet.utils.size import mean_size
|
|
17
|
-
|
|
18
|
-
composition_factors: dict[str, int] = {'%': 100, 'ppm': 1e6, 'ppb': 1e9}
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def column_prefixes(columns: List[str]) -> Dict[str, List[str]]:
|
|
22
|
-
return {prefix: [col for col in columns if prefix == col.split('_')[0]] for prefix in
|
|
23
|
-
list(dict.fromkeys([col.split('_')[0] for col in columns if len(col.split('_')) > 1]))}
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def column_prefix_counts(columns: List[str]) -> Dict[str, int]:
|
|
27
|
-
return {k: len(v) for k, v in column_prefixes(columns).items()}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def mass_to_composition(df: pd.DataFrame,
|
|
31
|
-
mass_wet: Optional[str] = 'mass_wet',
|
|
32
|
-
mass_dry: str = 'mass_dry',
|
|
33
|
-
moisture_column_name: Optional[str] = None,
|
|
34
|
-
component_columns: Optional[list[str]] = None,
|
|
35
|
-
composition_units: Literal['%', 'ppm', 'ppb'] = '%') -> pd.DataFrame:
|
|
36
|
-
"""Convert a mass DataFrame to composition
|
|
37
|
-
|
|
38
|
-
Supplementary columns (columns that are not mass or composition) are ignored.
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
df: The pd.DataFrame containing mass. H2O if provided will be ignored. All columns other than the
|
|
42
|
-
mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
|
|
43
|
-
Assumes composition is in %w/w units.
|
|
44
|
-
mass_wet: The wet mass column, optional. If not provided, it's assumed to be equal to mass_dry.
|
|
45
|
-
mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
|
|
46
|
-
moisture_column_name: if mass_wet is provided, the resultant moisture will be returned with this column name.
|
|
47
|
-
If None, and moisture is detected in the input, that column name will be used instead.
|
|
48
|
-
|
|
49
|
-
component_columns: The composition columns to be used for the calculation. If not provided, the columns
|
|
50
|
-
will be auto-detected using a case in-sensitive match to all elements and oxides. H2O is excluded
|
|
51
|
-
composition_units: determines the factor to convert mass to composition.
|
|
52
|
-
|
|
53
|
-
Returns:
|
|
54
|
-
A pd.Dataframe containing mass (wet and dry mass) and composition
|
|
55
|
-
"""
|
|
56
|
-
|
|
57
|
-
moisture_column_name, mass_moisture_cols, component_cols = prepare_columns(df, mass_wet, mass_dry,
|
|
58
|
-
moisture_column_name, component_columns)
|
|
59
|
-
|
|
60
|
-
if mass_wet and mass_wet in df.columns:
|
|
61
|
-
mass: pd.DataFrame = df[[mass_wet, mass_dry]]
|
|
62
|
-
else:
|
|
63
|
-
mass: pd.DataFrame = df[[mass_dry]]
|
|
64
|
-
|
|
65
|
-
component_mass: pd.DataFrame = df[component_cols]
|
|
66
|
-
composition: pd.DataFrame = component_mass.div(mass[mass_dry].replace(0.0, np.nan), axis=0).fillna(0.0) * composition_factors[composition_units]
|
|
67
|
-
|
|
68
|
-
if mass_wet and (mass_wet in df.columns):
|
|
69
|
-
moisture: pd.Series = solve_mass_moisture(mass_wet=mass[mass_wet], mass_dry=mass[mass_dry]).rename(
|
|
70
|
-
moisture_column_name)
|
|
71
|
-
return pd.concat([mass, moisture, composition], axis='columns')
|
|
72
|
-
else:
|
|
73
|
-
return pd.concat([mass, composition], axis=1)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def composition_to_mass(df: pd.DataFrame,
|
|
77
|
-
mass_wet: Optional[str] = None,
|
|
78
|
-
mass_dry: str = 'mass_dry',
|
|
79
|
-
component_columns: Optional[list[str]] = None,
|
|
80
|
-
moisture_column_name: Optional[str] = None,
|
|
81
|
-
composition_units: Literal['%', 'ppm', 'ppb'] = '%',
|
|
82
|
-
return_moisture: bool = False) -> pd.DataFrame:
|
|
83
|
-
""" Convert a composition DataFrame to mass
|
|
84
|
-
|
|
85
|
-
Supplementary columns (columns that are not mass or composition) are ignored.
|
|
86
|
-
|
|
87
|
-
Args:
|
|
88
|
-
df: The pd.DataFrame containing mass. H2O if provided will be ignored. All columns other than the
|
|
89
|
-
mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
|
|
90
|
-
Assumes composition is in %w/w units.
|
|
91
|
-
mass_wet: The wet mass column, optional.
|
|
92
|
-
mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
|
|
93
|
-
moisture_column_name: if mass_wet is provided, the resultant moisture will be returned with this column name.
|
|
94
|
-
If None, and moisture is detected in the input, that column name will be used instead.
|
|
95
|
-
component_columns: The composition columns to be used for the calculation. If not provided, the columns
|
|
96
|
-
will be auto-detected using a case in-sensitive match to all elements and oxides. H2O is excluded
|
|
97
|
-
composition_units: determines the factor to convert composition to mass.
|
|
98
|
-
return_moisture: If True, the moisture column will be returned.
|
|
99
|
-
|
|
100
|
-
Returns:
|
|
101
|
-
A pd.Dataframe containing the mass representation of mass totals and components
|
|
102
|
-
"""
|
|
103
|
-
|
|
104
|
-
moisture_column_name, mass_moisture_cols, component_cols = prepare_columns(df, mass_wet, mass_dry,
|
|
105
|
-
moisture_column_name, component_columns)
|
|
106
|
-
|
|
107
|
-
if mass_wet and mass_wet in df.columns:
|
|
108
|
-
mass: pd.DataFrame = df[[mass_wet, mass_dry]]
|
|
109
|
-
else:
|
|
110
|
-
mass: pd.DataFrame = df[[mass_dry]]
|
|
111
|
-
|
|
112
|
-
composition: pd.DataFrame = df[component_cols]
|
|
113
|
-
component_mass: pd.DataFrame = composition.mul(mass[mass_dry], axis=0) / composition_factors[composition_units]
|
|
114
|
-
|
|
115
|
-
if mass_wet and (mass_wet in df.columns) and return_moisture:
|
|
116
|
-
moisture: pd.Series = (mass[mass_wet] - mass[mass_dry]).rename(moisture_column_name)
|
|
117
|
-
return pd.concat([mass, moisture, component_mass], axis='columns')
|
|
118
|
-
else:
|
|
119
|
-
return pd.concat([mass, component_mass], axis=1)
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
def prepare_columns(df: pd.DataFrame, mass_wet: Optional[str], mass_dry: str, moisture_column_name: Optional[str],
|
|
123
|
-
component_columns: Optional[list[str]]) -> tuple[str, List[str], List[str]]:
|
|
124
|
-
if moisture_column_name is None:
|
|
125
|
-
moisture_column_name = detect_moisture_column(df.columns)
|
|
126
|
-
# if moisture_column_name is None:
|
|
127
|
-
# moisture_column_name = 'h2o' # set default value to 'h2o' if not detected
|
|
128
|
-
mass_moisture_cols = [mass_wet, mass_dry, moisture_column_name]
|
|
129
|
-
|
|
130
|
-
if component_columns is None:
|
|
131
|
-
non_mass_cols: list[str] = [col for col in df.columns if col.lower() not in mass_moisture_cols]
|
|
132
|
-
component_cols: list[str] = get_components(df[non_mass_cols], strict=False)
|
|
133
|
-
else:
|
|
134
|
-
component_cols: list[str] = component_columns
|
|
135
|
-
|
|
136
|
-
return moisture_column_name, mass_moisture_cols, component_cols
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def weight_average(df: pd.DataFrame,
|
|
140
|
-
mass_wet: Optional[str] = None,
|
|
141
|
-
mass_dry: str = 'mass_dry',
|
|
142
|
-
moisture_column_name: Optional[str] = None,
|
|
143
|
-
component_columns: Optional[list[str]] = None,
|
|
144
|
-
composition_units: Literal['%', 'ppm', 'ppb'] = '%') -> pd.Series:
|
|
145
|
-
"""Weight Average a DataFrame containing mass-composition
|
|
146
|
-
|
|
147
|
-
Args:
|
|
148
|
-
df: The pd.DataFrame containing mass-composition. H2O if provided will be ignored. All columns other than the
|
|
149
|
-
mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
|
|
150
|
-
Assumes composition is in %w/w units.
|
|
151
|
-
mass_wet: The optional wet mass column.
|
|
152
|
-
mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
|
|
153
|
-
moisture_column_name: if mass_wet is provided, the resultant moisture will be returned with this column name.
|
|
154
|
-
If None, and moisture is detected in the input, that column name will be used instead.
|
|
155
|
-
component_columns: The composition columns to be used for the calculation. If not provided, the columns
|
|
156
|
-
will be auto-detected using a case in-sensitive match to all elements and oxides. H2O is excluded
|
|
157
|
-
composition_units: determines the factor to convert mass to composition.
|
|
158
|
-
|
|
159
|
-
Returns:
|
|
160
|
-
A pd.Series containing the total mass and weight averaged composition.
|
|
161
|
-
"""
|
|
162
|
-
moisture_column_name, mass_moisture_cols, component_cols = prepare_columns(df, mass_wet, mass_dry,
|
|
163
|
-
moisture_column_name, component_columns)
|
|
164
|
-
|
|
165
|
-
mass_sum: pd.DataFrame = df.pipe(composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry,
|
|
166
|
-
moisture_column_name=moisture_column_name,
|
|
167
|
-
component_columns=component_columns,
|
|
168
|
-
composition_units=composition_units).sum(axis="index").to_frame().T
|
|
169
|
-
|
|
170
|
-
component_cols = [col for col in component_cols if
|
|
171
|
-
col.lower() not in [mass_wet, mass_dry, 'h2o', 'moisture']]
|
|
172
|
-
|
|
173
|
-
weighted_composition: pd.Series = mass_sum[component_cols].div(mass_sum[mass_dry], axis=0) * composition_factors[
|
|
174
|
-
composition_units]
|
|
175
|
-
|
|
176
|
-
if mass_wet and (mass_wet in df.columns):
|
|
177
|
-
moisture: pd.Series = solve_mass_moisture(mass_wet=mass_sum[mass_wet], mass_dry=mass_sum[mass_dry])
|
|
178
|
-
return pd.concat([mass_sum[[mass_wet, mass_dry]], moisture, weighted_composition], axis=1).iloc[0].rename(
|
|
179
|
-
'weight_average')
|
|
180
|
-
else:
|
|
181
|
-
return pd.concat([mass_sum[[mass_dry]], weighted_composition], axis=1).iloc[0].rename('weight_average')
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
def calculate_recovery(df: pd.DataFrame,
|
|
185
|
-
df_ref: pd.DataFrame,
|
|
186
|
-
mass_wet: str = 'mass_wet',
|
|
187
|
-
mass_dry: str = 'mass_dry') -> pd.DataFrame:
|
|
188
|
-
"""Calculate recovery of mass-composition for two DataFrames
|
|
189
|
-
|
|
190
|
-
Args:
|
|
191
|
-
df: The pd.DataFrame containing mass-composition. H2O if provided will be ignored. All columns other than the
|
|
192
|
-
mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
|
|
193
|
-
Assumes composition is in %w/w units.
|
|
194
|
-
df_ref: The stream that df will be divided by to calculate the recovery. Often the feed stream.
|
|
195
|
-
mass_wet: The wet mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
|
|
196
|
-
mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
|
|
197
|
-
|
|
198
|
-
Returns:
|
|
199
|
-
A pd.Series containing the total mass and weight averaged composition.
|
|
200
|
-
"""
|
|
201
|
-
|
|
202
|
-
res: pd.DataFrame = df.pipe(composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry) / df_ref.pipe(
|
|
203
|
-
composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry)
|
|
204
|
-
return res
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
def calculate_partition(df_feed: pd.DataFrame,
|
|
208
|
-
df_preferred: pd.DataFrame,
|
|
209
|
-
col_mass_dry: str = 'mass_dry') -> pd.DataFrame:
|
|
210
|
-
"""Calculate the partition curve from two streams
|
|
211
|
-
|
|
212
|
-
.. math::
|
|
213
|
-
K = \\frac{{m_{preferred}}}{{m_{feed}}}
|
|
214
|
-
|
|
215
|
-
Applicable to the one dimensional case only. The PN is bounded [0, 1].
|
|
216
|
-
The interval mean for size is the geometric mean, otherwise the arithmetic mean.
|
|
217
|
-
The interval mean is named `da`, which can be interpreted as `diameter-average` or `density-average`.
|
|
218
|
-
TODO: consider a generalised name, fraction-average -> fa?
|
|
219
|
-
|
|
220
|
-
Args:
|
|
221
|
-
df_feed: The pd.DataFrame containing mass-composition representing the fractionated feed.
|
|
222
|
-
df_preferred: The pd.DataFrame containing mass-composition representing the fractionated preferred stream.
|
|
223
|
-
col_mass_dry: The dry mass column, not optional.
|
|
224
|
-
|
|
225
|
-
Returns:
|
|
226
|
-
A pd.DataFrame containing the partition data with a range [0, 1].
|
|
227
|
-
"""
|
|
228
|
-
|
|
229
|
-
res: pd.DataFrame = df_preferred[[col_mass_dry]].div(df_feed[[col_mass_dry]]).rename(columns={col_mass_dry: 'K'})
|
|
230
|
-
if df_preferred.index.name.lower() == 'size':
|
|
231
|
-
res.insert(loc=0, column='size', value=mean_size(res.index))
|
|
232
|
-
else:
|
|
233
|
-
res.insert(loc=0, column=df_preferred.index.name.lower(), value=res.index.mid)
|
|
234
|
-
return res
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
def cumulate(mass_data: pd.DataFrame, direction: str) -> pd.DataFrame:
|
|
238
|
-
"""Cumulate along the index
|
|
239
|
-
|
|
240
|
-
Expected use case is only for Datasets that have been reduced to 1D.
|
|
241
|
-
|
|
242
|
-
Args:
|
|
243
|
-
mass_data: The mass data to cumulate - note composition must be represented as mass
|
|
244
|
-
direction: 'ascending'|'descending'
|
|
245
|
-
|
|
246
|
-
Returns:
|
|
247
|
-
|
|
248
|
-
"""
|
|
249
|
-
|
|
250
|
-
valid_dirs: List[str] = ['ascending', 'descending']
|
|
251
|
-
if direction not in valid_dirs:
|
|
252
|
-
raise KeyError(f'Invalid direction provided. Valid arguments are: {valid_dirs}')
|
|
253
|
-
|
|
254
|
-
d_dir: Dict = {'ascending': True if direction == 'ascending' else False,
|
|
255
|
-
'descending': True if direction == 'descending' else False}
|
|
256
|
-
|
|
257
|
-
if mass_data.index.ndim > 1:
|
|
258
|
-
raise NotImplementedError('DataFrames having indexes > 1D have not been tested.')
|
|
259
|
-
|
|
260
|
-
index_var: str = mass_data.index.name
|
|
261
|
-
if not isinstance(mass_data.index, pd.IntervalIndex):
|
|
262
|
-
raise NotImplementedError(f"The {index_var} of this object is not a pd.Interval. "
|
|
263
|
-
f" Only 1D interval objects are valid")
|
|
264
|
-
|
|
265
|
-
interval_index = mass_data.index.get_level_values(index_var)
|
|
266
|
-
if not (interval_index.is_monotonic_increasing or interval_index.is_monotonic_decreasing):
|
|
267
|
-
raise ValueError('Index is not monotonically increasing or decreasing')
|
|
268
|
-
|
|
269
|
-
in_data_ascending: bool = True
|
|
270
|
-
if interval_index.is_monotonic_decreasing:
|
|
271
|
-
in_data_ascending = False
|
|
272
|
-
|
|
273
|
-
# sort by the direction provided, first save the index
|
|
274
|
-
original_index: pd.Index = mass_data.index
|
|
275
|
-
try:
|
|
276
|
-
mass_data: pd.DataFrame = mass_data.sort_index(ascending=d_dir[direction])
|
|
277
|
-
mass_cum: pd.DataFrame = mass_data.cumsum()
|
|
278
|
-
|
|
279
|
-
finally:
|
|
280
|
-
# reset the index to the original
|
|
281
|
-
mass_data = mass_data.reindex(original_index)
|
|
282
|
-
|
|
283
|
-
return mass_cum
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
def _detect_non_float_columns(df):
|
|
287
|
-
_logger: logging.Logger = logging.getLogger(inspect.stack()[1].function)
|
|
288
|
-
non_float_cols: List = [col for col in df.columns if col not in df.select_dtypes(include=[float, int]).columns]
|
|
289
|
-
if len(non_float_cols) > 0:
|
|
290
|
-
_logger.info(f"The following columns are not float columns and will be ignored: {non_float_cols}")
|
|
291
|
-
return non_float_cols
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
def _detect_non_component_columns(df):
|
|
295
|
-
_logger: logging.Logger = logging.getLogger(inspect.stack()[1].function)
|
|
296
|
-
chemistry_vars = [col.lower() for col in is_compositional(df.columns, strict=False).values() if col not in ['H2O']]
|
|
297
|
-
|
|
298
|
-
non_float_cols: List = [col for col in df.columns if
|
|
299
|
-
col not in (list(df.select_dtypes(include=[float, int]).columns) + chemistry_vars + [
|
|
300
|
-
'mass_wet', 'mass_dry', 'h2o'])]
|
|
301
|
-
if len(non_float_cols) > 0:
|
|
302
|
-
_logger.info(f"The following columns are not float columns and will be ignored: {non_float_cols}")
|
|
303
|
-
return non_float_cols
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
class MeanIntervalIndex(pd.IntervalIndex):
|
|
307
|
-
"""MeanIntervalIndex is a subclass of pd.IntervalIndex that calculates the mean of the interval bounds."""
|
|
308
|
-
|
|
309
|
-
def __new__(cls, data, mean_values=None):
|
|
310
|
-
obj = pd.IntervalIndex.__new__(cls, data)
|
|
311
|
-
return obj
|
|
312
|
-
|
|
313
|
-
def __init__(self, data, mean_values=None):
|
|
314
|
-
self.mean_values = mean_values
|
|
315
|
-
|
|
316
|
-
@property
|
|
317
|
-
def mean(self):
|
|
318
|
-
if self.mean_values is not None:
|
|
319
|
-
return self.mean_values
|
|
320
|
-
elif self.name == 'size':
|
|
321
|
-
# Calculate geometric mean
|
|
322
|
-
return mean_size(self)
|
|
323
|
-
else:
|
|
324
|
-
# Calculate arithmetic mean
|
|
325
|
-
return (self.right + self.left) / 2
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
# class MeanIntervalArray(pd.arrays.IntervalArray):
|
|
329
|
-
# def __init__(self, data, dtype=None, copy=False):
|
|
330
|
-
# super().__init__(data, dtype, copy)
|
|
331
|
-
# if self.name == 'size':
|
|
332
|
-
# # Calculate geometric mean
|
|
333
|
-
# self.mean_values = gmean([self.right, self.left], axis=0)
|
|
334
|
-
# else:
|
|
335
|
-
# # Calculate arithmetic mean
|
|
336
|
-
# self.mean_values = (self.right + self.left) / 2
|
|
337
|
-
#
|
|
338
|
-
# @property
|
|
339
|
-
# def mean(self):
|
|
340
|
-
# if self.mean_values is not None:
|
|
341
|
-
# return self.mean_values
|
|
342
|
-
# elif self.name == 'size':
|
|
343
|
-
# # Calculate geometric mean
|
|
344
|
-
# return gmean([self.right, self.left], axis=0)
|
|
345
|
-
# else:
|
|
346
|
-
# # Calculate arithmetic mean
|
|
347
|
-
# return (self.right + self.left) / 2
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
def parse_vars_from_expr(expr: str) -> list[str]:
|
|
351
|
-
""" Parse variables from a pandas query expression string.
|
|
352
|
-
|
|
353
|
-
Args:
|
|
354
|
-
expr: The expression string
|
|
355
|
-
|
|
356
|
-
Returns:
|
|
357
|
-
list[str]: The list of variables
|
|
358
|
-
"""
|
|
359
|
-
variables = set()
|
|
360
|
-
tokens = tokenize.generate_tokens(StringIO(expr).readline)
|
|
361
|
-
logical_operators = {'and', 'or', '&', '|'}
|
|
362
|
-
inside_backticks = False
|
|
363
|
-
current_var = []
|
|
364
|
-
|
|
365
|
-
for token in tokens:
|
|
366
|
-
if token.string == '`':
|
|
367
|
-
if inside_backticks:
|
|
368
|
-
# End of backtick-enclosed variable
|
|
369
|
-
variables.add(' '.join(current_var))
|
|
370
|
-
current_var = []
|
|
371
|
-
inside_backticks = not inside_backticks
|
|
372
|
-
elif inside_backticks:
|
|
373
|
-
if token.type in {tokenize.NAME, STRING}:
|
|
374
|
-
current_var.append(token.string)
|
|
375
|
-
elif token.type == tokenize.NAME and token.string not in logical_operators:
|
|
376
|
-
variables.add(token.string)
|
|
377
|
-
|
|
378
|
-
return list(variables)
|
|
1
|
+
"""
|
|
2
|
+
Pandas utils
|
|
3
|
+
"""
|
|
4
|
+
import inspect
|
|
5
|
+
import logging
|
|
6
|
+
import tokenize
|
|
7
|
+
from io import StringIO
|
|
8
|
+
from token import STRING
|
|
9
|
+
from typing import List, Dict, Optional, Literal
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
from elphick.geomet.utils.components import is_compositional, get_components
|
|
15
|
+
from elphick.geomet.utils.moisture import solve_mass_moisture, detect_moisture_column
|
|
16
|
+
from elphick.geomet.utils.size import mean_size
|
|
17
|
+
|
|
18
|
+
composition_factors: dict[str, int] = {'%': 100, 'ppm': 1e6, 'ppb': 1e9}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def column_prefixes(columns: List[str]) -> Dict[str, List[str]]:
|
|
22
|
+
return {prefix: [col for col in columns if prefix == col.split('_')[0]] for prefix in
|
|
23
|
+
list(dict.fromkeys([col.split('_')[0] for col in columns if len(col.split('_')) > 1]))}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def column_prefix_counts(columns: List[str]) -> Dict[str, int]:
|
|
27
|
+
return {k: len(v) for k, v in column_prefixes(columns).items()}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def mass_to_composition(df: pd.DataFrame,
|
|
31
|
+
mass_wet: Optional[str] = 'mass_wet',
|
|
32
|
+
mass_dry: str = 'mass_dry',
|
|
33
|
+
moisture_column_name: Optional[str] = None,
|
|
34
|
+
component_columns: Optional[list[str]] = None,
|
|
35
|
+
composition_units: Literal['%', 'ppm', 'ppb'] = '%') -> pd.DataFrame:
|
|
36
|
+
"""Convert a mass DataFrame to composition
|
|
37
|
+
|
|
38
|
+
Supplementary columns (columns that are not mass or composition) are ignored.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
df: The pd.DataFrame containing mass. H2O if provided will be ignored. All columns other than the
|
|
42
|
+
mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
|
|
43
|
+
Assumes composition is in %w/w units.
|
|
44
|
+
mass_wet: The wet mass column, optional. If not provided, it's assumed to be equal to mass_dry.
|
|
45
|
+
mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
|
|
46
|
+
moisture_column_name: if mass_wet is provided, the resultant moisture will be returned with this column name.
|
|
47
|
+
If None, and moisture is detected in the input, that column name will be used instead.
|
|
48
|
+
|
|
49
|
+
component_columns: The composition columns to be used for the calculation. If not provided, the columns
|
|
50
|
+
will be auto-detected using a case in-sensitive match to all elements and oxides. H2O is excluded
|
|
51
|
+
composition_units: determines the factor to convert mass to composition.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
A pd.Dataframe containing mass (wet and dry mass) and composition
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
moisture_column_name, mass_moisture_cols, component_cols = prepare_columns(df, mass_wet, mass_dry,
|
|
58
|
+
moisture_column_name, component_columns)
|
|
59
|
+
|
|
60
|
+
if mass_wet and mass_wet in df.columns:
|
|
61
|
+
mass: pd.DataFrame = df[[mass_wet, mass_dry]]
|
|
62
|
+
else:
|
|
63
|
+
mass: pd.DataFrame = df[[mass_dry]]
|
|
64
|
+
|
|
65
|
+
component_mass: pd.DataFrame = df[component_cols]
|
|
66
|
+
composition: pd.DataFrame = component_mass.div(mass[mass_dry].replace(0.0, np.nan), axis=0).fillna(0.0) * composition_factors[composition_units]
|
|
67
|
+
|
|
68
|
+
if mass_wet and (mass_wet in df.columns):
|
|
69
|
+
moisture: pd.Series = solve_mass_moisture(mass_wet=mass[mass_wet], mass_dry=mass[mass_dry]).rename(
|
|
70
|
+
moisture_column_name)
|
|
71
|
+
return pd.concat([mass, moisture, composition], axis='columns')
|
|
72
|
+
else:
|
|
73
|
+
return pd.concat([mass, composition], axis=1)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def composition_to_mass(df: pd.DataFrame,
|
|
77
|
+
mass_wet: Optional[str] = None,
|
|
78
|
+
mass_dry: str = 'mass_dry',
|
|
79
|
+
component_columns: Optional[list[str]] = None,
|
|
80
|
+
moisture_column_name: Optional[str] = None,
|
|
81
|
+
composition_units: Literal['%', 'ppm', 'ppb'] = '%',
|
|
82
|
+
return_moisture: bool = False) -> pd.DataFrame:
|
|
83
|
+
""" Convert a composition DataFrame to mass
|
|
84
|
+
|
|
85
|
+
Supplementary columns (columns that are not mass or composition) are ignored.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
df: The pd.DataFrame containing mass. H2O if provided will be ignored. All columns other than the
|
|
89
|
+
mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
|
|
90
|
+
Assumes composition is in %w/w units.
|
|
91
|
+
mass_wet: The wet mass column, optional.
|
|
92
|
+
mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
|
|
93
|
+
moisture_column_name: if mass_wet is provided, the resultant moisture will be returned with this column name.
|
|
94
|
+
If None, and moisture is detected in the input, that column name will be used instead.
|
|
95
|
+
component_columns: The composition columns to be used for the calculation. If not provided, the columns
|
|
96
|
+
will be auto-detected using a case in-sensitive match to all elements and oxides. H2O is excluded
|
|
97
|
+
composition_units: determines the factor to convert composition to mass.
|
|
98
|
+
return_moisture: If True, the moisture column will be returned.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
A pd.Dataframe containing the mass representation of mass totals and components
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
moisture_column_name, mass_moisture_cols, component_cols = prepare_columns(df, mass_wet, mass_dry,
|
|
105
|
+
moisture_column_name, component_columns)
|
|
106
|
+
|
|
107
|
+
if mass_wet and mass_wet in df.columns:
|
|
108
|
+
mass: pd.DataFrame = df[[mass_wet, mass_dry]]
|
|
109
|
+
else:
|
|
110
|
+
mass: pd.DataFrame = df[[mass_dry]]
|
|
111
|
+
|
|
112
|
+
composition: pd.DataFrame = df[component_cols]
|
|
113
|
+
component_mass: pd.DataFrame = composition.mul(mass[mass_dry], axis=0) / composition_factors[composition_units]
|
|
114
|
+
|
|
115
|
+
if mass_wet and (mass_wet in df.columns) and return_moisture:
|
|
116
|
+
moisture: pd.Series = (mass[mass_wet] - mass[mass_dry]).rename(moisture_column_name)
|
|
117
|
+
return pd.concat([mass, moisture, component_mass], axis='columns')
|
|
118
|
+
else:
|
|
119
|
+
return pd.concat([mass, component_mass], axis=1)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def prepare_columns(df: pd.DataFrame, mass_wet: Optional[str], mass_dry: str, moisture_column_name: Optional[str],
|
|
123
|
+
component_columns: Optional[list[str]]) -> tuple[str, List[str], List[str]]:
|
|
124
|
+
if moisture_column_name is None:
|
|
125
|
+
moisture_column_name = detect_moisture_column(df.columns)
|
|
126
|
+
# if moisture_column_name is None:
|
|
127
|
+
# moisture_column_name = 'h2o' # set default value to 'h2o' if not detected
|
|
128
|
+
mass_moisture_cols = [mass_wet, mass_dry, moisture_column_name]
|
|
129
|
+
|
|
130
|
+
if component_columns is None:
|
|
131
|
+
non_mass_cols: list[str] = [col for col in df.columns if col.lower() not in mass_moisture_cols]
|
|
132
|
+
component_cols: list[str] = get_components(df[non_mass_cols], strict=False)
|
|
133
|
+
else:
|
|
134
|
+
component_cols: list[str] = component_columns
|
|
135
|
+
|
|
136
|
+
return moisture_column_name, mass_moisture_cols, component_cols
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def weight_average(df: pd.DataFrame,
|
|
140
|
+
mass_wet: Optional[str] = None,
|
|
141
|
+
mass_dry: str = 'mass_dry',
|
|
142
|
+
moisture_column_name: Optional[str] = None,
|
|
143
|
+
component_columns: Optional[list[str]] = None,
|
|
144
|
+
composition_units: Literal['%', 'ppm', 'ppb'] = '%') -> pd.Series:
|
|
145
|
+
"""Weight Average a DataFrame containing mass-composition
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
df: The pd.DataFrame containing mass-composition. H2O if provided will be ignored. All columns other than the
|
|
149
|
+
mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
|
|
150
|
+
Assumes composition is in %w/w units.
|
|
151
|
+
mass_wet: The optional wet mass column.
|
|
152
|
+
mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
|
|
153
|
+
moisture_column_name: if mass_wet is provided, the resultant moisture will be returned with this column name.
|
|
154
|
+
If None, and moisture is detected in the input, that column name will be used instead.
|
|
155
|
+
component_columns: The composition columns to be used for the calculation. If not provided, the columns
|
|
156
|
+
will be auto-detected using a case in-sensitive match to all elements and oxides. H2O is excluded
|
|
157
|
+
composition_units: determines the factor to convert mass to composition.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
A pd.Series containing the total mass and weight averaged composition.
|
|
161
|
+
"""
|
|
162
|
+
moisture_column_name, mass_moisture_cols, component_cols = prepare_columns(df, mass_wet, mass_dry,
|
|
163
|
+
moisture_column_name, component_columns)
|
|
164
|
+
|
|
165
|
+
mass_sum: pd.DataFrame = df.pipe(composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry,
|
|
166
|
+
moisture_column_name=moisture_column_name,
|
|
167
|
+
component_columns=component_columns,
|
|
168
|
+
composition_units=composition_units).sum(axis="index").to_frame().T
|
|
169
|
+
|
|
170
|
+
component_cols = [col for col in component_cols if
|
|
171
|
+
col.lower() not in [mass_wet, mass_dry, 'h2o', 'moisture']]
|
|
172
|
+
|
|
173
|
+
weighted_composition: pd.Series = mass_sum[component_cols].div(mass_sum[mass_dry], axis=0) * composition_factors[
|
|
174
|
+
composition_units]
|
|
175
|
+
|
|
176
|
+
if mass_wet and (mass_wet in df.columns):
|
|
177
|
+
moisture: pd.Series = solve_mass_moisture(mass_wet=mass_sum[mass_wet], mass_dry=mass_sum[mass_dry])
|
|
178
|
+
return pd.concat([mass_sum[[mass_wet, mass_dry]], moisture, weighted_composition], axis=1).iloc[0].rename(
|
|
179
|
+
'weight_average')
|
|
180
|
+
else:
|
|
181
|
+
return pd.concat([mass_sum[[mass_dry]], weighted_composition], axis=1).iloc[0].rename('weight_average')
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def calculate_recovery(df: pd.DataFrame,
|
|
185
|
+
df_ref: pd.DataFrame,
|
|
186
|
+
mass_wet: str = 'mass_wet',
|
|
187
|
+
mass_dry: str = 'mass_dry') -> pd.DataFrame:
|
|
188
|
+
"""Calculate recovery of mass-composition for two DataFrames
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
df: The pd.DataFrame containing mass-composition. H2O if provided will be ignored. All columns other than the
|
|
192
|
+
mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
|
|
193
|
+
Assumes composition is in %w/w units.
|
|
194
|
+
df_ref: The stream that df will be divided by to calculate the recovery. Often the feed stream.
|
|
195
|
+
mass_wet: The wet mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
|
|
196
|
+
mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
A pd.Series containing the total mass and weight averaged composition.
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
res: pd.DataFrame = df.pipe(composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry) / df_ref.pipe(
|
|
203
|
+
composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry)
|
|
204
|
+
return res
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def calculate_partition(df_feed: pd.DataFrame,
|
|
208
|
+
df_preferred: pd.DataFrame,
|
|
209
|
+
col_mass_dry: str = 'mass_dry') -> pd.DataFrame:
|
|
210
|
+
"""Calculate the partition curve from two streams
|
|
211
|
+
|
|
212
|
+
.. math::
|
|
213
|
+
K = \\frac{{m_{preferred}}}{{m_{feed}}}
|
|
214
|
+
|
|
215
|
+
Applicable to the one dimensional case only. The PN is bounded [0, 1].
|
|
216
|
+
The interval mean for size is the geometric mean, otherwise the arithmetic mean.
|
|
217
|
+
The interval mean is named `da`, which can be interpreted as `diameter-average` or `density-average`.
|
|
218
|
+
TODO: consider a generalised name, fraction-average -> fa?
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
df_feed: The pd.DataFrame containing mass-composition representing the fractionated feed.
|
|
222
|
+
df_preferred: The pd.DataFrame containing mass-composition representing the fractionated preferred stream.
|
|
223
|
+
col_mass_dry: The dry mass column, not optional.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
A pd.DataFrame containing the partition data with a range [0, 1].
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
res: pd.DataFrame = df_preferred[[col_mass_dry]].div(df_feed[[col_mass_dry]]).rename(columns={col_mass_dry: 'K'})
|
|
230
|
+
if df_preferred.index.name.lower() == 'size':
|
|
231
|
+
res.insert(loc=0, column='size', value=mean_size(res.index))
|
|
232
|
+
else:
|
|
233
|
+
res.insert(loc=0, column=df_preferred.index.name.lower(), value=res.index.mid)
|
|
234
|
+
return res
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def cumulate(mass_data: pd.DataFrame, direction: str) -> pd.DataFrame:
|
|
238
|
+
"""Cumulate along the index
|
|
239
|
+
|
|
240
|
+
Expected use case is only for Datasets that have been reduced to 1D.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
mass_data: The mass data to cumulate - note composition must be represented as mass
|
|
244
|
+
direction: 'ascending'|'descending'
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
|
|
248
|
+
"""
|
|
249
|
+
|
|
250
|
+
valid_dirs: List[str] = ['ascending', 'descending']
|
|
251
|
+
if direction not in valid_dirs:
|
|
252
|
+
raise KeyError(f'Invalid direction provided. Valid arguments are: {valid_dirs}')
|
|
253
|
+
|
|
254
|
+
d_dir: Dict = {'ascending': True if direction == 'ascending' else False,
|
|
255
|
+
'descending': True if direction == 'descending' else False}
|
|
256
|
+
|
|
257
|
+
if mass_data.index.ndim > 1:
|
|
258
|
+
raise NotImplementedError('DataFrames having indexes > 1D have not been tested.')
|
|
259
|
+
|
|
260
|
+
index_var: str = mass_data.index.name
|
|
261
|
+
if not isinstance(mass_data.index, pd.IntervalIndex):
|
|
262
|
+
raise NotImplementedError(f"The {index_var} of this object is not a pd.Interval. "
|
|
263
|
+
f" Only 1D interval objects are valid")
|
|
264
|
+
|
|
265
|
+
interval_index = mass_data.index.get_level_values(index_var)
|
|
266
|
+
if not (interval_index.is_monotonic_increasing or interval_index.is_monotonic_decreasing):
|
|
267
|
+
raise ValueError('Index is not monotonically increasing or decreasing')
|
|
268
|
+
|
|
269
|
+
in_data_ascending: bool = True
|
|
270
|
+
if interval_index.is_monotonic_decreasing:
|
|
271
|
+
in_data_ascending = False
|
|
272
|
+
|
|
273
|
+
# sort by the direction provided, first save the index
|
|
274
|
+
original_index: pd.Index = mass_data.index
|
|
275
|
+
try:
|
|
276
|
+
mass_data: pd.DataFrame = mass_data.sort_index(ascending=d_dir[direction])
|
|
277
|
+
mass_cum: pd.DataFrame = mass_data.cumsum()
|
|
278
|
+
|
|
279
|
+
finally:
|
|
280
|
+
# reset the index to the original
|
|
281
|
+
mass_data = mass_data.reindex(original_index)
|
|
282
|
+
|
|
283
|
+
return mass_cum
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _detect_non_float_columns(df):
|
|
287
|
+
_logger: logging.Logger = logging.getLogger(inspect.stack()[1].function)
|
|
288
|
+
non_float_cols: List = [col for col in df.columns if col not in df.select_dtypes(include=[float, int]).columns]
|
|
289
|
+
if len(non_float_cols) > 0:
|
|
290
|
+
_logger.info(f"The following columns are not float columns and will be ignored: {non_float_cols}")
|
|
291
|
+
return non_float_cols
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _detect_non_component_columns(df):
|
|
295
|
+
_logger: logging.Logger = logging.getLogger(inspect.stack()[1].function)
|
|
296
|
+
chemistry_vars = [col.lower() for col in is_compositional(df.columns, strict=False).values() if col not in ['H2O']]
|
|
297
|
+
|
|
298
|
+
non_float_cols: List = [col for col in df.columns if
|
|
299
|
+
col not in (list(df.select_dtypes(include=[float, int]).columns) + chemistry_vars + [
|
|
300
|
+
'mass_wet', 'mass_dry', 'h2o'])]
|
|
301
|
+
if len(non_float_cols) > 0:
|
|
302
|
+
_logger.info(f"The following columns are not float columns and will be ignored: {non_float_cols}")
|
|
303
|
+
return non_float_cols
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class MeanIntervalIndex(pd.IntervalIndex):
|
|
307
|
+
"""MeanIntervalIndex is a subclass of pd.IntervalIndex that calculates the mean of the interval bounds."""
|
|
308
|
+
|
|
309
|
+
def __new__(cls, data, mean_values=None):
|
|
310
|
+
obj = pd.IntervalIndex.__new__(cls, data)
|
|
311
|
+
return obj
|
|
312
|
+
|
|
313
|
+
def __init__(self, data, mean_values=None):
|
|
314
|
+
self.mean_values = mean_values
|
|
315
|
+
|
|
316
|
+
@property
|
|
317
|
+
def mean(self):
|
|
318
|
+
if self.mean_values is not None:
|
|
319
|
+
return self.mean_values
|
|
320
|
+
elif self.name == 'size':
|
|
321
|
+
# Calculate geometric mean
|
|
322
|
+
return mean_size(self)
|
|
323
|
+
else:
|
|
324
|
+
# Calculate arithmetic mean
|
|
325
|
+
return (self.right + self.left) / 2
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
# class MeanIntervalArray(pd.arrays.IntervalArray):
|
|
329
|
+
# def __init__(self, data, dtype=None, copy=False):
|
|
330
|
+
# super().__init__(data, dtype, copy)
|
|
331
|
+
# if self.name == 'size':
|
|
332
|
+
# # Calculate geometric mean
|
|
333
|
+
# self.mean_values = gmean([self.right, self.left], axis=0)
|
|
334
|
+
# else:
|
|
335
|
+
# # Calculate arithmetic mean
|
|
336
|
+
# self.mean_values = (self.right + self.left) / 2
|
|
337
|
+
#
|
|
338
|
+
# @property
|
|
339
|
+
# def mean(self):
|
|
340
|
+
# if self.mean_values is not None:
|
|
341
|
+
# return self.mean_values
|
|
342
|
+
# elif self.name == 'size':
|
|
343
|
+
# # Calculate geometric mean
|
|
344
|
+
# return gmean([self.right, self.left], axis=0)
|
|
345
|
+
# else:
|
|
346
|
+
# # Calculate arithmetic mean
|
|
347
|
+
# return (self.right + self.left) / 2
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def parse_vars_from_expr(expr: str) -> list[str]:
|
|
351
|
+
""" Parse variables from a pandas query expression string.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
expr: The expression string
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
list[str]: The list of variables
|
|
358
|
+
"""
|
|
359
|
+
variables = set()
|
|
360
|
+
tokens = tokenize.generate_tokens(StringIO(expr).readline)
|
|
361
|
+
logical_operators = {'and', 'or', '&', '|'}
|
|
362
|
+
inside_backticks = False
|
|
363
|
+
current_var = []
|
|
364
|
+
|
|
365
|
+
for token in tokens:
|
|
366
|
+
if token.string == '`':
|
|
367
|
+
if inside_backticks:
|
|
368
|
+
# End of backtick-enclosed variable
|
|
369
|
+
variables.add(' '.join(current_var))
|
|
370
|
+
current_var = []
|
|
371
|
+
inside_backticks = not inside_backticks
|
|
372
|
+
elif inside_backticks:
|
|
373
|
+
if token.type in {tokenize.NAME, STRING}:
|
|
374
|
+
current_var.append(token.string)
|
|
375
|
+
elif token.type == tokenize.NAME and token.string not in logical_operators:
|
|
376
|
+
variables.add(token.string)
|
|
377
|
+
|
|
378
|
+
return list(variables)
|