geometallurgy 0.4.12__py3-none-any.whl → 0.4.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. elphick/geomet/__init__.py +11 -11
  2. elphick/geomet/base.py +1133 -1133
  3. elphick/geomet/block_model.py +319 -358
  4. elphick/geomet/config/__init__.py +1 -1
  5. elphick/geomet/config/config_read.py +39 -39
  6. elphick/geomet/config/flowsheet_example_partition.yaml +31 -31
  7. elphick/geomet/config/flowsheet_example_simple.yaml +25 -25
  8. elphick/geomet/config/mc_config.yml +35 -35
  9. elphick/geomet/data/downloader.py +39 -39
  10. elphick/geomet/data/register.csv +12 -12
  11. elphick/geomet/datasets/__init__.py +2 -2
  12. elphick/geomet/datasets/datasets.py +47 -47
  13. elphick/geomet/datasets/downloader.py +40 -40
  14. elphick/geomet/datasets/register.csv +12 -12
  15. elphick/geomet/datasets/sample_data.py +196 -196
  16. elphick/geomet/extras.py +35 -35
  17. elphick/geomet/flowsheet/__init__.py +1 -1
  18. elphick/geomet/flowsheet/flowsheet.py +1216 -1216
  19. elphick/geomet/flowsheet/loader.py +99 -99
  20. elphick/geomet/flowsheet/operation.py +256 -256
  21. elphick/geomet/flowsheet/stream.py +39 -39
  22. elphick/geomet/interval_sample.py +641 -641
  23. elphick/geomet/io.py +379 -379
  24. elphick/geomet/plot.py +147 -147
  25. elphick/geomet/sample.py +28 -28
  26. elphick/geomet/utils/amenability.py +49 -49
  27. elphick/geomet/utils/block_model_converter.py +93 -93
  28. elphick/geomet/utils/components.py +136 -136
  29. elphick/geomet/utils/data.py +49 -49
  30. elphick/geomet/utils/estimates.py +108 -108
  31. elphick/geomet/utils/interp.py +193 -193
  32. elphick/geomet/utils/interp2.py +134 -134
  33. elphick/geomet/utils/layout.py +72 -72
  34. elphick/geomet/utils/moisture.py +61 -61
  35. elphick/geomet/utils/output.html +617 -0
  36. elphick/geomet/utils/pandas.py +378 -378
  37. elphick/geomet/utils/parallel.py +29 -29
  38. elphick/geomet/utils/partition.py +63 -63
  39. elphick/geomet/utils/size.py +51 -51
  40. elphick/geomet/utils/timer.py +80 -80
  41. elphick/geomet/utils/viz.py +56 -56
  42. elphick/geomet/validate.py.hide +176 -176
  43. {geometallurgy-0.4.12.dist-info → geometallurgy-0.4.13.dist-info}/LICENSE +21 -21
  44. {geometallurgy-0.4.12.dist-info → geometallurgy-0.4.13.dist-info}/METADATA +7 -5
  45. geometallurgy-0.4.13.dist-info/RECORD +49 -0
  46. {geometallurgy-0.4.12.dist-info → geometallurgy-0.4.13.dist-info}/WHEEL +1 -1
  47. geometallurgy-0.4.12.dist-info/RECORD +0 -48
  48. {geometallurgy-0.4.12.dist-info → geometallurgy-0.4.13.dist-info}/entry_points.txt +0 -0
@@ -1,378 +1,378 @@
1
- """
2
- Pandas utils
3
- """
4
- import inspect
5
- import logging
6
- import tokenize
7
- from io import StringIO
8
- from token import STRING
9
- from typing import List, Dict, Optional, Literal
10
-
11
- import numpy as np
12
- import pandas as pd
13
-
14
- from elphick.geomet.utils.components import is_compositional, get_components
15
- from elphick.geomet.utils.moisture import solve_mass_moisture, detect_moisture_column
16
- from elphick.geomet.utils.size import mean_size
17
-
18
- composition_factors: dict[str, int] = {'%': 100, 'ppm': 1e6, 'ppb': 1e9}
19
-
20
-
21
- def column_prefixes(columns: List[str]) -> Dict[str, List[str]]:
22
- return {prefix: [col for col in columns if prefix == col.split('_')[0]] for prefix in
23
- list(dict.fromkeys([col.split('_')[0] for col in columns if len(col.split('_')) > 1]))}
24
-
25
-
26
- def column_prefix_counts(columns: List[str]) -> Dict[str, int]:
27
- return {k: len(v) for k, v in column_prefixes(columns).items()}
28
-
29
-
30
- def mass_to_composition(df: pd.DataFrame,
31
- mass_wet: Optional[str] = 'mass_wet',
32
- mass_dry: str = 'mass_dry',
33
- moisture_column_name: Optional[str] = None,
34
- component_columns: Optional[list[str]] = None,
35
- composition_units: Literal['%', 'ppm', 'ppb'] = '%') -> pd.DataFrame:
36
- """Convert a mass DataFrame to composition
37
-
38
- Supplementary columns (columns that are not mass or composition) are ignored.
39
-
40
- Args:
41
- df: The pd.DataFrame containing mass. H2O if provided will be ignored. All columns other than the
42
- mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
43
- Assumes composition is in %w/w units.
44
- mass_wet: The wet mass column, optional. If not provided, it's assumed to be equal to mass_dry.
45
- mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
46
- moisture_column_name: if mass_wet is provided, the resultant moisture will be returned with this column name.
47
- If None, and moisture is detected in the input, that column name will be used instead.
48
-
49
- component_columns: The composition columns to be used for the calculation. If not provided, the columns
50
- will be auto-detected using a case in-sensitive match to all elements and oxides. H2O is excluded
51
- composition_units: determines the factor to convert mass to composition.
52
-
53
- Returns:
54
- A pd.Dataframe containing mass (wet and dry mass) and composition
55
- """
56
-
57
- moisture_column_name, mass_moisture_cols, component_cols = prepare_columns(df, mass_wet, mass_dry,
58
- moisture_column_name, component_columns)
59
-
60
- if mass_wet and mass_wet in df.columns:
61
- mass: pd.DataFrame = df[[mass_wet, mass_dry]]
62
- else:
63
- mass: pd.DataFrame = df[[mass_dry]]
64
-
65
- component_mass: pd.DataFrame = df[component_cols]
66
- composition: pd.DataFrame = component_mass.div(mass[mass_dry].replace(0.0, np.nan), axis=0).fillna(0.0) * composition_factors[composition_units]
67
-
68
- if mass_wet and (mass_wet in df.columns):
69
- moisture: pd.Series = solve_mass_moisture(mass_wet=mass[mass_wet], mass_dry=mass[mass_dry]).rename(
70
- moisture_column_name)
71
- return pd.concat([mass, moisture, composition], axis='columns')
72
- else:
73
- return pd.concat([mass, composition], axis=1)
74
-
75
-
76
- def composition_to_mass(df: pd.DataFrame,
77
- mass_wet: Optional[str] = None,
78
- mass_dry: str = 'mass_dry',
79
- component_columns: Optional[list[str]] = None,
80
- moisture_column_name: Optional[str] = None,
81
- composition_units: Literal['%', 'ppm', 'ppb'] = '%',
82
- return_moisture: bool = False) -> pd.DataFrame:
83
- """ Convert a composition DataFrame to mass
84
-
85
- Supplementary columns (columns that are not mass or composition) are ignored.
86
-
87
- Args:
88
- df: The pd.DataFrame containing mass. H2O if provided will be ignored. All columns other than the
89
- mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
90
- Assumes composition is in %w/w units.
91
- mass_wet: The wet mass column, optional.
92
- mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
93
- moisture_column_name: if mass_wet is provided, the resultant moisture will be returned with this column name.
94
- If None, and moisture is detected in the input, that column name will be used instead.
95
- component_columns: The composition columns to be used for the calculation. If not provided, the columns
96
- will be auto-detected using a case in-sensitive match to all elements and oxides. H2O is excluded
97
- composition_units: determines the factor to convert composition to mass.
98
- return_moisture: If True, the moisture column will be returned.
99
-
100
- Returns:
101
- A pd.Dataframe containing the mass representation of mass totals and components
102
- """
103
-
104
- moisture_column_name, mass_moisture_cols, component_cols = prepare_columns(df, mass_wet, mass_dry,
105
- moisture_column_name, component_columns)
106
-
107
- if mass_wet and mass_wet in df.columns:
108
- mass: pd.DataFrame = df[[mass_wet, mass_dry]]
109
- else:
110
- mass: pd.DataFrame = df[[mass_dry]]
111
-
112
- composition: pd.DataFrame = df[component_cols]
113
- component_mass: pd.DataFrame = composition.mul(mass[mass_dry], axis=0) / composition_factors[composition_units]
114
-
115
- if mass_wet and (mass_wet in df.columns) and return_moisture:
116
- moisture: pd.Series = (mass[mass_wet] - mass[mass_dry]).rename(moisture_column_name)
117
- return pd.concat([mass, moisture, component_mass], axis='columns')
118
- else:
119
- return pd.concat([mass, component_mass], axis=1)
120
-
121
-
122
- def prepare_columns(df: pd.DataFrame, mass_wet: Optional[str], mass_dry: str, moisture_column_name: Optional[str],
123
- component_columns: Optional[list[str]]) -> tuple[str, List[str], List[str]]:
124
- if moisture_column_name is None:
125
- moisture_column_name = detect_moisture_column(df.columns)
126
- # if moisture_column_name is None:
127
- # moisture_column_name = 'h2o' # set default value to 'h2o' if not detected
128
- mass_moisture_cols = [mass_wet, mass_dry, moisture_column_name]
129
-
130
- if component_columns is None:
131
- non_mass_cols: list[str] = [col for col in df.columns if col.lower() not in mass_moisture_cols]
132
- component_cols: list[str] = get_components(df[non_mass_cols], strict=False)
133
- else:
134
- component_cols: list[str] = component_columns
135
-
136
- return moisture_column_name, mass_moisture_cols, component_cols
137
-
138
-
139
- def weight_average(df: pd.DataFrame,
140
- mass_wet: Optional[str] = None,
141
- mass_dry: str = 'mass_dry',
142
- moisture_column_name: Optional[str] = None,
143
- component_columns: Optional[list[str]] = None,
144
- composition_units: Literal['%', 'ppm', 'ppb'] = '%') -> pd.Series:
145
- """Weight Average a DataFrame containing mass-composition
146
-
147
- Args:
148
- df: The pd.DataFrame containing mass-composition. H2O if provided will be ignored. All columns other than the
149
- mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
150
- Assumes composition is in %w/w units.
151
- mass_wet: The optional wet mass column.
152
- mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
153
- moisture_column_name: if mass_wet is provided, the resultant moisture will be returned with this column name.
154
- If None, and moisture is detected in the input, that column name will be used instead.
155
- component_columns: The composition columns to be used for the calculation. If not provided, the columns
156
- will be auto-detected using a case in-sensitive match to all elements and oxides. H2O is excluded
157
- composition_units: determines the factor to convert mass to composition.
158
-
159
- Returns:
160
- A pd.Series containing the total mass and weight averaged composition.
161
- """
162
- moisture_column_name, mass_moisture_cols, component_cols = prepare_columns(df, mass_wet, mass_dry,
163
- moisture_column_name, component_columns)
164
-
165
- mass_sum: pd.DataFrame = df.pipe(composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry,
166
- moisture_column_name=moisture_column_name,
167
- component_columns=component_columns,
168
- composition_units=composition_units).sum(axis="index").to_frame().T
169
-
170
- component_cols = [col for col in component_cols if
171
- col.lower() not in [mass_wet, mass_dry, 'h2o', 'moisture']]
172
-
173
- weighted_composition: pd.Series = mass_sum[component_cols].div(mass_sum[mass_dry], axis=0) * composition_factors[
174
- composition_units]
175
-
176
- if mass_wet and (mass_wet in df.columns):
177
- moisture: pd.Series = solve_mass_moisture(mass_wet=mass_sum[mass_wet], mass_dry=mass_sum[mass_dry])
178
- return pd.concat([mass_sum[[mass_wet, mass_dry]], moisture, weighted_composition], axis=1).iloc[0].rename(
179
- 'weight_average')
180
- else:
181
- return pd.concat([mass_sum[[mass_dry]], weighted_composition], axis=1).iloc[0].rename('weight_average')
182
-
183
-
184
- def calculate_recovery(df: pd.DataFrame,
185
- df_ref: pd.DataFrame,
186
- mass_wet: str = 'mass_wet',
187
- mass_dry: str = 'mass_dry') -> pd.DataFrame:
188
- """Calculate recovery of mass-composition for two DataFrames
189
-
190
- Args:
191
- df: The pd.DataFrame containing mass-composition. H2O if provided will be ignored. All columns other than the
192
- mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
193
- Assumes composition is in %w/w units.
194
- df_ref: The stream that df will be divided by to calculate the recovery. Often the feed stream.
195
- mass_wet: The wet mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
196
- mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
197
-
198
- Returns:
199
- A pd.Series containing the total mass and weight averaged composition.
200
- """
201
-
202
- res: pd.DataFrame = df.pipe(composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry) / df_ref.pipe(
203
- composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry)
204
- return res
205
-
206
-
207
- def calculate_partition(df_feed: pd.DataFrame,
208
- df_preferred: pd.DataFrame,
209
- col_mass_dry: str = 'mass_dry') -> pd.DataFrame:
210
- """Calculate the partition curve from two streams
211
-
212
- .. math::
213
- K = \\frac{{m_{preferred}}}{{m_{feed}}}
214
-
215
- Applicable to the one dimensional case only. The PN is bounded [0, 1].
216
- The interval mean for size is the geometric mean, otherwise the arithmetic mean.
217
- The interval mean is named `da`, which can be interpreted as `diameter-average` or `density-average`.
218
- TODO: consider a generalised name, fraction-average -> fa?
219
-
220
- Args:
221
- df_feed: The pd.DataFrame containing mass-composition representing the fractionated feed.
222
- df_preferred: The pd.DataFrame containing mass-composition representing the fractionated preferred stream.
223
- col_mass_dry: The dry mass column, not optional.
224
-
225
- Returns:
226
- A pd.DataFrame containing the partition data with a range [0, 1].
227
- """
228
-
229
- res: pd.DataFrame = df_preferred[[col_mass_dry]].div(df_feed[[col_mass_dry]]).rename(columns={col_mass_dry: 'K'})
230
- if df_preferred.index.name.lower() == 'size':
231
- res.insert(loc=0, column='size', value=mean_size(res.index))
232
- else:
233
- res.insert(loc=0, column=df_preferred.index.name.lower(), value=res.index.mid)
234
- return res
235
-
236
-
237
- def cumulate(mass_data: pd.DataFrame, direction: str) -> pd.DataFrame:
238
- """Cumulate along the index
239
-
240
- Expected use case is only for Datasets that have been reduced to 1D.
241
-
242
- Args:
243
- mass_data: The mass data to cumulate - note composition must be represented as mass
244
- direction: 'ascending'|'descending'
245
-
246
- Returns:
247
-
248
- """
249
-
250
- valid_dirs: List[str] = ['ascending', 'descending']
251
- if direction not in valid_dirs:
252
- raise KeyError(f'Invalid direction provided. Valid arguments are: {valid_dirs}')
253
-
254
- d_dir: Dict = {'ascending': True if direction == 'ascending' else False,
255
- 'descending': True if direction == 'descending' else False}
256
-
257
- if mass_data.index.ndim > 1:
258
- raise NotImplementedError('DataFrames having indexes > 1D have not been tested.')
259
-
260
- index_var: str = mass_data.index.name
261
- if not isinstance(mass_data.index, pd.IntervalIndex):
262
- raise NotImplementedError(f"The {index_var} of this object is not a pd.Interval. "
263
- f" Only 1D interval objects are valid")
264
-
265
- interval_index = mass_data.index.get_level_values(index_var)
266
- if not (interval_index.is_monotonic_increasing or interval_index.is_monotonic_decreasing):
267
- raise ValueError('Index is not monotonically increasing or decreasing')
268
-
269
- in_data_ascending: bool = True
270
- if interval_index.is_monotonic_decreasing:
271
- in_data_ascending = False
272
-
273
- # sort by the direction provided, first save the index
274
- original_index: pd.Index = mass_data.index
275
- try:
276
- mass_data: pd.DataFrame = mass_data.sort_index(ascending=d_dir[direction])
277
- mass_cum: pd.DataFrame = mass_data.cumsum()
278
-
279
- finally:
280
- # reset the index to the original
281
- mass_data = mass_data.reindex(original_index)
282
-
283
- return mass_cum
284
-
285
-
286
- def _detect_non_float_columns(df):
287
- _logger: logging.Logger = logging.getLogger(inspect.stack()[1].function)
288
- non_float_cols: List = [col for col in df.columns if col not in df.select_dtypes(include=[float, int]).columns]
289
- if len(non_float_cols) > 0:
290
- _logger.info(f"The following columns are not float columns and will be ignored: {non_float_cols}")
291
- return non_float_cols
292
-
293
-
294
- def _detect_non_component_columns(df):
295
- _logger: logging.Logger = logging.getLogger(inspect.stack()[1].function)
296
- chemistry_vars = [col.lower() for col in is_compositional(df.columns, strict=False).values() if col not in ['H2O']]
297
-
298
- non_float_cols: List = [col for col in df.columns if
299
- col not in (list(df.select_dtypes(include=[float, int]).columns) + chemistry_vars + [
300
- 'mass_wet', 'mass_dry', 'h2o'])]
301
- if len(non_float_cols) > 0:
302
- _logger.info(f"The following columns are not float columns and will be ignored: {non_float_cols}")
303
- return non_float_cols
304
-
305
-
306
- class MeanIntervalIndex(pd.IntervalIndex):
307
- """MeanIntervalIndex is a subclass of pd.IntervalIndex that calculates the mean of the interval bounds."""
308
-
309
- def __new__(cls, data, mean_values=None):
310
- obj = pd.IntervalIndex.__new__(cls, data)
311
- return obj
312
-
313
- def __init__(self, data, mean_values=None):
314
- self.mean_values = mean_values
315
-
316
- @property
317
- def mean(self):
318
- if self.mean_values is not None:
319
- return self.mean_values
320
- elif self.name == 'size':
321
- # Calculate geometric mean
322
- return mean_size(self)
323
- else:
324
- # Calculate arithmetic mean
325
- return (self.right + self.left) / 2
326
-
327
-
328
- # class MeanIntervalArray(pd.arrays.IntervalArray):
329
- # def __init__(self, data, dtype=None, copy=False):
330
- # super().__init__(data, dtype, copy)
331
- # if self.name == 'size':
332
- # # Calculate geometric mean
333
- # self.mean_values = gmean([self.right, self.left], axis=0)
334
- # else:
335
- # # Calculate arithmetic mean
336
- # self.mean_values = (self.right + self.left) / 2
337
- #
338
- # @property
339
- # def mean(self):
340
- # if self.mean_values is not None:
341
- # return self.mean_values
342
- # elif self.name == 'size':
343
- # # Calculate geometric mean
344
- # return gmean([self.right, self.left], axis=0)
345
- # else:
346
- # # Calculate arithmetic mean
347
- # return (self.right + self.left) / 2
348
-
349
-
350
- def parse_vars_from_expr(expr: str) -> list[str]:
351
- """ Parse variables from a pandas query expression string.
352
-
353
- Args:
354
- expr: The expression string
355
-
356
- Returns:
357
- list[str]: The list of variables
358
- """
359
- variables = set()
360
- tokens = tokenize.generate_tokens(StringIO(expr).readline)
361
- logical_operators = {'and', 'or', '&', '|'}
362
- inside_backticks = False
363
- current_var = []
364
-
365
- for token in tokens:
366
- if token.string == '`':
367
- if inside_backticks:
368
- # End of backtick-enclosed variable
369
- variables.add(' '.join(current_var))
370
- current_var = []
371
- inside_backticks = not inside_backticks
372
- elif inside_backticks:
373
- if token.type in {tokenize.NAME, STRING}:
374
- current_var.append(token.string)
375
- elif token.type == tokenize.NAME and token.string not in logical_operators:
376
- variables.add(token.string)
377
-
378
- return list(variables)
1
+ """
2
+ Pandas utils
3
+ """
4
+ import inspect
5
+ import logging
6
+ import tokenize
7
+ from io import StringIO
8
+ from token import STRING
9
+ from typing import List, Dict, Optional, Literal
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ from elphick.geomet.utils.components import is_compositional, get_components
15
+ from elphick.geomet.utils.moisture import solve_mass_moisture, detect_moisture_column
16
+ from elphick.geomet.utils.size import mean_size
17
+
18
+ composition_factors: dict[str, int] = {'%': 100, 'ppm': 1e6, 'ppb': 1e9}
19
+
20
+
21
+ def column_prefixes(columns: List[str]) -> Dict[str, List[str]]:
22
+ return {prefix: [col for col in columns if prefix == col.split('_')[0]] for prefix in
23
+ list(dict.fromkeys([col.split('_')[0] for col in columns if len(col.split('_')) > 1]))}
24
+
25
+
26
+ def column_prefix_counts(columns: List[str]) -> Dict[str, int]:
27
+ return {k: len(v) for k, v in column_prefixes(columns).items()}
28
+
29
+
30
+ def mass_to_composition(df: pd.DataFrame,
31
+ mass_wet: Optional[str] = 'mass_wet',
32
+ mass_dry: str = 'mass_dry',
33
+ moisture_column_name: Optional[str] = None,
34
+ component_columns: Optional[list[str]] = None,
35
+ composition_units: Literal['%', 'ppm', 'ppb'] = '%') -> pd.DataFrame:
36
+ """Convert a mass DataFrame to composition
37
+
38
+ Supplementary columns (columns that are not mass or composition) are ignored.
39
+
40
+ Args:
41
+ df: The pd.DataFrame containing mass. H2O if provided will be ignored. All columns other than the
42
+ mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
43
+ Assumes composition is in %w/w units.
44
+ mass_wet: The wet mass column, optional. If not provided, it's assumed to be equal to mass_dry.
45
+ mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
46
+ moisture_column_name: if mass_wet is provided, the resultant moisture will be returned with this column name.
47
+ If None, and moisture is detected in the input, that column name will be used instead.
48
+
49
+ component_columns: The composition columns to be used for the calculation. If not provided, the columns
50
+ will be auto-detected using a case in-sensitive match to all elements and oxides. H2O is excluded
51
+ composition_units: determines the factor to convert mass to composition.
52
+
53
+ Returns:
54
+ A pd.Dataframe containing mass (wet and dry mass) and composition
55
+ """
56
+
57
+ moisture_column_name, mass_moisture_cols, component_cols = prepare_columns(df, mass_wet, mass_dry,
58
+ moisture_column_name, component_columns)
59
+
60
+ if mass_wet and mass_wet in df.columns:
61
+ mass: pd.DataFrame = df[[mass_wet, mass_dry]]
62
+ else:
63
+ mass: pd.DataFrame = df[[mass_dry]]
64
+
65
+ component_mass: pd.DataFrame = df[component_cols]
66
+ composition: pd.DataFrame = component_mass.div(mass[mass_dry].replace(0.0, np.nan), axis=0).fillna(0.0) * composition_factors[composition_units]
67
+
68
+ if mass_wet and (mass_wet in df.columns):
69
+ moisture: pd.Series = solve_mass_moisture(mass_wet=mass[mass_wet], mass_dry=mass[mass_dry]).rename(
70
+ moisture_column_name)
71
+ return pd.concat([mass, moisture, composition], axis='columns')
72
+ else:
73
+ return pd.concat([mass, composition], axis=1)
74
+
75
+
76
+ def composition_to_mass(df: pd.DataFrame,
77
+ mass_wet: Optional[str] = None,
78
+ mass_dry: str = 'mass_dry',
79
+ component_columns: Optional[list[str]] = None,
80
+ moisture_column_name: Optional[str] = None,
81
+ composition_units: Literal['%', 'ppm', 'ppb'] = '%',
82
+ return_moisture: bool = False) -> pd.DataFrame:
83
+ """ Convert a composition DataFrame to mass
84
+
85
+ Supplementary columns (columns that are not mass or composition) are ignored.
86
+
87
+ Args:
88
+ df: The pd.DataFrame containing mass. H2O if provided will be ignored. All columns other than the
89
+ mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
90
+ Assumes composition is in %w/w units.
91
+ mass_wet: The wet mass column, optional.
92
+ mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
93
+ moisture_column_name: if mass_wet is provided, the resultant moisture will be returned with this column name.
94
+ If None, and moisture is detected in the input, that column name will be used instead.
95
+ component_columns: The composition columns to be used for the calculation. If not provided, the columns
96
+ will be auto-detected using a case in-sensitive match to all elements and oxides. H2O is excluded
97
+ composition_units: determines the factor to convert composition to mass.
98
+ return_moisture: If True, the moisture column will be returned.
99
+
100
+ Returns:
101
+ A pd.Dataframe containing the mass representation of mass totals and components
102
+ """
103
+
104
+ moisture_column_name, mass_moisture_cols, component_cols = prepare_columns(df, mass_wet, mass_dry,
105
+ moisture_column_name, component_columns)
106
+
107
+ if mass_wet and mass_wet in df.columns:
108
+ mass: pd.DataFrame = df[[mass_wet, mass_dry]]
109
+ else:
110
+ mass: pd.DataFrame = df[[mass_dry]]
111
+
112
+ composition: pd.DataFrame = df[component_cols]
113
+ component_mass: pd.DataFrame = composition.mul(mass[mass_dry], axis=0) / composition_factors[composition_units]
114
+
115
+ if mass_wet and (mass_wet in df.columns) and return_moisture:
116
+ moisture: pd.Series = (mass[mass_wet] - mass[mass_dry]).rename(moisture_column_name)
117
+ return pd.concat([mass, moisture, component_mass], axis='columns')
118
+ else:
119
+ return pd.concat([mass, component_mass], axis=1)
120
+
121
+
122
+ def prepare_columns(df: pd.DataFrame, mass_wet: Optional[str], mass_dry: str, moisture_column_name: Optional[str],
123
+ component_columns: Optional[list[str]]) -> tuple[str, List[str], List[str]]:
124
+ if moisture_column_name is None:
125
+ moisture_column_name = detect_moisture_column(df.columns)
126
+ # if moisture_column_name is None:
127
+ # moisture_column_name = 'h2o' # set default value to 'h2o' if not detected
128
+ mass_moisture_cols = [mass_wet, mass_dry, moisture_column_name]
129
+
130
+ if component_columns is None:
131
+ non_mass_cols: list[str] = [col for col in df.columns if col.lower() not in mass_moisture_cols]
132
+ component_cols: list[str] = get_components(df[non_mass_cols], strict=False)
133
+ else:
134
+ component_cols: list[str] = component_columns
135
+
136
+ return moisture_column_name, mass_moisture_cols, component_cols
137
+
138
+
139
+ def weight_average(df: pd.DataFrame,
140
+ mass_wet: Optional[str] = None,
141
+ mass_dry: str = 'mass_dry',
142
+ moisture_column_name: Optional[str] = None,
143
+ component_columns: Optional[list[str]] = None,
144
+ composition_units: Literal['%', 'ppm', 'ppb'] = '%') -> pd.Series:
145
+ """Weight Average a DataFrame containing mass-composition
146
+
147
+ Args:
148
+ df: The pd.DataFrame containing mass-composition. H2O if provided will be ignored. All columns other than the
149
+ mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
150
+ Assumes composition is in %w/w units.
151
+ mass_wet: The optional wet mass column.
152
+ mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
153
+ moisture_column_name: if mass_wet is provided, the resultant moisture will be returned with this column name.
154
+ If None, and moisture is detected in the input, that column name will be used instead.
155
+ component_columns: The composition columns to be used for the calculation. If not provided, the columns
156
+ will be auto-detected using a case in-sensitive match to all elements and oxides. H2O is excluded
157
+ composition_units: determines the factor to convert mass to composition.
158
+
159
+ Returns:
160
+ A pd.Series containing the total mass and weight averaged composition.
161
+ """
162
+ moisture_column_name, mass_moisture_cols, component_cols = prepare_columns(df, mass_wet, mass_dry,
163
+ moisture_column_name, component_columns)
164
+
165
+ mass_sum: pd.DataFrame = df.pipe(composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry,
166
+ moisture_column_name=moisture_column_name,
167
+ component_columns=component_columns,
168
+ composition_units=composition_units).sum(axis="index").to_frame().T
169
+
170
+ component_cols = [col for col in component_cols if
171
+ col.lower() not in [mass_wet, mass_dry, 'h2o', 'moisture']]
172
+
173
+ weighted_composition: pd.Series = mass_sum[component_cols].div(mass_sum[mass_dry], axis=0) * composition_factors[
174
+ composition_units]
175
+
176
+ if mass_wet and (mass_wet in df.columns):
177
+ moisture: pd.Series = solve_mass_moisture(mass_wet=mass_sum[mass_wet], mass_dry=mass_sum[mass_dry])
178
+ return pd.concat([mass_sum[[mass_wet, mass_dry]], moisture, weighted_composition], axis=1).iloc[0].rename(
179
+ 'weight_average')
180
+ else:
181
+ return pd.concat([mass_sum[[mass_dry]], weighted_composition], axis=1).iloc[0].rename('weight_average')
182
+
183
+
184
+ def calculate_recovery(df: pd.DataFrame,
185
+ df_ref: pd.DataFrame,
186
+ mass_wet: str = 'mass_wet',
187
+ mass_dry: str = 'mass_dry') -> pd.DataFrame:
188
+ """Calculate recovery of mass-composition for two DataFrames
189
+
190
+ Args:
191
+ df: The pd.DataFrame containing mass-composition. H2O if provided will be ignored. All columns other than the
192
+ mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid.
193
+ Assumes composition is in %w/w units.
194
+ df_ref: The stream that df will be divided by to calculate the recovery. Often the feed stream.
195
+ mass_wet: The wet mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
196
+ mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed.
197
+
198
+ Returns:
199
+ A pd.Series containing the total mass and weight averaged composition.
200
+ """
201
+
202
+ res: pd.DataFrame = df.pipe(composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry) / df_ref.pipe(
203
+ composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry)
204
+ return res
205
+
206
+
207
+ def calculate_partition(df_feed: pd.DataFrame,
208
+ df_preferred: pd.DataFrame,
209
+ col_mass_dry: str = 'mass_dry') -> pd.DataFrame:
210
+ """Calculate the partition curve from two streams
211
+
212
+ .. math::
213
+ K = \\frac{{m_{preferred}}}{{m_{feed}}}
214
+
215
+ Applicable to the one dimensional case only. The PN is bounded [0, 1].
216
+ The interval mean for size is the geometric mean, otherwise the arithmetic mean.
217
+ The interval mean is named `da`, which can be interpreted as `diameter-average` or `density-average`.
218
+ TODO: consider a generalised name, fraction-average -> fa?
219
+
220
+ Args:
221
+ df_feed: The pd.DataFrame containing mass-composition representing the fractionated feed.
222
+ df_preferred: The pd.DataFrame containing mass-composition representing the fractionated preferred stream.
223
+ col_mass_dry: The dry mass column, not optional.
224
+
225
+ Returns:
226
+ A pd.DataFrame containing the partition data with a range [0, 1].
227
+ """
228
+
229
+ res: pd.DataFrame = df_preferred[[col_mass_dry]].div(df_feed[[col_mass_dry]]).rename(columns={col_mass_dry: 'K'})
230
+ if df_preferred.index.name.lower() == 'size':
231
+ res.insert(loc=0, column='size', value=mean_size(res.index))
232
+ else:
233
+ res.insert(loc=0, column=df_preferred.index.name.lower(), value=res.index.mid)
234
+ return res
235
+
236
+
237
+ def cumulate(mass_data: pd.DataFrame, direction: str) -> pd.DataFrame:
238
+ """Cumulate along the index
239
+
240
+ Expected use case is only for Datasets that have been reduced to 1D.
241
+
242
+ Args:
243
+ mass_data: The mass data to cumulate - note composition must be represented as mass
244
+ direction: 'ascending'|'descending'
245
+
246
+ Returns:
247
+
248
+ """
249
+
250
+ valid_dirs: List[str] = ['ascending', 'descending']
251
+ if direction not in valid_dirs:
252
+ raise KeyError(f'Invalid direction provided. Valid arguments are: {valid_dirs}')
253
+
254
+ d_dir: Dict = {'ascending': True if direction == 'ascending' else False,
255
+ 'descending': True if direction == 'descending' else False}
256
+
257
+ if mass_data.index.ndim > 1:
258
+ raise NotImplementedError('DataFrames having indexes > 1D have not been tested.')
259
+
260
+ index_var: str = mass_data.index.name
261
+ if not isinstance(mass_data.index, pd.IntervalIndex):
262
+ raise NotImplementedError(f"The {index_var} of this object is not a pd.Interval. "
263
+ f" Only 1D interval objects are valid")
264
+
265
+ interval_index = mass_data.index.get_level_values(index_var)
266
+ if not (interval_index.is_monotonic_increasing or interval_index.is_monotonic_decreasing):
267
+ raise ValueError('Index is not monotonically increasing or decreasing')
268
+
269
+ in_data_ascending: bool = True
270
+ if interval_index.is_monotonic_decreasing:
271
+ in_data_ascending = False
272
+
273
+ # sort by the direction provided, first save the index
274
+ original_index: pd.Index = mass_data.index
275
+ try:
276
+ mass_data: pd.DataFrame = mass_data.sort_index(ascending=d_dir[direction])
277
+ mass_cum: pd.DataFrame = mass_data.cumsum()
278
+
279
+ finally:
280
+ # reset the index to the original
281
+ mass_data = mass_data.reindex(original_index)
282
+
283
+ return mass_cum
284
+
285
+
286
+ def _detect_non_float_columns(df):
287
+ _logger: logging.Logger = logging.getLogger(inspect.stack()[1].function)
288
+ non_float_cols: List = [col for col in df.columns if col not in df.select_dtypes(include=[float, int]).columns]
289
+ if len(non_float_cols) > 0:
290
+ _logger.info(f"The following columns are not float columns and will be ignored: {non_float_cols}")
291
+ return non_float_cols
292
+
293
+
294
+ def _detect_non_component_columns(df):
295
+ _logger: logging.Logger = logging.getLogger(inspect.stack()[1].function)
296
+ chemistry_vars = [col.lower() for col in is_compositional(df.columns, strict=False).values() if col not in ['H2O']]
297
+
298
+ non_float_cols: List = [col for col in df.columns if
299
+ col not in (list(df.select_dtypes(include=[float, int]).columns) + chemistry_vars + [
300
+ 'mass_wet', 'mass_dry', 'h2o'])]
301
+ if len(non_float_cols) > 0:
302
+ _logger.info(f"The following columns are not float columns and will be ignored: {non_float_cols}")
303
+ return non_float_cols
304
+
305
+
306
+ class MeanIntervalIndex(pd.IntervalIndex):
307
+ """MeanIntervalIndex is a subclass of pd.IntervalIndex that calculates the mean of the interval bounds."""
308
+
309
+ def __new__(cls, data, mean_values=None):
310
+ obj = pd.IntervalIndex.__new__(cls, data)
311
+ return obj
312
+
313
+ def __init__(self, data, mean_values=None):
314
+ self.mean_values = mean_values
315
+
316
+ @property
317
+ def mean(self):
318
+ if self.mean_values is not None:
319
+ return self.mean_values
320
+ elif self.name == 'size':
321
+ # Calculate geometric mean
322
+ return mean_size(self)
323
+ else:
324
+ # Calculate arithmetic mean
325
+ return (self.right + self.left) / 2
326
+
327
+
328
+ # class MeanIntervalArray(pd.arrays.IntervalArray):
329
+ # def __init__(self, data, dtype=None, copy=False):
330
+ # super().__init__(data, dtype, copy)
331
+ # if self.name == 'size':
332
+ # # Calculate geometric mean
333
+ # self.mean_values = gmean([self.right, self.left], axis=0)
334
+ # else:
335
+ # # Calculate arithmetic mean
336
+ # self.mean_values = (self.right + self.left) / 2
337
+ #
338
+ # @property
339
+ # def mean(self):
340
+ # if self.mean_values is not None:
341
+ # return self.mean_values
342
+ # elif self.name == 'size':
343
+ # # Calculate geometric mean
344
+ # return gmean([self.right, self.left], axis=0)
345
+ # else:
346
+ # # Calculate arithmetic mean
347
+ # return (self.right + self.left) / 2
348
+
349
+
350
+ def parse_vars_from_expr(expr: str) -> list[str]:
351
+ """ Parse variables from a pandas query expression string.
352
+
353
+ Args:
354
+ expr: The expression string
355
+
356
+ Returns:
357
+ list[str]: The list of variables
358
+ """
359
+ variables = set()
360
+ tokens = tokenize.generate_tokens(StringIO(expr).readline)
361
+ logical_operators = {'and', 'or', '&', '|'}
362
+ inside_backticks = False
363
+ current_var = []
364
+
365
+ for token in tokens:
366
+ if token.string == '`':
367
+ if inside_backticks:
368
+ # End of backtick-enclosed variable
369
+ variables.add(' '.join(current_var))
370
+ current_var = []
371
+ inside_backticks = not inside_backticks
372
+ elif inside_backticks:
373
+ if token.type in {tokenize.NAME, STRING}:
374
+ current_var.append(token.string)
375
+ elif token.type == tokenize.NAME and token.string not in logical_operators:
376
+ variables.add(token.string)
377
+
378
+ return list(variables)