geometallurgy 0.4.12__py3-none-any.whl → 0.4.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. elphick/geomet/__init__.py +11 -11
  2. elphick/geomet/base.py +1133 -1133
  3. elphick/geomet/block_model.py +319 -358
  4. elphick/geomet/config/__init__.py +1 -1
  5. elphick/geomet/config/config_read.py +39 -39
  6. elphick/geomet/config/flowsheet_example_partition.yaml +31 -31
  7. elphick/geomet/config/flowsheet_example_simple.yaml +25 -25
  8. elphick/geomet/config/mc_config.yml +35 -35
  9. elphick/geomet/data/downloader.py +39 -39
  10. elphick/geomet/data/register.csv +12 -12
  11. elphick/geomet/datasets/__init__.py +2 -2
  12. elphick/geomet/datasets/datasets.py +47 -47
  13. elphick/geomet/datasets/downloader.py +40 -40
  14. elphick/geomet/datasets/register.csv +12 -12
  15. elphick/geomet/datasets/sample_data.py +196 -196
  16. elphick/geomet/extras.py +35 -35
  17. elphick/geomet/flowsheet/__init__.py +1 -1
  18. elphick/geomet/flowsheet/flowsheet.py +1216 -1216
  19. elphick/geomet/flowsheet/loader.py +99 -99
  20. elphick/geomet/flowsheet/operation.py +256 -256
  21. elphick/geomet/flowsheet/stream.py +39 -39
  22. elphick/geomet/interval_sample.py +641 -641
  23. elphick/geomet/io.py +379 -379
  24. elphick/geomet/plot.py +147 -147
  25. elphick/geomet/sample.py +28 -28
  26. elphick/geomet/utils/amenability.py +49 -49
  27. elphick/geomet/utils/block_model_converter.py +93 -93
  28. elphick/geomet/utils/components.py +136 -136
  29. elphick/geomet/utils/data.py +49 -49
  30. elphick/geomet/utils/estimates.py +108 -108
  31. elphick/geomet/utils/interp.py +193 -193
  32. elphick/geomet/utils/interp2.py +134 -134
  33. elphick/geomet/utils/layout.py +72 -72
  34. elphick/geomet/utils/moisture.py +61 -61
  35. elphick/geomet/utils/output.html +617 -0
  36. elphick/geomet/utils/pandas.py +378 -378
  37. elphick/geomet/utils/parallel.py +29 -29
  38. elphick/geomet/utils/partition.py +63 -63
  39. elphick/geomet/utils/size.py +51 -51
  40. elphick/geomet/utils/timer.py +80 -80
  41. elphick/geomet/utils/viz.py +56 -56
  42. elphick/geomet/validate.py.hide +176 -176
  43. {geometallurgy-0.4.12.dist-info → geometallurgy-0.4.13.dist-info}/LICENSE +21 -21
  44. {geometallurgy-0.4.12.dist-info → geometallurgy-0.4.13.dist-info}/METADATA +7 -5
  45. geometallurgy-0.4.13.dist-info/RECORD +49 -0
  46. {geometallurgy-0.4.12.dist-info → geometallurgy-0.4.13.dist-info}/WHEEL +1 -1
  47. geometallurgy-0.4.12.dist-info/RECORD +0 -48
  48. {geometallurgy-0.4.12.dist-info → geometallurgy-0.4.13.dist-info}/entry_points.txt +0 -0
@@ -1,641 +1,641 @@
1
- from __future__ import annotations
2
-
3
- import functools
4
- import uuid
5
- from pathlib import Path
6
- from typing import Optional, Literal, Callable, Union, Iterable, TYPE_CHECKING
7
-
8
- import numpy as np
9
- import pandas as pd
10
- from pandas import IntervalIndex
11
- from pandas.core.indexes.frozen import FrozenList
12
-
13
- import plotly.graph_objects as go
14
- import plotly.express as px
15
-
16
- import elphick.geomet.flowsheet.stream
17
- from elphick.geomet.utils.amenability import amenability_index
18
- from elphick.geomet.utils.interp import mass_preserving_interp
19
- from elphick.geomet.utils.interp2 import mass_preserving_interp_2d
20
- from elphick.geomet.utils.pandas import MeanIntervalIndex, weight_average, calculate_recovery, calculate_partition, \
21
- cumulate, mass_to_composition
22
-
23
- from elphick.geomet.base import MassComposition
24
-
25
- if TYPE_CHECKING:
26
- from elphick.geomet.flowsheet.stream import Stream
27
-
28
-
29
- class IntervalSample(MassComposition):
30
- """
31
- A class to represent a sample of data with an interval index.
32
- This exposes methods to split the sample by a partition definition.
33
- """
34
-
35
- def __init__(self,
36
- data: Optional[pd.DataFrame] = None,
37
- name: Optional[str] = None,
38
- moisture_in_scope: bool = True,
39
- mass_wet_var: Optional[str] = None,
40
- mass_dry_var: Optional[str] = None,
41
- moisture_var: Optional[str] = None,
42
- component_vars: Optional[list[str]] = None,
43
- composition_units: Literal['%', 'ppm', 'ppb'] = '%',
44
- components_as_symbols: bool = True,
45
- ranges: Optional[dict[str, list]] = None,
46
- config_file: Optional[Path] = None):
47
-
48
- super().__init__(data=data, name=name, moisture_in_scope=moisture_in_scope,
49
- mass_wet_var=mass_wet_var, mass_dry_var=mass_dry_var,
50
- moisture_var=moisture_var, component_vars=component_vars,
51
- composition_units=composition_units, components_as_symbols=components_as_symbols,
52
- ranges=ranges, config_file=config_file)
53
-
54
- # manage the interval indexes
55
- if self.data is not None:
56
- self.data = self._create_interval_indexes(data)
57
-
58
- def _create_interval_indexes(self, data: pd.DataFrame) -> pd.DataFrame:
59
- original_indexes = data.index.names
60
- interval_indexes = []
61
- for pair in self.config['intervals']['suffixes']:
62
- if data.index.names != FrozenList([None]):
63
- suffix_candidates: dict = {n: n.split('_')[-1].lower() for n in data.index.names}
64
- suffixes: dict = {k: v for k, v in suffix_candidates.items() if v in pair}
65
- if suffixes:
66
- data.reset_index(list(suffixes.keys()), inplace=True)
67
- num_interval_indexes: int = int(len(suffixes.keys()) / 2)
68
- for i in range(0, num_interval_indexes + 1, 2):
69
- keys = list(suffixes.keys())[i: i + 2]
70
- base_name: str = '_'.join(keys[0].split('_')[:-1])
71
- index = IntervalIndex.from_arrays(left=data[keys[0]], right=data[keys[1]],
72
- closed=self.config['intervals']['closed'])
73
- index.name = base_name
74
- # left and right names are only preserved for a single interval index.
75
- # when a multiindex is used, the names are not preserved.
76
- index.left.name = keys[0].split('_')[-1]
77
- index.right.name = keys[1].split('_')[-1]
78
- interval_indexes.append(index)
79
-
80
- # drop the index columns from the dataframe columns
81
- data.drop(columns=keys, inplace=True)
82
-
83
- if interval_indexes:
84
- new_indexes = {} # Use dict to preserve order and uniqueness
85
- # we need to set the index to include the new interval index, but respect the order of the original.
86
- for i in original_indexes:
87
- if i.split('_')[0] not in [ii.name for ii in interval_indexes]:
88
- new_indexes[i] = data.index.get_level_values(i)
89
- else:
90
- # Find the corresponding interval index and append it to the new_indexes list
91
- for ii in interval_indexes:
92
- if ii.name == i.split('_')[0]:
93
- new_indexes[ii.name] = ii
94
- break
95
-
96
- if len(new_indexes) > 1:
97
- data.index = pd.MultiIndex.from_frame(pd.DataFrame(new_indexes.values()).T, names=new_indexes.keys())
98
- else:
99
- data.index = list(new_indexes.values())[0]
100
-
101
- return data
102
-
103
- def split_by_partition(self, partition_definition: Union[pd.Series, Callable], name_1: str = 'preferred',
104
- name_2: str = 'complement'):
105
- """
106
- Split the sample into two samples based on the partition definition.
107
-
108
- .. math::
109
- K = \\frac{{m_{preferred}}}{{m_{feed}}}
110
-
111
- :param partition_definition: A function that takes a data frame and returns a boolean series with a
112
- range [0, 1]. A 1D function must have an argument that matches the dimension of the interval index.
113
- A 2D function must have two arguments that match the dimensions of the interval index.
114
- :param name_1: The name of the first sample.
115
- :param name_2: The name of the second sample.
116
- :return: A tuple of two IntervalSamples.
117
- """
118
-
119
- # Check that the partition definition has the correct number of arguments and that the names match
120
- sample_fraction_dims = [col for col in self.mass_data.index.names if
121
- col != isinstance(self.mass_data.index.get_level_values(col), pd.IntervalIndex)]
122
- fraction_means: pd.DataFrame = self.mass_data.index.to_frame()[sample_fraction_dims].apply(
123
- lambda x: MeanIntervalIndex(x).mean, axis=0)
124
-
125
- # Get the function from the partial object if necessary
126
- if isinstance(partition_definition, Callable):
127
- partition_func = partition_definition.func if isinstance(partition_definition,
128
- functools.partial) else partition_definition
129
- # Check that the required argument names are present in the IntervalIndex levels
130
- required_args = [col for col in partition_func.__code__.co_varnames if col in sample_fraction_dims]
131
- pn: pd.Series = pd.Series(partition_definition(**fraction_means[required_args]), name='K',
132
- index=self._mass_data.index)
133
- elif isinstance(partition_definition, pd.Series):
134
- required_args = partition_definition.index.names
135
- pn: pd.Series = partition_definition
136
- else:
137
- raise TypeError(f"The partition definition must be a function or a pandas Series:"
138
- f" type = {type(partition_definition)}")
139
- for arg, dim in zip(required_args, sample_fraction_dims):
140
- if arg != dim:
141
- raise ValueError(f"The partition definition argument name does not match the index name. "
142
- f"Expected {dim}, found {arg}")
143
-
144
- self.to_stream()
145
- self: 'Stream'
146
-
147
- sample_1 = self.create_congruent_object(name=name_1).to_stream()
148
- sample_1.mass_data = self.mass_data.copy().multiply(pn, axis=0)
149
- sample_1.set_nodes([self.nodes[1], uuid.uuid4()])
150
- sample_2 = self.create_congruent_object(name=name_2)
151
- sample_2.mass_data = self.mass_data.copy().multiply((1 - pn), axis=0)
152
- sample_2.set_nodes([self.nodes[1], uuid.uuid4()])
153
-
154
- return sample_1, sample_2
155
-
156
- def is_2d_grid(self):
157
- """
158
- Check if the sample is a 2d grid.
159
- :return: True if the sample has 2 levels of intervals, False otherwise.
160
- """
161
- res = False
162
- if self.mass_data is not None and self.mass_data.index.nlevels >= 2:
163
- # get the type of the index levels
164
- level_types = [type(level) for level in self.mass_data.index.levels]
165
- # get the counts of each type
166
- level_counts = {level_type: level_types.count(level_type) for level_type in set(level_types)}
167
- # check if there are 2 levels of intervals
168
- res = level_counts.get(pd.Interval, 0) == 2
169
-
170
- return res
171
-
172
- @property
173
- def is_rectilinear_grid(self):
174
- """If rectilinear we can plot with a simple heatmap"""
175
- res = False
176
- if self.mass_data is not None and self._mass_data.index.nlevels >= 2:
177
- # Get the midpoints of the intervals for X and Y
178
- x_midpoints = self.mass_data.index.get_level_values(0).mid
179
- y_midpoints = self.mass_data.index.get_level_values(1).mid
180
-
181
- # Get unique midpoints for X and Y
182
- unique_x_midpoints = set(x_midpoints)
183
- unique_y_midpoints = set(y_midpoints)
184
-
185
- # Check if the grid is full (i.e., no steps in the lines that define the grid edges)
186
- # todo: fix this logic - it is not correct
187
- if len(unique_x_midpoints) == len(x_midpoints) and len(unique_y_midpoints) == len(y_midpoints):
188
- res = True
189
- return res
190
-
191
- def ideal_incremental_separation(self, discard_from: Literal["lowest", "highest"] = "lowest") -> pd.DataFrame:
192
- """Incrementally separate a fractionated sample.
193
-
194
- This method sorts by the provided direction prior to incrementally removing and discarding the first fraction
195
- (of the remaining fractions) and recalculating the mass-composition and recovery of the portion remaining.
196
- This is equivalent to incrementally applying a perfect separation (partition) at every interval edge.
197
-
198
- This method is only applicable to a 1D object where the single dimension is a pd.Interval type.
199
-
200
- See also: ideal_incremental_composition, ideal_incremental_recovery.
201
-
202
- Args:
203
- discard_from: Defines the discarded direction. discard_from = "lowest" will discard the lowest value
204
- first, then the next lowest, etc.
205
-
206
- Returns:
207
- A pandas DataFrame
208
- """
209
- self._check_one_dim_interval()
210
-
211
- sample: pd.DataFrame = self.data
212
-
213
- is_decreasing: bool = sample.index.is_monotonic_decreasing
214
- if discard_from == "lowest":
215
- sample.sort_index(ascending=True, inplace=True)
216
- new_index: pd.Index = pd.Index(sample.index.left)
217
- else:
218
- sample.sort_index(ascending=False, inplace=True)
219
- new_index: pd.Index = pd.Index(sample.index.right)
220
- new_index.name = f"{sample.index.name}_cut-point"
221
-
222
- aggregated_chunks: list = []
223
- recovery_chunks: list = []
224
- head: pd.Series = sample.pipe(weight_average)
225
-
226
- for i, indx in enumerate(sample.index):
227
- tmp_composition: pd.DataFrame = sample.iloc[i:, :].pipe(weight_average).to_frame().T
228
- aggregated_chunks.append(tmp_composition)
229
- recovery_chunks.append(tmp_composition.pipe(calculate_recovery, df_ref=head.to_frame().T))
230
-
231
- res_composition: pd.DataFrame = pd.concat(aggregated_chunks).assign(attribute="composition").set_index(
232
- new_index)
233
- res_recovery: pd.DataFrame = pd.concat(recovery_chunks).assign(attribute="recovery").set_index(
234
- new_index)
235
-
236
- if is_decreasing:
237
- res_composition.sort_index(ascending=False, inplace=True)
238
- res_recovery.sort_index(ascending=False, inplace=True)
239
-
240
- res: pd.DataFrame = pd.concat([res_composition, res_recovery]).reset_index().set_index(
241
- [new_index.name, 'attribute'])
242
-
243
- return res
244
-
245
- def _check_one_dim_interval(self):
246
- if len(self.mass_data.index.names) != 1:
247
- raise NotImplementedError(f"This object is {self.mass_data.index.ndim} dimensional. "
248
- f"Only 1D interval objects are valid")
249
- index_var: str = self.mass_data.index.name
250
- if not isinstance(self.mass_data.index, pd.IntervalIndex):
251
- raise NotImplementedError(f"The {index_var} of this object is not a pd.Interval. "
252
- f" Only 1D interval objects are valid")
253
-
254
- def _check_two_dim_interval(self):
255
- if len(self.mass_data.index.names) != 2:
256
- raise NotImplementedError(f"This object is {self.mass_data.index.ndim} dimensional. "
257
- f"Only 2D interval objects are valid")
258
- for indx in self.mass_data.index.levels:
259
- if not isinstance(indx, pd.IntervalIndex):
260
- raise NotImplementedError(f"The {indx.name} of this object is not a pd.Interval. "
261
- f" Only 1D interval objects are valid")
262
-
263
- def ideal_incremental_composition(self, discard_from: Literal["lowest", "highest"] = "lowest") -> pd.DataFrame:
264
- """Incrementally separate a fractionated sample.
265
-
266
- This method sorts by the provided direction prior to incrementally removing and discarding the first fraction
267
- (of the remaining fractions) and recalculating the mass-composition of the portion remaining.
268
- This is equivalent to incrementally applying a perfect separation (partition) at every interval edge.
269
-
270
- This method is only applicable to a 1D object where the single dimension is a pd.Interval type.
271
-
272
- See also: ideal_incremental_separation, ideal_incremental_recovery.
273
-
274
- Args:
275
- discard_from: Defines the discarded direction. discard_from = "lowest" will discard the lowest value
276
- first, then the next lowest, etc.
277
-
278
- Returns:
279
- A pandas DataFrame
280
- """
281
- df: pd.DataFrame = self.ideal_incremental_separation(discard_from=discard_from).query(
282
- 'attribute=="composition"').droplevel('attribute')
283
- return df
284
-
285
- def ideal_incremental_recovery(self, discard_from: Literal["lowest", "highest"] = "lowest",
286
- apply_closure: bool = True) -> pd.DataFrame:
287
- """Incrementally separate a fractionated sample.
288
-
289
- This method sorts by the provided direction prior to incrementally removing and discarding the first fraction
290
- (of the remaining fractions) and recalculating the recovery of the portion remaining.
291
- This is equivalent to incrementally applying a perfect separation (partition) at every interval edge.
292
-
293
- This method is only applicable to a 1D object where the single dimension is a pd.Interval type.
294
-
295
- See also: ideal_incremental_separation, ideal_incremental_composition.
296
-
297
- Args:
298
- discard_from: Defines the discarded direction. discard_from = "lowest" will discard the lowest value
299
- first, then the next lowest, etc.
300
- apply_closure: If True, Add the missing record (zero recovery) that closes the recovery envelope.
301
-
302
- Returns:
303
- A pandas DataFrame
304
- """
305
- columns_to_drop: list[str] = ['mass_wet', 'H2O'] if self.moisture_in_scope else []
306
- df: pd.DataFrame = self.ideal_incremental_separation(discard_from=discard_from).query(
307
- 'attribute=="recovery"').droplevel('attribute').rename(columns={'mass_dry': 'mass'}).drop(
308
- columns=columns_to_drop)
309
- if apply_closure:
310
- # add zero recovery record to close the envelope.
311
- indx = np.inf if df.index.min() == 0.0 else 0.0
312
- indx_name: str = df.index.name
313
- df = pd.concat([df, pd.Series(0, index=df.columns, name=indx).to_frame().T]).sort_index(ascending=True)
314
- df.index.name = indx_name
315
- return df
316
-
317
- def plot_heatmap(self, components: list[str], **kwargs):
318
- """
319
- Plot the sample as a heatmap.
320
- :param components: The list of components to plot.
321
- :param kwargs: Additional keyword arguments to pass to the plot method.
322
- :return: The axis with the plot.
323
- """
324
- # if not self.is_rectilinear_grid:
325
- # raise ValueError('The sample is not a rectilinear grid.')
326
-
327
- # convert IntervalIndex to nominal values df.index = df.index.map(lambda x: x.mid)
328
-
329
- x_label = self.mass_data.index.names[0]
330
- y_label = self.mass_data.index.names[1]
331
- z_label = self.mass_data.columns[0]
332
-
333
- # create a pivot table for the heatmap
334
- pivot_df = self.mass_data[components].copy().unstack()
335
-
336
- # Get the midpoints of the intervals for X and Y
337
- x_midpoints = [interval.mid for interval in self.mass_data.index.get_level_values(x_label)]
338
- y_midpoints = [interval.mid for interval in self.mass_data.index.get_level_values(y_label)]
339
-
340
- # Get interval edges for x and y axes
341
- x_edges = self._get_unique_edges(self.mass_data.index.get_level_values(x_label))
342
- y_edges = self._get_unique_edges(self.mass_data.index.get_level_values(y_label))
343
-
344
- # Create hover text
345
- hover_text = [[f"{x_label}: {x_mid}, {y_label}: {y_mid}, {z_label}: {z_val}"
346
- for x_mid, z_val in zip(x_midpoints, z_values)]
347
- for y_mid, z_values in zip(y_midpoints, pivot_df.values)]
348
-
349
- # plot the heatmap
350
- fig = go.Figure(data=go.Heatmap(
351
- z=pivot_df.values,
352
- x=x_edges,
353
- y=y_edges,
354
- text=hover_text,
355
- hoverinfo='text'))
356
-
357
- # update the layout to use logarithmic x-axis
358
- if x_label == 'size':
359
- fig.update_layout(xaxis_type="log")
360
- elif y_label == 'size':
361
- fig.update_layout(yaxis_type="log")
362
-
363
- # set the title and x and y labels dynamically
364
- fig.update_layout(title=f'{self.name} Heatmap',
365
- xaxis_title=self.mass_data.index.names[0],
366
- yaxis_title=self.mass_data.index.names[1])
367
-
368
- return fig
369
-
370
- def plot_intervals(self,
371
- variables: list[str],
372
- cumulative: bool = True,
373
- direction: str = 'descending',
374
- show_edges: bool = True,
375
- min_x: Optional[float] = None) -> go.Figure:
376
- """Plot "The Grade-Tonnage" curve.
377
-
378
- Mass and grade by bins for a cut-off variable.
379
-
380
- Args:
381
- variables: List of variables to include in the plot
382
- cumulative: If True, the results are cumulative weight averaged.
383
- direction: 'ascending'|'descending', if cumulative is True, the direction of accumulation
384
- show_edges: If True, show the edges on the plot. Applicable to cumulative plots only.
385
- min_x: Optional minimum value for the x-axis, useful to set reasonable visual range with a log
386
- scaled x-axis when plotting size data
387
- """
388
-
389
- res: pd.DataFrame = self.data[variables]
390
-
391
- plot_kwargs: dict = dict(line_shape='vh')
392
- if cumulative:
393
- res = self.mass_data.pipe(cumulate, direction=direction).pipe(mass_to_composition)
394
- plot_kwargs = dict(line_shape='spline')
395
-
396
- interval_data: pd.DataFrame = res
397
-
398
- # Get the first IntervalIndex - TODO: specify or check...
399
- interval_index: Optional[pd.IntervalIndex] = None
400
- for level in range(interval_data.index.nlevels):
401
- if isinstance(interval_data.index.get_level_values(level), pd.IntervalIndex):
402
- interval_index = interval_data.index.get_level_values(level)
403
- break
404
- if interval_index is None:
405
- raise ValueError("No IntervalIndex found in the index levels")
406
- left_name: str = interval_index.left.name if interval_index.left.name else 'left'
407
- right_name: str = interval_index.right.name if interval_index.right.name else 'right'
408
- left: pd.Series = pd.Series(interval_index.left, name=left_name, index=interval_index)
409
- right: pd.Series = pd.Series(interval_index.right, name=right_name, index=interval_index)
410
- df_intervals = pd.concat([left, right, interval_data], axis='columns')
411
- x_var: str = interval_data.index.name
412
- if not cumulative:
413
- # append on the largest fraction right edge for display purposes
414
- is_ascending: bool = interval_index.is_monotonic_increasing
415
- df_end: pd.DataFrame = df_intervals.loc[df_intervals.index.max(), :].to_frame().T
416
- df_end[left_name] = df_end[right_name]
417
- df_end[right_name] = np.inf
418
- df = pd.concat([df_end.reset_index(drop=True), df_intervals], axis='index')
419
- df[interval_data.index.name] = df[left_name]
420
- df = df.sort_values(by=interval_data.index.name, ascending=is_ascending)
421
- else:
422
- if direction == 'ascending':
423
- x_var = right_name
424
- elif direction == 'descending':
425
- x_var = left_name
426
- df = df_intervals
427
-
428
- if res.index.name.lower() == 'size':
429
- if not min_x:
430
- min_x = interval_data.index.min().right / 2.0
431
- # set zero to the minimum x value (for display only) to enable the tooltips on that point.
432
- df.loc[df[x_var] == df[x_var].min(), x_var] = min_x
433
- hover_data = {'component': True, # add other column, default formatting
434
- x_var: ':.3f', # add other column, customized formatting
435
- 'value': ':.2f'
436
- }
437
- plot_kwargs = {**plot_kwargs,
438
- **dict(log_x=True,
439
- range_x=[min_x, interval_data.index.max().right],
440
- hover_data=hover_data)}
441
-
442
- df = df[[x_var] + variables].melt(id_vars=[x_var], var_name='component')
443
-
444
- if cumulative and show_edges:
445
- plot_kwargs['markers'] = True
446
-
447
- fig = px.line(df, x=x_var, y='value', facet_row='component', **plot_kwargs)
448
- fig.for_each_annotation(lambda a: a.update(text=a.text.replace("component=", "")))
449
- fig.update_yaxes(matches=None)
450
- fig.update_layout(title=self.name)
451
-
452
- return fig
453
-
454
- @staticmethod
455
- def _get_unique_edges(interval_index):
456
- # Get the left and right edges of the intervals
457
- left_edges = interval_index.left.tolist()
458
- right_edges = interval_index.right.tolist()
459
-
460
- # Concatenate the two lists
461
- all_edges = left_edges + right_edges
462
-
463
- # Get the unique edges
464
- unique_edges = np.unique(all_edges)
465
-
466
- return unique_edges
467
-
468
- def plot_grade_recovery(self, target_analyte,
469
- discard_from: Literal["lowest", "highest"] = "lowest",
470
- title: Optional[str] = None,
471
- ) -> go.Figure:
472
- """The grade-recovery plot.
473
-
474
- The grade recovery curve is generated by assuming an ideal separation (for the chosen property, or dimension)
475
- at each fractional interval. It defines the theoretical maximum performance, which can only be improved if
476
- liberation is improved by comminution.
477
-
478
- This method is only applicable to a 1D object where the single dimension is a pd.Interval type.
479
-
480
- Args:
481
- target_analyte: The analyte of value.
482
- discard_from: Defines the discarded direction. discard_from = "lowest" will discard the lowest value
483
- first, then the next lowest, etc.
484
- title: Optional plot title
485
-
486
- Returns:
487
- A plotly.GraphObjects figure
488
- """
489
- title = title if title is not None else 'Ideal Grade - Recovery'
490
- cols_to_drop: list[str] = ['mass_wet', 'mass_dry', 'H2O'] if self.moisture_in_scope else ['mass_dry']
491
-
492
- df: pd.DataFrame = self.ideal_incremental_separation(discard_from=discard_from)
493
- df_recovery: pd.DataFrame = df.loc[(slice(None), 'recovery'), [target_analyte, 'mass_dry']].droplevel(
494
- 'attribute').rename(
495
- columns={'mass_dry': 'Yield', target_analyte: f"{target_analyte}_recovery"})
496
- df_composition: pd.DataFrame = df.loc[(slice(None), 'composition'), :].droplevel('attribute').drop(
497
- columns=cols_to_drop)
498
-
499
- df_plot: pd.DataFrame = pd.concat([df_recovery, df_composition], axis=1).reset_index()
500
- fig = px.line(df_plot, x=target_analyte,
501
- y=f"{target_analyte}_recovery",
502
- hover_data=df_plot.columns,
503
- title=title)
504
- # fig.update_layout(xaxis_title=f"Grade of {target_analyte}", yaxis_title=f"Recovery of {target_analyte}",
505
- # title=title)
506
-
507
- return fig
508
-
509
- def plot_amenability(self, target_analyte: str,
510
- discard_from: Literal["lowest", "highest"] = "lowest",
511
- gangue_analytes: Optional[str] = None,
512
- title: Optional[str] = None,
513
- ) -> go.Figure:
514
- """The yield-recovery plot.
515
-
516
- The yield recovery curve provides an understanding of the amenability of a sample.
517
-
518
- This method is only applicable to a 1D object where the single dimension is a pd.Interval type.
519
-
520
- Args:
521
- target_analyte: The analyte of value.
522
- discard_from: Defines the discarded direction. discard_from = "lowest" will discard the lowest value
523
- first, then the next lowest, etc.
524
- gangue_analytes: The analytes to be rejected
525
- title: Optional plot title
526
-
527
- Returns:
528
- A plotly.GraphObjects figure
529
- """
530
- title = title if title is not None else 'Amenability Plot'
531
- df: pd.DataFrame = self.ideal_incremental_recovery(discard_from=discard_from)
532
- amenability_indices: pd.Series = amenability_index(df, col_target=target_analyte, col_mass_recovery='mass')
533
-
534
- analytes = [col for col in df.columns if col != "mass"] if gangue_analytes is None else [
535
- target_analyte + gangue_analytes]
536
-
537
- mass_rec: pd.DataFrame = df["mass"]
538
- df = df[analytes]
539
-
540
- fig = go.Figure()
541
- for analyte in analytes:
542
- fig.add_trace(
543
- go.Scatter(x=mass_rec, y=df[analyte], mode="lines",
544
- name=f"{analyte} ({round(amenability_indices[analyte], 2)})",
545
- customdata=df.index.values,
546
- hovertemplate='<b>Recovery: %{y:.3f}</b><br>Cut-point: %{customdata:.3f} '))
547
- fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode="lines", name='y=x',
548
- line=dict(shape='linear', color='gray', dash='dash'),
549
- ))
550
- fig.update_layout(xaxis_title='Yield (Mass Recovery)', yaxis_title='Recovery', title=title,
551
- hovermode='x')
552
- return fig
553
-
554
- def calculate_partition(self, preferred: 'MassComposition') -> pd.DataFrame:
555
- """Calculate the partition number (K) [0, 1] of the preferred stream relative to self
556
-
557
- .. math::
558
- K = \\frac{{m_{preferred}}}{{m_{feed}}}
559
-
560
- """
561
- self._check_one_dim_interval()
562
- return calculate_partition(df_feed=self.data, df_preferred=preferred.data,
563
- col_mass_dry='mass_dry')
564
-
565
- def resample_1d(self, interval_edges: Union[Iterable, int],
566
- precision: Optional[int] = None,
567
- include_original_edges: bool = False) -> 'IntervalSample':
568
- """Resample a 1D fractional dim/index
569
-
570
- Args:
571
- interval_edges: The values of the new grid (interval edges). If an int, will up-sample by that factor, for
572
- example the value of 10 will automatically define edges that create 10 x the resolution (up-sampled).
573
- precision: Optional integer for the number of decimal places to round the grid values to.
574
- include_original_edges: If True include the original edges in the grid.
575
-
576
- Returns:
577
- A new IntervalSample object interpolated onto the new grid
578
- """
579
-
580
- # TODO: add support for supplementary variables
581
-
582
- # test the index contains a single interval index
583
- self._check_one_dim_interval()
584
-
585
- df_upsampled: pd.DataFrame = mass_preserving_interp(self.mass_data,
586
- interval_edges=interval_edges, precision=precision,
587
- include_original_edges=include_original_edges,
588
- mass_wet=None, mass_dry=self.mass_dry_var,
589
- interval_data_as_mass=True)
590
-
591
- obj: IntervalSample = IntervalSample(df_upsampled, name=self.name, moisture_in_scope=False,
592
- mass_dry_var=self.mass_dry_var)
593
- obj.status.ranges = self.status.ranges
594
- return obj
595
-
596
- def resample_2d(self, interval_edges: dict[str, Iterable],
597
- precision: Optional[int] = None) -> 'IntervalSample':
598
- """Resample a 2D fractional dim/index
599
-
600
- Args:
601
- interval_edges: A dict keyed by index name containing the grid the data is resampled to.
602
- precision: Optional integer for the number of decimal places to round the grid values to.
603
-
604
- Returns:
605
- A new IntervalSample object interpolated onto the new grid
606
- """
607
-
608
- # TODO: add support for supplementary variables
609
-
610
- # test the index contains a single interval index
611
- self._check_two_dim_interval()
612
-
613
- df_upsampled_specific_mass: pd.DataFrame = mass_preserving_interp_2d(self._specific_mass(),
614
- interval_edges=interval_edges,
615
- precision=precision,
616
- mass_dry=self.mass_dry_var)
617
-
618
- # convert from specific mass to mass
619
- df_upsampled = df_upsampled_specific_mass.mul(self.mass_data[self.mass_dry_var].sum(), axis=0)
620
- df_upsampled[self.composition_columns] = df_upsampled[self.composition_columns].div(
621
- df_upsampled[self.mass_dry_var], axis=0).mul(self.composition_factor, axis=0)
622
-
623
- obj: IntervalSample = IntervalSample(df_upsampled, name=self.name, moisture_in_scope=False,
624
- mass_dry_var=self.mass_dry_var)
625
- if hasattr(obj, 'nodes'):
626
- obj.nodes = self.nodes
627
- obj.status.ranges = self.status.ranges
628
- return obj
629
-
630
- def _specific_mass(self) -> Optional[pd.DataFrame]:
631
- """Calculate the specific mass of the sample
632
-
633
- Specific mass is the mass of the sample fractions divided by the mass of all fractions.
634
- The sum of the specific mass (for mass_dry) is 1.0 by definition.
635
- """
636
- res = None
637
- if self.data is not None:
638
- res = self.mass_data.div(self.mass_data[self.mass_dry_var].sum(), axis=0)
639
- if self.moisture_in_scope:
640
- res.drop(columns=[self.mass_wet_var], inplace=True)
641
- return res
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import uuid
5
+ from pathlib import Path
6
+ from typing import Optional, Literal, Callable, Union, Iterable, TYPE_CHECKING
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ from pandas import IntervalIndex
11
+ from pandas.core.indexes.frozen import FrozenList
12
+
13
+ import plotly.graph_objects as go
14
+ import plotly.express as px
15
+
16
+ import elphick.geomet.flowsheet.stream
17
+ from elphick.geomet.utils.amenability import amenability_index
18
+ from elphick.geomet.utils.interp import mass_preserving_interp
19
+ from elphick.geomet.utils.interp2 import mass_preserving_interp_2d
20
+ from elphick.geomet.utils.pandas import MeanIntervalIndex, weight_average, calculate_recovery, calculate_partition, \
21
+ cumulate, mass_to_composition
22
+
23
+ from elphick.geomet.base import MassComposition
24
+
25
+ if TYPE_CHECKING:
26
+ from elphick.geomet.flowsheet.stream import Stream
27
+
28
+
29
+ class IntervalSample(MassComposition):
30
+ """
31
+ A class to represent a sample of data with an interval index.
32
+ This exposes methods to split the sample by a partition definition.
33
+ """
34
+
35
+ def __init__(self,
36
+ data: Optional[pd.DataFrame] = None,
37
+ name: Optional[str] = None,
38
+ moisture_in_scope: bool = True,
39
+ mass_wet_var: Optional[str] = None,
40
+ mass_dry_var: Optional[str] = None,
41
+ moisture_var: Optional[str] = None,
42
+ component_vars: Optional[list[str]] = None,
43
+ composition_units: Literal['%', 'ppm', 'ppb'] = '%',
44
+ components_as_symbols: bool = True,
45
+ ranges: Optional[dict[str, list]] = None,
46
+ config_file: Optional[Path] = None):
47
+
48
+ super().__init__(data=data, name=name, moisture_in_scope=moisture_in_scope,
49
+ mass_wet_var=mass_wet_var, mass_dry_var=mass_dry_var,
50
+ moisture_var=moisture_var, component_vars=component_vars,
51
+ composition_units=composition_units, components_as_symbols=components_as_symbols,
52
+ ranges=ranges, config_file=config_file)
53
+
54
+ # manage the interval indexes
55
+ if self.data is not None:
56
+ self.data = self._create_interval_indexes(data)
57
+
58
+ def _create_interval_indexes(self, data: pd.DataFrame) -> pd.DataFrame:
59
+ original_indexes = data.index.names
60
+ interval_indexes = []
61
+ for pair in self.config['intervals']['suffixes']:
62
+ if data.index.names != FrozenList([None]):
63
+ suffix_candidates: dict = {n: n.split('_')[-1].lower() for n in data.index.names}
64
+ suffixes: dict = {k: v for k, v in suffix_candidates.items() if v in pair}
65
+ if suffixes:
66
+ data.reset_index(list(suffixes.keys()), inplace=True)
67
+ num_interval_indexes: int = int(len(suffixes.keys()) / 2)
68
+ for i in range(0, num_interval_indexes + 1, 2):
69
+ keys = list(suffixes.keys())[i: i + 2]
70
+ base_name: str = '_'.join(keys[0].split('_')[:-1])
71
+ index = IntervalIndex.from_arrays(left=data[keys[0]], right=data[keys[1]],
72
+ closed=self.config['intervals']['closed'])
73
+ index.name = base_name
74
+ # left and right names are only preserved for a single interval index.
75
+ # when a multiindex is used, the names are not preserved.
76
+ index.left.name = keys[0].split('_')[-1]
77
+ index.right.name = keys[1].split('_')[-1]
78
+ interval_indexes.append(index)
79
+
80
+ # drop the index columns from the dataframe columns
81
+ data.drop(columns=keys, inplace=True)
82
+
83
+ if interval_indexes:
84
+ new_indexes = {} # Use dict to preserve order and uniqueness
85
+ # we need to set the index to include the new interval index, but respect the order of the original.
86
+ for i in original_indexes:
87
+ if i.split('_')[0] not in [ii.name for ii in interval_indexes]:
88
+ new_indexes[i] = data.index.get_level_values(i)
89
+ else:
90
+ # Find the corresponding interval index and append it to the new_indexes list
91
+ for ii in interval_indexes:
92
+ if ii.name == i.split('_')[0]:
93
+ new_indexes[ii.name] = ii
94
+ break
95
+
96
+ if len(new_indexes) > 1:
97
+ data.index = pd.MultiIndex.from_frame(pd.DataFrame(new_indexes.values()).T, names=new_indexes.keys())
98
+ else:
99
+ data.index = list(new_indexes.values())[0]
100
+
101
+ return data
102
+
103
+ def split_by_partition(self, partition_definition: Union[pd.Series, Callable], name_1: str = 'preferred',
104
+ name_2: str = 'complement'):
105
+ """
106
+ Split the sample into two samples based on the partition definition.
107
+
108
+ .. math::
109
+ K = \\frac{{m_{preferred}}}{{m_{feed}}}
110
+
111
+ :param partition_definition: A function that takes a data frame and returns a boolean series with a
112
+ range [0, 1]. A 1D function must have an argument that matches the dimension of the interval index.
113
+ A 2D function must have two arguments that match the dimensions of the interval index.
114
+ :param name_1: The name of the first sample.
115
+ :param name_2: The name of the second sample.
116
+ :return: A tuple of two IntervalSamples.
117
+ """
118
+
119
+ # Check that the partition definition has the correct number of arguments and that the names match
120
+ sample_fraction_dims = [col for col in self.mass_data.index.names if
121
+ col != isinstance(self.mass_data.index.get_level_values(col), pd.IntervalIndex)]
122
+ fraction_means: pd.DataFrame = self.mass_data.index.to_frame()[sample_fraction_dims].apply(
123
+ lambda x: MeanIntervalIndex(x).mean, axis=0)
124
+
125
+ # Get the function from the partial object if necessary
126
+ if isinstance(partition_definition, Callable):
127
+ partition_func = partition_definition.func if isinstance(partition_definition,
128
+ functools.partial) else partition_definition
129
+ # Check that the required argument names are present in the IntervalIndex levels
130
+ required_args = [col for col in partition_func.__code__.co_varnames if col in sample_fraction_dims]
131
+ pn: pd.Series = pd.Series(partition_definition(**fraction_means[required_args]), name='K',
132
+ index=self._mass_data.index)
133
+ elif isinstance(partition_definition, pd.Series):
134
+ required_args = partition_definition.index.names
135
+ pn: pd.Series = partition_definition
136
+ else:
137
+ raise TypeError(f"The partition definition must be a function or a pandas Series:"
138
+ f" type = {type(partition_definition)}")
139
+ for arg, dim in zip(required_args, sample_fraction_dims):
140
+ if arg != dim:
141
+ raise ValueError(f"The partition definition argument name does not match the index name. "
142
+ f"Expected {dim}, found {arg}")
143
+
144
+ self.to_stream()
145
+ self: 'Stream'
146
+
147
+ sample_1 = self.create_congruent_object(name=name_1).to_stream()
148
+ sample_1.mass_data = self.mass_data.copy().multiply(pn, axis=0)
149
+ sample_1.set_nodes([self.nodes[1], uuid.uuid4()])
150
+ sample_2 = self.create_congruent_object(name=name_2)
151
+ sample_2.mass_data = self.mass_data.copy().multiply((1 - pn), axis=0)
152
+ sample_2.set_nodes([self.nodes[1], uuid.uuid4()])
153
+
154
+ return sample_1, sample_2
155
+
156
+ def is_2d_grid(self):
157
+ """
158
+ Check if the sample is a 2d grid.
159
+ :return: True if the sample has 2 levels of intervals, False otherwise.
160
+ """
161
+ res = False
162
+ if self.mass_data is not None and self.mass_data.index.nlevels >= 2:
163
+ # get the type of the index levels
164
+ level_types = [type(level) for level in self.mass_data.index.levels]
165
+ # get the counts of each type
166
+ level_counts = {level_type: level_types.count(level_type) for level_type in set(level_types)}
167
+ # check if there are 2 levels of intervals
168
+ res = level_counts.get(pd.Interval, 0) == 2
169
+
170
+ return res
171
+
172
+ @property
173
+ def is_rectilinear_grid(self):
174
+ """If rectilinear we can plot with a simple heatmap"""
175
+ res = False
176
+ if self.mass_data is not None and self._mass_data.index.nlevels >= 2:
177
+ # Get the midpoints of the intervals for X and Y
178
+ x_midpoints = self.mass_data.index.get_level_values(0).mid
179
+ y_midpoints = self.mass_data.index.get_level_values(1).mid
180
+
181
+ # Get unique midpoints for X and Y
182
+ unique_x_midpoints = set(x_midpoints)
183
+ unique_y_midpoints = set(y_midpoints)
184
+
185
+ # Check if the grid is full (i.e., no steps in the lines that define the grid edges)
186
+ # todo: fix this logic - it is not correct
187
+ if len(unique_x_midpoints) == len(x_midpoints) and len(unique_y_midpoints) == len(y_midpoints):
188
+ res = True
189
+ return res
190
+
191
+ def ideal_incremental_separation(self, discard_from: Literal["lowest", "highest"] = "lowest") -> pd.DataFrame:
192
+ """Incrementally separate a fractionated sample.
193
+
194
+ This method sorts by the provided direction prior to incrementally removing and discarding the first fraction
195
+ (of the remaining fractions) and recalculating the mass-composition and recovery of the portion remaining.
196
+ This is equivalent to incrementally applying a perfect separation (partition) at every interval edge.
197
+
198
+ This method is only applicable to a 1D object where the single dimension is a pd.Interval type.
199
+
200
+ See also: ideal_incremental_composition, ideal_incremental_recovery.
201
+
202
+ Args:
203
+ discard_from: Defines the discarded direction. discard_from = "lowest" will discard the lowest value
204
+ first, then the next lowest, etc.
205
+
206
+ Returns:
207
+ A pandas DataFrame
208
+ """
209
+ self._check_one_dim_interval()
210
+
211
+ sample: pd.DataFrame = self.data
212
+
213
+ is_decreasing: bool = sample.index.is_monotonic_decreasing
214
+ if discard_from == "lowest":
215
+ sample.sort_index(ascending=True, inplace=True)
216
+ new_index: pd.Index = pd.Index(sample.index.left)
217
+ else:
218
+ sample.sort_index(ascending=False, inplace=True)
219
+ new_index: pd.Index = pd.Index(sample.index.right)
220
+ new_index.name = f"{sample.index.name}_cut-point"
221
+
222
+ aggregated_chunks: list = []
223
+ recovery_chunks: list = []
224
+ head: pd.Series = sample.pipe(weight_average)
225
+
226
+ for i, indx in enumerate(sample.index):
227
+ tmp_composition: pd.DataFrame = sample.iloc[i:, :].pipe(weight_average).to_frame().T
228
+ aggregated_chunks.append(tmp_composition)
229
+ recovery_chunks.append(tmp_composition.pipe(calculate_recovery, df_ref=head.to_frame().T))
230
+
231
+ res_composition: pd.DataFrame = pd.concat(aggregated_chunks).assign(attribute="composition").set_index(
232
+ new_index)
233
+ res_recovery: pd.DataFrame = pd.concat(recovery_chunks).assign(attribute="recovery").set_index(
234
+ new_index)
235
+
236
+ if is_decreasing:
237
+ res_composition.sort_index(ascending=False, inplace=True)
238
+ res_recovery.sort_index(ascending=False, inplace=True)
239
+
240
+ res: pd.DataFrame = pd.concat([res_composition, res_recovery]).reset_index().set_index(
241
+ [new_index.name, 'attribute'])
242
+
243
+ return res
244
+
245
+ def _check_one_dim_interval(self):
246
+ if len(self.mass_data.index.names) != 1:
247
+ raise NotImplementedError(f"This object is {self.mass_data.index.ndim} dimensional. "
248
+ f"Only 1D interval objects are valid")
249
+ index_var: str = self.mass_data.index.name
250
+ if not isinstance(self.mass_data.index, pd.IntervalIndex):
251
+ raise NotImplementedError(f"The {index_var} of this object is not a pd.Interval. "
252
+ f" Only 1D interval objects are valid")
253
+
254
+ def _check_two_dim_interval(self):
255
+ if len(self.mass_data.index.names) != 2:
256
+ raise NotImplementedError(f"This object is {self.mass_data.index.ndim} dimensional. "
257
+ f"Only 2D interval objects are valid")
258
+ for indx in self.mass_data.index.levels:
259
+ if not isinstance(indx, pd.IntervalIndex):
260
+ raise NotImplementedError(f"The {indx.name} of this object is not a pd.Interval. "
261
+ f" Only 1D interval objects are valid")
262
+
263
+ def ideal_incremental_composition(self, discard_from: Literal["lowest", "highest"] = "lowest") -> pd.DataFrame:
264
+ """Incrementally separate a fractionated sample.
265
+
266
+ This method sorts by the provided direction prior to incrementally removing and discarding the first fraction
267
+ (of the remaining fractions) and recalculating the mass-composition of the portion remaining.
268
+ This is equivalent to incrementally applying a perfect separation (partition) at every interval edge.
269
+
270
+ This method is only applicable to a 1D object where the single dimension is a pd.Interval type.
271
+
272
+ See also: ideal_incremental_separation, ideal_incremental_recovery.
273
+
274
+ Args:
275
+ discard_from: Defines the discarded direction. discard_from = "lowest" will discard the lowest value
276
+ first, then the next lowest, etc.
277
+
278
+ Returns:
279
+ A pandas DataFrame
280
+ """
281
+ df: pd.DataFrame = self.ideal_incremental_separation(discard_from=discard_from).query(
282
+ 'attribute=="composition"').droplevel('attribute')
283
+ return df
284
+
285
+ def ideal_incremental_recovery(self, discard_from: Literal["lowest", "highest"] = "lowest",
286
+ apply_closure: bool = True) -> pd.DataFrame:
287
+ """Incrementally separate a fractionated sample.
288
+
289
+ This method sorts by the provided direction prior to incrementally removing and discarding the first fraction
290
+ (of the remaining fractions) and recalculating the recovery of the portion remaining.
291
+ This is equivalent to incrementally applying a perfect separation (partition) at every interval edge.
292
+
293
+ This method is only applicable to a 1D object where the single dimension is a pd.Interval type.
294
+
295
+ See also: ideal_incremental_separation, ideal_incremental_composition.
296
+
297
+ Args:
298
+ discard_from: Defines the discarded direction. discard_from = "lowest" will discard the lowest value
299
+ first, then the next lowest, etc.
300
+ apply_closure: If True, Add the missing record (zero recovery) that closes the recovery envelope.
301
+
302
+ Returns:
303
+ A pandas DataFrame
304
+ """
305
+ columns_to_drop: list[str] = ['mass_wet', 'H2O'] if self.moisture_in_scope else []
306
+ df: pd.DataFrame = self.ideal_incremental_separation(discard_from=discard_from).query(
307
+ 'attribute=="recovery"').droplevel('attribute').rename(columns={'mass_dry': 'mass'}).drop(
308
+ columns=columns_to_drop)
309
+ if apply_closure:
310
+ # add zero recovery record to close the envelope.
311
+ indx = np.inf if df.index.min() == 0.0 else 0.0
312
+ indx_name: str = df.index.name
313
+ df = pd.concat([df, pd.Series(0, index=df.columns, name=indx).to_frame().T]).sort_index(ascending=True)
314
+ df.index.name = indx_name
315
+ return df
316
+
317
+ def plot_heatmap(self, components: list[str], **kwargs):
318
+ """
319
+ Plot the sample as a heatmap.
320
+ :param components: The list of components to plot.
321
+ :param kwargs: Additional keyword arguments to pass to the plot method.
322
+ :return: The axis with the plot.
323
+ """
324
+ # if not self.is_rectilinear_grid:
325
+ # raise ValueError('The sample is not a rectilinear grid.')
326
+
327
+ # convert IntervalIndex to nominal values df.index = df.index.map(lambda x: x.mid)
328
+
329
+ x_label = self.mass_data.index.names[0]
330
+ y_label = self.mass_data.index.names[1]
331
+ z_label = self.mass_data.columns[0]
332
+
333
+ # create a pivot table for the heatmap
334
+ pivot_df = self.mass_data[components].copy().unstack()
335
+
336
+ # Get the midpoints of the intervals for X and Y
337
+ x_midpoints = [interval.mid for interval in self.mass_data.index.get_level_values(x_label)]
338
+ y_midpoints = [interval.mid for interval in self.mass_data.index.get_level_values(y_label)]
339
+
340
+ # Get interval edges for x and y axes
341
+ x_edges = self._get_unique_edges(self.mass_data.index.get_level_values(x_label))
342
+ y_edges = self._get_unique_edges(self.mass_data.index.get_level_values(y_label))
343
+
344
+ # Create hover text
345
+ hover_text = [[f"{x_label}: {x_mid}, {y_label}: {y_mid}, {z_label}: {z_val}"
346
+ for x_mid, z_val in zip(x_midpoints, z_values)]
347
+ for y_mid, z_values in zip(y_midpoints, pivot_df.values)]
348
+
349
+ # plot the heatmap
350
+ fig = go.Figure(data=go.Heatmap(
351
+ z=pivot_df.values,
352
+ x=x_edges,
353
+ y=y_edges,
354
+ text=hover_text,
355
+ hoverinfo='text'))
356
+
357
+ # update the layout to use logarithmic x-axis
358
+ if x_label == 'size':
359
+ fig.update_layout(xaxis_type="log")
360
+ elif y_label == 'size':
361
+ fig.update_layout(yaxis_type="log")
362
+
363
+ # set the title and x and y labels dynamically
364
+ fig.update_layout(title=f'{self.name} Heatmap',
365
+ xaxis_title=self.mass_data.index.names[0],
366
+ yaxis_title=self.mass_data.index.names[1])
367
+
368
+ return fig
369
+
370
+ def plot_intervals(self,
371
+ variables: list[str],
372
+ cumulative: bool = True,
373
+ direction: str = 'descending',
374
+ show_edges: bool = True,
375
+ min_x: Optional[float] = None) -> go.Figure:
376
+ """Plot "The Grade-Tonnage" curve.
377
+
378
+ Mass and grade by bins for a cut-off variable.
379
+
380
+ Args:
381
+ variables: List of variables to include in the plot
382
+ cumulative: If True, the results are cumulative weight averaged.
383
+ direction: 'ascending'|'descending', if cumulative is True, the direction of accumulation
384
+ show_edges: If True, show the edges on the plot. Applicable to cumulative plots only.
385
+ min_x: Optional minimum value for the x-axis, useful to set reasonable visual range with a log
386
+ scaled x-axis when plotting size data
387
+ """
388
+
389
+ res: pd.DataFrame = self.data[variables]
390
+
391
+ plot_kwargs: dict = dict(line_shape='vh')
392
+ if cumulative:
393
+ res = self.mass_data.pipe(cumulate, direction=direction).pipe(mass_to_composition)
394
+ plot_kwargs = dict(line_shape='spline')
395
+
396
+ interval_data: pd.DataFrame = res
397
+
398
+ # Get the first IntervalIndex - TODO: specify or check...
399
+ interval_index: Optional[pd.IntervalIndex] = None
400
+ for level in range(interval_data.index.nlevels):
401
+ if isinstance(interval_data.index.get_level_values(level), pd.IntervalIndex):
402
+ interval_index = interval_data.index.get_level_values(level)
403
+ break
404
+ if interval_index is None:
405
+ raise ValueError("No IntervalIndex found in the index levels")
406
+ left_name: str = interval_index.left.name if interval_index.left.name else 'left'
407
+ right_name: str = interval_index.right.name if interval_index.right.name else 'right'
408
+ left: pd.Series = pd.Series(interval_index.left, name=left_name, index=interval_index)
409
+ right: pd.Series = pd.Series(interval_index.right, name=right_name, index=interval_index)
410
+ df_intervals = pd.concat([left, right, interval_data], axis='columns')
411
+ x_var: str = interval_data.index.name
412
+ if not cumulative:
413
+ # append on the largest fraction right edge for display purposes
414
+ is_ascending: bool = interval_index.is_monotonic_increasing
415
+ df_end: pd.DataFrame = df_intervals.loc[df_intervals.index.max(), :].to_frame().T
416
+ df_end[left_name] = df_end[right_name]
417
+ df_end[right_name] = np.inf
418
+ df = pd.concat([df_end.reset_index(drop=True), df_intervals], axis='index')
419
+ df[interval_data.index.name] = df[left_name]
420
+ df = df.sort_values(by=interval_data.index.name, ascending=is_ascending)
421
+ else:
422
+ if direction == 'ascending':
423
+ x_var = right_name
424
+ elif direction == 'descending':
425
+ x_var = left_name
426
+ df = df_intervals
427
+
428
+ if res.index.name.lower() == 'size':
429
+ if not min_x:
430
+ min_x = interval_data.index.min().right / 2.0
431
+ # set zero to the minimum x value (for display only) to enable the tooltips on that point.
432
+ df.loc[df[x_var] == df[x_var].min(), x_var] = min_x
433
+ hover_data = {'component': True, # add other column, default formatting
434
+ x_var: ':.3f', # add other column, customized formatting
435
+ 'value': ':.2f'
436
+ }
437
+ plot_kwargs = {**plot_kwargs,
438
+ **dict(log_x=True,
439
+ range_x=[min_x, interval_data.index.max().right],
440
+ hover_data=hover_data)}
441
+
442
+ df = df[[x_var] + variables].melt(id_vars=[x_var], var_name='component')
443
+
444
+ if cumulative and show_edges:
445
+ plot_kwargs['markers'] = True
446
+
447
+ fig = px.line(df, x=x_var, y='value', facet_row='component', **plot_kwargs)
448
+ fig.for_each_annotation(lambda a: a.update(text=a.text.replace("component=", "")))
449
+ fig.update_yaxes(matches=None)
450
+ fig.update_layout(title=self.name)
451
+
452
+ return fig
453
+
454
+ @staticmethod
455
+ def _get_unique_edges(interval_index):
456
+ # Get the left and right edges of the intervals
457
+ left_edges = interval_index.left.tolist()
458
+ right_edges = interval_index.right.tolist()
459
+
460
+ # Concatenate the two lists
461
+ all_edges = left_edges + right_edges
462
+
463
+ # Get the unique edges
464
+ unique_edges = np.unique(all_edges)
465
+
466
+ return unique_edges
467
+
468
+ def plot_grade_recovery(self, target_analyte,
469
+ discard_from: Literal["lowest", "highest"] = "lowest",
470
+ title: Optional[str] = None,
471
+ ) -> go.Figure:
472
+ """The grade-recovery plot.
473
+
474
+ The grade recovery curve is generated by assuming an ideal separation (for the chosen property, or dimension)
475
+ at each fractional interval. It defines the theoretical maximum performance, which can only be improved if
476
+ liberation is improved by comminution.
477
+
478
+ This method is only applicable to a 1D object where the single dimension is a pd.Interval type.
479
+
480
+ Args:
481
+ target_analyte: The analyte of value.
482
+ discard_from: Defines the discarded direction. discard_from = "lowest" will discard the lowest value
483
+ first, then the next lowest, etc.
484
+ title: Optional plot title
485
+
486
+ Returns:
487
+ A plotly.GraphObjects figure
488
+ """
489
+ title = title if title is not None else 'Ideal Grade - Recovery'
490
+ cols_to_drop: list[str] = ['mass_wet', 'mass_dry', 'H2O'] if self.moisture_in_scope else ['mass_dry']
491
+
492
+ df: pd.DataFrame = self.ideal_incremental_separation(discard_from=discard_from)
493
+ df_recovery: pd.DataFrame = df.loc[(slice(None), 'recovery'), [target_analyte, 'mass_dry']].droplevel(
494
+ 'attribute').rename(
495
+ columns={'mass_dry': 'Yield', target_analyte: f"{target_analyte}_recovery"})
496
+ df_composition: pd.DataFrame = df.loc[(slice(None), 'composition'), :].droplevel('attribute').drop(
497
+ columns=cols_to_drop)
498
+
499
+ df_plot: pd.DataFrame = pd.concat([df_recovery, df_composition], axis=1).reset_index()
500
+ fig = px.line(df_plot, x=target_analyte,
501
+ y=f"{target_analyte}_recovery",
502
+ hover_data=df_plot.columns,
503
+ title=title)
504
+ # fig.update_layout(xaxis_title=f"Grade of {target_analyte}", yaxis_title=f"Recovery of {target_analyte}",
505
+ # title=title)
506
+
507
+ return fig
508
+
509
+ def plot_amenability(self, target_analyte: str,
510
+ discard_from: Literal["lowest", "highest"] = "lowest",
511
+ gangue_analytes: Optional[str] = None,
512
+ title: Optional[str] = None,
513
+ ) -> go.Figure:
514
+ """The yield-recovery plot.
515
+
516
+ The yield recovery curve provides an understanding of the amenability of a sample.
517
+
518
+ This method is only applicable to a 1D object where the single dimension is a pd.Interval type.
519
+
520
+ Args:
521
+ target_analyte: The analyte of value.
522
+ discard_from: Defines the discarded direction. discard_from = "lowest" will discard the lowest value
523
+ first, then the next lowest, etc.
524
+ gangue_analytes: The analytes to be rejected
525
+ title: Optional plot title
526
+
527
+ Returns:
528
+ A plotly.GraphObjects figure
529
+ """
530
+ title = title if title is not None else 'Amenability Plot'
531
+ df: pd.DataFrame = self.ideal_incremental_recovery(discard_from=discard_from)
532
+ amenability_indices: pd.Series = amenability_index(df, col_target=target_analyte, col_mass_recovery='mass')
533
+
534
+ analytes = [col for col in df.columns if col != "mass"] if gangue_analytes is None else [
535
+ target_analyte + gangue_analytes]
536
+
537
+ mass_rec: pd.DataFrame = df["mass"]
538
+ df = df[analytes]
539
+
540
+ fig = go.Figure()
541
+ for analyte in analytes:
542
+ fig.add_trace(
543
+ go.Scatter(x=mass_rec, y=df[analyte], mode="lines",
544
+ name=f"{analyte} ({round(amenability_indices[analyte], 2)})",
545
+ customdata=df.index.values,
546
+ hovertemplate='<b>Recovery: %{y:.3f}</b><br>Cut-point: %{customdata:.3f} '))
547
+ fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode="lines", name='y=x',
548
+ line=dict(shape='linear', color='gray', dash='dash'),
549
+ ))
550
+ fig.update_layout(xaxis_title='Yield (Mass Recovery)', yaxis_title='Recovery', title=title,
551
+ hovermode='x')
552
+ return fig
553
+
554
+ def calculate_partition(self, preferred: 'MassComposition') -> pd.DataFrame:
555
+ """Calculate the partition number (K) [0, 1] of the preferred stream relative to self
556
+
557
+ .. math::
558
+ K = \\frac{{m_{preferred}}}{{m_{feed}}}
559
+
560
+ """
561
+ self._check_one_dim_interval()
562
+ return calculate_partition(df_feed=self.data, df_preferred=preferred.data,
563
+ col_mass_dry='mass_dry')
564
+
565
+ def resample_1d(self, interval_edges: Union[Iterable, int],
566
+ precision: Optional[int] = None,
567
+ include_original_edges: bool = False) -> 'IntervalSample':
568
+ """Resample a 1D fractional dim/index
569
+
570
+ Args:
571
+ interval_edges: The values of the new grid (interval edges). If an int, will up-sample by that factor, for
572
+ example the value of 10 will automatically define edges that create 10 x the resolution (up-sampled).
573
+ precision: Optional integer for the number of decimal places to round the grid values to.
574
+ include_original_edges: If True include the original edges in the grid.
575
+
576
+ Returns:
577
+ A new IntervalSample object interpolated onto the new grid
578
+ """
579
+
580
+ # TODO: add support for supplementary variables
581
+
582
+ # test the index contains a single interval index
583
+ self._check_one_dim_interval()
584
+
585
+ df_upsampled: pd.DataFrame = mass_preserving_interp(self.mass_data,
586
+ interval_edges=interval_edges, precision=precision,
587
+ include_original_edges=include_original_edges,
588
+ mass_wet=None, mass_dry=self.mass_dry_var,
589
+ interval_data_as_mass=True)
590
+
591
+ obj: IntervalSample = IntervalSample(df_upsampled, name=self.name, moisture_in_scope=False,
592
+ mass_dry_var=self.mass_dry_var)
593
+ obj.status.ranges = self.status.ranges
594
+ return obj
595
+
596
+ def resample_2d(self, interval_edges: dict[str, Iterable],
597
+ precision: Optional[int] = None) -> 'IntervalSample':
598
+ """Resample a 2D fractional dim/index
599
+
600
+ Args:
601
+ interval_edges: A dict keyed by index name containing the grid the data is resampled to.
602
+ precision: Optional integer for the number of decimal places to round the grid values to.
603
+
604
+ Returns:
605
+ A new IntervalSample object interpolated onto the new grid
606
+ """
607
+
608
+ # TODO: add support for supplementary variables
609
+
610
+ # test the index contains a single interval index
611
+ self._check_two_dim_interval()
612
+
613
+ df_upsampled_specific_mass: pd.DataFrame = mass_preserving_interp_2d(self._specific_mass(),
614
+ interval_edges=interval_edges,
615
+ precision=precision,
616
+ mass_dry=self.mass_dry_var)
617
+
618
+ # convert from specific mass to mass
619
+ df_upsampled = df_upsampled_specific_mass.mul(self.mass_data[self.mass_dry_var].sum(), axis=0)
620
+ df_upsampled[self.composition_columns] = df_upsampled[self.composition_columns].div(
621
+ df_upsampled[self.mass_dry_var], axis=0).mul(self.composition_factor, axis=0)
622
+
623
+ obj: IntervalSample = IntervalSample(df_upsampled, name=self.name, moisture_in_scope=False,
624
+ mass_dry_var=self.mass_dry_var)
625
+ if hasattr(obj, 'nodes'):
626
+ obj.nodes = self.nodes
627
+ obj.status.ranges = self.status.ranges
628
+ return obj
629
+
630
+ def _specific_mass(self) -> Optional[pd.DataFrame]:
631
+ """Calculate the specific mass of the sample
632
+
633
+ Specific mass is the mass of the sample fractions divided by the mass of all fractions.
634
+ The sum of the specific mass (for mass_dry) is 1.0 by definition.
635
+ """
636
+ res = None
637
+ if self.data is not None:
638
+ res = self.mass_data.div(self.mass_data[self.mass_dry_var].sum(), axis=0)
639
+ if self.moisture_in_scope:
640
+ res.drop(columns=[self.mass_wet_var], inplace=True)
641
+ return res