geometallurgy 0.4.11__py3-none-any.whl → 0.4.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elphick/geomet/__init__.py +11 -11
- elphick/geomet/base.py +1133 -1133
- elphick/geomet/block_model.py +319 -358
- elphick/geomet/config/__init__.py +1 -1
- elphick/geomet/config/config_read.py +39 -39
- elphick/geomet/config/flowsheet_example_partition.yaml +31 -31
- elphick/geomet/config/flowsheet_example_simple.yaml +25 -25
- elphick/geomet/config/mc_config.yml +35 -35
- elphick/geomet/data/downloader.py +39 -39
- elphick/geomet/data/register.csv +12 -12
- elphick/geomet/datasets/__init__.py +2 -2
- elphick/geomet/datasets/datasets.py +47 -47
- elphick/geomet/datasets/downloader.py +40 -40
- elphick/geomet/datasets/register.csv +12 -12
- elphick/geomet/datasets/sample_data.py +196 -196
- elphick/geomet/extras.py +35 -35
- elphick/geomet/flowsheet/__init__.py +1 -1
- elphick/geomet/flowsheet/flowsheet.py +1216 -1193
- elphick/geomet/flowsheet/loader.py +99 -99
- elphick/geomet/flowsheet/operation.py +256 -256
- elphick/geomet/flowsheet/stream.py +39 -38
- elphick/geomet/interval_sample.py +641 -641
- elphick/geomet/io.py +379 -379
- elphick/geomet/plot.py +147 -147
- elphick/geomet/sample.py +28 -28
- elphick/geomet/utils/amenability.py +49 -49
- elphick/geomet/utils/block_model_converter.py +93 -93
- elphick/geomet/utils/components.py +136 -136
- elphick/geomet/utils/data.py +49 -49
- elphick/geomet/utils/estimates.py +108 -108
- elphick/geomet/utils/interp.py +193 -193
- elphick/geomet/utils/interp2.py +134 -134
- elphick/geomet/utils/layout.py +72 -72
- elphick/geomet/utils/moisture.py +61 -61
- elphick/geomet/utils/output.html +617 -0
- elphick/geomet/utils/pandas.py +378 -378
- elphick/geomet/utils/parallel.py +29 -29
- elphick/geomet/utils/partition.py +63 -63
- elphick/geomet/utils/size.py +51 -51
- elphick/geomet/utils/timer.py +80 -80
- elphick/geomet/utils/viz.py +56 -56
- elphick/geomet/validate.py.hide +176 -176
- {geometallurgy-0.4.11.dist-info → geometallurgy-0.4.13.dist-info}/LICENSE +21 -21
- {geometallurgy-0.4.11.dist-info → geometallurgy-0.4.13.dist-info}/METADATA +7 -5
- geometallurgy-0.4.13.dist-info/RECORD +49 -0
- {geometallurgy-0.4.11.dist-info → geometallurgy-0.4.13.dist-info}/WHEEL +1 -1
- elphick/geomet/utils/sampling.py +0 -5
- geometallurgy-0.4.11.dist-info/RECORD +0 -49
- {geometallurgy-0.4.11.dist-info → geometallurgy-0.4.13.dist-info}/entry_points.txt +0 -0
|
@@ -1,99 +1,99 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from typing import Dict, Optional, List, Union, Iterable, Tuple
|
|
3
|
-
|
|
4
|
-
import numpy as np
|
|
5
|
-
import pandas as pd
|
|
6
|
-
from joblib import delayed
|
|
7
|
-
from tqdm import tqdm
|
|
8
|
-
|
|
9
|
-
from elphick.geomet import Sample
|
|
10
|
-
from elphick.geomet.flowsheet.stream import Stream
|
|
11
|
-
# from elphick.geomet.utils.interp import _upsample_grid_by_factor
|
|
12
|
-
from elphick.geomet.utils.parallel import TqdmParallel
|
|
13
|
-
from elphick.geomet.utils.pandas import column_prefix_counts, column_prefixes
|
|
14
|
-
|
|
15
|
-
logger = logging.getLogger(__name__)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def create_stream(stream_data: Tuple[Union[int, str], pd.DataFrame],
|
|
19
|
-
interval_edges: Optional[Union[Iterable, int]] = None) -> list[Stream]:
|
|
20
|
-
stream, data = stream_data
|
|
21
|
-
res = None
|
|
22
|
-
try:
|
|
23
|
-
if interval_edges is not None:
|
|
24
|
-
res = Stream(data=data, name=stream).resample_1d(interval_edges=interval_edges)
|
|
25
|
-
else:
|
|
26
|
-
res = Stream(data=data, name=stream)
|
|
27
|
-
except Exception as e:
|
|
28
|
-
logger.error(f"Error creating Sample object for {stream}: {e}")
|
|
29
|
-
|
|
30
|
-
return res
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def streams_from_dataframe(df: pd.DataFrame,
|
|
34
|
-
mc_name_col: Optional[str] = None,
|
|
35
|
-
interval_edges: Optional[Union[Iterable, int]] = None,
|
|
36
|
-
n_jobs=1) -> List[Sample]:
|
|
37
|
-
"""Objects from a DataFrame
|
|
38
|
-
|
|
39
|
-
Args:
|
|
40
|
-
df: The DataFrame
|
|
41
|
-
mc_name_col: The column specified contains the names of objects to create.
|
|
42
|
-
If None the DataFrame is assumed to be wide and the mc objects will be extracted from column prefixes.
|
|
43
|
-
interval_edges: The values of the new grid (interval edges). If an int, will up-sample by that factor, for
|
|
44
|
-
example the value of 10 will automatically define edges that create 10 x the resolution (up-sampled).
|
|
45
|
-
Applicable only to 1d interval indexes.
|
|
46
|
-
n_jobs: The number of parallel jobs to run. If -1, will use all available cores.
|
|
47
|
-
|
|
48
|
-
Returns:
|
|
49
|
-
List of Stream objects
|
|
50
|
-
"""
|
|
51
|
-
stream_data: Dict[str, pd.DataFrame] = {}
|
|
52
|
-
index_names: List[str] = []
|
|
53
|
-
if mc_name_col:
|
|
54
|
-
logger.debug("Creating Stream objects by name column.")
|
|
55
|
-
if mc_name_col in df.index.names:
|
|
56
|
-
index_names = df.index.names
|
|
57
|
-
df.reset_index(mc_name_col, inplace=True)
|
|
58
|
-
if mc_name_col not in df.columns:
|
|
59
|
-
raise KeyError(f'{mc_name_col} is not in the columns or indexes.')
|
|
60
|
-
names = df[mc_name_col].unique()
|
|
61
|
-
for obj_name in tqdm(names, desc='Preparing Stream data'):
|
|
62
|
-
stream_data[obj_name] = df.query(f'{mc_name_col} == @obj_name')[
|
|
63
|
-
[col for col in df.columns if col != mc_name_col]]
|
|
64
|
-
if index_names: # reinstate the index on the original dataframe
|
|
65
|
-
df.reset_index(inplace=True)
|
|
66
|
-
df.set_index(index_names, inplace=True)
|
|
67
|
-
else:
|
|
68
|
-
logger.debug("Creating Stream objects by column prefixes.")
|
|
69
|
-
# wide case - find prefixes where there are at least 3 columns
|
|
70
|
-
prefix_counts = column_prefix_counts(df.columns)
|
|
71
|
-
prefix_cols = column_prefixes(df.columns)
|
|
72
|
-
for prefix, n in tqdm(prefix_counts.items(), desc='Preparing Stream data by column prefixes'):
|
|
73
|
-
if n >= 3: # we need at least 3 columns to create a Stream object
|
|
74
|
-
logger.info(f"Creating object for {prefix}")
|
|
75
|
-
cols = prefix_cols[prefix]
|
|
76
|
-
stream_data[prefix] = df[[col for col in df.columns if col in cols]].rename(
|
|
77
|
-
columns={col: col.replace(f'{prefix}_', '') for col in df.columns})
|
|
78
|
-
|
|
79
|
-
if interval_edges is not None:
|
|
80
|
-
logger.debug("Resampling Stream objects to new interval edges.")
|
|
81
|
-
# unify the edges - this will also interp missing grades
|
|
82
|
-
if not isinstance(df.index, pd.IntervalIndex):
|
|
83
|
-
raise NotImplementedError(f"The index `{df.index}` of the dataframe is not a pd.Interval. "
|
|
84
|
-
f" Only 1D interval indexes are valid")
|
|
85
|
-
if isinstance(interval_edges, int):
|
|
86
|
-
raise NotImplementedError("Needs work on interp to convert from xr to pd")
|
|
87
|
-
all_edges = []
|
|
88
|
-
for strm_data in stream_data.values():
|
|
89
|
-
all_edges.extend(list(np.sort(np.unique(list(strm_data.index.left) + list(strm_data.index.right)))))
|
|
90
|
-
all_edges = list(set(all_edges))
|
|
91
|
-
all_edges.sort()
|
|
92
|
-
indx = pd.IntervalIndex.from_arrays(left=all_edges[0:-1], right=all_edges[1:])
|
|
93
|
-
interval_edges = _upsample_grid_by_factor(indx=indx, factor=interval_edges)
|
|
94
|
-
|
|
95
|
-
with TqdmParallel(desc="Creating Stream objects", n_jobs=n_jobs,
|
|
96
|
-
prefer=None, total=len(stream_data)) as p:
|
|
97
|
-
res = p(delayed(create_stream)(stream_data, interval_edges) for stream_data in stream_data.items())
|
|
98
|
-
|
|
99
|
-
return res
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, Optional, List, Union, Iterable, Tuple
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from joblib import delayed
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
|
|
9
|
+
from elphick.geomet import Sample
|
|
10
|
+
from elphick.geomet.flowsheet.stream import Stream
|
|
11
|
+
# from elphick.geomet.utils.interp import _upsample_grid_by_factor
|
|
12
|
+
from elphick.geomet.utils.parallel import TqdmParallel
|
|
13
|
+
from elphick.geomet.utils.pandas import column_prefix_counts, column_prefixes
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def create_stream(stream_data: Tuple[Union[int, str], pd.DataFrame],
|
|
19
|
+
interval_edges: Optional[Union[Iterable, int]] = None) -> list[Stream]:
|
|
20
|
+
stream, data = stream_data
|
|
21
|
+
res = None
|
|
22
|
+
try:
|
|
23
|
+
if interval_edges is not None:
|
|
24
|
+
res = Stream(data=data, name=stream).resample_1d(interval_edges=interval_edges)
|
|
25
|
+
else:
|
|
26
|
+
res = Stream(data=data, name=stream)
|
|
27
|
+
except Exception as e:
|
|
28
|
+
logger.error(f"Error creating Sample object for {stream}: {e}")
|
|
29
|
+
|
|
30
|
+
return res
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def streams_from_dataframe(df: pd.DataFrame,
|
|
34
|
+
mc_name_col: Optional[str] = None,
|
|
35
|
+
interval_edges: Optional[Union[Iterable, int]] = None,
|
|
36
|
+
n_jobs=1) -> List[Sample]:
|
|
37
|
+
"""Objects from a DataFrame
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
df: The DataFrame
|
|
41
|
+
mc_name_col: The column specified contains the names of objects to create.
|
|
42
|
+
If None the DataFrame is assumed to be wide and the mc objects will be extracted from column prefixes.
|
|
43
|
+
interval_edges: The values of the new grid (interval edges). If an int, will up-sample by that factor, for
|
|
44
|
+
example the value of 10 will automatically define edges that create 10 x the resolution (up-sampled).
|
|
45
|
+
Applicable only to 1d interval indexes.
|
|
46
|
+
n_jobs: The number of parallel jobs to run. If -1, will use all available cores.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
List of Stream objects
|
|
50
|
+
"""
|
|
51
|
+
stream_data: Dict[str, pd.DataFrame] = {}
|
|
52
|
+
index_names: List[str] = []
|
|
53
|
+
if mc_name_col:
|
|
54
|
+
logger.debug("Creating Stream objects by name column.")
|
|
55
|
+
if mc_name_col in df.index.names:
|
|
56
|
+
index_names = df.index.names
|
|
57
|
+
df.reset_index(mc_name_col, inplace=True)
|
|
58
|
+
if mc_name_col not in df.columns:
|
|
59
|
+
raise KeyError(f'{mc_name_col} is not in the columns or indexes.')
|
|
60
|
+
names = df[mc_name_col].unique()
|
|
61
|
+
for obj_name in tqdm(names, desc='Preparing Stream data'):
|
|
62
|
+
stream_data[obj_name] = df.query(f'{mc_name_col} == @obj_name')[
|
|
63
|
+
[col for col in df.columns if col != mc_name_col]]
|
|
64
|
+
if index_names: # reinstate the index on the original dataframe
|
|
65
|
+
df.reset_index(inplace=True)
|
|
66
|
+
df.set_index(index_names, inplace=True)
|
|
67
|
+
else:
|
|
68
|
+
logger.debug("Creating Stream objects by column prefixes.")
|
|
69
|
+
# wide case - find prefixes where there are at least 3 columns
|
|
70
|
+
prefix_counts = column_prefix_counts(df.columns)
|
|
71
|
+
prefix_cols = column_prefixes(df.columns)
|
|
72
|
+
for prefix, n in tqdm(prefix_counts.items(), desc='Preparing Stream data by column prefixes'):
|
|
73
|
+
if n >= 3: # we need at least 3 columns to create a Stream object
|
|
74
|
+
logger.info(f"Creating object for {prefix}")
|
|
75
|
+
cols = prefix_cols[prefix]
|
|
76
|
+
stream_data[prefix] = df[[col for col in df.columns if col in cols]].rename(
|
|
77
|
+
columns={col: col.replace(f'{prefix}_', '') for col in df.columns})
|
|
78
|
+
|
|
79
|
+
if interval_edges is not None:
|
|
80
|
+
logger.debug("Resampling Stream objects to new interval edges.")
|
|
81
|
+
# unify the edges - this will also interp missing grades
|
|
82
|
+
if not isinstance(df.index, pd.IntervalIndex):
|
|
83
|
+
raise NotImplementedError(f"The index `{df.index}` of the dataframe is not a pd.Interval. "
|
|
84
|
+
f" Only 1D interval indexes are valid")
|
|
85
|
+
if isinstance(interval_edges, int):
|
|
86
|
+
raise NotImplementedError("Needs work on interp to convert from xr to pd")
|
|
87
|
+
all_edges = []
|
|
88
|
+
for strm_data in stream_data.values():
|
|
89
|
+
all_edges.extend(list(np.sort(np.unique(list(strm_data.index.left) + list(strm_data.index.right)))))
|
|
90
|
+
all_edges = list(set(all_edges))
|
|
91
|
+
all_edges.sort()
|
|
92
|
+
indx = pd.IntervalIndex.from_arrays(left=all_edges[0:-1], right=all_edges[1:])
|
|
93
|
+
interval_edges = _upsample_grid_by_factor(indx=indx, factor=interval_edges)
|
|
94
|
+
|
|
95
|
+
with TqdmParallel(desc="Creating Stream objects", n_jobs=n_jobs,
|
|
96
|
+
prefer=None, total=len(stream_data)) as p:
|
|
97
|
+
res = p(delayed(create_stream)(stream_data, interval_edges) for stream_data in stream_data.items())
|
|
98
|
+
|
|
99
|
+
return res
|