aspect-stable 0.5.0__tar.gz → 0.7.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/PKG-INFO +1 -1
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/pyproject.toml +17 -7
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/aspect.toml +4 -4
- aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v10_model.joblib +0 -0
- aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v10_model.toml +27 -0
- aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.joblib +0 -0
- aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.toml +27 -0
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/plots.py +32 -4
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/tools.py +29 -11
- aspect_stable-0.7.dev1/src/aspect/trainer.py +214 -0
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/workflow.py +21 -12
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/PKG-INFO +1 -1
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/SOURCES.txt +4 -1
- aspect_stable-0.5.0/MANIFEST.in +0 -6
- aspect_stable-0.5.0/src/aspect/trainer.py +0 -104
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/README.rst +0 -0
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/setup.cfg +0 -0
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/__init__.py +0 -0
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/changelog.txt +0 -0
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/io.py +0 -0
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/dependency_links.txt +0 -0
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/requires.txt +0 -0
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/top_level.txt +0 -0
- {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/tests/test_tools.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "aspect-stable"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.7.dev1"
|
|
4
4
|
readme = "README.rst"
|
|
5
5
|
requires-python = ">=3.11"
|
|
6
6
|
license = "GPL-3.0-or-later"
|
|
@@ -19,6 +19,22 @@ classifiers = ["Programming Language :: Python :: 3",
|
|
|
19
19
|
requires = ["setuptools>=61.0.0", "wheel"]
|
|
20
20
|
build-backend = "setuptools.build_meta"
|
|
21
21
|
|
|
22
|
+
[tool.setuptools.packages.find]
|
|
23
|
+
where = ["src"]
|
|
24
|
+
|
|
25
|
+
[tool.setuptools.package-data]
|
|
26
|
+
"aspect" = ["aspect.toml",
|
|
27
|
+
"changelog.txt",
|
|
28
|
+
"models/*.toml",
|
|
29
|
+
"models/*.joblib"]
|
|
30
|
+
|
|
31
|
+
[tool.pytest.ini_options]
|
|
32
|
+
pythonpath = ["src"]
|
|
33
|
+
mpl-baseline-path = 'tests/baseline'
|
|
34
|
+
mpl-results-path = 'tests/outputs'
|
|
35
|
+
mpl-results-always = false
|
|
36
|
+
addopts = "-p no:asdf_schema_tester"
|
|
37
|
+
|
|
22
38
|
[project.optional-dependencies]
|
|
23
39
|
docs = ["sphinx-rtd-theme~=3.0",
|
|
24
40
|
"ipympl~=0.9",
|
|
@@ -28,9 +44,3 @@ tests = ["pytest~=8.4",
|
|
|
28
44
|
"pytest-cov~=7.0",
|
|
29
45
|
"pytest-mpl~=0.17"]
|
|
30
46
|
|
|
31
|
-
[tool.pytest.ini_options]
|
|
32
|
-
pythonpath = ["src"]
|
|
33
|
-
mpl-baseline-path = 'tests/baseline'
|
|
34
|
-
mpl-results-path = 'tests/outputs'
|
|
35
|
-
mpl-results-always = false
|
|
36
|
-
addopts = "-p no:asdf_schema_tester"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = 'aspect-stable'
|
|
3
|
-
version = '0.
|
|
3
|
+
version = '0.7.dev1'
|
|
4
4
|
category_order = ['undefined', 'white-noise', 'continuum', 'emission', 'cosmic-ray', 'broad', 'doublet-em', 'peak',
|
|
5
5
|
'absorption', 'dead-pixel', 'doublet-abs', 'trough']
|
|
6
6
|
|
|
@@ -24,7 +24,7 @@ white-noise = '#C41E3A' # Red
|
|
|
24
24
|
continuum = '#F48CBA' # Pink
|
|
25
25
|
emission = '#00FF98' # Spring Green
|
|
26
26
|
cosmic-ray= '#FFF468' # Yellow
|
|
27
|
-
broad = '#0070DD'
|
|
27
|
+
broad = '#0070DD' # Blue
|
|
28
28
|
doublet-em = '#3FC7EB' # Light blue
|
|
29
29
|
peak = '#C69B6D' # Tan
|
|
30
30
|
absorption = '#FF7C0A' # Orange
|
|
@@ -68,10 +68,10 @@ time_labels = ['Current detection', 'Past detection']
|
|
|
68
68
|
time = [[2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #undefined
|
|
69
69
|
[0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #white-noise
|
|
70
70
|
[0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1], #continuum
|
|
71
|
-
[0, 0, 0, 2, 0, 0,
|
|
71
|
+
[0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0], #emission
|
|
72
72
|
[0, 0, 0, 1, 2, 1, 1, 1, 1, 0, 1, 1], #cosmic-ray
|
|
73
73
|
[0, 0, 0, 1, 0, 2, 1, 0, 1, 0, 0, 0], #broad
|
|
74
|
-
[0, 0, 0,
|
|
74
|
+
[0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0], #doublet_em
|
|
75
75
|
[0, 0, 0, 1, 0, 1, 1, 2, 1, 0, 0, 1], #peak
|
|
76
76
|
[0, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0], #absorption
|
|
77
77
|
[0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 0, 1], #dead-pixel
|
|
Binary file
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[resuts]
|
|
2
|
+
f1 = 0.9640995326458678
|
|
3
|
+
precision = 0.9657992561722317
|
|
4
|
+
Recall = 0.9644108394108395
|
|
5
|
+
confusion_matrix = [ [ "np.float64(0.12337337337337337)", "np.float64(4.369448813893258e-05)", "np.float64(0.00013306957751402197)", "np.float64(0.00013306957751402197)", "np.float64(0.00036345869679203015)", "np.float64(0.0003614725836948059)", "np.float64(0.00019662519662519662)", "np.float64(0.00039523650634761746)",], [ "np.float64(3.9722261944484165e-06)", "np.float64(0.12423931868376313)", "np.float64(0.00025422247644469866)", "np.float64(0.00017676406565295455)", "np.float64(0.0)", "np.float64(0.00025422247644469866)", "np.float64(7.15000715000715e-05)", "np.float64(0.0)",], [ "np.float64(0.00029394473838918284)", "np.float64(0.004957338290671624)", "np.float64(0.10877742822187267)", "np.float64(0.010516468849802183)", "np.float64(0.00045481989926434373)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(5.1638940527829415e-05)", "np.float64(0.0010248343581676915)", "np.float64(0.0006057644946533835)", "np.float64(0.1233177622066511)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(0.000562070006514451)", "np.float64(0.0)", "np.float64(6.156950601395046e-05)", "np.float64(0.0)", "np.float64(0.12437437437437437)", "np.float64(1.9861130972242082e-06)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(0.0003594864705975817)", "np.float64(0.0009414176080842747)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.11313098813098812)", "np.float64(0.010228482450704674)", "np.float64(0.0003396253396253396)",], [ "np.float64(3.9722261944484165e-06)", "np.float64(4.965282743060521e-05)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0018828352161685494)", "np.float64(0.1230635397302064)", "np.float64(0.0)",], [ "np.float64(0.0006276117387228499)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.000238333571666905)", "np.float64(0.0)", "np.float64(0.12413405468961025)",],]
|
|
6
|
+
fit_time = "np.float64(0.009)"
|
|
7
|
+
|
|
8
|
+
[properties]
|
|
9
|
+
box_size = 12
|
|
10
|
+
sample_size = 600000
|
|
11
|
+
test_sample_size_fraction = 0.1
|
|
12
|
+
categories = [ "white-noise", "continuum", "cosmic-ray", "emission", "doublet-em", "dead-pixel", "absorption", "doublet-abs",]
|
|
13
|
+
scale = "min-max-log"
|
|
14
|
+
|
|
15
|
+
[properties.estimator]
|
|
16
|
+
module = "sklearn.ensemble"
|
|
17
|
+
class = "RandomForestClassifier"
|
|
18
|
+
|
|
19
|
+
[properties.estimator_params]
|
|
20
|
+
random_state = 42
|
|
21
|
+
n_estimators = 60
|
|
22
|
+
max_depth = 8
|
|
23
|
+
max_features = "sqrt"
|
|
24
|
+
verbose = 0
|
|
25
|
+
n_jobs = 10
|
|
26
|
+
min_samples_split = 2000
|
|
27
|
+
min_samples_leaf = 2000
|
aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.joblib
ADDED
|
Binary file
|
aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.toml
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[resuts]
|
|
2
|
+
f1 = 0.9606625147234599
|
|
3
|
+
precision = 0.9621003125623686
|
|
4
|
+
Recall = 0.9607928952544678
|
|
5
|
+
confusion_matrix = [ [ "np.float64(0.1212956695176403)", "np.float64(0.0027323889380246754)", "np.float64(7.246842270830965e-05)", "np.float64(1.9764115284084453e-05)", "np.float64(0.0004891618532810902)", "np.float64(9.717356681341523e-05)", "np.float64(2.3058134498098526e-05)", "np.float64(0.00027010957554915417)",], [ "np.float64(0.003073319926675132)", "np.float64(0.11908538262503685)", "np.float64(0.0006620978620168292)", "np.float64(0.000436457545856865)", "np.float64(0.0002898736908332386)", "np.float64(0.0007971526497914062)", "np.float64(0.00030799079651031605)", "np.float64(0.0003475190270784849)",], [ "np.float64(0.0)", "np.float64(0.0037535348943690387)", "np.float64(0.11137902467385093)", "np.float64(0.009842529411474057)", "np.float64(2.4705144105105566e-05)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(0.0)", "np.float64(0.0004677507283899987)", "np.float64(0.0005616302759893999)", "np.float64(0.12397041311941973)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(2.964617292612668e-05)", "np.float64(0.0007823295633283429)", "np.float64(1.8117105677077413e-05)", "np.float64(0.0)", "np.float64(0.12415817221461853)", "np.float64(1.1529067249049263e-05)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(0.0)", "np.float64(0.0018446507598478823)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.11339167041361352)", "np.float64(0.009694298546843423)", "np.float64(6.917440349429558e-05)",], [ "np.float64(0.0)", "np.float64(2.6352153712112603e-05)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0017590062602835163)", "np.float64(0.12321608271941051)", "np.float64(0.0)",], [ "np.float64(9.882057642042226e-06)", "np.float64(0.0006390397275187306)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(5.435131703123224e-05)", "np.float64(0.0)", "np.float64(0.12429652102160713)",],]
|
|
6
|
+
fit_time = "np.float64(3.37)"
|
|
7
|
+
|
|
8
|
+
[properties]
|
|
9
|
+
box_size = 12
|
|
10
|
+
sample_size = 700000
|
|
11
|
+
test_sample_size_fraction = 0.1
|
|
12
|
+
categories = [ "white-noise", "continuum", "cosmic-ray", "emission", "doublet-em", "dead-pixel", "absorption", "doublet-abs",]
|
|
13
|
+
scale = "min-max-log"
|
|
14
|
+
|
|
15
|
+
[properties.estimator]
|
|
16
|
+
module = "sklearn.ensemble"
|
|
17
|
+
class = "RandomForestClassifier"
|
|
18
|
+
|
|
19
|
+
[properties.estimator_params]
|
|
20
|
+
random_state = 42
|
|
21
|
+
n_estimators = 60
|
|
22
|
+
max_depth = 8
|
|
23
|
+
max_features = "sqrt"
|
|
24
|
+
verbose = 0
|
|
25
|
+
n_jobs = 10
|
|
26
|
+
min_samples_split = 2000
|
|
27
|
+
min_samples_leaf = 2000
|
|
@@ -45,7 +45,8 @@ def decision_matrix_plot(matrix_arr, output_address=None, categories=None, exclu
|
|
|
45
45
|
axes_labels = None if matrix_name is None else cfg['decision_matrices'][f'{matrix_name}_labels']
|
|
46
46
|
|
|
47
47
|
# Start the figure
|
|
48
|
-
|
|
48
|
+
theme.set_style('dark')
|
|
49
|
+
with rc_context(theme.fig_defaults(cfg_fig)):
|
|
49
50
|
|
|
50
51
|
# Define colors for values
|
|
51
52
|
cmap = colors.ListedColormap(['white', decision_colors[0], decision_colors[1]])
|
|
@@ -110,7 +111,7 @@ def decision_matrix_plot(matrix_arr, output_address=None, categories=None, exclu
|
|
|
110
111
|
return
|
|
111
112
|
|
|
112
113
|
def scatter_plot(fig, ax, x_arr, y_arr, labels_arr, feature_list, color_dict, alpha=0.5, idx_target=None,
|
|
113
|
-
detection_range=None, ratio_color=None):
|
|
114
|
+
detection_range=None, ratio_color=None, sn_limits=None):
|
|
114
115
|
|
|
115
116
|
# Input user diagnostic coloring
|
|
116
117
|
if ratio_color is not None:
|
|
@@ -138,6 +139,9 @@ def scatter_plot(fig, ax, x_arr, y_arr, labels_arr, feature_list, color_dict, al
|
|
|
138
139
|
if detection_range is not None:
|
|
139
140
|
ax.plot(detection_range, detection_function(detection_range))
|
|
140
141
|
|
|
142
|
+
if sn_limits is not None:
|
|
143
|
+
ax.set_ylim(sn_limits)
|
|
144
|
+
|
|
141
145
|
return
|
|
142
146
|
|
|
143
147
|
def parse_fig_cfg(fig_cfg=None, ax_diag=None, ax_line=None, dtype=None):
|
|
@@ -242,9 +246,29 @@ def ax_wording(ax, ax_cfg=None, legend_cfg=None, yscale=None):
|
|
|
242
246
|
return
|
|
243
247
|
|
|
244
248
|
|
|
245
|
-
def plot_comps_detect(
|
|
249
|
+
def plot_comps_detect(x_arr, y_arr, b_pixels, idx, counts, model, out_type, seg_pred, old_pred):
|
|
250
|
+
|
|
251
|
+
x_sect = x_arr[idx:idx + b_pixels]
|
|
252
|
+
y_norm = y_arr[idx, -b_pixels:, 0]
|
|
253
|
+
|
|
254
|
+
min_max_arr = np.power(10, y_arr[idx, 0, :] * 4)
|
|
255
|
+
std_arr = np.std(y_arr[idx, :, :] * min_max_arr, axis=0)
|
|
256
|
+
msg_scale = f'min_max = {min_max_arr.mean():.1f}±{min_max_arr.std():.1f}, std = {std_arr.mean():.1f}±{std_arr.std():.1f}'
|
|
246
257
|
|
|
247
|
-
|
|
258
|
+
# x_arr[idx:idx + self.medium.b_pixels],
|
|
259
|
+
# y_arr[idx, -self.medium.b_pixels:, 0],
|
|
260
|
+
# idx, counts, self.medium,
|
|
261
|
+
# new_pred[0],
|
|
262
|
+
# pred_arr[idx:idx + self.medium.b_pixels],
|
|
263
|
+
# self.seg_pred[:]
|
|
264
|
+
|
|
265
|
+
# print(f'Idx "{idx}"; counts: {counts}; Output: {model.number_feature_dict[out_type]} ({out_type})')
|
|
266
|
+
msg = f'Idx "{idx}"; counts:'
|
|
267
|
+
for i, value in enumerate(counts):
|
|
268
|
+
if value > 0:
|
|
269
|
+
msg += f'{' ,' if msg[-1] != ':' else ' '} {model.number_feature_dict[i]} {value}'
|
|
270
|
+
msg += f' -> Output: {model.number_feature_dict[out_type]} ({out_type})'
|
|
271
|
+
print(msg)
|
|
248
272
|
|
|
249
273
|
colors_old = [cfg['colors'][model.number_feature_dict[val]] for val in old_pred]
|
|
250
274
|
colors_new = [cfg['colors'][model.number_feature_dict[val]] for val in seg_pred]
|
|
@@ -255,6 +279,7 @@ def plot_comps_detect(x_sect, y_norm, idx, counts, model, out_type, seg_pred, ol
|
|
|
255
279
|
ax.scatter(x_sect, np.zeros(x_sect.size), color=colors_old, label='Old prediction')
|
|
256
280
|
ax.scatter(x_sect, np.ones(x_sect.size), color=colors_new, label='New prediction')
|
|
257
281
|
ax.set_xlabel(r'Wavelength $(\AA)$')
|
|
282
|
+
ax.set_title(msg_scale)
|
|
258
283
|
|
|
259
284
|
ax_secondary = ax.twinx() # Creates a twin y-axis on the right
|
|
260
285
|
ax_secondary.set_ylim(ax.get_ylim()) # Match the primary y-axis limits
|
|
@@ -266,6 +291,7 @@ def plot_comps_detect(x_sect, y_norm, idx, counts, model, out_type, seg_pred, ol
|
|
|
266
291
|
|
|
267
292
|
return
|
|
268
293
|
|
|
294
|
+
|
|
269
295
|
def plot_steps_backUP(spec, y_norm, idx, counts, model_mgr, out_type, seg_pred, old_pred):
|
|
270
296
|
|
|
271
297
|
print(idx)
|
|
@@ -294,6 +320,7 @@ def plot_steps_backUP(spec, y_norm, idx, counts, model_mgr, out_type, seg_pred,
|
|
|
294
320
|
|
|
295
321
|
return
|
|
296
322
|
|
|
323
|
+
|
|
297
324
|
def plot_comps_detect_new(spec, theme, idx, y_norm, counts, model_mgr, out_type, old_pred, seg_pred, **kwargs):
|
|
298
325
|
|
|
299
326
|
# Clear previous figure
|
|
@@ -351,6 +378,7 @@ def plot_comps_detect_new(spec, theme, idx, y_norm, counts, model_mgr, out_type,
|
|
|
351
378
|
|
|
352
379
|
return
|
|
353
380
|
|
|
381
|
+
|
|
354
382
|
class CheckSample:
|
|
355
383
|
|
|
356
384
|
def __init__(self, in_data_arr, in_pred_arr, idx_features, fig_cfg=None, ax_diag=None, ax_line=None, base=10000,
|
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
import numpy as np
|
|
3
3
|
from .io import Aspect_Error
|
|
4
4
|
from lime.fitting.lines import gaussian_model
|
|
5
|
+
from matplotlib import pyplot as plt
|
|
5
6
|
|
|
6
7
|
# Log variable
|
|
7
8
|
_logger = logging.getLogger('aspect')
|
|
@@ -34,7 +35,7 @@ def scale_min_max_orig(data, axis=None):
|
|
|
34
35
|
|
|
35
36
|
def scale_min_max(data, box_size, axis=None, scale_parameter='min-max'):
|
|
36
37
|
|
|
37
|
-
# Norm the scale features
|
|
38
|
+
# Norm the scale features # TODO this gives error if the error is 0 and the data is 0
|
|
38
39
|
data_min_array = data[:, -box_size:].min(axis=axis, keepdims=True)
|
|
39
40
|
data_max_array = data[:, -box_size:].max(axis=axis, keepdims=True)
|
|
40
41
|
data[:, -box_size:] = (data[:, -box_size:] - data_min_array) / (data_max_array - data_min_array)
|
|
@@ -46,15 +47,6 @@ def scale_min_max(data, box_size, axis=None, scale_parameter='min-max'):
|
|
|
46
47
|
if scale_parameter == 'min-max-log':
|
|
47
48
|
data[:, -box_size - 1] = (np.log10(data_max_array - data_min_array)/4)[:,0]
|
|
48
49
|
|
|
49
|
-
# # Norm the scale features
|
|
50
|
-
# data_min_array = data[:, -box_size:].min(axis=axis, keepdims=True)
|
|
51
|
-
# data_max_array = data[:, -box_size:].max(axis=axis, keepdims=True)
|
|
52
|
-
# data[:, -box_size:] = (data[:, -box_size:] - data_min_array) / (data_max_array - data_min_array)
|
|
53
|
-
#
|
|
54
|
-
# # Save the scaling parameters
|
|
55
|
-
# data[:, -box_size - 1] = ((data_max_array - data_min_array)/10000)[:,0]
|
|
56
|
-
# data[:, -box_size - 1] = ((data_max_array - data_min_array)/10000)[:,0]
|
|
57
|
-
|
|
58
50
|
return
|
|
59
51
|
|
|
60
52
|
def scale_log(data, log_base, axis=None):
|
|
@@ -111,7 +103,33 @@ def broad_component_function(intensity_ratio):
|
|
|
111
103
|
return np.sqrt(1 + np.log(intensity_ratio)/np.log(2))
|
|
112
104
|
|
|
113
105
|
|
|
114
|
-
def doublet_model(wave_arr, noise_arr, cont_arr, amp, mu_line, sigma,
|
|
106
|
+
def doublet_model(wave_arr, noise_arr, cont_arr, amp, mu_line, sigma, doublet_em_sep_max,
|
|
107
|
+
doublet_int_min, doublet_int_max, lower_limit, upper_limit, sign=1):
|
|
108
|
+
|
|
109
|
+
# Generate intensities
|
|
110
|
+
int_diff = np.random.uniform(doublet_int_min, doublet_int_max)
|
|
111
|
+
amp1, amp2 = amp, amp * int_diff
|
|
112
|
+
|
|
113
|
+
# Clip for intensity limits
|
|
114
|
+
amp2 = np.clip(np.abs(amp2), lower_limit, upper_limit)
|
|
115
|
+
|
|
116
|
+
r = max(amp1, amp2)/min(amp1, amp2)
|
|
117
|
+
# sep_min = 1.2 + 0.15*(r - 1)
|
|
118
|
+
# sep_min = 1.3 + 0.15*(r - 1)
|
|
119
|
+
sep_min = 1.5 + 0.15*(r - 1)
|
|
120
|
+
sep = np.random.uniform(sep_min, doublet_em_sep_max)
|
|
121
|
+
|
|
122
|
+
# Generate the profiles
|
|
123
|
+
mu1 = mu_line - sep
|
|
124
|
+
mu2 = mu_line + sep
|
|
125
|
+
sigma1, sigma2 = sigma, sigma * 1
|
|
126
|
+
gauss1 = gaussian_model(wave_arr, sign*amp1, mu1, sigma1)
|
|
127
|
+
gauss2 = gaussian_model(wave_arr, sign*amp2, mu2, sigma2)
|
|
128
|
+
flux_arr = gauss1 + gauss2 + noise_arr + cont_arr
|
|
129
|
+
|
|
130
|
+
return flux_arr
|
|
131
|
+
|
|
132
|
+
def doublet_model_orig(wave_arr, noise_arr, cont_arr, amp, mu_line, sigma, doublet_em_sep_min, doublet_em_sep_max,
|
|
115
133
|
doublet_int_min, doublet_int_max, lower_limit, upper_limit):
|
|
116
134
|
|
|
117
135
|
# Compute the doublet
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import numpy as np
|
|
3
|
+
import joblib
|
|
4
|
+
import toml
|
|
5
|
+
from matplotlib import pyplot as plt
|
|
6
|
+
from sklearn.model_selection import cross_val_score, cross_val_predict
|
|
7
|
+
from sklearn.metrics import confusion_matrix
|
|
8
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
9
|
+
from sklearn.model_selection import StratifiedShuffleSplit, train_test_split
|
|
10
|
+
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
|
|
11
|
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, median_absolute_error
|
|
12
|
+
from time import time
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from .io import cfg as aspect_cfg
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_training_test_sets(x_arr, y_arr, test_fraction, n_pixel_features=None, n_scale_features=None, random_state=None, classification=True):
|
|
18
|
+
|
|
19
|
+
# Split into training and testing:
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
if classification:
|
|
23
|
+
|
|
24
|
+
print(f'\nSplitting sample with categories:')
|
|
25
|
+
print(np.unique(y_arr))
|
|
26
|
+
sss = StratifiedShuffleSplit(n_splits=1, train_size=int(y_arr.size * (1 - test_fraction)),
|
|
27
|
+
test_size=int(y_arr.size * test_fraction), random_state=random_state)
|
|
28
|
+
|
|
29
|
+
# Equal splits
|
|
30
|
+
for train_index, test_index in sss.split(x_arr, y_arr):
|
|
31
|
+
X_train, X_test = x_arr[train_index, :], x_arr[test_index, :]
|
|
32
|
+
y_train, y_test = y_arr[train_index], y_arr[test_index]
|
|
33
|
+
|
|
34
|
+
# Convert strings to integers
|
|
35
|
+
y_train = np.vectorize(aspect_cfg['shape_number'].get)(y_train)
|
|
36
|
+
y_test = np.vectorize(aspect_cfg['shape_number'].get)(y_test)
|
|
37
|
+
|
|
38
|
+
else:
|
|
39
|
+
X_train, X_test, y_train, y_test = train_test_split(x_arr, y_arr, test_size=test_fraction,
|
|
40
|
+
random_state=random_state, shuffle=True)
|
|
41
|
+
y_train, y_test = np.log10(y_train), np.log10(y_test)
|
|
42
|
+
|
|
43
|
+
# Crop the database if requested
|
|
44
|
+
if n_pixel_features and n_scale_features:
|
|
45
|
+
X_train, X_test = X_train[:, -n_pixel_features - n_scale_features:], X_test[:, -n_pixel_features - n_scale_features:]
|
|
46
|
+
|
|
47
|
+
return X_train, y_train, X_test, y_test
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def components_trainer(model_label, x_arr, y_arr, fit_cfg, list_labels, output_folder=None, test_fraction=0.1,
|
|
51
|
+
random_state=None, classification=True):
|
|
52
|
+
|
|
53
|
+
# Preparing the estimator:
|
|
54
|
+
print(f'\nLoading estimator: {fit_cfg["estimator"]["class"]}')
|
|
55
|
+
estimator = getattr(importlib.import_module(fit_cfg['estimator']["module"]), fit_cfg['estimator']["class"])
|
|
56
|
+
estimator_params = fit_cfg.get('estimator_params', {})
|
|
57
|
+
|
|
58
|
+
# Split into training and testing:
|
|
59
|
+
data_train, y_train, data_test, y_test = get_training_test_sets(x_arr, y_arr, test_fraction,
|
|
60
|
+
random_state=random_state, classification=classification)
|
|
61
|
+
|
|
62
|
+
# Select just the features
|
|
63
|
+
feature_slice = -fit_cfg['box_size'] - 1
|
|
64
|
+
X_train, X_test = data_train[:, feature_slice:], data_test[:, feature_slice:]
|
|
65
|
+
|
|
66
|
+
# Run the training
|
|
67
|
+
if classification:
|
|
68
|
+
print(f'\nClassification: {y_train.size/len(fit_cfg["categories"]):.0f} * {len(fit_cfg["categories"])} = {y_train.size} points ({model_label})')
|
|
69
|
+
print(f'- Settings: {fit_cfg["estimator_params"]}\n')
|
|
70
|
+
print(f'- Data set size: {X_train.shape}\n')
|
|
71
|
+
else:
|
|
72
|
+
print(f'Regression range: [{y_train.min():.3f}, {y_train.max():.3f}]')
|
|
73
|
+
print(f'- Settings: {fit_cfg["estimator_params"]}')
|
|
74
|
+
print(f'- Data set size: {X_train.shape}\n')
|
|
75
|
+
|
|
76
|
+
start_time = time()
|
|
77
|
+
ml_function = estimator(**estimator_params)
|
|
78
|
+
ml_function.fit(X_train, y_train)
|
|
79
|
+
end_time = np.round((time()-start_time)/60, 2)
|
|
80
|
+
print(f'- completed ({end_time} minutes)')
|
|
81
|
+
|
|
82
|
+
# Save the trained model and configuration
|
|
83
|
+
output_folder = Path(output_folder)/'results'
|
|
84
|
+
output_folder.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
|
|
86
|
+
model_address = output_folder/f'{model_label}.joblib'
|
|
87
|
+
joblib.dump(ml_function, model_address)
|
|
88
|
+
|
|
89
|
+
if classification:
|
|
90
|
+
|
|
91
|
+
# Run initial diagnostics
|
|
92
|
+
print(f'\nReloading model from: {model_address}')
|
|
93
|
+
start_time = time()
|
|
94
|
+
ml_function = joblib.load(model_address)
|
|
95
|
+
fit_time = np.round((time()-start_time), 3)
|
|
96
|
+
print(f'- completed ({fit_time} seconds)')
|
|
97
|
+
|
|
98
|
+
print(f'\nRuning prediction on test set ({y_test.size} points)')
|
|
99
|
+
start_time = time()
|
|
100
|
+
y_pred = ml_function.predict(X_test)
|
|
101
|
+
print(f'- completed ({(time()-start_time):0.1f} seconds)')
|
|
102
|
+
|
|
103
|
+
# Testing confussion matrix
|
|
104
|
+
print(f'\nConfusion matrix in test set ({y_test.size} points)')
|
|
105
|
+
start_time = time()
|
|
106
|
+
conf_matrix_test = confusion_matrix(y_test, y_pred, normalize="all")
|
|
107
|
+
print(f'- completed ({(time()-start_time):0.1f} seconds)')
|
|
108
|
+
|
|
109
|
+
# Precision, recall and f1:
|
|
110
|
+
print(f'\nF1, Precision and recall diagnostics ({y_test.size} points)')
|
|
111
|
+
start_time = time()
|
|
112
|
+
pres = precision_score(y_test, y_pred, average='macro')
|
|
113
|
+
recall = recall_score(y_test, y_pred, average='macro')
|
|
114
|
+
f1 = f1_score(y_test, y_pred, average='macro')
|
|
115
|
+
print(f'- completed ({(time()-start_time):0.1f} seconds)')
|
|
116
|
+
|
|
117
|
+
print(f'\nModel outputs')
|
|
118
|
+
print(f'- F1: \n {f1}')
|
|
119
|
+
print(f'- Precision: \n {pres}')
|
|
120
|
+
print(f'- Recall: \n {recall}')
|
|
121
|
+
print(f'- Testing confusion matrix: \n {conf_matrix_test}')
|
|
122
|
+
print(f'- Fitting time (seconds): \n {float(fit_time)}')
|
|
123
|
+
|
|
124
|
+
# Save results into a TOML file
|
|
125
|
+
toml_path = output_folder/f'{model_label}.toml'
|
|
126
|
+
output_dict = {'resuts': {'f1':f1, 'precision':pres, 'Recall':recall, 'confusion_matrix':conf_matrix_test,
|
|
127
|
+
'fit_time': end_time}, 'properties': fit_cfg,}
|
|
128
|
+
with open(toml_path, 'w') as f:
|
|
129
|
+
toml.dump(output_dict, f)
|
|
130
|
+
|
|
131
|
+
else:
|
|
132
|
+
|
|
133
|
+
# Reload model
|
|
134
|
+
print(f'\nReloading model from: {model_address}')
|
|
135
|
+
start_time = time()
|
|
136
|
+
ml_function = joblib.load(model_address)
|
|
137
|
+
fit_time = np.round((time() - start_time), 3)
|
|
138
|
+
print(f'- completed ({fit_time} seconds)')
|
|
139
|
+
|
|
140
|
+
# Prediction
|
|
141
|
+
print(f'\nRunning prediction on test set ({y_test.size} points)')
|
|
142
|
+
start_time = time()
|
|
143
|
+
y_pred = ml_function.predict(X_test)
|
|
144
|
+
pred_time = np.round((time() - start_time), 3)
|
|
145
|
+
print(f'- completed ({pred_time} seconds)')
|
|
146
|
+
|
|
147
|
+
# Core regression metrics
|
|
148
|
+
print(f'\nRegression diagnostics ({y_test.size} points)')
|
|
149
|
+
start_time = time()
|
|
150
|
+
|
|
151
|
+
mse = mean_squared_error(y_test, y_pred)
|
|
152
|
+
rmse = np.sqrt(mse)
|
|
153
|
+
mae = mean_absolute_error(y_test, y_pred)
|
|
154
|
+
medae = median_absolute_error(y_test, y_pred)
|
|
155
|
+
r2 = r2_score(y_test, y_pred)
|
|
156
|
+
|
|
157
|
+
# Normalized errors (scale-independent)
|
|
158
|
+
y_range = y_test.max() - y_test.min()
|
|
159
|
+
nrmse = rmse / y_range if y_range > 0 else np.nan
|
|
160
|
+
nmae = mae / y_range if y_range > 0 else np.nan
|
|
161
|
+
|
|
162
|
+
print(f'- completed ({(time() - start_time):0.1f} seconds)')
|
|
163
|
+
|
|
164
|
+
# Outputs
|
|
165
|
+
print(f'\nModel outputs')
|
|
166
|
+
print(f'- R²: \n {r2}')
|
|
167
|
+
print(f'- RMSE: \n {rmse}')
|
|
168
|
+
print(f'- MAE: \n {mae}')
|
|
169
|
+
print(f'- Median AE: \n {medae}')
|
|
170
|
+
print(f'- Normalized RMSE: \n {nrmse}')
|
|
171
|
+
print(f'- Normalized MAE: \n {nmae}')
|
|
172
|
+
print(f'- Fit time (seconds): \n {float(fit_time)}')
|
|
173
|
+
|
|
174
|
+
# Save results to TOML
|
|
175
|
+
toml_path = output_folder / f'{model_label}.toml'
|
|
176
|
+
output_dict = {
|
|
177
|
+
'results': {
|
|
178
|
+
'r2': float(r2),
|
|
179
|
+
'rmse': float(rmse),
|
|
180
|
+
'mae': float(mae),
|
|
181
|
+
'median_ae': float(medae),
|
|
182
|
+
'nrmse': float(nrmse),
|
|
183
|
+
'nmae': float(nmae),
|
|
184
|
+
'fit_time': float(end_time),
|
|
185
|
+
'prediction_time': float(pred_time),
|
|
186
|
+
},
|
|
187
|
+
'properties': fit_cfg,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
# Scatter plot
|
|
191
|
+
fig, ax = plt.subplots()
|
|
192
|
+
|
|
193
|
+
idcs_limit = 5000
|
|
194
|
+
ycoords, xcoords = data_test[:, 0], data_test[:, 1]
|
|
195
|
+
error = y_test - y_pred # signed error
|
|
196
|
+
abs_error = np.abs(error)
|
|
197
|
+
rel_error = error / y_test
|
|
198
|
+
limit = np.percentile(rel_error, 95)
|
|
199
|
+
|
|
200
|
+
# Set the color limits
|
|
201
|
+
|
|
202
|
+
sc = ax.scatter(xcoords[:idcs_limit], ycoords[:idcs_limit], c=rel_error[:idcs_limit], s=8, cmap='viridis')
|
|
203
|
+
sc.set_clim(-limit, limit)
|
|
204
|
+
|
|
205
|
+
cbar = fig.colorbar(sc, ax=ax, label='|Prediction error|')
|
|
206
|
+
ax.set_yscale('log')
|
|
207
|
+
plt.tight_layout()
|
|
208
|
+
plt.show()
|
|
209
|
+
|
|
210
|
+
with open(toml_path, 'w') as f:
|
|
211
|
+
toml.dump(output_dict, f)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
return
|
|
@@ -5,9 +5,11 @@ from aspect.plots import plot_comps_detect
|
|
|
5
5
|
# from matplotlib import pyplot as plt
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
|
|
8
|
+
|
|
8
9
|
CHOICE_DM = np.array(cfg['decision_matrices']['choice'])
|
|
9
10
|
TIME_DM = np.array(cfg['decision_matrices']['time'])
|
|
10
11
|
|
|
12
|
+
|
|
11
13
|
def flux_to_image(flux_array, approximation, model_2D):
|
|
12
14
|
|
|
13
15
|
if model_2D is not None:
|
|
@@ -35,7 +37,10 @@ def flux_to_image(flux_array, approximation, model_2D):
|
|
|
35
37
|
def unpack_spec_flux(spectrum, rest_wl_lim):
|
|
36
38
|
|
|
37
39
|
# Extract the mask if masked array
|
|
38
|
-
pixel_mask = ~spectrum.flux.mask
|
|
40
|
+
pixel_mask = (~spectrum.flux.mask) & (spectrum.flux.data != 0)
|
|
41
|
+
|
|
42
|
+
if spectrum.err_flux is not None:
|
|
43
|
+
pixel_mask = pixel_mask & (spectrum.err_flux.data != 0)
|
|
39
44
|
|
|
40
45
|
# Limit to region if requested # TODO warning negative entries
|
|
41
46
|
if rest_wl_lim is not None:
|
|
@@ -59,6 +64,7 @@ def enbox_spectrum(input_flux, box_size, range_box, n_scale_features):
|
|
|
59
64
|
n_rows = input_flux.size - box_size
|
|
60
65
|
|
|
61
66
|
# Container for the data
|
|
67
|
+
# box_containter = np.zeros((n_rows, n_columns))
|
|
62
68
|
box_containter = np.empty((n_rows, n_columns))
|
|
63
69
|
|
|
64
70
|
# Assign values
|
|
@@ -111,8 +117,6 @@ def detection_revision(seg_pred, box_size, new_type, new_confidence):
|
|
|
111
117
|
return idcs_pred, new_pred, new_conf
|
|
112
118
|
|
|
113
119
|
|
|
114
|
-
|
|
115
|
-
|
|
116
120
|
class DetectionModel:
|
|
117
121
|
|
|
118
122
|
def __init__(self, model_address=None, n_jobs=None, verbose=0):
|
|
@@ -139,7 +143,7 @@ class DetectionModel:
|
|
|
139
143
|
|
|
140
144
|
class ModelManager:
|
|
141
145
|
|
|
142
|
-
def __init__(self, model_address=None,):
|
|
146
|
+
def __init__(self, model_address=None, n_jobs=4):
|
|
143
147
|
|
|
144
148
|
# Global parameters
|
|
145
149
|
self.n_mc = 100
|
|
@@ -148,10 +152,10 @@ class ModelManager:
|
|
|
148
152
|
self.n_scale_features = 1
|
|
149
153
|
|
|
150
154
|
# Default values
|
|
151
|
-
model_address = DEFAULT_MODEL_ADDRESS if model_address is None else model_address
|
|
155
|
+
self.model_address = DEFAULT_MODEL_ADDRESS if model_address is None else Path(model_address)
|
|
152
156
|
|
|
153
157
|
# Load the model
|
|
154
|
-
self.medium = DetectionModel(model_address)
|
|
158
|
+
self.medium = DetectionModel(self.model_address, n_jobs)
|
|
155
159
|
self.large = None
|
|
156
160
|
|
|
157
161
|
# Largest reference model parameters
|
|
@@ -212,8 +216,8 @@ class ModelManager:
|
|
|
212
216
|
out_confidence)
|
|
213
217
|
|
|
214
218
|
# Only pass if more than half
|
|
215
|
-
# half_check = idcs_pred[
|
|
216
|
-
half_check = idcs_pred[
|
|
219
|
+
# half_check = idcs_pred[5:].sum() > 6
|
|
220
|
+
half_check = np.all(idcs_pred[3:9])
|
|
217
221
|
if half_check:
|
|
218
222
|
idcs_pred = np.flatnonzero(idcs_pred)
|
|
219
223
|
self.seg_pred[idcs_pred] = new_pred[idcs_pred]
|
|
@@ -223,13 +227,20 @@ class ModelManager:
|
|
|
223
227
|
self.seg_conf[:] = conf_arr[idx:idx + self.medium.b_pixels]
|
|
224
228
|
|
|
225
229
|
if plot_steps:
|
|
226
|
-
plot_comps_detect(x_arr
|
|
227
|
-
y_arr[idx, -self.medium.b_pixels:, 0],
|
|
230
|
+
plot_comps_detect(x_arr, y_arr, self.medium.b_pixels,
|
|
228
231
|
idx, counts, self.medium,
|
|
229
232
|
new_pred[0],
|
|
230
233
|
pred_arr[idx:idx + self.medium.b_pixels],
|
|
231
234
|
self.seg_pred[:])
|
|
232
235
|
|
|
236
|
+
# plot_comps_detect(x_arr[idx:idx + self.medium.b_pixels],
|
|
237
|
+
# y_arr[idx, -self.medium.b_pixels:, 0],
|
|
238
|
+
# idx, counts, self.medium,
|
|
239
|
+
# new_pred[0],
|
|
240
|
+
# pred_arr[idx:idx + self.medium.b_pixels],
|
|
241
|
+
# self.seg_pred[:])
|
|
242
|
+
|
|
243
|
+
|
|
233
244
|
# Assign new categories and confidence
|
|
234
245
|
pred_arr[idx:idx + self.medium.b_pixels] = self.seg_pred[:]
|
|
235
246
|
conf_arr[idx:idx + self.medium.b_pixels] = self.seg_conf[:]
|
|
@@ -241,8 +252,6 @@ class ModelManager:
|
|
|
241
252
|
model_mgr = ModelManager()
|
|
242
253
|
|
|
243
254
|
|
|
244
|
-
|
|
245
|
-
|
|
246
255
|
class ComponentsDetector:
|
|
247
256
|
|
|
248
257
|
def __init__(self, spectrum, model_address=None):
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
MANIFEST.in
|
|
2
1
|
README.rst
|
|
3
2
|
pyproject.toml
|
|
4
3
|
src/aspect/__init__.py
|
|
@@ -9,6 +8,10 @@ src/aspect/plots.py
|
|
|
9
8
|
src/aspect/tools.py
|
|
10
9
|
src/aspect/trainer.py
|
|
11
10
|
src/aspect/workflow.py
|
|
11
|
+
src/aspect/models/aspect_min-max-log_12_pixels_v10_model.joblib
|
|
12
|
+
src/aspect/models/aspect_min-max-log_12_pixels_v10_model.toml
|
|
13
|
+
src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.joblib
|
|
14
|
+
src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.toml
|
|
12
15
|
src/aspect_stable.egg-info/PKG-INFO
|
|
13
16
|
src/aspect_stable.egg-info/SOURCES.txt
|
|
14
17
|
src/aspect_stable.egg-info/dependency_links.txt
|
aspect_stable-0.5.0/MANIFEST.in
DELETED
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
import importlib
|
|
2
|
-
import numpy as np
|
|
3
|
-
import joblib
|
|
4
|
-
import toml
|
|
5
|
-
from sklearn.model_selection import cross_val_score, cross_val_predict
|
|
6
|
-
from sklearn.metrics import confusion_matrix
|
|
7
|
-
from sklearn.ensemble import RandomForestClassifier
|
|
8
|
-
from sklearn.model_selection import StratifiedShuffleSplit
|
|
9
|
-
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
|
|
10
|
-
from time import time
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from .io import cfg as aspect_cfg
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def get_training_test_sets(x_arr, y_arr, test_fraction, n_pixel_features, n_scale_features, random_state=None):
|
|
16
|
-
|
|
17
|
-
# Split into training and testing:
|
|
18
|
-
print(f'\nSplitting sample with categories:')
|
|
19
|
-
print(np.unique(y_arr))
|
|
20
|
-
sss = StratifiedShuffleSplit(n_splits=1, train_size=int(y_arr.size * (1 - test_fraction)),
|
|
21
|
-
test_size=int(y_arr.size * test_fraction), random_state=random_state)
|
|
22
|
-
|
|
23
|
-
for train_index, test_index in sss.split(x_arr, y_arr):
|
|
24
|
-
X_train, X_test = x_arr[train_index, -n_pixel_features-n_scale_features:], x_arr[test_index, -n_pixel_features-n_scale_features:]
|
|
25
|
-
y_train, y_test = y_arr[train_index], y_arr[test_index]
|
|
26
|
-
|
|
27
|
-
# Convert strings to integers
|
|
28
|
-
y_train = np.vectorize(aspect_cfg['shape_number'].get)(y_train)
|
|
29
|
-
y_test = np.vectorize(aspect_cfg['shape_number'].get)(y_test)
|
|
30
|
-
|
|
31
|
-
return X_train, y_train, X_test, y_test
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def components_trainer(model_label, x_arr, y_arr, fit_cfg, list_labels, output_folder=None, test_fraction=0.1,
|
|
35
|
-
random_state=None):
|
|
36
|
-
|
|
37
|
-
# Preparing the estimator:
|
|
38
|
-
print(f'\nLoading estimator: {fit_cfg["estimator"]["class"]}')
|
|
39
|
-
estimator = getattr(importlib.import_module(fit_cfg['estimator']["module"]), fit_cfg['estimator']["class"])
|
|
40
|
-
estimator_params = fit_cfg.get('estimator_params', {})
|
|
41
|
-
|
|
42
|
-
# Split into training and testing:
|
|
43
|
-
print(f'\nSplitting sample with categories:')
|
|
44
|
-
X_train, y_train, X_test, y_test = get_training_test_sets(x_arr, y_arr, test_fraction,
|
|
45
|
-
n_pixel_features=fit_cfg['box_size'], n_scale_features=1,
|
|
46
|
-
random_state=random_state)
|
|
47
|
-
|
|
48
|
-
# Run the training
|
|
49
|
-
print(f'\nTraining: {y_train.size/len(fit_cfg["categories"]):.0f} * {len(fit_cfg["categories"])} = {y_train.size} points ({model_label})')
|
|
50
|
-
print(f'- Settings: {fit_cfg["estimator_params"]}\n')
|
|
51
|
-
start_time = time()
|
|
52
|
-
ml_function = estimator(**estimator_params)
|
|
53
|
-
ml_function.fit(X_train, y_train)
|
|
54
|
-
end_time = np.round((time()-start_time)/60, 2)
|
|
55
|
-
print(f'- completed ({end_time} minutes)')
|
|
56
|
-
|
|
57
|
-
# Save the trained model and configuration
|
|
58
|
-
output_folder = Path(output_folder)/'results'
|
|
59
|
-
output_folder.mkdir(parents=True, exist_ok=True)
|
|
60
|
-
|
|
61
|
-
model_address = output_folder/f'{model_label}.joblib'
|
|
62
|
-
joblib.dump(ml_function, model_address)
|
|
63
|
-
|
|
64
|
-
# Run initial diagnostics
|
|
65
|
-
print(f'\nReloading model from: {model_address}')
|
|
66
|
-
start_time = time()
|
|
67
|
-
ml_function = joblib.load(model_address)
|
|
68
|
-
fit_time = np.round((time()-start_time), 3)
|
|
69
|
-
print(f'- completed ({fit_time} seconds)')
|
|
70
|
-
|
|
71
|
-
print(f'\nRuning prediction on test set ({y_test.size} points)')
|
|
72
|
-
start_time = time()
|
|
73
|
-
y_pred = ml_function.predict(X_test)
|
|
74
|
-
print(f'- completed ({(time()-start_time):0.1f} seconds)')
|
|
75
|
-
|
|
76
|
-
# Testing confussion matrix
|
|
77
|
-
print(f'\nConfusion matrix in test set ({y_test.size} points)')
|
|
78
|
-
start_time = time()
|
|
79
|
-
conf_matrix_test = confusion_matrix(y_test, y_pred, normalize="all")
|
|
80
|
-
print(f'- completed ({(time()-start_time):0.1f} seconds)')
|
|
81
|
-
|
|
82
|
-
# Precision, recall and f1:
|
|
83
|
-
print(f'\nF1, Precision and recall diagnostics ({y_test.size} points)')
|
|
84
|
-
start_time = time()
|
|
85
|
-
pres = precision_score(y_test, y_pred, average='macro')
|
|
86
|
-
recall = recall_score(y_test, y_pred, average='macro')
|
|
87
|
-
f1 = f1_score(y_test, y_pred, average='macro')
|
|
88
|
-
print(f'- completed ({(time()-start_time):0.1f} seconds)')
|
|
89
|
-
|
|
90
|
-
print(f'\nModel outputs')
|
|
91
|
-
print(f'- F1: \n {f1}')
|
|
92
|
-
print(f'- Precision: \n {pres}')
|
|
93
|
-
print(f'- Recall: \n {recall}')
|
|
94
|
-
print(f'- Testing confusion matrix: \n {conf_matrix_test}')
|
|
95
|
-
print(f'- Fitting time (seconds): \n {float(fit_time)}')
|
|
96
|
-
|
|
97
|
-
# Save results into a TOML file
|
|
98
|
-
toml_path = output_folder/f'{model_label}.toml'
|
|
99
|
-
output_dict = {'resuts': {'f1':f1, 'precision':pres, 'Recall':recall, 'confusion_matrix':conf_matrix_test,
|
|
100
|
-
'fit_time': fit_time}, 'properties': fit_cfg,}
|
|
101
|
-
with open(toml_path, 'w') as f:
|
|
102
|
-
toml.dump(output_dict, f)
|
|
103
|
-
|
|
104
|
-
return
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|