aspect-stable 0.5.0__tar.gz → 0.7.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/PKG-INFO +1 -1
  2. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/pyproject.toml +17 -7
  3. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/aspect.toml +4 -4
  4. aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v10_model.joblib +0 -0
  5. aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v10_model.toml +27 -0
  6. aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.joblib +0 -0
  7. aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.toml +27 -0
  8. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/plots.py +32 -4
  9. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/tools.py +29 -11
  10. aspect_stable-0.7.dev1/src/aspect/trainer.py +214 -0
  11. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/workflow.py +21 -12
  12. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/PKG-INFO +1 -1
  13. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/SOURCES.txt +4 -1
  14. aspect_stable-0.5.0/MANIFEST.in +0 -6
  15. aspect_stable-0.5.0/src/aspect/trainer.py +0 -104
  16. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/README.rst +0 -0
  17. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/setup.cfg +0 -0
  18. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/__init__.py +0 -0
  19. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/changelog.txt +0 -0
  20. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/io.py +0 -0
  21. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/dependency_links.txt +0 -0
  22. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/requires.txt +0 -0
  23. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/top_level.txt +0 -0
  24. {aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/tests/test_tools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aspect-stable
3
- Version: 0.5.0
3
+ Version: 0.7.dev1
4
4
  Summary: Automatic SPEctra Components Tagging
5
5
  Author-email: Vital Fernández <vgf@stsci.edu>
6
6
  License-Expression: GPL-3.0-or-later
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "aspect-stable"
3
- version = "0.5.0"
3
+ version = "0.7.dev1"
4
4
  readme = "README.rst"
5
5
  requires-python = ">=3.11"
6
6
  license = "GPL-3.0-or-later"
@@ -19,6 +19,22 @@ classifiers = ["Programming Language :: Python :: 3",
19
19
  requires = ["setuptools>=61.0.0", "wheel"]
20
20
  build-backend = "setuptools.build_meta"
21
21
 
22
+ [tool.setuptools.packages.find]
23
+ where = ["src"]
24
+
25
+ [tool.setuptools.package-data]
26
+ "aspect" = ["aspect.toml",
27
+ "changelog.txt",
28
+ "models/*.toml",
29
+ "models/*.joblib"]
30
+
31
+ [tool.pytest.ini_options]
32
+ pythonpath = ["src"]
33
+ mpl-baseline-path = 'tests/baseline'
34
+ mpl-results-path = 'tests/outputs'
35
+ mpl-results-always = false
36
+ addopts = "-p no:asdf_schema_tester"
37
+
22
38
  [project.optional-dependencies]
23
39
  docs = ["sphinx-rtd-theme~=3.0",
24
40
  "ipympl~=0.9",
@@ -28,9 +44,3 @@ tests = ["pytest~=8.4",
28
44
  "pytest-cov~=7.0",
29
45
  "pytest-mpl~=0.17"]
30
46
 
31
- [tool.pytest.ini_options]
32
- pythonpath = ["src"]
33
- mpl-baseline-path = 'tests/baseline'
34
- mpl-results-path = 'tests/outputs'
35
- mpl-results-always = false
36
- addopts = "-p no:asdf_schema_tester"
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = 'aspect-stable'
3
- version = '0.5.0'
3
+ version = '0.7.dev1'
4
4
  category_order = ['undefined', 'white-noise', 'continuum', 'emission', 'cosmic-ray', 'broad', 'doublet-em', 'peak',
5
5
  'absorption', 'dead-pixel', 'doublet-abs', 'trough']
6
6
 
@@ -24,7 +24,7 @@ white-noise = '#C41E3A' # Red
24
24
  continuum = '#F48CBA' # Pink
25
25
  emission = '#00FF98' # Spring Green
26
26
  cosmic-ray= '#FFF468' # Yellow
27
- broad = '#0070DD' # Blue
27
+ broad = '#0070DD' # Blue
28
28
  doublet-em = '#3FC7EB' # Light blue
29
29
  peak = '#C69B6D' # Tan
30
30
  absorption = '#FF7C0A' # Orange
@@ -68,10 +68,10 @@ time_labels = ['Current detection', 'Past detection']
68
68
  time = [[2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #undefined
69
69
  [0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #white-noise
70
70
  [0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1], #continuum
71
- [0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0], #emission
71
+ [0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0], #emission
72
72
  [0, 0, 0, 1, 2, 1, 1, 1, 1, 0, 1, 1], #cosmic-ray
73
73
  [0, 0, 0, 1, 0, 2, 1, 0, 1, 0, 0, 0], #broad
74
- [0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0], #doublet_em
74
+ [0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0], #doublet_em
75
75
  [0, 0, 0, 1, 0, 1, 1, 2, 1, 0, 0, 1], #peak
76
76
  [0, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0], #absorption
77
77
  [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 0, 1], #dead-pixel
@@ -0,0 +1,27 @@
1
+ [resuts]
2
+ f1 = 0.9640995326458678
3
+ precision = 0.9657992561722317
4
+ Recall = 0.9644108394108395
5
+ confusion_matrix = [ [ "np.float64(0.12337337337337337)", "np.float64(4.369448813893258e-05)", "np.float64(0.00013306957751402197)", "np.float64(0.00013306957751402197)", "np.float64(0.00036345869679203015)", "np.float64(0.0003614725836948059)", "np.float64(0.00019662519662519662)", "np.float64(0.00039523650634761746)",], [ "np.float64(3.9722261944484165e-06)", "np.float64(0.12423931868376313)", "np.float64(0.00025422247644469866)", "np.float64(0.00017676406565295455)", "np.float64(0.0)", "np.float64(0.00025422247644469866)", "np.float64(7.15000715000715e-05)", "np.float64(0.0)",], [ "np.float64(0.00029394473838918284)", "np.float64(0.004957338290671624)", "np.float64(0.10877742822187267)", "np.float64(0.010516468849802183)", "np.float64(0.00045481989926434373)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(5.1638940527829415e-05)", "np.float64(0.0010248343581676915)", "np.float64(0.0006057644946533835)", "np.float64(0.1233177622066511)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(0.000562070006514451)", "np.float64(0.0)", "np.float64(6.156950601395046e-05)", "np.float64(0.0)", "np.float64(0.12437437437437437)", "np.float64(1.9861130972242082e-06)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(0.0003594864705975817)", "np.float64(0.0009414176080842747)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.11313098813098812)", "np.float64(0.010228482450704674)", "np.float64(0.0003396253396253396)",], [ "np.float64(3.9722261944484165e-06)", "np.float64(4.965282743060521e-05)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0018828352161685494)", "np.float64(0.1230635397302064)", "np.float64(0.0)",], [ "np.float64(0.0006276117387228499)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.000238333571666905)", "np.float64(0.0)", "np.float64(0.12413405468961025)",],]
6
+ fit_time = "np.float64(0.009)"
7
+
8
+ [properties]
9
+ box_size = 12
10
+ sample_size = 600000
11
+ test_sample_size_fraction = 0.1
12
+ categories = [ "white-noise", "continuum", "cosmic-ray", "emission", "doublet-em", "dead-pixel", "absorption", "doublet-abs",]
13
+ scale = "min-max-log"
14
+
15
+ [properties.estimator]
16
+ module = "sklearn.ensemble"
17
+ class = "RandomForestClassifier"
18
+
19
+ [properties.estimator_params]
20
+ random_state = 42
21
+ n_estimators = 60
22
+ max_depth = 8
23
+ max_features = "sqrt"
24
+ verbose = 0
25
+ n_jobs = 10
26
+ min_samples_split = 2000
27
+ min_samples_leaf = 2000
@@ -0,0 +1,27 @@
1
+ [resuts]
2
+ f1 = 0.9606625147234599
3
+ precision = 0.9621003125623686
4
+ Recall = 0.9607928952544678
5
+ confusion_matrix = [ [ "np.float64(0.1212956695176403)", "np.float64(0.0027323889380246754)", "np.float64(7.246842270830965e-05)", "np.float64(1.9764115284084453e-05)", "np.float64(0.0004891618532810902)", "np.float64(9.717356681341523e-05)", "np.float64(2.3058134498098526e-05)", "np.float64(0.00027010957554915417)",], [ "np.float64(0.003073319926675132)", "np.float64(0.11908538262503685)", "np.float64(0.0006620978620168292)", "np.float64(0.000436457545856865)", "np.float64(0.0002898736908332386)", "np.float64(0.0007971526497914062)", "np.float64(0.00030799079651031605)", "np.float64(0.0003475190270784849)",], [ "np.float64(0.0)", "np.float64(0.0037535348943690387)", "np.float64(0.11137902467385093)", "np.float64(0.009842529411474057)", "np.float64(2.4705144105105566e-05)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(0.0)", "np.float64(0.0004677507283899987)", "np.float64(0.0005616302759893999)", "np.float64(0.12397041311941973)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(2.964617292612668e-05)", "np.float64(0.0007823295633283429)", "np.float64(1.8117105677077413e-05)", "np.float64(0.0)", "np.float64(0.12415817221461853)", "np.float64(1.1529067249049263e-05)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(0.0)", "np.float64(0.0018446507598478823)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.11339167041361352)", "np.float64(0.009694298546843423)", "np.float64(6.917440349429558e-05)",], [ "np.float64(0.0)", "np.float64(2.6352153712112603e-05)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0017590062602835163)", "np.float64(0.12321608271941051)", "np.float64(0.0)",], [ "np.float64(9.882057642042226e-06)", "np.float64(0.0006390397275187306)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(5.435131703123224e-05)", "np.float64(0.0)", "np.float64(0.12429652102160713)",],]
6
+ fit_time = "np.float64(3.37)"
7
+
8
+ [properties]
9
+ box_size = 12
10
+ sample_size = 700000
11
+ test_sample_size_fraction = 0.1
12
+ categories = [ "white-noise", "continuum", "cosmic-ray", "emission", "doublet-em", "dead-pixel", "absorption", "doublet-abs",]
13
+ scale = "min-max-log"
14
+
15
+ [properties.estimator]
16
+ module = "sklearn.ensemble"
17
+ class = "RandomForestClassifier"
18
+
19
+ [properties.estimator_params]
20
+ random_state = 42
21
+ n_estimators = 60
22
+ max_depth = 8
23
+ max_features = "sqrt"
24
+ verbose = 0
25
+ n_jobs = 10
26
+ min_samples_split = 2000
27
+ min_samples_leaf = 2000
@@ -45,7 +45,8 @@ def decision_matrix_plot(matrix_arr, output_address=None, categories=None, exclu
45
45
  axes_labels = None if matrix_name is None else cfg['decision_matrices'][f'{matrix_name}_labels']
46
46
 
47
47
  # Start the figure
48
- with rc_context(cfg_fig):
48
+ theme.set_style('dark')
49
+ with rc_context(theme.fig_defaults(cfg_fig)):
49
50
 
50
51
  # Define colors for values
51
52
  cmap = colors.ListedColormap(['white', decision_colors[0], decision_colors[1]])
@@ -110,7 +111,7 @@ def decision_matrix_plot(matrix_arr, output_address=None, categories=None, exclu
110
111
  return
111
112
 
112
113
  def scatter_plot(fig, ax, x_arr, y_arr, labels_arr, feature_list, color_dict, alpha=0.5, idx_target=None,
113
- detection_range=None, ratio_color=None):
114
+ detection_range=None, ratio_color=None, sn_limits=None):
114
115
 
115
116
  # Input user diagnostic coloring
116
117
  if ratio_color is not None:
@@ -138,6 +139,9 @@ def scatter_plot(fig, ax, x_arr, y_arr, labels_arr, feature_list, color_dict, al
138
139
  if detection_range is not None:
139
140
  ax.plot(detection_range, detection_function(detection_range))
140
141
 
142
+ if sn_limits is not None:
143
+ ax.set_ylim(sn_limits)
144
+
141
145
  return
142
146
 
143
147
  def parse_fig_cfg(fig_cfg=None, ax_diag=None, ax_line=None, dtype=None):
@@ -242,9 +246,29 @@ def ax_wording(ax, ax_cfg=None, legend_cfg=None, yscale=None):
242
246
  return
243
247
 
244
248
 
245
- def plot_comps_detect(x_sect, y_norm, idx, counts, model, out_type, seg_pred, old_pred):
249
+ def plot_comps_detect(x_arr, y_arr, b_pixels, idx, counts, model, out_type, seg_pred, old_pred):
250
+
251
+ x_sect = x_arr[idx:idx + b_pixels]
252
+ y_norm = y_arr[idx, -b_pixels:, 0]
253
+
254
+ min_max_arr = np.power(10, y_arr[idx, 0, :] * 4)
255
+ std_arr = np.std(y_arr[idx, :, :] * min_max_arr, axis=0)
256
+ msg_scale = f'min_max = {min_max_arr.mean():.1f}±{min_max_arr.std():.1f}, std = {std_arr.mean():.1f}±{std_arr.std():.1f}'
246
257
 
247
- print(f'Idx "{idx}"; counts: {counts}; Output: {model.number_feature_dict[out_type]} ({out_type})')
258
+ # x_arr[idx:idx + self.medium.b_pixels],
259
+ # y_arr[idx, -self.medium.b_pixels:, 0],
260
+ # idx, counts, self.medium,
261
+ # new_pred[0],
262
+ # pred_arr[idx:idx + self.medium.b_pixels],
263
+ # self.seg_pred[:]
264
+
265
+ # print(f'Idx "{idx}"; counts: {counts}; Output: {model.number_feature_dict[out_type]} ({out_type})')
266
+ msg = f'Idx "{idx}"; counts:'
267
+ for i, value in enumerate(counts):
268
+ if value > 0:
269
+ msg += f'{' ,' if msg[-1] != ':' else ' '} {model.number_feature_dict[i]} {value}'
270
+ msg += f' -> Output: {model.number_feature_dict[out_type]} ({out_type})'
271
+ print(msg)
248
272
 
249
273
  colors_old = [cfg['colors'][model.number_feature_dict[val]] for val in old_pred]
250
274
  colors_new = [cfg['colors'][model.number_feature_dict[val]] for val in seg_pred]
@@ -255,6 +279,7 @@ def plot_comps_detect(x_sect, y_norm, idx, counts, model, out_type, seg_pred, ol
255
279
  ax.scatter(x_sect, np.zeros(x_sect.size), color=colors_old, label='Old prediction')
256
280
  ax.scatter(x_sect, np.ones(x_sect.size), color=colors_new, label='New prediction')
257
281
  ax.set_xlabel(r'Wavelength $(\AA)$')
282
+ ax.set_title(msg_scale)
258
283
 
259
284
  ax_secondary = ax.twinx() # Creates a twin y-axis on the right
260
285
  ax_secondary.set_ylim(ax.get_ylim()) # Match the primary y-axis limits
@@ -266,6 +291,7 @@ def plot_comps_detect(x_sect, y_norm, idx, counts, model, out_type, seg_pred, ol
266
291
 
267
292
  return
268
293
 
294
+
269
295
  def plot_steps_backUP(spec, y_norm, idx, counts, model_mgr, out_type, seg_pred, old_pred):
270
296
 
271
297
  print(idx)
@@ -294,6 +320,7 @@ def plot_steps_backUP(spec, y_norm, idx, counts, model_mgr, out_type, seg_pred,
294
320
 
295
321
  return
296
322
 
323
+
297
324
  def plot_comps_detect_new(spec, theme, idx, y_norm, counts, model_mgr, out_type, old_pred, seg_pred, **kwargs):
298
325
 
299
326
  # Clear previous figure
@@ -351,6 +378,7 @@ def plot_comps_detect_new(spec, theme, idx, y_norm, counts, model_mgr, out_type,
351
378
 
352
379
  return
353
380
 
381
+
354
382
  class CheckSample:
355
383
 
356
384
  def __init__(self, in_data_arr, in_pred_arr, idx_features, fig_cfg=None, ax_diag=None, ax_line=None, base=10000,
@@ -2,6 +2,7 @@ import logging
2
2
  import numpy as np
3
3
  from .io import Aspect_Error
4
4
  from lime.fitting.lines import gaussian_model
5
+ from matplotlib import pyplot as plt
5
6
 
6
7
  # Log variable
7
8
  _logger = logging.getLogger('aspect')
@@ -34,7 +35,7 @@ def scale_min_max_orig(data, axis=None):
34
35
 
35
36
  def scale_min_max(data, box_size, axis=None, scale_parameter='min-max'):
36
37
 
37
- # Norm the scale features
38
+ # Norm the scale features # TODO this gives error if the error is 0 and the data is 0
38
39
  data_min_array = data[:, -box_size:].min(axis=axis, keepdims=True)
39
40
  data_max_array = data[:, -box_size:].max(axis=axis, keepdims=True)
40
41
  data[:, -box_size:] = (data[:, -box_size:] - data_min_array) / (data_max_array - data_min_array)
@@ -46,15 +47,6 @@ def scale_min_max(data, box_size, axis=None, scale_parameter='min-max'):
46
47
  if scale_parameter == 'min-max-log':
47
48
  data[:, -box_size - 1] = (np.log10(data_max_array - data_min_array)/4)[:,0]
48
49
 
49
- # # Norm the scale features
50
- # data_min_array = data[:, -box_size:].min(axis=axis, keepdims=True)
51
- # data_max_array = data[:, -box_size:].max(axis=axis, keepdims=True)
52
- # data[:, -box_size:] = (data[:, -box_size:] - data_min_array) / (data_max_array - data_min_array)
53
- #
54
- # # Save the scaling parameters
55
- # data[:, -box_size - 1] = ((data_max_array - data_min_array)/10000)[:,0]
56
- # data[:, -box_size - 1] = ((data_max_array - data_min_array)/10000)[:,0]
57
-
58
50
  return
59
51
 
60
52
  def scale_log(data, log_base, axis=None):
@@ -111,7 +103,33 @@ def broad_component_function(intensity_ratio):
111
103
  return np.sqrt(1 + np.log(intensity_ratio)/np.log(2))
112
104
 
113
105
 
114
- def doublet_model(wave_arr, noise_arr, cont_arr, amp, mu_line, sigma, doublet_em_sep_min, doublet_em_sep_max,
106
+ def doublet_model(wave_arr, noise_arr, cont_arr, amp, mu_line, sigma, doublet_em_sep_max,
107
+ doublet_int_min, doublet_int_max, lower_limit, upper_limit, sign=1):
108
+
109
+ # Generate intensities
110
+ int_diff = np.random.uniform(doublet_int_min, doublet_int_max)
111
+ amp1, amp2 = amp, amp * int_diff
112
+
113
+ # Clip for intensity limits
114
+ amp2 = np.clip(np.abs(amp2), lower_limit, upper_limit)
115
+
116
+ r = max(amp1, amp2)/min(amp1, amp2)
117
+ # sep_min = 1.2 + 0.15*(r - 1)
118
+ # sep_min = 1.3 + 0.15*(r - 1)
119
+ sep_min = 1.5 + 0.15*(r - 1)
120
+ sep = np.random.uniform(sep_min, doublet_em_sep_max)
121
+
122
+ # Generate the profiles
123
+ mu1 = mu_line - sep
124
+ mu2 = mu_line + sep
125
+ sigma1, sigma2 = sigma, sigma * 1
126
+ gauss1 = gaussian_model(wave_arr, sign*amp1, mu1, sigma1)
127
+ gauss2 = gaussian_model(wave_arr, sign*amp2, mu2, sigma2)
128
+ flux_arr = gauss1 + gauss2 + noise_arr + cont_arr
129
+
130
+ return flux_arr
131
+
132
+ def doublet_model_orig(wave_arr, noise_arr, cont_arr, amp, mu_line, sigma, doublet_em_sep_min, doublet_em_sep_max,
115
133
  doublet_int_min, doublet_int_max, lower_limit, upper_limit):
116
134
 
117
135
  # Compute the doublet
@@ -0,0 +1,214 @@
1
+ import importlib
2
+ import numpy as np
3
+ import joblib
4
+ import toml
5
+ from matplotlib import pyplot as plt
6
+ from sklearn.model_selection import cross_val_score, cross_val_predict
7
+ from sklearn.metrics import confusion_matrix
8
+ from sklearn.ensemble import RandomForestClassifier
9
+ from sklearn.model_selection import StratifiedShuffleSplit, train_test_split
10
+ from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
11
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, median_absolute_error
12
+ from time import time
13
+ from pathlib import Path
14
+ from .io import cfg as aspect_cfg
15
+
16
+
17
+ def get_training_test_sets(x_arr, y_arr, test_fraction, n_pixel_features=None, n_scale_features=None, random_state=None, classification=True):
18
+
19
+ # Split into training and testing:
20
+
21
+
22
+ if classification:
23
+
24
+ print(f'\nSplitting sample with categories:')
25
+ print(np.unique(y_arr))
26
+ sss = StratifiedShuffleSplit(n_splits=1, train_size=int(y_arr.size * (1 - test_fraction)),
27
+ test_size=int(y_arr.size * test_fraction), random_state=random_state)
28
+
29
+ # Equal splits
30
+ for train_index, test_index in sss.split(x_arr, y_arr):
31
+ X_train, X_test = x_arr[train_index, :], x_arr[test_index, :]
32
+ y_train, y_test = y_arr[train_index], y_arr[test_index]
33
+
34
+ # Convert strings to integers
35
+ y_train = np.vectorize(aspect_cfg['shape_number'].get)(y_train)
36
+ y_test = np.vectorize(aspect_cfg['shape_number'].get)(y_test)
37
+
38
+ else:
39
+ X_train, X_test, y_train, y_test = train_test_split(x_arr, y_arr, test_size=test_fraction,
40
+ random_state=random_state, shuffle=True)
41
+ y_train, y_test = np.log10(y_train), np.log10(y_test)
42
+
43
+ # Crop the database if requested
44
+ if n_pixel_features and n_scale_features:
45
+ X_train, X_test = X_train[:, -n_pixel_features - n_scale_features:], X_test[:, -n_pixel_features - n_scale_features:]
46
+
47
+ return X_train, y_train, X_test, y_test
48
+
49
+
50
+ def components_trainer(model_label, x_arr, y_arr, fit_cfg, list_labels, output_folder=None, test_fraction=0.1,
51
+ random_state=None, classification=True):
52
+
53
+ # Preparing the estimator:
54
+ print(f'\nLoading estimator: {fit_cfg["estimator"]["class"]}')
55
+ estimator = getattr(importlib.import_module(fit_cfg['estimator']["module"]), fit_cfg['estimator']["class"])
56
+ estimator_params = fit_cfg.get('estimator_params', {})
57
+
58
+ # Split into training and testing:
59
+ data_train, y_train, data_test, y_test = get_training_test_sets(x_arr, y_arr, test_fraction,
60
+ random_state=random_state, classification=classification)
61
+
62
+ # Select just the features
63
+ feature_slice = -fit_cfg['box_size'] - 1
64
+ X_train, X_test = data_train[:, feature_slice:], data_test[:, feature_slice:]
65
+
66
+ # Run the training
67
+ if classification:
68
+ print(f'\nClassification: {y_train.size/len(fit_cfg["categories"]):.0f} * {len(fit_cfg["categories"])} = {y_train.size} points ({model_label})')
69
+ print(f'- Settings: {fit_cfg["estimator_params"]}\n')
70
+ print(f'- Data set size: {X_train.shape}\n')
71
+ else:
72
+ print(f'Regression range: [{y_train.min():.3f}, {y_train.max():.3f}]')
73
+ print(f'- Settings: {fit_cfg["estimator_params"]}')
74
+ print(f'- Data set size: {X_train.shape}\n')
75
+
76
+ start_time = time()
77
+ ml_function = estimator(**estimator_params)
78
+ ml_function.fit(X_train, y_train)
79
+ end_time = np.round((time()-start_time)/60, 2)
80
+ print(f'- completed ({end_time} minutes)')
81
+
82
+ # Save the trained model and configuration
83
+ output_folder = Path(output_folder)/'results'
84
+ output_folder.mkdir(parents=True, exist_ok=True)
85
+
86
+ model_address = output_folder/f'{model_label}.joblib'
87
+ joblib.dump(ml_function, model_address)
88
+
89
+ if classification:
90
+
91
+ # Run initial diagnostics
92
+ print(f'\nReloading model from: {model_address}')
93
+ start_time = time()
94
+ ml_function = joblib.load(model_address)
95
+ fit_time = np.round((time()-start_time), 3)
96
+ print(f'- completed ({fit_time} seconds)')
97
+
98
+ print(f'\nRuning prediction on test set ({y_test.size} points)')
99
+ start_time = time()
100
+ y_pred = ml_function.predict(X_test)
101
+ print(f'- completed ({(time()-start_time):0.1f} seconds)')
102
+
103
+ # Testing confussion matrix
104
+ print(f'\nConfusion matrix in test set ({y_test.size} points)')
105
+ start_time = time()
106
+ conf_matrix_test = confusion_matrix(y_test, y_pred, normalize="all")
107
+ print(f'- completed ({(time()-start_time):0.1f} seconds)')
108
+
109
+ # Precision, recall and f1:
110
+ print(f'\nF1, Precision and recall diagnostics ({y_test.size} points)')
111
+ start_time = time()
112
+ pres = precision_score(y_test, y_pred, average='macro')
113
+ recall = recall_score(y_test, y_pred, average='macro')
114
+ f1 = f1_score(y_test, y_pred, average='macro')
115
+ print(f'- completed ({(time()-start_time):0.1f} seconds)')
116
+
117
+ print(f'\nModel outputs')
118
+ print(f'- F1: \n {f1}')
119
+ print(f'- Precision: \n {pres}')
120
+ print(f'- Recall: \n {recall}')
121
+ print(f'- Testing confusion matrix: \n {conf_matrix_test}')
122
+ print(f'- Fitting time (seconds): \n {float(fit_time)}')
123
+
124
+ # Save results into a TOML file
125
+ toml_path = output_folder/f'{model_label}.toml'
126
+ output_dict = {'resuts': {'f1':f1, 'precision':pres, 'Recall':recall, 'confusion_matrix':conf_matrix_test,
127
+ 'fit_time': end_time}, 'properties': fit_cfg,}
128
+ with open(toml_path, 'w') as f:
129
+ toml.dump(output_dict, f)
130
+
131
+ else:
132
+
133
+ # Reload model
134
+ print(f'\nReloading model from: {model_address}')
135
+ start_time = time()
136
+ ml_function = joblib.load(model_address)
137
+ fit_time = np.round((time() - start_time), 3)
138
+ print(f'- completed ({fit_time} seconds)')
139
+
140
+ # Prediction
141
+ print(f'\nRunning prediction on test set ({y_test.size} points)')
142
+ start_time = time()
143
+ y_pred = ml_function.predict(X_test)
144
+ pred_time = np.round((time() - start_time), 3)
145
+ print(f'- completed ({pred_time} seconds)')
146
+
147
+ # Core regression metrics
148
+ print(f'\nRegression diagnostics ({y_test.size} points)')
149
+ start_time = time()
150
+
151
+ mse = mean_squared_error(y_test, y_pred)
152
+ rmse = np.sqrt(mse)
153
+ mae = mean_absolute_error(y_test, y_pred)
154
+ medae = median_absolute_error(y_test, y_pred)
155
+ r2 = r2_score(y_test, y_pred)
156
+
157
+ # Normalized errors (scale-independent)
158
+ y_range = y_test.max() - y_test.min()
159
+ nrmse = rmse / y_range if y_range > 0 else np.nan
160
+ nmae = mae / y_range if y_range > 0 else np.nan
161
+
162
+ print(f'- completed ({(time() - start_time):0.1f} seconds)')
163
+
164
+ # Outputs
165
+ print(f'\nModel outputs')
166
+ print(f'- R²: \n {r2}')
167
+ print(f'- RMSE: \n {rmse}')
168
+ print(f'- MAE: \n {mae}')
169
+ print(f'- Median AE: \n {medae}')
170
+ print(f'- Normalized RMSE: \n {nrmse}')
171
+ print(f'- Normalized MAE: \n {nmae}')
172
+ print(f'- Fit time (seconds): \n {float(fit_time)}')
173
+
174
+ # Save results to TOML
175
+ toml_path = output_folder / f'{model_label}.toml'
176
+ output_dict = {
177
+ 'results': {
178
+ 'r2': float(r2),
179
+ 'rmse': float(rmse),
180
+ 'mae': float(mae),
181
+ 'median_ae': float(medae),
182
+ 'nrmse': float(nrmse),
183
+ 'nmae': float(nmae),
184
+ 'fit_time': float(end_time),
185
+ 'prediction_time': float(pred_time),
186
+ },
187
+ 'properties': fit_cfg,
188
+ }
189
+
190
+ # Scatter plot
191
+ fig, ax = plt.subplots()
192
+
193
+ idcs_limit = 5000
194
+ ycoords, xcoords = data_test[:, 0], data_test[:, 1]
195
+ error = y_test - y_pred # signed error
196
+ abs_error = np.abs(error)
197
+ rel_error = error / y_test
198
+ limit = np.percentile(rel_error, 95)
199
+
200
+ # Set the color limits
201
+
202
+ sc = ax.scatter(xcoords[:idcs_limit], ycoords[:idcs_limit], c=rel_error[:idcs_limit], s=8, cmap='viridis')
203
+ sc.set_clim(-limit, limit)
204
+
205
+ cbar = fig.colorbar(sc, ax=ax, label='|Prediction error|')
206
+ ax.set_yscale('log')
207
+ plt.tight_layout()
208
+ plt.show()
209
+
210
+ with open(toml_path, 'w') as f:
211
+ toml.dump(output_dict, f)
212
+
213
+
214
+ return
@@ -5,9 +5,11 @@ from aspect.plots import plot_comps_detect
5
5
  # from matplotlib import pyplot as plt
6
6
  from pathlib import Path
7
7
 
8
+
8
9
  CHOICE_DM = np.array(cfg['decision_matrices']['choice'])
9
10
  TIME_DM = np.array(cfg['decision_matrices']['time'])
10
11
 
12
+
11
13
  def flux_to_image(flux_array, approximation, model_2D):
12
14
 
13
15
  if model_2D is not None:
@@ -35,7 +37,10 @@ def flux_to_image(flux_array, approximation, model_2D):
35
37
  def unpack_spec_flux(spectrum, rest_wl_lim):
36
38
 
37
39
  # Extract the mask if masked array
38
- pixel_mask = ~spectrum.flux.mask
40
+ pixel_mask = (~spectrum.flux.mask) & (spectrum.flux.data != 0)
41
+
42
+ if spectrum.err_flux is not None:
43
+ pixel_mask = pixel_mask & (spectrum.err_flux.data != 0)
39
44
 
40
45
  # Limit to region if requested # TODO warning negative entries
41
46
  if rest_wl_lim is not None:
@@ -59,6 +64,7 @@ def enbox_spectrum(input_flux, box_size, range_box, n_scale_features):
59
64
  n_rows = input_flux.size - box_size
60
65
 
61
66
  # Container for the data
67
+ # box_containter = np.zeros((n_rows, n_columns))
62
68
  box_containter = np.empty((n_rows, n_columns))
63
69
 
64
70
  # Assign values
@@ -111,8 +117,6 @@ def detection_revision(seg_pred, box_size, new_type, new_confidence):
111
117
  return idcs_pred, new_pred, new_conf
112
118
 
113
119
 
114
-
115
-
116
120
  class DetectionModel:
117
121
 
118
122
  def __init__(self, model_address=None, n_jobs=None, verbose=0):
@@ -139,7 +143,7 @@ class DetectionModel:
139
143
 
140
144
  class ModelManager:
141
145
 
142
- def __init__(self, model_address=None,):
146
+ def __init__(self, model_address=None, n_jobs=4):
143
147
 
144
148
  # Global parameters
145
149
  self.n_mc = 100
@@ -148,10 +152,10 @@ class ModelManager:
148
152
  self.n_scale_features = 1
149
153
 
150
154
  # Default values
151
- model_address = DEFAULT_MODEL_ADDRESS if model_address is None else model_address
155
+ self.model_address = DEFAULT_MODEL_ADDRESS if model_address is None else Path(model_address)
152
156
 
153
157
  # Load the model
154
- self.medium = DetectionModel(model_address)
158
+ self.medium = DetectionModel(self.model_address, n_jobs)
155
159
  self.large = None
156
160
 
157
161
  # Largest reference model parameters
@@ -212,8 +216,8 @@ class ModelManager:
212
216
  out_confidence)
213
217
 
214
218
  # Only pass if more than half
215
- # half_check = idcs_pred[6:].sum() > 5
216
- half_check = idcs_pred[5:].sum() > 6
219
+ # half_check = idcs_pred[5:].sum() > 6
220
+ half_check = np.all(idcs_pred[3:9])
217
221
  if half_check:
218
222
  idcs_pred = np.flatnonzero(idcs_pred)
219
223
  self.seg_pred[idcs_pred] = new_pred[idcs_pred]
@@ -223,13 +227,20 @@ class ModelManager:
223
227
  self.seg_conf[:] = conf_arr[idx:idx + self.medium.b_pixels]
224
228
 
225
229
  if plot_steps:
226
- plot_comps_detect(x_arr[idx:idx + self.medium.b_pixels],
227
- y_arr[idx, -self.medium.b_pixels:, 0],
230
+ plot_comps_detect(x_arr, y_arr, self.medium.b_pixels,
228
231
  idx, counts, self.medium,
229
232
  new_pred[0],
230
233
  pred_arr[idx:idx + self.medium.b_pixels],
231
234
  self.seg_pred[:])
232
235
 
236
+ # plot_comps_detect(x_arr[idx:idx + self.medium.b_pixels],
237
+ # y_arr[idx, -self.medium.b_pixels:, 0],
238
+ # idx, counts, self.medium,
239
+ # new_pred[0],
240
+ # pred_arr[idx:idx + self.medium.b_pixels],
241
+ # self.seg_pred[:])
242
+
243
+
233
244
  # Assign new categories and confidence
234
245
  pred_arr[idx:idx + self.medium.b_pixels] = self.seg_pred[:]
235
246
  conf_arr[idx:idx + self.medium.b_pixels] = self.seg_conf[:]
@@ -241,8 +252,6 @@ class ModelManager:
241
252
  model_mgr = ModelManager()
242
253
 
243
254
 
244
-
245
-
246
255
  class ComponentsDetector:
247
256
 
248
257
  def __init__(self, spectrum, model_address=None):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aspect-stable
3
- Version: 0.5.0
3
+ Version: 0.7.dev1
4
4
  Summary: Automatic SPEctra Components Tagging
5
5
  Author-email: Vital Fernández <vgf@stsci.edu>
6
6
  License-Expression: GPL-3.0-or-later
@@ -1,4 +1,3 @@
1
- MANIFEST.in
2
1
  README.rst
3
2
  pyproject.toml
4
3
  src/aspect/__init__.py
@@ -9,6 +8,10 @@ src/aspect/plots.py
9
8
  src/aspect/tools.py
10
9
  src/aspect/trainer.py
11
10
  src/aspect/workflow.py
11
+ src/aspect/models/aspect_min-max-log_12_pixels_v10_model.joblib
12
+ src/aspect/models/aspect_min-max-log_12_pixels_v10_model.toml
13
+ src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.joblib
14
+ src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.toml
12
15
  src/aspect_stable.egg-info/PKG-INFO
13
16
  src/aspect_stable.egg-info/SOURCES.txt
14
17
  src/aspect_stable.egg-info/dependency_links.txt
@@ -1,6 +0,0 @@
1
- # MANIFEST.in
2
-
3
- include src/aspect/aspect.toml
4
- include src/aspect/changelog.txt
5
- include src/aspect/models/aspect_min-max_12_pixels_v10_model.toml
6
- include src/aspect/models/aspect_min-max_12_pixels_v10_model.joblib
@@ -1,104 +0,0 @@
1
- import importlib
2
- import numpy as np
3
- import joblib
4
- import toml
5
- from sklearn.model_selection import cross_val_score, cross_val_predict
6
- from sklearn.metrics import confusion_matrix
7
- from sklearn.ensemble import RandomForestClassifier
8
- from sklearn.model_selection import StratifiedShuffleSplit
9
- from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
10
- from time import time
11
- from pathlib import Path
12
- from .io import cfg as aspect_cfg
13
-
14
-
15
- def get_training_test_sets(x_arr, y_arr, test_fraction, n_pixel_features, n_scale_features, random_state=None):
16
-
17
- # Split into training and testing:
18
- print(f'\nSplitting sample with categories:')
19
- print(np.unique(y_arr))
20
- sss = StratifiedShuffleSplit(n_splits=1, train_size=int(y_arr.size * (1 - test_fraction)),
21
- test_size=int(y_arr.size * test_fraction), random_state=random_state)
22
-
23
- for train_index, test_index in sss.split(x_arr, y_arr):
24
- X_train, X_test = x_arr[train_index, -n_pixel_features-n_scale_features:], x_arr[test_index, -n_pixel_features-n_scale_features:]
25
- y_train, y_test = y_arr[train_index], y_arr[test_index]
26
-
27
- # Convert strings to integers
28
- y_train = np.vectorize(aspect_cfg['shape_number'].get)(y_train)
29
- y_test = np.vectorize(aspect_cfg['shape_number'].get)(y_test)
30
-
31
- return X_train, y_train, X_test, y_test
32
-
33
-
34
- def components_trainer(model_label, x_arr, y_arr, fit_cfg, list_labels, output_folder=None, test_fraction=0.1,
35
- random_state=None):
36
-
37
- # Preparing the estimator:
38
- print(f'\nLoading estimator: {fit_cfg["estimator"]["class"]}')
39
- estimator = getattr(importlib.import_module(fit_cfg['estimator']["module"]), fit_cfg['estimator']["class"])
40
- estimator_params = fit_cfg.get('estimator_params', {})
41
-
42
- # Split into training and testing:
43
- print(f'\nSplitting sample with categories:')
44
- X_train, y_train, X_test, y_test = get_training_test_sets(x_arr, y_arr, test_fraction,
45
- n_pixel_features=fit_cfg['box_size'], n_scale_features=1,
46
- random_state=random_state)
47
-
48
- # Run the training
49
- print(f'\nTraining: {y_train.size/len(fit_cfg["categories"]):.0f} * {len(fit_cfg["categories"])} = {y_train.size} points ({model_label})')
50
- print(f'- Settings: {fit_cfg["estimator_params"]}\n')
51
- start_time = time()
52
- ml_function = estimator(**estimator_params)
53
- ml_function.fit(X_train, y_train)
54
- end_time = np.round((time()-start_time)/60, 2)
55
- print(f'- completed ({end_time} minutes)')
56
-
57
- # Save the trained model and configuration
58
- output_folder = Path(output_folder)/'results'
59
- output_folder.mkdir(parents=True, exist_ok=True)
60
-
61
- model_address = output_folder/f'{model_label}.joblib'
62
- joblib.dump(ml_function, model_address)
63
-
64
- # Run initial diagnostics
65
- print(f'\nReloading model from: {model_address}')
66
- start_time = time()
67
- ml_function = joblib.load(model_address)
68
- fit_time = np.round((time()-start_time), 3)
69
- print(f'- completed ({fit_time} seconds)')
70
-
71
- print(f'\nRuning prediction on test set ({y_test.size} points)')
72
- start_time = time()
73
- y_pred = ml_function.predict(X_test)
74
- print(f'- completed ({(time()-start_time):0.1f} seconds)')
75
-
76
- # Testing confussion matrix
77
- print(f'\nConfusion matrix in test set ({y_test.size} points)')
78
- start_time = time()
79
- conf_matrix_test = confusion_matrix(y_test, y_pred, normalize="all")
80
- print(f'- completed ({(time()-start_time):0.1f} seconds)')
81
-
82
- # Precision, recall and f1:
83
- print(f'\nF1, Precision and recall diagnostics ({y_test.size} points)')
84
- start_time = time()
85
- pres = precision_score(y_test, y_pred, average='macro')
86
- recall = recall_score(y_test, y_pred, average='macro')
87
- f1 = f1_score(y_test, y_pred, average='macro')
88
- print(f'- completed ({(time()-start_time):0.1f} seconds)')
89
-
90
- print(f'\nModel outputs')
91
- print(f'- F1: \n {f1}')
92
- print(f'- Precision: \n {pres}')
93
- print(f'- Recall: \n {recall}')
94
- print(f'- Testing confusion matrix: \n {conf_matrix_test}')
95
- print(f'- Fitting time (seconds): \n {float(fit_time)}')
96
-
97
- # Save results into a TOML file
98
- toml_path = output_folder/f'{model_label}.toml'
99
- output_dict = {'resuts': {'f1':f1, 'precision':pres, 'Recall':recall, 'confusion_matrix':conf_matrix_test,
100
- 'fit_time': fit_time}, 'properties': fit_cfg,}
101
- with open(toml_path, 'w') as f:
102
- toml.dump(output_dict, f)
103
-
104
- return