PyPI - aspect-stable - Versions diffs - 0.5.0__tar.gz → 0.7.dev1__tar.gz - Mend

aspect-stable 0.5.0tar.gz → 0.7.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aspect-stable
-Version: 0.5.0
+Version: 0.7.dev1
 Summary: Automatic SPEctra Components Tagging
 Author-email: Vital Fernández <vgf@stsci.edu>
 License-Expression: GPL-3.0-or-later

{aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "aspect-stable"
-version = "0.5.0"
+version = "0.7.dev1"
 readme = "README.rst"
 requires-python = ">=3.11"
 license = "GPL-3.0-or-later"
@@ -19,6 +19,22 @@ classifiers = ["Programming Language :: Python :: 3",
 requires = ["setuptools>=61.0.0", "wheel"]
 build-backend = "setuptools.build_meta"
+[tool.setuptools.packages.find]
+where = ["src"]
+[tool.setuptools.package-data]
+"aspect" = ["aspect.toml",
+            "changelog.txt",
+            "models/*.toml",
+            "models/*.joblib"]
+[tool.pytest.ini_options]
+pythonpath = ["src"]
+mpl-baseline-path = 'tests/baseline'
+mpl-results-path = 'tests/outputs'
+mpl-results-always = false
+addopts = "-p no:asdf_schema_tester"
 [project.optional-dependencies]
 docs = ["sphinx-rtd-theme~=3.0",
         "ipympl~=0.9",
@@ -28,9 +44,3 @@ tests = ["pytest~=8.4",
          "pytest-cov~=7.0",
          "pytest-mpl~=0.17"]
-[tool.pytest.ini_options]
-pythonpath = ["src"]
-mpl-baseline-path = 'tests/baseline'
-mpl-results-path = 'tests/outputs'
-mpl-results-always = false
-addopts = "-p no:asdf_schema_tester"

{aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/aspect.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = 'aspect-stable'
-version = '0.5.0'
+version = '0.7.dev1'
 category_order = ['undefined', 'white-noise', 'continuum', 'emission', 'cosmic-ray', 'broad', 'doublet-em', 'peak',
                   'absorption', 'dead-pixel', 'doublet-abs', 'trough']
@@ -24,7 +24,7 @@ white-noise = '#C41E3A'         # Red
 continuum = '#F48CBA'           # Pink
 emission = '#00FF98'            # Spring Green
 cosmic-ray= '#FFF468'           # Yellow
-broad = '#0070DD'         # Blue
+broad = '#0070DD'               # Blue
 doublet-em = '#3FC7EB'          # Light blue
 peak = '#C69B6D'                # Tan
 absorption = '#FF7C0A'          # Orange
@@ -68,10 +68,10 @@ time_labels = ['Current detection', 'Past detection']
 time = [[2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 1], #undefined
         [0,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1, 1], #white-noise
         [0,  0,  2,  1,  1,  1,  1,  1,  1,  1,  1, 1], #continuum
-        [0,  0,  0,  2,  0,  0,  1,  0,  0,  0,  0, 0], #emission
+        [0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0, 0], #emission
         [0,  0,  0,  1,  2,  1,  1,  1,  1,  0,  1, 1], #cosmic-ray
         [0,  0,  0,  1,  0,  2,  1,  0,  1,  0,  0, 0], #broad
-        [0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0, 0], #doublet_em
+        [0,  0,  0,  1,  0,  0,  2,  0,  0,  0,  0, 0], #doublet_em
         [0,  0,  0,  1,  0,  1,  1,  2,  1,  0,  0, 1], #peak
         [0,  0,  0,  1,  0,  0,  1,  0,  2,  0,  0, 0], #absorption
         [0,  0,  0,  1,  1,  1,  1,  1,  1,  2,  0, 1], #dead-pixel

aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v10_model.joblib ADDED Viewed

Binary file

aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v10_model.toml ADDED Viewed

@@ -0,0 +1,27 @@
+[resuts]
+f1 = 0.9640995326458678
+precision = 0.9657992561722317
+Recall = 0.9644108394108395
+confusion_matrix = [ [ "np.float64(0.12337337337337337)", "np.float64(4.369448813893258e-05)", "np.float64(0.00013306957751402197)", "np.float64(0.00013306957751402197)", "np.float64(0.00036345869679203015)", "np.float64(0.0003614725836948059)", "np.float64(0.00019662519662519662)", "np.float64(0.00039523650634761746)",], [ "np.float64(3.9722261944484165e-06)", "np.float64(0.12423931868376313)", "np.float64(0.00025422247644469866)", "np.float64(0.00017676406565295455)", "np.float64(0.0)", "np.float64(0.00025422247644469866)", "np.float64(7.15000715000715e-05)", "np.float64(0.0)",], [ "np.float64(0.00029394473838918284)", "np.float64(0.004957338290671624)", "np.float64(0.10877742822187267)", "np.float64(0.010516468849802183)", "np.float64(0.00045481989926434373)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(5.1638940527829415e-05)", "np.float64(0.0010248343581676915)", "np.float64(0.0006057644946533835)", "np.float64(0.1233177622066511)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(0.000562070006514451)", "np.float64(0.0)", "np.float64(6.156950601395046e-05)", "np.float64(0.0)", "np.float64(0.12437437437437437)", "np.float64(1.9861130972242082e-06)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(0.0003594864705975817)", "np.float64(0.0009414176080842747)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.11313098813098812)", "np.float64(0.010228482450704674)", "np.float64(0.0003396253396253396)",], [ "np.float64(3.9722261944484165e-06)", "np.float64(4.965282743060521e-05)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0018828352161685494)", "np.float64(0.1230635397302064)", "np.float64(0.0)",], [ "np.float64(0.0006276117387228499)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.000238333571666905)", "np.float64(0.0)", "np.float64(0.12413405468961025)",],]
+fit_time = "np.float64(0.009)"
+[properties]
+box_size = 12
+sample_size = 600000
+test_sample_size_fraction = 0.1
+categories = [ "white-noise", "continuum", "cosmic-ray", "emission", "doublet-em", "dead-pixel", "absorption", "doublet-abs",]
+scale = "min-max-log"
+[properties.estimator]
+module = "sklearn.ensemble"
+class = "RandomForestClassifier"
+[properties.estimator_params]
+random_state = 42
+n_estimators = 60
+max_depth = 8
+max_features = "sqrt"
+verbose = 0
+n_jobs = 10
+min_samples_split = 2000
+min_samples_leaf = 2000

aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.joblib ADDED Viewed

Binary file

aspect_stable-0.7.dev1/src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.toml ADDED Viewed

@@ -0,0 +1,27 @@
+[resuts]
+f1 = 0.9606625147234599
+precision = 0.9621003125623686
+Recall = 0.9607928952544678
+confusion_matrix = [ [ "np.float64(0.1212956695176403)", "np.float64(0.0027323889380246754)", "np.float64(7.246842270830965e-05)", "np.float64(1.9764115284084453e-05)", "np.float64(0.0004891618532810902)", "np.float64(9.717356681341523e-05)", "np.float64(2.3058134498098526e-05)", "np.float64(0.00027010957554915417)",], [ "np.float64(0.003073319926675132)", "np.float64(0.11908538262503685)", "np.float64(0.0006620978620168292)", "np.float64(0.000436457545856865)", "np.float64(0.0002898736908332386)", "np.float64(0.0007971526497914062)", "np.float64(0.00030799079651031605)", "np.float64(0.0003475190270784849)",], [ "np.float64(0.0)", "np.float64(0.0037535348943690387)", "np.float64(0.11137902467385093)", "np.float64(0.009842529411474057)", "np.float64(2.4705144105105566e-05)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(0.0)", "np.float64(0.0004677507283899987)", "np.float64(0.0005616302759893999)", "np.float64(0.12397041311941973)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(2.964617292612668e-05)", "np.float64(0.0007823295633283429)", "np.float64(1.8117105677077413e-05)", "np.float64(0.0)", "np.float64(0.12415817221461853)", "np.float64(1.1529067249049263e-05)", "np.float64(0.0)", "np.float64(0.0)",], [ "np.float64(0.0)", "np.float64(0.0018446507598478823)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.11339167041361352)", "np.float64(0.009694298546843423)", "np.float64(6.917440349429558e-05)",], [ "np.float64(0.0)", "np.float64(2.6352153712112603e-05)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0017590062602835163)", "np.float64(0.12321608271941051)", "np.float64(0.0)",], [ "np.float64(9.882057642042226e-06)", "np.float64(0.0006390397275187306)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(0.0)", "np.float64(5.435131703123224e-05)", "np.float64(0.0)", "np.float64(0.12429652102160713)",],]
+fit_time = "np.float64(3.37)"
+[properties]
+box_size = 12
+sample_size = 700000
+test_sample_size_fraction = 0.1
+categories = [ "white-noise", "continuum", "cosmic-ray", "emission", "doublet-em", "dead-pixel", "absorption", "doublet-abs",]
+scale = "min-max-log"
+[properties.estimator]
+module = "sklearn.ensemble"
+class = "RandomForestClassifier"
+[properties.estimator_params]
+random_state = 42
+n_estimators = 60
+max_depth = 8
+max_features = "sqrt"
+verbose = 0
+n_jobs = 10
+min_samples_split = 2000
+min_samples_leaf = 2000

{aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/plots.py RENAMED Viewed

@@ -45,7 +45,8 @@ def decision_matrix_plot(matrix_arr, output_address=None, categories=None, exclu
     axes_labels = None if matrix_name is None else cfg['decision_matrices'][f'{matrix_name}_labels']
     # Start the figure
-    with rc_context(cfg_fig):
+    theme.set_style('dark')
+    with rc_context(theme.fig_defaults(cfg_fig)):
         # Define colors for values
         cmap = colors.ListedColormap(['white', decision_colors[0], decision_colors[1]])
@@ -110,7 +111,7 @@ def decision_matrix_plot(matrix_arr, output_address=None, categories=None, exclu
     return
 def scatter_plot(fig, ax, x_arr, y_arr, labels_arr, feature_list, color_dict, alpha=0.5, idx_target=None,
-                 detection_range=None, ratio_color=None):
+                 detection_range=None, ratio_color=None, sn_limits=None):
     # Input user diagnostic coloring
     if ratio_color is not None:
@@ -138,6 +139,9 @@ def scatter_plot(fig, ax, x_arr, y_arr, labels_arr, feature_list, color_dict, al
     if detection_range is not None:
         ax.plot(detection_range, detection_function(detection_range))
+    if sn_limits is not None:
+        ax.set_ylim(sn_limits)
     return
 def parse_fig_cfg(fig_cfg=None, ax_diag=None, ax_line=None, dtype=None):
@@ -242,9 +246,29 @@ def ax_wording(ax, ax_cfg=None, legend_cfg=None, yscale=None):
     return
-def plot_comps_detect(x_sect, y_norm, idx, counts, model, out_type, seg_pred, old_pred):
+def plot_comps_detect(x_arr, y_arr, b_pixels, idx, counts, model, out_type, seg_pred, old_pred):
+    x_sect = x_arr[idx:idx + b_pixels]
+    y_norm = y_arr[idx, -b_pixels:, 0]
+    min_max_arr = np.power(10, y_arr[idx, 0, :] * 4)
+    std_arr = np.std(y_arr[idx, :, :] * min_max_arr, axis=0)
+    msg_scale = f'min_max = {min_max_arr.mean():.1f}±{min_max_arr.std():.1f}, std = {std_arr.mean():.1f}±{std_arr.std():.1f}'
-    print(f'Idx "{idx}"; counts: {counts}; Output: {model.number_feature_dict[out_type]} ({out_type})')
+    # x_arr[idx:idx + self.medium.b_pixels],
+    #                   y_arr[idx, -self.medium.b_pixels:, 0],
+    #                   idx, counts, self.medium,
+    #                   new_pred[0],
+    #                   pred_arr[idx:idx + self.medium.b_pixels],
+    #                   self.seg_pred[:]
+    # print(f'Idx "{idx}"; counts: {counts}; Output: {model.number_feature_dict[out_type]} ({out_type})')
+    msg = f'Idx "{idx}"; counts:'
+    for i, value in enumerate(counts):
+        if value > 0:
+            msg += f'{' ,' if msg[-1] != ':' else ' '} {model.number_feature_dict[i]} {value}'
+    msg += f' -> Output: {model.number_feature_dict[out_type]} ({out_type})'
+    print(msg)
     colors_old = [cfg['colors'][model.number_feature_dict[val]] for val in old_pred]
     colors_new = [cfg['colors'][model.number_feature_dict[val]] for val in seg_pred]
@@ -255,6 +279,7 @@ def plot_comps_detect(x_sect, y_norm, idx, counts, model, out_type, seg_pred, ol
     ax.scatter(x_sect, np.zeros(x_sect.size), color=colors_old, label='Old prediction')
     ax.scatter(x_sect, np.ones(x_sect.size), color=colors_new, label='New prediction')
     ax.set_xlabel(r'Wavelength $(\AA)$')
+    ax.set_title(msg_scale)
     ax_secondary = ax.twinx()  # Creates a twin y-axis on the right
     ax_secondary.set_ylim(ax.get_ylim())  # Match the primary y-axis limits
@@ -266,6 +291,7 @@ def plot_comps_detect(x_sect, y_norm, idx, counts, model, out_type, seg_pred, ol
     return
 def plot_steps_backUP(spec, y_norm, idx, counts, model_mgr, out_type, seg_pred, old_pred):
     print(idx)
@@ -294,6 +320,7 @@ def plot_steps_backUP(spec, y_norm, idx, counts, model_mgr, out_type, seg_pred,
     return
 def plot_comps_detect_new(spec, theme, idx, y_norm, counts, model_mgr, out_type, old_pred, seg_pred, **kwargs):
     # Clear previous figure
@@ -351,6 +378,7 @@ def plot_comps_detect_new(spec, theme, idx, y_norm, counts, model_mgr, out_type,
     return
 class CheckSample:
     def __init__(self, in_data_arr, in_pred_arr, idx_features, fig_cfg=None, ax_diag=None, ax_line=None, base=10000,

{aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/tools.py RENAMED Viewed

@@ -2,6 +2,7 @@ import logging
 import numpy as np
 from .io import Aspect_Error
 from lime.fitting.lines import gaussian_model
+from matplotlib import pyplot as plt
 # Log variable
 _logger = logging.getLogger('aspect')
@@ -34,7 +35,7 @@ def scale_min_max_orig(data, axis=None):
 def scale_min_max(data, box_size, axis=None, scale_parameter='min-max'):
-    # Norm the scale features
+    # Norm the scale features # TODO this gives error if the error is 0 and the data is 0
     data_min_array = data[:, -box_size:].min(axis=axis, keepdims=True)
     data_max_array = data[:, -box_size:].max(axis=axis, keepdims=True)
     data[:, -box_size:] = (data[:, -box_size:] - data_min_array) / (data_max_array - data_min_array)
@@ -46,15 +47,6 @@ def scale_min_max(data, box_size, axis=None, scale_parameter='min-max'):
     if scale_parameter == 'min-max-log':
         data[:, -box_size - 1] = (np.log10(data_max_array - data_min_array)/4)[:,0]
-    # # Norm the scale features
-    # data_min_array = data[:, -box_size:].min(axis=axis, keepdims=True)
-    # data_max_array = data[:, -box_size:].max(axis=axis, keepdims=True)
-    # data[:, -box_size:] = (data[:, -box_size:] - data_min_array) / (data_max_array - data_min_array)
-    #
-    # # Save the scaling parameters
-    # data[:, -box_size - 1] = ((data_max_array - data_min_array)/10000)[:,0]
-    # data[:, -box_size - 1] = ((data_max_array - data_min_array)/10000)[:,0]
     return
 def scale_log(data, log_base, axis=None):
@@ -111,7 +103,33 @@ def broad_component_function(intensity_ratio):
     return np.sqrt(1 + np.log(intensity_ratio)/np.log(2))
-def doublet_model(wave_arr, noise_arr, cont_arr, amp, mu_line, sigma, doublet_em_sep_min, doublet_em_sep_max,
+def doublet_model(wave_arr, noise_arr, cont_arr, amp, mu_line, sigma, doublet_em_sep_max,
+                  doublet_int_min, doublet_int_max, lower_limit, upper_limit, sign=1):
+    # Generate intensities
+    int_diff = np.random.uniform(doublet_int_min, doublet_int_max)
+    amp1, amp2 = amp, amp * int_diff
+    # Clip for intensity limits
+    amp2 = np.clip(np.abs(amp2), lower_limit, upper_limit)
+    r = max(amp1, amp2)/min(amp1, amp2)
+    # sep_min = 1.2 + 0.15*(r - 1)
+    # sep_min = 1.3 + 0.15*(r - 1)
+    sep_min = 1.5 + 0.15*(r - 1)
+    sep = np.random.uniform(sep_min, doublet_em_sep_max)
+    # Generate the profiles
+    mu1 = mu_line - sep
+    mu2 = mu_line + sep
+    sigma1, sigma2 = sigma, sigma * 1
+    gauss1 = gaussian_model(wave_arr, sign*amp1, mu1, sigma1)
+    gauss2 = gaussian_model(wave_arr, sign*amp2, mu2, sigma2)
+    flux_arr = gauss1 + gauss2 + noise_arr + cont_arr
+    return flux_arr
+def doublet_model_orig(wave_arr, noise_arr, cont_arr, amp, mu_line, sigma, doublet_em_sep_min, doublet_em_sep_max,
                   doublet_int_min, doublet_int_max, lower_limit, upper_limit):
     # Compute the doublet

aspect_stable-0.7.dev1/src/aspect/trainer.py ADDED Viewed

@@ -0,0 +1,214 @@
+import importlib
+import numpy as np
+import joblib
+import toml
+from matplotlib import pyplot as plt
+from sklearn.model_selection import cross_val_score, cross_val_predict
+from sklearn.metrics import confusion_matrix
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import StratifiedShuffleSplit, train_test_split
+from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, median_absolute_error
+from time import time
+from pathlib import Path
+from .io import cfg as aspect_cfg
+def get_training_test_sets(x_arr, y_arr, test_fraction, n_pixel_features=None, n_scale_features=None, random_state=None, classification=True):
+    # Split into training and testing:
+    if classification:
+        print(f'\nSplitting sample with categories:')
+        print(np.unique(y_arr))
+        sss = StratifiedShuffleSplit(n_splits=1, train_size=int(y_arr.size * (1 - test_fraction)),
+                                     test_size=int(y_arr.size * test_fraction), random_state=random_state)
+        # Equal splits
+        for train_index, test_index in sss.split(x_arr, y_arr):
+            X_train, X_test = x_arr[train_index, :], x_arr[test_index, :]
+            y_train, y_test = y_arr[train_index], y_arr[test_index]
+        # Convert strings to integers
+        y_train = np.vectorize(aspect_cfg['shape_number'].get)(y_train)
+        y_test = np.vectorize(aspect_cfg['shape_number'].get)(y_test)
+    else:
+        X_train, X_test, y_train, y_test = train_test_split(x_arr, y_arr, test_size=test_fraction,
+                                                            random_state=random_state, shuffle=True)
+        y_train, y_test = np.log10(y_train), np.log10(y_test)
+    # Crop the database if requested
+    if n_pixel_features and n_scale_features:
+        X_train, X_test = X_train[:, -n_pixel_features - n_scale_features:], X_test[:, -n_pixel_features - n_scale_features:]
+    return X_train, y_train, X_test, y_test
+def components_trainer(model_label, x_arr, y_arr, fit_cfg, list_labels, output_folder=None, test_fraction=0.1,
+                       random_state=None, classification=True):
+    # Preparing the estimator:
+    print(f'\nLoading estimator: {fit_cfg["estimator"]["class"]}')
+    estimator = getattr(importlib.import_module(fit_cfg['estimator']["module"]), fit_cfg['estimator']["class"])
+    estimator_params = fit_cfg.get('estimator_params', {})
+    # Split into training and testing:
+    data_train, y_train, data_test, y_test = get_training_test_sets(x_arr, y_arr, test_fraction,
+                                                                    random_state=random_state, classification=classification)
+    # Select just the features
+    feature_slice = -fit_cfg['box_size'] - 1
+    X_train, X_test = data_train[:, feature_slice:], data_test[:, feature_slice:]
+    # Run the training
+    if classification:
+        print(f'\nClassification: {y_train.size/len(fit_cfg["categories"]):.0f} * {len(fit_cfg["categories"])} = {y_train.size}  points ({model_label})')
+        print(f'- Settings: {fit_cfg["estimator_params"]}\n')
+        print(f'- Data set size: {X_train.shape}\n')
+    else:
+        print(f'Regression range: [{y_train.min():.3f}, {y_train.max():.3f}]')
+        print(f'- Settings: {fit_cfg["estimator_params"]}')
+        print(f'- Data set size: {X_train.shape}\n')
+    start_time = time()
+    ml_function = estimator(**estimator_params)
+    ml_function.fit(X_train, y_train)
+    end_time = np.round((time()-start_time)/60, 2)
+    print(f'- completed ({end_time} minutes)')
+    # Save the trained model and configuration
+    output_folder = Path(output_folder)/'results'
+    output_folder.mkdir(parents=True, exist_ok=True)
+    model_address = output_folder/f'{model_label}.joblib'
+    joblib.dump(ml_function, model_address)
+    if classification:
+        # Run initial diagnostics
+        print(f'\nReloading model from: {model_address}')
+        start_time = time()
+        ml_function = joblib.load(model_address)
+        fit_time = np.round((time()-start_time), 3)
+        print(f'- completed ({fit_time} seconds)')
+        print(f'\nRuning prediction on test set ({y_test.size} points)')
+        start_time = time()
+        y_pred = ml_function.predict(X_test)
+        print(f'- completed ({(time()-start_time):0.1f} seconds)')
+        # Testing confussion matrix
+        print(f'\nConfusion matrix in test set ({y_test.size} points)')
+        start_time = time()
+        conf_matrix_test = confusion_matrix(y_test, y_pred, normalize="all")
+        print(f'- completed ({(time()-start_time):0.1f} seconds)')
+        # Precision, recall and f1:
+        print(f'\nF1, Precision and recall diagnostics ({y_test.size} points)')
+        start_time = time()
+        pres = precision_score(y_test, y_pred, average='macro')
+        recall = recall_score(y_test, y_pred, average='macro')
+        f1 = f1_score(y_test, y_pred, average='macro')
+        print(f'- completed ({(time()-start_time):0.1f} seconds)')
+        print(f'\nModel outputs')
+        print(f'- F1: \n {f1}')
+        print(f'- Precision: \n {pres}')
+        print(f'- Recall: \n {recall}')
+        print(f'- Testing confusion matrix: \n {conf_matrix_test}')
+        print(f'- Fitting time (seconds): \n {float(fit_time)}')
+        # Save results into a TOML file
+        toml_path = output_folder/f'{model_label}.toml'
+        output_dict = {'resuts': {'f1':f1, 'precision':pres, 'Recall':recall, 'confusion_matrix':conf_matrix_test,
+                                  'fit_time': end_time}, 'properties': fit_cfg,}
+        with open(toml_path, 'w') as f:
+            toml.dump(output_dict, f)
+    else:
+        # Reload model
+        print(f'\nReloading model from: {model_address}')
+        start_time = time()
+        ml_function = joblib.load(model_address)
+        fit_time = np.round((time() - start_time), 3)
+        print(f'- completed ({fit_time} seconds)')
+        # Prediction
+        print(f'\nRunning prediction on test set ({y_test.size} points)')
+        start_time = time()
+        y_pred = ml_function.predict(X_test)
+        pred_time = np.round((time() - start_time), 3)
+        print(f'- completed ({pred_time} seconds)')
+        # Core regression metrics
+        print(f'\nRegression diagnostics ({y_test.size} points)')
+        start_time = time()
+        mse = mean_squared_error(y_test, y_pred)
+        rmse = np.sqrt(mse)
+        mae = mean_absolute_error(y_test, y_pred)
+        medae = median_absolute_error(y_test, y_pred)
+        r2 = r2_score(y_test, y_pred)
+        # Normalized errors (scale-independent)
+        y_range = y_test.max() - y_test.min()
+        nrmse = rmse / y_range if y_range > 0 else np.nan
+        nmae = mae / y_range if y_range > 0 else np.nan
+        print(f'- completed ({(time() - start_time):0.1f} seconds)')
+        # Outputs
+        print(f'\nModel outputs')
+        print(f'- R²: \n {r2}')
+        print(f'- RMSE: \n {rmse}')
+        print(f'- MAE: \n {mae}')
+        print(f'- Median AE: \n {medae}')
+        print(f'- Normalized RMSE: \n {nrmse}')
+        print(f'- Normalized MAE: \n {nmae}')
+        print(f'- Fit time (seconds): \n {float(fit_time)}')
+        # Save results to TOML
+        toml_path = output_folder / f'{model_label}.toml'
+        output_dict = {
+            'results': {
+                'r2': float(r2),
+                'rmse': float(rmse),
+                'mae': float(mae),
+                'median_ae': float(medae),
+                'nrmse': float(nrmse),
+                'nmae': float(nmae),
+                'fit_time': float(end_time),
+                'prediction_time': float(pred_time),
+            },
+            'properties': fit_cfg,
+        }
+        # Scatter plot
+        fig, ax = plt.subplots()
+        idcs_limit = 5000
+        ycoords, xcoords = data_test[:, 0], data_test[:, 1]
+        error = y_test - y_pred  # signed error
+        abs_error = np.abs(error)
+        rel_error = error / y_test
+        limit = np.percentile(rel_error, 95)
+        # Set the color limits
+        sc = ax.scatter(xcoords[:idcs_limit], ycoords[:idcs_limit], c=rel_error[:idcs_limit], s=8, cmap='viridis')
+        sc.set_clim(-limit, limit)
+        cbar = fig.colorbar(sc, ax=ax, label='|Prediction error|')
+        ax.set_yscale('log')
+        plt.tight_layout()
+        plt.show()
+        with open(toml_path, 'w') as f:
+            toml.dump(output_dict, f)
+    return

{aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect/workflow.py RENAMED Viewed

@@ -5,9 +5,11 @@ from aspect.plots import plot_comps_detect
 # from matplotlib import pyplot as plt
 from pathlib import Path
 CHOICE_DM = np.array(cfg['decision_matrices']['choice'])
 TIME_DM = np.array(cfg['decision_matrices']['time'])
 def flux_to_image(flux_array, approximation, model_2D):
     if model_2D is not None:
@@ -35,7 +37,10 @@ def flux_to_image(flux_array, approximation, model_2D):
 def unpack_spec_flux(spectrum, rest_wl_lim):
     # Extract the mask if masked array
-    pixel_mask = ~spectrum.flux.mask
+    pixel_mask = (~spectrum.flux.mask) & (spectrum.flux.data != 0)
+    if spectrum.err_flux is not None:
+        pixel_mask = pixel_mask & (spectrum.err_flux.data != 0)
     # Limit to region if requested # TODO warning negative entries
     if rest_wl_lim is not None:
@@ -59,6 +64,7 @@ def enbox_spectrum(input_flux, box_size, range_box, n_scale_features):
     n_rows = input_flux.size - box_size
     # Container for the data
+    # box_containter = np.zeros((n_rows, n_columns))
     box_containter = np.empty((n_rows, n_columns))
     # Assign values
@@ -111,8 +117,6 @@ def detection_revision(seg_pred, box_size, new_type, new_confidence):
     return idcs_pred, new_pred, new_conf
 class DetectionModel:
     def __init__(self, model_address=None, n_jobs=None, verbose=0):
@@ -139,7 +143,7 @@ class DetectionModel:
 class ModelManager:
-    def __init__(self, model_address=None,):
+    def __init__(self, model_address=None, n_jobs=4):
         # Global parameters
         self.n_mc = 100
@@ -148,10 +152,10 @@ class ModelManager:
         self.n_scale_features = 1
         # Default values
-        model_address = DEFAULT_MODEL_ADDRESS if model_address is None else model_address
+        self.model_address = DEFAULT_MODEL_ADDRESS if model_address is None else Path(model_address)
         # Load the model
-        self.medium = DetectionModel(model_address)
+        self.medium = DetectionModel(self.model_address, n_jobs)
         self.large = None
         # Largest reference model parameters
@@ -212,8 +216,8 @@ class ModelManager:
                                                                out_confidence)
             # Only pass if more than half
-            # half_check = idcs_pred[6:].sum() > 5
-            half_check = idcs_pred[5:].sum() > 6
+            # half_check = idcs_pred[5:].sum() > 6
+            half_check = np.all(idcs_pred[3:9])
             if half_check:
                 idcs_pred = np.flatnonzero(idcs_pred)
                 self.seg_pred[idcs_pred] = new_pred[idcs_pred]
@@ -223,13 +227,20 @@ class ModelManager:
                 self.seg_conf[:] = conf_arr[idx:idx + self.medium.b_pixels]
             if plot_steps:
-                plot_comps_detect(x_arr[idx:idx + self.medium.b_pixels],
-                                  y_arr[idx, -self.medium.b_pixels:, 0],
+                plot_comps_detect(x_arr, y_arr, self.medium.b_pixels,
                                   idx, counts, self.medium,
                                   new_pred[0],
                                   pred_arr[idx:idx + self.medium.b_pixels],
                                   self.seg_pred[:])
+                # plot_comps_detect(x_arr[idx:idx + self.medium.b_pixels],
+                #                   y_arr[idx, -self.medium.b_pixels:, 0],
+                #                   idx, counts, self.medium,
+                #                   new_pred[0],
+                #                   pred_arr[idx:idx + self.medium.b_pixels],
+                #                   self.seg_pred[:])
             # Assign new categories and confidence
             pred_arr[idx:idx + self.medium.b_pixels] = self.seg_pred[:]
             conf_arr[idx:idx + self.medium.b_pixels] = self.seg_conf[:]
@@ -241,8 +252,6 @@ class ModelManager:
 model_mgr = ModelManager()
 class ComponentsDetector:
     def __init__(self, spectrum, model_address=None):

{aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aspect-stable
-Version: 0.5.0
+Version: 0.7.dev1
 Summary: Automatic SPEctra Components Tagging
 Author-email: Vital Fernández <vgf@stsci.edu>
 License-Expression: GPL-3.0-or-later

{aspect_stable-0.5.0 → aspect_stable-0.7.dev1}/src/aspect_stable.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,4 +1,3 @@
-MANIFEST.in
 README.rst
 pyproject.toml
 src/aspect/__init__.py
@@ -9,6 +8,10 @@ src/aspect/plots.py
 src/aspect/tools.py
 src/aspect/trainer.py
 src/aspect/workflow.py
+src/aspect/models/aspect_min-max-log_12_pixels_v10_model.joblib
+src/aspect/models/aspect_min-max-log_12_pixels_v10_model.toml
+src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.joblib
+src/aspect/models/aspect_min-max-log_12_pixels_v12_randomforest_model.toml
 src/aspect_stable.egg-info/PKG-INFO
 src/aspect_stable.egg-info/SOURCES.txt
 src/aspect_stable.egg-info/dependency_links.txt

aspect_stable-0.5.0/MANIFEST.in DELETED Viewed

@@ -1,6 +0,0 @@
-# MANIFEST.in
-include src/aspect/aspect.toml
-include src/aspect/changelog.txt
-include src/aspect/models/aspect_min-max_12_pixels_v10_model.toml
-include src/aspect/models/aspect_min-max_12_pixels_v10_model.joblib

aspect_stable-0.5.0/src/aspect/trainer.py DELETED Viewed

@@ -1,104 +0,0 @@
-import importlib
-import numpy as np
-import joblib
-import toml
-from sklearn.model_selection import cross_val_score, cross_val_predict
-from sklearn.metrics import confusion_matrix
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.model_selection import StratifiedShuffleSplit
-from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
-from time import time
-from pathlib import Path
-from .io import cfg as aspect_cfg
-def get_training_test_sets(x_arr, y_arr, test_fraction, n_pixel_features, n_scale_features, random_state=None):
-    # Split into training and testing:
-    print(f'\nSplitting sample with categories:')
-    print(np.unique(y_arr))
-    sss = StratifiedShuffleSplit(n_splits=1, train_size=int(y_arr.size * (1 - test_fraction)),
-                                 test_size=int(y_arr.size * test_fraction), random_state=random_state)
-    for train_index, test_index in sss.split(x_arr, y_arr):
-        X_train, X_test = x_arr[train_index, -n_pixel_features-n_scale_features:], x_arr[test_index, -n_pixel_features-n_scale_features:]
-        y_train, y_test = y_arr[train_index], y_arr[test_index]
-    # Convert strings to integers
-    y_train = np.vectorize(aspect_cfg['shape_number'].get)(y_train)
-    y_test = np.vectorize(aspect_cfg['shape_number'].get)(y_test)
-    return X_train, y_train, X_test, y_test
-def components_trainer(model_label, x_arr, y_arr, fit_cfg, list_labels, output_folder=None, test_fraction=0.1,
-                       random_state=None):
-    # Preparing the estimator:
-    print(f'\nLoading estimator: {fit_cfg["estimator"]["class"]}')
-    estimator = getattr(importlib.import_module(fit_cfg['estimator']["module"]), fit_cfg['estimator']["class"])
-    estimator_params = fit_cfg.get('estimator_params', {})
-    # Split into training and testing:
-    print(f'\nSplitting sample with categories:')
-    X_train, y_train, X_test, y_test = get_training_test_sets(x_arr, y_arr, test_fraction,
-                                                              n_pixel_features=fit_cfg['box_size'], n_scale_features=1,
-                                                              random_state=random_state)
-    # Run the training
-    print(f'\nTraining: {y_train.size/len(fit_cfg["categories"]):.0f} * {len(fit_cfg["categories"])} = {y_train.size}  points ({model_label})')
-    print(f'- Settings: {fit_cfg["estimator_params"]}\n')
-    start_time = time()
-    ml_function = estimator(**estimator_params)
-    ml_function.fit(X_train, y_train)
-    end_time = np.round((time()-start_time)/60, 2)
-    print(f'- completed ({end_time} minutes)')
-    # Save the trained model and configuration
-    output_folder = Path(output_folder)/'results'
-    output_folder.mkdir(parents=True, exist_ok=True)
-    model_address = output_folder/f'{model_label}.joblib'
-    joblib.dump(ml_function, model_address)
-    # Run initial diagnostics
-    print(f'\nReloading model from: {model_address}')
-    start_time = time()
-    ml_function = joblib.load(model_address)
-    fit_time = np.round((time()-start_time), 3)
-    print(f'- completed ({fit_time} seconds)')
-    print(f'\nRuning prediction on test set ({y_test.size} points)')
-    start_time = time()
-    y_pred = ml_function.predict(X_test)
-    print(f'- completed ({(time()-start_time):0.1f} seconds)')
-    # Testing confussion matrix
-    print(f'\nConfusion matrix in test set ({y_test.size} points)')
-    start_time = time()
-    conf_matrix_test = confusion_matrix(y_test, y_pred, normalize="all")
-    print(f'- completed ({(time()-start_time):0.1f} seconds)')
-    # Precision, recall and f1:
-    print(f'\nF1, Precision and recall diagnostics ({y_test.size} points)')
-    start_time = time()
-    pres = precision_score(y_test, y_pred, average='macro')
-    recall = recall_score(y_test, y_pred, average='macro')
-    f1 = f1_score(y_test, y_pred, average='macro')
-    print(f'- completed ({(time()-start_time):0.1f} seconds)')
-    print(f'\nModel outputs')
-    print(f'- F1: \n {f1}')
-    print(f'- Precision: \n {pres}')
-    print(f'- Recall: \n {recall}')
-    print(f'- Testing confusion matrix: \n {conf_matrix_test}')
-    print(f'- Fitting time (seconds): \n {float(fit_time)}')
-    # Save results into a TOML file
-    toml_path = output_folder/f'{model_label}.toml'
-    output_dict = {'resuts': {'f1':f1, 'precision':pres, 'Recall':recall, 'confusion_matrix':conf_matrix_test,
-                              'fit_time': fit_time}, 'properties': fit_cfg,}
-    with open(toml_path, 'w') as f:
-        toml.dump(output_dict, f)
-    return