PyPI - smftools - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

smftools 0.3.1py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

smftools/_version.py +1 -1
smftools/cli/chimeric_adata.py +1563 -0
smftools/cli/helpers.py +18 -2
smftools/cli/hmm_adata.py +18 -1
smftools/cli/latent_adata.py +522 -67
smftools/cli/load_adata.py +2 -2
smftools/cli/preprocess_adata.py +32 -93
smftools/cli/recipes.py +26 -0
smftools/cli/spatial_adata.py +23 -109
smftools/cli/variant_adata.py +423 -0
smftools/cli_entry.py +41 -5
smftools/config/conversion.yaml +0 -10
smftools/config/deaminase.yaml +3 -0
smftools/config/default.yaml +49 -13
smftools/config/experiment_config.py +96 -3
smftools/constants.py +4 -0
smftools/hmm/call_hmm_peaks.py +1 -1
smftools/informatics/binarize_converted_base_identities.py +2 -89
smftools/informatics/converted_BAM_to_adata.py +53 -13
smftools/informatics/h5ad_functions.py +83 -0
smftools/informatics/modkit_extract_to_adata.py +4 -0
smftools/plotting/__init__.py +26 -12
smftools/plotting/autocorrelation_plotting.py +22 -4
smftools/plotting/chimeric_plotting.py +1893 -0
smftools/plotting/classifiers.py +28 -14
smftools/plotting/general_plotting.py +58 -3362
smftools/plotting/hmm_plotting.py +1586 -2
smftools/plotting/latent_plotting.py +804 -0
smftools/plotting/plotting_utils.py +243 -0
smftools/plotting/position_stats.py +16 -8
smftools/plotting/preprocess_plotting.py +281 -0
smftools/plotting/qc_plotting.py +8 -3
smftools/plotting/spatial_plotting.py +1134 -0
smftools/plotting/variant_plotting.py +1231 -0
smftools/preprocessing/__init__.py +3 -0
smftools/preprocessing/append_base_context.py +1 -1
smftools/preprocessing/append_mismatch_frequency_sites.py +35 -6
smftools/preprocessing/append_sequence_mismatch_annotations.py +171 -0
smftools/preprocessing/append_variant_call_layer.py +480 -0
smftools/preprocessing/flag_duplicate_reads.py +4 -4
smftools/preprocessing/invert_adata.py +1 -0
smftools/readwrite.py +109 -85
smftools/tools/__init__.py +6 -0
smftools/tools/calculate_knn.py +121 -0
smftools/tools/calculate_nmf.py +18 -7
smftools/tools/calculate_pca.py +180 -0
smftools/tools/calculate_umap.py +70 -154
smftools/tools/position_stats.py +4 -4
smftools/tools/rolling_nn_distance.py +640 -3
smftools/tools/sequence_alignment.py +140 -0
smftools/tools/tensor_factorization.py +52 -4
{smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/METADATA +3 -1
{smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/RECORD +56 -42
{smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/WHEEL +0 -0
{smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/entry_points.txt +0 -0
{smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/licenses/LICENSE +0 -0

smftools/plotting/classifiers.py CHANGED Viewed

@@ -4,11 +4,14 @@ import os
 import numpy as np
+from smftools.logging_utils import get_logger
 from smftools.optional_imports import require
 plt = require("matplotlib.pyplot", extra="plotting", purpose="model plots")
 torch = require("torch", extra="ml-base", purpose="model saliency plots")
+logger = get_logger(__name__)
 def plot_model_performance(metrics, save_path=None):
     """Plot ROC and precision-recall curves for model metrics.
@@ -19,6 +22,7 @@ def plot_model_performance(metrics, save_path=None):
     """
     import os
+    logger.info("Plotting model performance curves.")
     for ref in metrics.keys():
         plt.figure(figsize=(12, 5))
@@ -58,14 +62,17 @@ def plot_model_performance(metrics, save_path=None):
             safe_name = save_name.replace("=", "").replace("__", "_").replace(",", "_")
             out_file = os.path.join(save_path, f"{safe_name}.png")
             plt.savefig(out_file, dpi=300)
-            print(f"📁 Saved: {out_file}")
+            logger.info("Saved model performance plot to %s.", out_file)
         plt.show()
         # Confusion Matrices
         for model_name, vals in metrics[ref].items():
-            print(f"Confusion Matrix for {ref} - {model_name.upper()}:")
-            print(vals["confusion_matrix"])
-            print()
+            logger.info(
+                "Confusion Matrix for %s - %s:\n%s",
+                ref,
+                model_name.upper(),
+                vals["confusion_matrix"],
+            )
 def plot_feature_importances_or_saliency(
@@ -94,6 +101,7 @@ def plot_feature_importances_or_saliency(
     import numpy as np
+    logger.info("Plotting feature importances or saliency.")
     # Select device for NN models
     device = (
         torch.device("cuda")
@@ -110,7 +118,7 @@ def plot_feature_importances_or_saliency(
             suffix = "_".join(site_config[ref]) if ref in site_config else "full"
         if ref not in positions or suffix not in positions[ref]:
-            print(f"Positions not found for {ref} with suffix {suffix}. Skipping {ref}.")
+            logger.warning("Positions not found for %s with suffix %s. Skipping.", ref, suffix)
             continue
         coords_index = positions[ref][suffix]
@@ -122,8 +130,8 @@ def plot_feature_importances_or_saliency(
         other_sites = set()
         if adata is None:
-            print(
-                "⚠️ AnnData object is required to classify site types. Skipping site type markers."
+            logger.warning(
+                "AnnData object is required to classify site types. Skipping site type markers."
             )
         else:
             gpc_col = f"{ref}_GpC_site"
@@ -140,7 +148,7 @@ def plot_feature_importances_or_saliency(
                     else:
                         other_sites.add(coord_int)
                 except KeyError:
-                    print(f"⚠️ Index '{idx_str}' not found in adata.var. Skipping.")
+                    logger.warning("Index '%s' not found in adata.var. Skipping.", idx_str)
                     continue
         for model_key, model in model_dict.items():
@@ -151,13 +159,17 @@ def plot_feature_importances_or_saliency(
                 if hasattr(model, "feature_importances_"):
                     importances = model.feature_importances_
                 else:
-                    print(f"Random Forest model {model_key} has no feature_importances_. Skipping.")
+                    logger.warning(
+                        "Random Forest model %s has no feature_importances_. Skipping.", model_key
+                    )
                     continue
                 plot_title = f"RF Feature Importances for {ref} ({model_key})"
                 y_label = "Feature Importance"
             else:
                 if tensors is None or ref not in tensors or suffix not in tensors[ref]:
-                    print(f"No input data provided for NN saliency for {model_key}. Skipping.")
+                    logger.warning(
+                        "No input data provided for NN saliency for %s. Skipping.", model_key
+                    )
                     continue
                 input_tensor = tensors[ref][suffix]
                 model.eval()
@@ -238,7 +250,7 @@ def plot_feature_importances_or_saliency(
                 )
                 out_file = os.path.join(save_path, f"{safe_name}.png")
                 plt.savefig(out_file, dpi=300)
-                print(f"📁 Saved: {out_file}")
+                logger.info("Saved feature importance plot to %s.", out_file)
             plt.show()
@@ -265,6 +277,7 @@ def plot_model_curves_from_adata(
         ylim_roc: Y-axis limits for ROC curve.
         ylim_pr: Y-axis limits for PR curve.
     """
+    logger.info("Plotting model curves from AnnData.")
     sklearn_metrics = require("sklearn.metrics", extra="ml-base", purpose="model curves")
     auc = sklearn_metrics.auc
     precision_recall_curve = sklearn_metrics.precision_recall_curve
@@ -320,7 +333,7 @@ def plot_model_curves_from_adata(
         safe_name = save_name.replace("=", "").replace("__", "_").replace(",", "_")
         out_file = os.path.join(save_path, f"{safe_name}.png")
         plt.savefig(out_file, dpi=300)
-        print(f"📁 Saved: {out_file}")
+        logger.info("Saved model curves plot to %s.", out_file)
     plt.show()
@@ -358,6 +371,7 @@ def plot_model_curves_from_adata_with_frequency_grid(
     import numpy as np
+    logger.info("Plotting model curves with frequency grid from AnnData.")
     sklearn_metrics = require("sklearn.metrics", extra="ml-base", purpose="model curves")
     auc = sklearn_metrics.auc
     precision_recall_curve = sklearn_metrics.precision_recall_curve
@@ -387,7 +401,7 @@ def plot_model_curves_from_adata_with_frequency_grid(
         neg_sample_count = desired_total - pos_sample_count
         if pos_sample_count > len(pos_indices) or neg_sample_count > len(neg_indices):
-            print(f"⚠️ Skipping frequency {pos_freq:.3f}: not enough samples.")
+            logger.warning("Skipping frequency %.3f: not enough samples.", pos_freq)
             continue
         sampled_pos = np.random.choice(pos_indices, size=pos_sample_count, replace=False)
@@ -453,5 +467,5 @@ def plot_model_curves_from_adata_with_frequency_grid(
         os.makedirs(save_path, exist_ok=True)
         out_file = os.path.join(save_path, "ROC_PR_grid.png")
         plt.savefig(out_file, dpi=300)
-        print(f"📁 Saved: {out_file}")
+        logger.info("Saved model curves frequency grid to %s.", out_file)
     plt.show()

smftools 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

smftools 0.3.1py3-none-any.whl → 0.3.2py3-none-any.whl