smftools 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. smftools/__init__.py +39 -7
  2. smftools/_settings.py +2 -0
  3. smftools/_version.py +3 -1
  4. smftools/cli/__init__.py +1 -0
  5. smftools/cli/archived/cli_flows.py +2 -0
  6. smftools/cli/helpers.py +34 -6
  7. smftools/cli/hmm_adata.py +239 -33
  8. smftools/cli/latent_adata.py +318 -0
  9. smftools/cli/load_adata.py +167 -131
  10. smftools/cli/preprocess_adata.py +180 -53
  11. smftools/cli/spatial_adata.py +152 -100
  12. smftools/cli_entry.py +38 -1
  13. smftools/config/__init__.py +2 -0
  14. smftools/config/conversion.yaml +11 -1
  15. smftools/config/default.yaml +42 -2
  16. smftools/config/experiment_config.py +59 -1
  17. smftools/constants.py +65 -0
  18. smftools/datasets/__init__.py +2 -0
  19. smftools/hmm/HMM.py +97 -3
  20. smftools/hmm/__init__.py +24 -13
  21. smftools/hmm/archived/apply_hmm_batched.py +2 -0
  22. smftools/hmm/archived/calculate_distances.py +2 -0
  23. smftools/hmm/archived/call_hmm_peaks.py +2 -0
  24. smftools/hmm/archived/train_hmm.py +2 -0
  25. smftools/hmm/call_hmm_peaks.py +5 -2
  26. smftools/hmm/display_hmm.py +4 -1
  27. smftools/hmm/hmm_readwrite.py +7 -2
  28. smftools/hmm/nucleosome_hmm_refinement.py +2 -0
  29. smftools/informatics/__init__.py +59 -34
  30. smftools/informatics/archived/bam_conversion.py +2 -0
  31. smftools/informatics/archived/bam_direct.py +2 -0
  32. smftools/informatics/archived/basecall_pod5s.py +2 -0
  33. smftools/informatics/archived/basecalls_to_adata.py +2 -0
  34. smftools/informatics/archived/conversion_smf.py +2 -0
  35. smftools/informatics/archived/deaminase_smf.py +1 -0
  36. smftools/informatics/archived/direct_smf.py +2 -0
  37. smftools/informatics/archived/fast5_to_pod5.py +2 -0
  38. smftools/informatics/archived/helpers/archived/__init__.py +2 -0
  39. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +2 -0
  40. smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
  41. smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
  42. smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
  43. smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
  44. smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
  45. smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
  46. smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
  47. smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
  48. smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
  49. smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
  50. smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
  51. smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
  52. smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
  53. smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
  54. smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
  55. smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
  56. smftools/informatics/archived/helpers/archived/informatics.py +2 -0
  57. smftools/informatics/archived/helpers/archived/load_adata.py +2 -0
  58. smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
  59. smftools/informatics/archived/helpers/archived/modQC.py +2 -0
  60. smftools/informatics/archived/helpers/archived/modcall.py +2 -0
  61. smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
  62. smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
  63. smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
  64. smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
  65. smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +2 -0
  66. smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
  67. smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
  68. smftools/informatics/archived/print_bam_query_seq.py +2 -0
  69. smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
  70. smftools/informatics/archived/subsample_pod5.py +2 -0
  71. smftools/informatics/bam_functions.py +1093 -176
  72. smftools/informatics/basecalling.py +2 -0
  73. smftools/informatics/bed_functions.py +271 -61
  74. smftools/informatics/binarize_converted_base_identities.py +3 -0
  75. smftools/informatics/complement_base_list.py +2 -0
  76. smftools/informatics/converted_BAM_to_adata.py +641 -176
  77. smftools/informatics/fasta_functions.py +94 -10
  78. smftools/informatics/h5ad_functions.py +123 -4
  79. smftools/informatics/modkit_extract_to_adata.py +1019 -431
  80. smftools/informatics/modkit_functions.py +2 -0
  81. smftools/informatics/ohe.py +2 -0
  82. smftools/informatics/pod5_functions.py +3 -2
  83. smftools/informatics/sequence_encoding.py +72 -0
  84. smftools/logging_utils.py +21 -2
  85. smftools/machine_learning/__init__.py +22 -6
  86. smftools/machine_learning/data/__init__.py +2 -0
  87. smftools/machine_learning/data/anndata_data_module.py +18 -4
  88. smftools/machine_learning/data/preprocessing.py +2 -0
  89. smftools/machine_learning/evaluation/__init__.py +2 -0
  90. smftools/machine_learning/evaluation/eval_utils.py +2 -0
  91. smftools/machine_learning/evaluation/evaluators.py +14 -9
  92. smftools/machine_learning/inference/__init__.py +2 -0
  93. smftools/machine_learning/inference/inference_utils.py +2 -0
  94. smftools/machine_learning/inference/lightning_inference.py +6 -1
  95. smftools/machine_learning/inference/sklearn_inference.py +2 -0
  96. smftools/machine_learning/inference/sliding_window_inference.py +2 -0
  97. smftools/machine_learning/models/__init__.py +2 -0
  98. smftools/machine_learning/models/base.py +7 -2
  99. smftools/machine_learning/models/cnn.py +7 -2
  100. smftools/machine_learning/models/lightning_base.py +16 -11
  101. smftools/machine_learning/models/mlp.py +5 -1
  102. smftools/machine_learning/models/positional.py +7 -2
  103. smftools/machine_learning/models/rnn.py +5 -1
  104. smftools/machine_learning/models/sklearn_models.py +14 -9
  105. smftools/machine_learning/models/transformer.py +7 -2
  106. smftools/machine_learning/models/wrappers.py +6 -2
  107. smftools/machine_learning/training/__init__.py +2 -0
  108. smftools/machine_learning/training/train_lightning_model.py +13 -3
  109. smftools/machine_learning/training/train_sklearn_model.py +2 -0
  110. smftools/machine_learning/utils/__init__.py +2 -0
  111. smftools/machine_learning/utils/device.py +5 -1
  112. smftools/machine_learning/utils/grl.py +5 -1
  113. smftools/metadata.py +1 -1
  114. smftools/optional_imports.py +31 -0
  115. smftools/plotting/__init__.py +41 -31
  116. smftools/plotting/autocorrelation_plotting.py +9 -5
  117. smftools/plotting/classifiers.py +16 -4
  118. smftools/plotting/general_plotting.py +2415 -629
  119. smftools/plotting/hmm_plotting.py +97 -9
  120. smftools/plotting/position_stats.py +15 -7
  121. smftools/plotting/qc_plotting.py +6 -1
  122. smftools/preprocessing/__init__.py +36 -37
  123. smftools/preprocessing/append_base_context.py +17 -17
  124. smftools/preprocessing/append_mismatch_frequency_sites.py +158 -0
  125. smftools/preprocessing/archived/add_read_length_and_mapping_qc.py +2 -0
  126. smftools/preprocessing/archived/calculate_complexity.py +2 -0
  127. smftools/preprocessing/archived/mark_duplicates.py +2 -0
  128. smftools/preprocessing/archived/preprocessing.py +2 -0
  129. smftools/preprocessing/archived/remove_duplicates.py +2 -0
  130. smftools/preprocessing/binary_layers_to_ohe.py +2 -1
  131. smftools/preprocessing/calculate_complexity_II.py +4 -1
  132. smftools/preprocessing/calculate_consensus.py +1 -1
  133. smftools/preprocessing/calculate_pairwise_differences.py +2 -0
  134. smftools/preprocessing/calculate_pairwise_hamming_distances.py +3 -0
  135. smftools/preprocessing/calculate_position_Youden.py +9 -2
  136. smftools/preprocessing/calculate_read_modification_stats.py +6 -1
  137. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +2 -0
  138. smftools/preprocessing/filter_reads_on_modification_thresholds.py +2 -0
  139. smftools/preprocessing/flag_duplicate_reads.py +42 -54
  140. smftools/preprocessing/make_dirs.py +2 -1
  141. smftools/preprocessing/min_non_diagonal.py +2 -0
  142. smftools/preprocessing/recipes.py +2 -0
  143. smftools/readwrite.py +53 -17
  144. smftools/schema/anndata_schema_v1.yaml +15 -1
  145. smftools/tools/__init__.py +30 -18
  146. smftools/tools/archived/apply_hmm.py +2 -0
  147. smftools/tools/archived/classifiers.py +2 -0
  148. smftools/tools/archived/classify_methylated_features.py +2 -0
  149. smftools/tools/archived/classify_non_methylated_features.py +2 -0
  150. smftools/tools/archived/subset_adata_v1.py +2 -0
  151. smftools/tools/archived/subset_adata_v2.py +2 -0
  152. smftools/tools/calculate_leiden.py +57 -0
  153. smftools/tools/calculate_nmf.py +119 -0
  154. smftools/tools/calculate_umap.py +93 -8
  155. smftools/tools/cluster_adata_on_methylation.py +7 -1
  156. smftools/tools/position_stats.py +17 -27
  157. smftools/tools/rolling_nn_distance.py +235 -0
  158. smftools/tools/tensor_factorization.py +169 -0
  159. {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/METADATA +69 -33
  160. smftools-0.3.1.dist-info/RECORD +189 -0
  161. smftools-0.2.5.dist-info/RECORD +0 -181
  162. {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/WHEEL +0 -0
  163. {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/entry_points.txt +0 -0
  164. {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,32 +1,42 @@
1
- from .autocorrelation_plotting import *
2
- from .classifiers import (
3
- plot_feature_importances_or_saliency,
4
- plot_model_curves_from_adata,
5
- plot_model_curves_from_adata_with_frequency_grid,
6
- plot_model_performance,
7
- )
8
- from .general_plotting import (
9
- combined_hmm_raw_clustermap,
10
- combined_raw_clustermap,
11
- plot_hmm_layers_rolling_by_sample_ref,
12
- )
13
- from .hmm_plotting import *
14
- from .position_stats import (
15
- plot_bar_relative_risk,
16
- plot_positionwise_matrix,
17
- plot_positionwise_matrix_grid,
18
- plot_volcano_relative_risk,
19
- )
20
- from .qc_plotting import *
1
+ from __future__ import annotations
21
2
 
22
- __all__ = [
23
- "combined_hmm_raw_clustermap",
24
- "plot_bar_relative_risk",
25
- "plot_positionwise_matrix",
26
- "plot_positionwise_matrix_grid",
27
- "plot_volcano_relative_risk",
28
- "plot_feature_importances_or_saliency",
29
- "plot_model_performance",
30
- "plot_model_curves_from_adata",
31
- "plot_model_curves_from_adata_with_frequency_grid",
32
- ]
3
+ from importlib import import_module
4
+
5
+ _LAZY_ATTRS = {
6
+ "combined_hmm_length_clustermap": "smftools.plotting.general_plotting",
7
+ "combined_hmm_raw_clustermap": "smftools.plotting.general_plotting",
8
+ "combined_raw_clustermap": "smftools.plotting.general_plotting",
9
+ "plot_rolling_nn_and_layer": "smftools.plotting.general_plotting",
10
+ "plot_hmm_layers_rolling_by_sample_ref": "smftools.plotting.general_plotting",
11
+ "plot_nmf_components": "smftools.plotting.general_plotting",
12
+ "plot_cp_sequence_components": "smftools.plotting.general_plotting",
13
+ "plot_embedding": "smftools.plotting.general_plotting",
14
+ "plot_read_span_quality_clustermaps": "smftools.plotting.general_plotting",
15
+ "plot_pca": "smftools.plotting.general_plotting",
16
+ "plot_sequence_integer_encoding_clustermaps": "smftools.plotting.general_plotting",
17
+ "plot_umap": "smftools.plotting.general_plotting",
18
+ "plot_bar_relative_risk": "smftools.plotting.position_stats",
19
+ "plot_positionwise_matrix": "smftools.plotting.position_stats",
20
+ "plot_positionwise_matrix_grid": "smftools.plotting.position_stats",
21
+ "plot_volcano_relative_risk": "smftools.plotting.position_stats",
22
+ "plot_feature_importances_or_saliency": "smftools.plotting.classifiers",
23
+ "plot_model_curves_from_adata": "smftools.plotting.classifiers",
24
+ "plot_model_curves_from_adata_with_frequency_grid": "smftools.plotting.classifiers",
25
+ "plot_model_performance": "smftools.plotting.classifiers",
26
+ "plot_read_qc_histograms": "smftools.plotting.qc_plotting",
27
+ "plot_rolling_grid": "smftools.plotting.autocorrelation_plotting",
28
+ "plot_spatial_autocorr_grid": "smftools.plotting.autocorrelation_plotting",
29
+ "plot_hmm_size_contours": "smftools.plotting.hmm_plotting",
30
+ }
31
+
32
+
33
+ def __getattr__(name: str):
34
+ if name in _LAZY_ATTRS:
35
+ module = import_module(_LAZY_ATTRS[name])
36
+ attr = getattr(module, name)
37
+ globals()[name] = attr
38
+ return attr
39
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
40
+
41
+
42
+ __all__ = list(_LAZY_ATTRS.keys())
@@ -1,8 +1,12 @@
1
+ from __future__ import annotations
2
+
1
3
  from typing import Optional
2
4
 
3
5
  import numpy as np
4
6
  import pandas as pd
5
7
 
8
+ from smftools.optional_imports import require
9
+
6
10
 
7
11
  def plot_spatial_autocorr_grid(
8
12
  adata,
@@ -35,7 +39,7 @@ def plot_spatial_autocorr_grid(
35
39
  import os
36
40
  import warnings
37
41
 
38
- import matplotlib.pyplot as plt
42
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="autocorrelation plots")
39
43
 
40
44
  # Try importing analyzer (used only as fallback)
41
45
  try:
@@ -518,7 +522,7 @@ def plot_rolling_metrics(df, out_png=None, title=None, figsize=(10, 3.5), dpi=16
518
522
  Plot NRL and SNR vs window center from the dataframe returned by rolling_autocorr_metrics.
519
523
  If out_png is None, returns the matplotlib Figure object; otherwise saves PNG and returns path.
520
524
  """
521
- import matplotlib.pyplot as plt
525
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="autocorrelation plots")
522
526
 
523
527
  # sort by center
524
528
  df2 = df.sort_values("center")
@@ -543,12 +547,12 @@ def plot_rolling_metrics(df, out_png=None, title=None, figsize=(10, 3.5), dpi=16
543
547
  if out_png:
544
548
  fig.savefig(out_png, bbox_inches="tight")
545
549
  if not show:
546
- import matplotlib
550
+ matplotlib = require("matplotlib", extra="plotting", purpose="autocorrelation plots")
547
551
 
548
552
  matplotlib.pyplot.close(fig)
549
553
  return out_png
550
554
  if not show:
551
- import matplotlib
555
+ matplotlib = require("matplotlib", extra="plotting", purpose="autocorrelation plots")
552
556
 
553
557
  matplotlib.pyplot.close(fig)
554
558
  return fig
@@ -600,7 +604,7 @@ def plot_rolling_grid(
600
604
  """
601
605
  import os
602
606
 
603
- import matplotlib.pyplot as plt
607
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="autocorrelation plots")
604
608
 
605
609
  if per_metric_ylim is None:
606
610
  per_metric_ylim = {}
@@ -1,8 +1,13 @@
1
+ from __future__ import annotations
2
+
1
3
  import os
2
4
 
3
- import matplotlib.pyplot as plt
4
5
  import numpy as np
5
- import torch
6
+
7
+ from smftools.optional_imports import require
8
+
9
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="model plots")
10
+ torch = require("torch", extra="ml-base", purpose="model saliency plots")
6
11
 
7
12
 
8
13
  def plot_model_performance(metrics, save_path=None):
@@ -260,7 +265,10 @@ def plot_model_curves_from_adata(
260
265
  ylim_roc: Y-axis limits for ROC curve.
261
266
  ylim_pr: Y-axis limits for PR curve.
262
267
  """
263
- from sklearn.metrics import auc, precision_recall_curve, roc_curve
268
+ sklearn_metrics = require("sklearn.metrics", extra="ml-base", purpose="model curves")
269
+ auc = sklearn_metrics.auc
270
+ precision_recall_curve = sklearn_metrics.precision_recall_curve
271
+ roc_curve = sklearn_metrics.roc_curve
264
272
 
265
273
  if omit_training:
266
274
  subset = adata[~adata.obs["used_for_training"].astype(bool)]
@@ -349,7 +357,11 @@ def plot_model_curves_from_adata_with_frequency_grid(
349
357
  import os
350
358
 
351
359
  import numpy as np
352
- from sklearn.metrics import auc, precision_recall_curve, roc_curve
360
+
361
+ sklearn_metrics = require("sklearn.metrics", extra="ml-base", purpose="model curves")
362
+ auc = sklearn_metrics.auc
363
+ precision_recall_curve = sklearn_metrics.precision_recall_curve
364
+ roc_curve = sklearn_metrics.roc_curve
353
365
 
354
366
  if f1_levels is None:
355
367
  f1_levels = np.linspace(0.2, 0.9, 8)