smftools 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. smftools/__init__.py +39 -7
  2. smftools/_settings.py +2 -0
  3. smftools/_version.py +3 -1
  4. smftools/cli/__init__.py +1 -0
  5. smftools/cli/archived/cli_flows.py +2 -0
  6. smftools/cli/helpers.py +2 -0
  7. smftools/cli/hmm_adata.py +7 -2
  8. smftools/cli/load_adata.py +130 -98
  9. smftools/cli/preprocess_adata.py +2 -0
  10. smftools/cli/spatial_adata.py +5 -1
  11. smftools/cli_entry.py +26 -1
  12. smftools/config/__init__.py +2 -0
  13. smftools/config/default.yaml +4 -1
  14. smftools/config/experiment_config.py +6 -0
  15. smftools/datasets/__init__.py +2 -0
  16. smftools/hmm/HMM.py +9 -3
  17. smftools/hmm/__init__.py +24 -13
  18. smftools/hmm/archived/apply_hmm_batched.py +2 -0
  19. smftools/hmm/archived/calculate_distances.py +2 -0
  20. smftools/hmm/archived/call_hmm_peaks.py +2 -0
  21. smftools/hmm/archived/train_hmm.py +2 -0
  22. smftools/hmm/call_hmm_peaks.py +5 -2
  23. smftools/hmm/display_hmm.py +4 -1
  24. smftools/hmm/hmm_readwrite.py +7 -2
  25. smftools/hmm/nucleosome_hmm_refinement.py +2 -0
  26. smftools/informatics/__init__.py +53 -34
  27. smftools/informatics/archived/bam_conversion.py +2 -0
  28. smftools/informatics/archived/bam_direct.py +2 -0
  29. smftools/informatics/archived/basecall_pod5s.py +2 -0
  30. smftools/informatics/archived/basecalls_to_adata.py +2 -0
  31. smftools/informatics/archived/conversion_smf.py +2 -0
  32. smftools/informatics/archived/deaminase_smf.py +1 -0
  33. smftools/informatics/archived/direct_smf.py +2 -0
  34. smftools/informatics/archived/fast5_to_pod5.py +2 -0
  35. smftools/informatics/archived/helpers/archived/__init__.py +2 -0
  36. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +2 -0
  37. smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
  38. smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
  39. smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
  40. smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
  41. smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
  42. smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
  43. smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
  44. smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
  45. smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
  46. smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
  47. smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
  48. smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
  49. smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
  50. smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
  51. smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
  52. smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
  53. smftools/informatics/archived/helpers/archived/informatics.py +2 -0
  54. smftools/informatics/archived/helpers/archived/load_adata.py +2 -0
  55. smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
  56. smftools/informatics/archived/helpers/archived/modQC.py +2 -0
  57. smftools/informatics/archived/helpers/archived/modcall.py +2 -0
  58. smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
  59. smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
  60. smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
  61. smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
  62. smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +2 -0
  63. smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
  64. smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
  65. smftools/informatics/archived/print_bam_query_seq.py +2 -0
  66. smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
  67. smftools/informatics/archived/subsample_pod5.py +2 -0
  68. smftools/informatics/bam_functions.py +737 -170
  69. smftools/informatics/basecalling.py +2 -0
  70. smftools/informatics/bed_functions.py +271 -61
  71. smftools/informatics/binarize_converted_base_identities.py +3 -0
  72. smftools/informatics/complement_base_list.py +2 -0
  73. smftools/informatics/converted_BAM_to_adata.py +66 -22
  74. smftools/informatics/fasta_functions.py +94 -10
  75. smftools/informatics/h5ad_functions.py +8 -2
  76. smftools/informatics/modkit_extract_to_adata.py +16 -6
  77. smftools/informatics/modkit_functions.py +2 -0
  78. smftools/informatics/ohe.py +2 -0
  79. smftools/informatics/pod5_functions.py +3 -2
  80. smftools/machine_learning/__init__.py +22 -6
  81. smftools/machine_learning/data/__init__.py +2 -0
  82. smftools/machine_learning/data/anndata_data_module.py +18 -4
  83. smftools/machine_learning/data/preprocessing.py +2 -0
  84. smftools/machine_learning/evaluation/__init__.py +2 -0
  85. smftools/machine_learning/evaluation/eval_utils.py +2 -0
  86. smftools/machine_learning/evaluation/evaluators.py +14 -9
  87. smftools/machine_learning/inference/__init__.py +2 -0
  88. smftools/machine_learning/inference/inference_utils.py +2 -0
  89. smftools/machine_learning/inference/lightning_inference.py +6 -1
  90. smftools/machine_learning/inference/sklearn_inference.py +2 -0
  91. smftools/machine_learning/inference/sliding_window_inference.py +2 -0
  92. smftools/machine_learning/models/__init__.py +2 -0
  93. smftools/machine_learning/models/base.py +7 -2
  94. smftools/machine_learning/models/cnn.py +7 -2
  95. smftools/machine_learning/models/lightning_base.py +16 -11
  96. smftools/machine_learning/models/mlp.py +5 -1
  97. smftools/machine_learning/models/positional.py +7 -2
  98. smftools/machine_learning/models/rnn.py +5 -1
  99. smftools/machine_learning/models/sklearn_models.py +14 -9
  100. smftools/machine_learning/models/transformer.py +7 -2
  101. smftools/machine_learning/models/wrappers.py +6 -2
  102. smftools/machine_learning/training/__init__.py +2 -0
  103. smftools/machine_learning/training/train_lightning_model.py +13 -3
  104. smftools/machine_learning/training/train_sklearn_model.py +2 -0
  105. smftools/machine_learning/utils/__init__.py +2 -0
  106. smftools/machine_learning/utils/device.py +5 -1
  107. smftools/machine_learning/utils/grl.py +5 -1
  108. smftools/optional_imports.py +31 -0
  109. smftools/plotting/__init__.py +32 -31
  110. smftools/plotting/autocorrelation_plotting.py +9 -5
  111. smftools/plotting/classifiers.py +16 -4
  112. smftools/plotting/general_plotting.py +6 -3
  113. smftools/plotting/hmm_plotting.py +12 -2
  114. smftools/plotting/position_stats.py +15 -7
  115. smftools/plotting/qc_plotting.py +6 -1
  116. smftools/preprocessing/__init__.py +35 -37
  117. smftools/preprocessing/archived/add_read_length_and_mapping_qc.py +2 -0
  118. smftools/preprocessing/archived/calculate_complexity.py +2 -0
  119. smftools/preprocessing/archived/mark_duplicates.py +2 -0
  120. smftools/preprocessing/archived/preprocessing.py +2 -0
  121. smftools/preprocessing/archived/remove_duplicates.py +2 -0
  122. smftools/preprocessing/binary_layers_to_ohe.py +2 -1
  123. smftools/preprocessing/calculate_complexity_II.py +4 -1
  124. smftools/preprocessing/calculate_pairwise_differences.py +2 -0
  125. smftools/preprocessing/calculate_pairwise_hamming_distances.py +3 -0
  126. smftools/preprocessing/calculate_position_Youden.py +9 -2
  127. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +2 -0
  128. smftools/preprocessing/filter_reads_on_modification_thresholds.py +2 -0
  129. smftools/preprocessing/flag_duplicate_reads.py +42 -54
  130. smftools/preprocessing/make_dirs.py +2 -1
  131. smftools/preprocessing/min_non_diagonal.py +2 -0
  132. smftools/preprocessing/recipes.py +2 -0
  133. smftools/tools/__init__.py +26 -18
  134. smftools/tools/archived/apply_hmm.py +2 -0
  135. smftools/tools/archived/classifiers.py +2 -0
  136. smftools/tools/archived/classify_methylated_features.py +2 -0
  137. smftools/tools/archived/classify_non_methylated_features.py +2 -0
  138. smftools/tools/archived/subset_adata_v1.py +2 -0
  139. smftools/tools/archived/subset_adata_v2.py +2 -0
  140. smftools/tools/calculate_umap.py +3 -1
  141. smftools/tools/cluster_adata_on_methylation.py +7 -1
  142. smftools/tools/position_stats.py +17 -27
  143. {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/METADATA +67 -33
  144. smftools-0.3.0.dist-info/RECORD +182 -0
  145. smftools-0.2.5.dist-info/RECORD +0 -181
  146. {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/WHEEL +0 -0
  147. {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/entry_points.txt +0 -0
  148. {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from ..data import AnnDataModule
2
4
  from ..models import SklearnModelWrapper
3
5
 
@@ -1,2 +1,4 @@
1
+ from __future__ import annotations
2
+
1
3
  from .device import detect_device
2
4
  from .grl import GradReverse
@@ -1,4 +1,8 @@
1
- import torch
1
+ from __future__ import annotations
2
+
3
+ from smftools.optional_imports import require
4
+
5
+ torch = require("torch", extra="ml-base", purpose="device selection")
2
6
 
3
7
 
4
8
  def detect_device():
@@ -1,4 +1,8 @@
1
- import torch
1
+ from __future__ import annotations
2
+
3
+ from smftools.optional_imports import require
4
+
5
+ torch = require("torch", extra="ml-base", purpose="gradient reversal layers")
2
6
 
3
7
 
4
8
  class GradReverse(torch.autograd.Function):
@@ -0,0 +1,31 @@
1
+ """Utilities for optional dependency handling."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from importlib import import_module
6
+ from typing import Any
7
+
8
+
9
+ def require(package: str, *, extra: str, purpose: str | None = None) -> Any:
10
+ """Import an optional dependency with a helpful error message.
11
+
12
+ Args:
13
+ package: Importable module name (e.g., "torch", "scanpy").
14
+ extra: Extra name users should install (e.g., "ml", "omics").
15
+ purpose: Optional context describing the feature needing the dependency.
16
+
17
+ Returns:
18
+ The imported module.
19
+
20
+ Raises:
21
+ ModuleNotFoundError: If the package is not installed.
22
+ """
23
+ try:
24
+ return import_module(package)
25
+ except ModuleNotFoundError as exc: # pragma: no cover - depends on env
26
+ reason = f" for {purpose}" if purpose else ""
27
+ message = (
28
+ f"Optional dependency '{package}' is required{reason}. "
29
+ f"Install it with: pip install 'smftools[{extra}]'"
30
+ )
31
+ raise ModuleNotFoundError(message) from exc
@@ -1,32 +1,33 @@
1
- from .autocorrelation_plotting import *
2
- from .classifiers import (
3
- plot_feature_importances_or_saliency,
4
- plot_model_curves_from_adata,
5
- plot_model_curves_from_adata_with_frequency_grid,
6
- plot_model_performance,
7
- )
8
- from .general_plotting import (
9
- combined_hmm_raw_clustermap,
10
- combined_raw_clustermap,
11
- plot_hmm_layers_rolling_by_sample_ref,
12
- )
13
- from .hmm_plotting import *
14
- from .position_stats import (
15
- plot_bar_relative_risk,
16
- plot_positionwise_matrix,
17
- plot_positionwise_matrix_grid,
18
- plot_volcano_relative_risk,
19
- )
20
- from .qc_plotting import *
1
+ from __future__ import annotations
21
2
 
22
- __all__ = [
23
- "combined_hmm_raw_clustermap",
24
- "plot_bar_relative_risk",
25
- "plot_positionwise_matrix",
26
- "plot_positionwise_matrix_grid",
27
- "plot_volcano_relative_risk",
28
- "plot_feature_importances_or_saliency",
29
- "plot_model_performance",
30
- "plot_model_curves_from_adata",
31
- "plot_model_curves_from_adata_with_frequency_grid",
32
- ]
3
+ from importlib import import_module
4
+
5
+ _LAZY_ATTRS = {
6
+ "combined_hmm_raw_clustermap": "smftools.plotting.general_plotting",
7
+ "combined_raw_clustermap": "smftools.plotting.general_plotting",
8
+ "plot_hmm_layers_rolling_by_sample_ref": "smftools.plotting.general_plotting",
9
+ "plot_bar_relative_risk": "smftools.plotting.position_stats",
10
+ "plot_positionwise_matrix": "smftools.plotting.position_stats",
11
+ "plot_positionwise_matrix_grid": "smftools.plotting.position_stats",
12
+ "plot_volcano_relative_risk": "smftools.plotting.position_stats",
13
+ "plot_feature_importances_or_saliency": "smftools.plotting.classifiers",
14
+ "plot_model_curves_from_adata": "smftools.plotting.classifiers",
15
+ "plot_model_curves_from_adata_with_frequency_grid": "smftools.plotting.classifiers",
16
+ "plot_model_performance": "smftools.plotting.classifiers",
17
+ "plot_read_qc_histograms": "smftools.plotting.qc_plotting",
18
+ "plot_rolling_grid": "smftools.plotting.autocorrelation_plotting",
19
+ "plot_spatial_autocorr_grid": "smftools.plotting.autocorrelation_plotting",
20
+ "plot_hmm_size_contours": "smftools.plotting.hmm_plotting",
21
+ }
22
+
23
+
24
+ def __getattr__(name: str):
25
+ if name in _LAZY_ATTRS:
26
+ module = import_module(_LAZY_ATTRS[name])
27
+ attr = getattr(module, name)
28
+ globals()[name] = attr
29
+ return attr
30
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
31
+
32
+
33
+ __all__ = list(_LAZY_ATTRS.keys())
@@ -1,8 +1,12 @@
1
+ from __future__ import annotations
2
+
1
3
  from typing import Optional
2
4
 
3
5
  import numpy as np
4
6
  import pandas as pd
5
7
 
8
+ from smftools.optional_imports import require
9
+
6
10
 
7
11
  def plot_spatial_autocorr_grid(
8
12
  adata,
@@ -35,7 +39,7 @@ def plot_spatial_autocorr_grid(
35
39
  import os
36
40
  import warnings
37
41
 
38
- import matplotlib.pyplot as plt
42
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="autocorrelation plots")
39
43
 
40
44
  # Try importing analyzer (used only as fallback)
41
45
  try:
@@ -518,7 +522,7 @@ def plot_rolling_metrics(df, out_png=None, title=None, figsize=(10, 3.5), dpi=16
518
522
  Plot NRL and SNR vs window center from the dataframe returned by rolling_autocorr_metrics.
519
523
  If out_png is None, returns the matplotlib Figure object; otherwise saves PNG and returns path.
520
524
  """
521
- import matplotlib.pyplot as plt
525
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="autocorrelation plots")
522
526
 
523
527
  # sort by center
524
528
  df2 = df.sort_values("center")
@@ -543,12 +547,12 @@ def plot_rolling_metrics(df, out_png=None, title=None, figsize=(10, 3.5), dpi=16
543
547
  if out_png:
544
548
  fig.savefig(out_png, bbox_inches="tight")
545
549
  if not show:
546
- import matplotlib
550
+ matplotlib = require("matplotlib", extra="plotting", purpose="autocorrelation plots")
547
551
 
548
552
  matplotlib.pyplot.close(fig)
549
553
  return out_png
550
554
  if not show:
551
- import matplotlib
555
+ matplotlib = require("matplotlib", extra="plotting", purpose="autocorrelation plots")
552
556
 
553
557
  matplotlib.pyplot.close(fig)
554
558
  return fig
@@ -600,7 +604,7 @@ def plot_rolling_grid(
600
604
  """
601
605
  import os
602
606
 
603
- import matplotlib.pyplot as plt
607
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="autocorrelation plots")
604
608
 
605
609
  if per_metric_ylim is None:
606
610
  per_metric_ylim = {}
@@ -1,8 +1,13 @@
1
+ from __future__ import annotations
2
+
1
3
  import os
2
4
 
3
- import matplotlib.pyplot as plt
4
5
  import numpy as np
5
- import torch
6
+
7
+ from smftools.optional_imports import require
8
+
9
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="model plots")
10
+ torch = require("torch", extra="ml-base", purpose="model saliency plots")
6
11
 
7
12
 
8
13
  def plot_model_performance(metrics, save_path=None):
@@ -260,7 +265,10 @@ def plot_model_curves_from_adata(
260
265
  ylim_roc: Y-axis limits for ROC curve.
261
266
  ylim_pr: Y-axis limits for PR curve.
262
267
  """
263
- from sklearn.metrics import auc, precision_recall_curve, roc_curve
268
+ sklearn_metrics = require("sklearn.metrics", extra="ml-base", purpose="model curves")
269
+ auc = sklearn_metrics.auc
270
+ precision_recall_curve = sklearn_metrics.precision_recall_curve
271
+ roc_curve = sklearn_metrics.roc_curve
264
272
 
265
273
  if omit_training:
266
274
  subset = adata[~adata.obs["used_for_training"].astype(bool)]
@@ -349,7 +357,11 @@ def plot_model_curves_from_adata_with_frequency_grid(
349
357
  import os
350
358
 
351
359
  import numpy as np
352
- from sklearn.metrics import auc, precision_recall_curve, roc_curve
360
+
361
+ sklearn_metrics = require("sklearn.metrics", extra="ml-base", purpose="model curves")
362
+ auc = sklearn_metrics.auc
363
+ precision_recall_curve = sklearn_metrics.precision_recall_curve
364
+ roc_curve = sklearn_metrics.roc_curve
353
365
 
354
366
  if f1_levels is None:
355
367
  f1_levels = np.linspace(0.2, 0.9, 8)
@@ -5,12 +5,15 @@ import os
5
5
  from pathlib import Path
6
6
  from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
7
7
 
8
- import matplotlib.gridspec as gridspec
9
- import matplotlib.pyplot as plt
10
8
  import numpy as np
11
9
  import pandas as pd
12
10
  import scipy.cluster.hierarchy as sch
13
- import seaborn as sns
11
+
12
+ from smftools.optional_imports import require
13
+
14
+ gridspec = require("matplotlib.gridspec", extra="plotting", purpose="heatmap plotting")
15
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="plot rendering")
16
+ sns = require("seaborn", extra="plotting", purpose="plot styling")
14
17
 
15
18
 
16
19
  def _fixed_tick_positions(n_positions: int, n_ticks: int) -> np.ndarray:
@@ -1,9 +1,19 @@
1
+ from __future__ import annotations
2
+
1
3
  import math
2
4
  from typing import Optional, Tuple, Union
3
5
 
4
- import matplotlib.pyplot as plt
5
6
  import numpy as np
6
- from matplotlib.backends.backend_pdf import PdfPages
7
+
8
+ from smftools.optional_imports import require
9
+
10
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="HMM plots")
11
+ pdf_backend = require(
12
+ "matplotlib.backends.backend_pdf",
13
+ extra="plotting",
14
+ purpose="PDF output",
15
+ )
16
+ PdfPages = pdf_backend.PdfPages
7
17
 
8
18
 
9
19
  def plot_hmm_size_contours(
@@ -1,3 +1,8 @@
1
+ from __future__ import annotations
2
+
3
+ from smftools.optional_imports import require
4
+
5
+
1
6
  def plot_volcano_relative_risk(
2
7
  results_dict,
3
8
  save_path=None,
@@ -22,7 +27,7 @@ def plot_volcano_relative_risk(
22
27
  """
23
28
  import os
24
29
 
25
- import matplotlib.pyplot as plt
30
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="relative risk plots")
26
31
 
27
32
  for ref, group_results in results_dict.items():
28
33
  for group_label, (results_df, _) in group_results.items():
@@ -124,7 +129,7 @@ def plot_bar_relative_risk(
124
129
  """
125
130
  import os
126
131
 
127
- import matplotlib.pyplot as plt
132
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="relative risk plots")
128
133
 
129
134
  for ref, group_data in results_dict.items():
130
135
  for group_label, (df, _) in group_data.items():
@@ -229,10 +234,11 @@ def plot_positionwise_matrix(
229
234
  """
230
235
  import os
231
236
 
232
- import matplotlib.pyplot as plt
233
237
  import numpy as np
234
238
  import pandas as pd
235
- import seaborn as sns
239
+
240
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="position stats plots")
241
+ sns = require("seaborn", extra="plotting", purpose="position stats plots")
236
242
 
237
243
  def find_closest_index(index, target):
238
244
  """Find the index value closest to a target value."""
@@ -408,12 +414,14 @@ def plot_positionwise_matrix_grid(
408
414
  """
409
415
  import os
410
416
 
411
- import matplotlib.pyplot as plt
412
417
  import numpy as np
413
418
  import pandas as pd
414
- import seaborn as sns
415
419
  from joblib import Parallel, delayed
416
- from matplotlib.gridspec import GridSpec
420
+
421
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="position stats plots")
422
+ sns = require("seaborn", extra="plotting", purpose="position stats plots")
423
+ grid_spec = require("matplotlib.gridspec", extra="plotting", purpose="position stats plots")
424
+ GridSpec = grid_spec.GridSpec
417
425
 
418
426
  matrices = adata.uns[key]
419
427
  group_labels = list(matrices.keys())
@@ -1,9 +1,14 @@
1
+ from __future__ import annotations
2
+
1
3
  import os
2
4
 
3
- import matplotlib.pyplot as plt
4
5
  import numpy as np
5
6
  import pandas as pd
6
7
 
8
+ from smftools.optional_imports import require
9
+
10
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="QC plots")
11
+
7
12
 
8
13
  def plot_read_qc_histograms(
9
14
  adata,
@@ -1,38 +1,36 @@
1
- from .append_base_context import append_base_context
2
- from .append_binary_layer_by_base_context import append_binary_layer_by_base_context
3
- from .binarize import binarize_adata
4
- from .binarize_on_Youden import binarize_on_Youden
5
- from .calculate_complexity_II import calculate_complexity_II
6
- from .calculate_coverage import calculate_coverage
7
- from .calculate_position_Youden import calculate_position_Youden
8
- from .calculate_read_length_stats import calculate_read_length_stats
9
- from .calculate_read_modification_stats import calculate_read_modification_stats
10
- from .clean_NaN import clean_NaN
11
- from .filter_adata_by_nan_proportion import filter_adata_by_nan_proportion
12
- from .filter_reads_on_length_quality_mapping import filter_reads_on_length_quality_mapping
13
- from .filter_reads_on_modification_thresholds import filter_reads_on_modification_thresholds
14
- from .flag_duplicate_reads import flag_duplicate_reads
15
- from .invert_adata import invert_adata
16
- from .load_sample_sheet import load_sample_sheet
17
- from .reindex_references_adata import reindex_references_adata
18
- from .subsample_adata import subsample_adata
1
+ from __future__ import annotations
19
2
 
20
- __all__ = [
21
- "append_base_context",
22
- "append_binary_layer_by_base_context",
23
- "binarize_on_Youden",
24
- "binarize_adata",
25
- "calculate_complexity_II",
26
- "calculate_read_modification_stats",
27
- "calculate_coverage",
28
- "calculate_position_Youden",
29
- "calculate_read_length_stats",
30
- "clean_NaN",
31
- "filter_adata_by_nan_proportion",
32
- "filter_reads_on_modification_thresholds",
33
- "filter_reads_on_length_quality_mapping",
34
- "invert_adata",
35
- "load_sample_sheet",
36
- "flag_duplicate_reads",
37
- "subsample_adata",
38
- ]
3
+ from importlib import import_module
4
+
5
+ _LAZY_ATTRS = {
6
+ "append_base_context": "smftools.preprocessing.append_base_context",
7
+ "append_binary_layer_by_base_context": "smftools.preprocessing.append_binary_layer_by_base_context",
8
+ "binarize_adata": "smftools.preprocessing.binarize",
9
+ "binarize_on_Youden": "smftools.preprocessing.binarize_on_Youden",
10
+ "calculate_complexity_II": "smftools.preprocessing.calculate_complexity_II",
11
+ "calculate_coverage": "smftools.preprocessing.calculate_coverage",
12
+ "calculate_position_Youden": "smftools.preprocessing.calculate_position_Youden",
13
+ "calculate_read_length_stats": "smftools.preprocessing.calculate_read_length_stats",
14
+ "calculate_read_modification_stats": "smftools.preprocessing.calculate_read_modification_stats",
15
+ "clean_NaN": "smftools.preprocessing.clean_NaN",
16
+ "filter_adata_by_nan_proportion": "smftools.preprocessing.filter_adata_by_nan_proportion",
17
+ "filter_reads_on_length_quality_mapping": "smftools.preprocessing.filter_reads_on_length_quality_mapping",
18
+ "filter_reads_on_modification_thresholds": "smftools.preprocessing.filter_reads_on_modification_thresholds",
19
+ "flag_duplicate_reads": "smftools.preprocessing.flag_duplicate_reads",
20
+ "invert_adata": "smftools.preprocessing.invert_adata",
21
+ "load_sample_sheet": "smftools.preprocessing.load_sample_sheet",
22
+ "reindex_references_adata": "smftools.preprocessing.reindex_references_adata",
23
+ "subsample_adata": "smftools.preprocessing.subsample_adata",
24
+ }
25
+
26
+
27
+ def __getattr__(name: str):
28
+ if name in _LAZY_ATTRS:
29
+ module = import_module(_LAZY_ATTRS[name])
30
+ attr = getattr(module, name)
31
+ globals()[name] = attr
32
+ return attr
33
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
34
+
35
+
36
+ __all__ = list(_LAZY_ATTRS.keys())
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import numpy as np
2
4
  import pandas as pd
3
5
  import scipy.sparse as sp
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## calculate_complexity
2
4
 
3
5
  def calculate_complexity(adata, output_directory='', obs_column='Reference', sample_col='Sample_names', plot=True, save_plot=False):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## mark_duplicates
2
4
 
3
5
  def mark_duplicates(adata, layers, obs_column='Reference', sample_col='Sample_names', method='N_masked_distances', distance_thresholds={}):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## preprocessing
2
4
  from .. import readwrite
3
5
 
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # remove_duplicates
2
4
 
3
5
  def remove_duplicates(adata):
@@ -1,5 +1,6 @@
1
- ## binary_layers_to_ohe
1
+ from __future__ import annotations
2
2
 
3
+ ## binary_layers_to_ohe
3
4
  from smftools.logging_utils import get_logger
4
5
 
5
6
  logger = get_logger(__name__)
@@ -3,6 +3,8 @@ from __future__ import annotations
3
3
  from pathlib import Path
4
4
  from typing import TYPE_CHECKING, Optional
5
5
 
6
+ from smftools.optional_imports import require
7
+
6
8
  if TYPE_CHECKING:
7
9
  import anndata as ad
8
10
 
@@ -46,11 +48,12 @@ def calculate_complexity_II(
46
48
  """
47
49
  import os
48
50
 
49
- import matplotlib.pyplot as plt
50
51
  import numpy as np
51
52
  import pandas as pd
52
53
  from scipy.optimize import curve_fit
53
54
 
55
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="complexity plots")
56
+
54
57
  # early exits
55
58
  already = bool(adata.uns.get(uns_flag, False))
56
59
  if already and not force_redo:
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # calculate_pairwise_differences
2
4
 
3
5
 
@@ -1,5 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  ## calculate_pairwise_hamming_distances
2
4
 
5
+
3
6
  ## Conversion SMF Specific
4
7
  def calculate_pairwise_hamming_distances(arrays):
5
8
  """
@@ -6,6 +6,7 @@ from pathlib import Path
6
6
  from typing import TYPE_CHECKING
7
7
 
8
8
  from smftools.logging_utils import get_logger
9
+ from smftools.optional_imports import require
9
10
 
10
11
  if TYPE_CHECKING:
11
12
  import anndata as ad
@@ -40,9 +41,15 @@ def calculate_position_Youden(
40
41
  save: Whether to save ROC plots to disk.
41
42
  output_directory: Output directory for ROC plots.
42
43
  """
43
- import matplotlib.pyplot as plt
44
44
  import numpy as np
45
- from sklearn.metrics import roc_curve
45
+
46
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="Youden ROC plots")
47
+ sklearn_metrics = require(
48
+ "sklearn.metrics",
49
+ extra="ml-base",
50
+ purpose="Youden ROC curve calculation",
51
+ )
52
+ roc_curve = sklearn_metrics.roc_curve
46
53
 
47
54
  control_samples = [positive_control_sample, negative_control_sample]
48
55
  references = adata.obs[ref_column].cat.categories
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from typing import Optional, Sequence, Union
2
4
 
3
5
  import anndata as ad
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import gc
2
4
  from typing import List, Optional, Sequence
3
5