smftools 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. smftools/_version.py +1 -1
  2. smftools/cli/chimeric_adata.py +1563 -0
  3. smftools/cli/helpers.py +18 -2
  4. smftools/cli/hmm_adata.py +18 -1
  5. smftools/cli/latent_adata.py +522 -67
  6. smftools/cli/load_adata.py +2 -2
  7. smftools/cli/preprocess_adata.py +32 -93
  8. smftools/cli/recipes.py +26 -0
  9. smftools/cli/spatial_adata.py +23 -109
  10. smftools/cli/variant_adata.py +423 -0
  11. smftools/cli_entry.py +41 -5
  12. smftools/config/conversion.yaml +0 -10
  13. smftools/config/deaminase.yaml +3 -0
  14. smftools/config/default.yaml +49 -13
  15. smftools/config/experiment_config.py +96 -3
  16. smftools/constants.py +4 -0
  17. smftools/hmm/call_hmm_peaks.py +1 -1
  18. smftools/informatics/binarize_converted_base_identities.py +2 -89
  19. smftools/informatics/converted_BAM_to_adata.py +53 -13
  20. smftools/informatics/h5ad_functions.py +83 -0
  21. smftools/informatics/modkit_extract_to_adata.py +4 -0
  22. smftools/plotting/__init__.py +26 -12
  23. smftools/plotting/autocorrelation_plotting.py +22 -4
  24. smftools/plotting/chimeric_plotting.py +1893 -0
  25. smftools/plotting/classifiers.py +28 -14
  26. smftools/plotting/general_plotting.py +58 -3362
  27. smftools/plotting/hmm_plotting.py +1586 -2
  28. smftools/plotting/latent_plotting.py +804 -0
  29. smftools/plotting/plotting_utils.py +243 -0
  30. smftools/plotting/position_stats.py +16 -8
  31. smftools/plotting/preprocess_plotting.py +281 -0
  32. smftools/plotting/qc_plotting.py +8 -3
  33. smftools/plotting/spatial_plotting.py +1134 -0
  34. smftools/plotting/variant_plotting.py +1231 -0
  35. smftools/preprocessing/__init__.py +3 -0
  36. smftools/preprocessing/append_base_context.py +1 -1
  37. smftools/preprocessing/append_mismatch_frequency_sites.py +35 -6
  38. smftools/preprocessing/append_sequence_mismatch_annotations.py +171 -0
  39. smftools/preprocessing/append_variant_call_layer.py +480 -0
  40. smftools/preprocessing/flag_duplicate_reads.py +4 -4
  41. smftools/preprocessing/invert_adata.py +1 -0
  42. smftools/readwrite.py +109 -85
  43. smftools/tools/__init__.py +6 -0
  44. smftools/tools/calculate_knn.py +121 -0
  45. smftools/tools/calculate_nmf.py +18 -7
  46. smftools/tools/calculate_pca.py +180 -0
  47. smftools/tools/calculate_umap.py +70 -154
  48. smftools/tools/position_stats.py +4 -4
  49. smftools/tools/rolling_nn_distance.py +640 -3
  50. smftools/tools/sequence_alignment.py +140 -0
  51. smftools/tools/tensor_factorization.py +52 -4
  52. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/METADATA +3 -1
  53. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/RECORD +56 -42
  54. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/WHEEL +0 -0
  55. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/entry_points.txt +0 -0
  56. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/licenses/LICENSE +0 -0
smftools/cli/helpers.py CHANGED
@@ -5,7 +5,16 @@ from pathlib import Path
5
5
 
6
6
  import anndata as ad
7
7
 
8
- from smftools.constants import H5_DIR, HMM_DIR, LATENT_DIR, LOAD_DIR, PREPROCESS_DIR, SPATIAL_DIR
8
+ from smftools.constants import (
9
+ CHIMERIC_DIR,
10
+ H5_DIR,
11
+ HMM_DIR,
12
+ LATENT_DIR,
13
+ LOAD_DIR,
14
+ PREPROCESS_DIR,
15
+ SPATIAL_DIR,
16
+ VARIANT_DIR,
17
+ )
9
18
 
10
19
  from ..metadata import write_runtime_schema_yaml
11
20
  from ..readwrite import safe_write_h5ad
@@ -19,6 +28,8 @@ class AdataPaths:
19
28
  spatial: Path
20
29
  hmm: Path
21
30
  latent: Path
31
+ variant: Path
32
+ chimeric: Path
22
33
 
23
34
 
24
35
  def get_adata_paths(cfg) -> AdataPaths:
@@ -27,8 +38,8 @@ def get_adata_paths(cfg) -> AdataPaths:
27
38
  """
28
39
  output_directory = Path(cfg.output_directory)
29
40
 
41
+ # Raw and Preprocessed adata file pathes will have set names.
30
42
  raw = output_directory / LOAD_DIR / H5_DIR / f"{cfg.experiment_name}.h5ad.gz"
31
-
32
43
  pp = output_directory / PREPROCESS_DIR / H5_DIR / f"{cfg.experiment_name}_preprocessed.h5ad.gz"
33
44
 
34
45
  if cfg.smf_modality == "direct":
@@ -44,9 +55,12 @@ def get_adata_paths(cfg) -> AdataPaths:
44
55
 
45
56
  pp_dedup_base = pp_dedup.name.removesuffix(".h5ad.gz")
46
57
 
58
+ # All of the following just append a new suffix to the preprocessesed_deduplicated base name
47
59
  spatial = output_directory / SPATIAL_DIR / H5_DIR / f"{pp_dedup_base}_spatial.h5ad.gz"
48
60
  hmm = output_directory / HMM_DIR / H5_DIR / f"{pp_dedup_base}_hmm.h5ad.gz"
49
61
  latent = output_directory / LATENT_DIR / H5_DIR / f"{pp_dedup_base}_latent.h5ad.gz"
62
+ variant = output_directory / VARIANT_DIR / H5_DIR / f"{pp_dedup_base}_variant.h5ad.gz"
63
+ chimeric = output_directory / CHIMERIC_DIR / H5_DIR / f"{pp_dedup_base}_chimeric.h5ad.gz"
50
64
 
51
65
  return AdataPaths(
52
66
  raw=raw,
@@ -55,6 +69,8 @@ def get_adata_paths(cfg) -> AdataPaths:
55
69
  spatial=spatial,
56
70
  hmm=hmm,
57
71
  latent=latent,
72
+ variant=variant,
73
+ chimeric=chimeric,
58
74
  )
59
75
 
60
76
 
smftools/cli/hmm_adata.py CHANGED
@@ -18,7 +18,7 @@ from ..hmm.HMM import _safe_int_coords, _to_dense_np, create_hmm, normalize_hmm_
18
18
  logger = get_logger(__name__)
19
19
 
20
20
  if TYPE_CHECKING:
21
- import torch as torch_types
21
+ import torch
22
22
 
23
23
  torch = require("torch", extra="torch", purpose="HMM CLI")
24
24
  mpl = require("matplotlib", extra="plotting", purpose="HMM plotting")
@@ -586,9 +586,18 @@ def hmm_adata(config_path: str):
586
586
  if paths.hmm.exists():
587
587
  adata, _ = safe_read_h5ad(paths.hmm)
588
588
  source_path = paths.hmm
589
+ elif paths.latent.exists():
590
+ adata, _ = safe_read_h5ad(paths.latent)
591
+ source_path = paths.latent
589
592
  elif paths.spatial.exists():
590
593
  adata, _ = safe_read_h5ad(paths.spatial)
591
594
  source_path = paths.spatial
595
+ elif paths.chimeric.exists():
596
+ adata, _ = safe_read_h5ad(paths.chimeric)
597
+ source_path = paths.chimeric
598
+ elif paths.variant.exists():
599
+ adata, _ = safe_read_h5ad(paths.variant)
600
+ source_path = paths.variant
592
601
  elif paths.pp_dedup.exists():
593
602
  adata, _ = safe_read_h5ad(paths.pp_dedup)
594
603
  source_path = paths.pp_dedup
@@ -1080,6 +1089,10 @@ def hmm_adata_core(
1080
1089
  deaminase=deaminase,
1081
1090
  min_signal=0,
1082
1091
  index_col_suffix=cfg.reindexed_var_suffix,
1092
+ overlay_variant_calls=getattr(cfg, "overlay_variant_calls", False),
1093
+ variant_overlay_seq1_color=getattr(cfg, "variant_overlay_seq1_color", "white"),
1094
+ variant_overlay_seq2_color=getattr(cfg, "variant_overlay_seq2_color", "black"),
1095
+ variant_overlay_marker_size=getattr(cfg, "variant_overlay_marker_size", 4.0),
1083
1096
  )
1084
1097
 
1085
1098
  hmm_length_dir = hmm_directory / "12b_hmm_length_clustermaps"
@@ -1136,6 +1149,10 @@ def hmm_adata_core(
1136
1149
  min_signal=0,
1137
1150
  index_col_suffix=cfg.reindexed_var_suffix,
1138
1151
  length_feature_ranges=length_feature_ranges,
1152
+ overlay_variant_calls=getattr(cfg, "overlay_variant_calls", False),
1153
+ variant_overlay_seq1_color=getattr(cfg, "variant_overlay_seq1_color", "white"),
1154
+ variant_overlay_seq2_color=getattr(cfg, "variant_overlay_seq2_color", "black"),
1155
+ variant_overlay_marker_size=getattr(cfg, "variant_overlay_marker_size", 4.0),
1139
1156
  )
1140
1157
 
1141
1158
  hmm_dir = hmm_directory / "13_hmm_bulk_traces"