smftools 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. smftools/__init__.py +6 -8
  2. smftools/_settings.py +4 -6
  3. smftools/_version.py +1 -1
  4. smftools/cli/helpers.py +7 -1
  5. smftools/cli/hmm_adata.py +902 -244
  6. smftools/cli/load_adata.py +318 -198
  7. smftools/cli/preprocess_adata.py +285 -171
  8. smftools/cli/spatial_adata.py +137 -53
  9. smftools/cli_entry.py +94 -178
  10. smftools/config/__init__.py +1 -1
  11. smftools/config/conversion.yaml +5 -1
  12. smftools/config/deaminase.yaml +1 -1
  13. smftools/config/default.yaml +22 -17
  14. smftools/config/direct.yaml +8 -3
  15. smftools/config/discover_input_files.py +19 -5
  16. smftools/config/experiment_config.py +505 -276
  17. smftools/constants.py +37 -0
  18. smftools/datasets/__init__.py +2 -8
  19. smftools/datasets/datasets.py +32 -18
  20. smftools/hmm/HMM.py +2125 -1426
  21. smftools/hmm/__init__.py +2 -3
  22. smftools/hmm/archived/call_hmm_peaks.py +16 -1
  23. smftools/hmm/call_hmm_peaks.py +173 -193
  24. smftools/hmm/display_hmm.py +19 -6
  25. smftools/hmm/hmm_readwrite.py +13 -4
  26. smftools/hmm/nucleosome_hmm_refinement.py +102 -14
  27. smftools/informatics/__init__.py +30 -7
  28. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +14 -1
  29. smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
  30. smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
  31. smftools/informatics/archived/helpers/archived/load_adata.py +3 -3
  32. smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +3 -1
  33. smftools/informatics/archived/print_bam_query_seq.py +7 -1
  34. smftools/informatics/bam_functions.py +379 -156
  35. smftools/informatics/basecalling.py +51 -9
  36. smftools/informatics/bed_functions.py +90 -57
  37. smftools/informatics/binarize_converted_base_identities.py +18 -7
  38. smftools/informatics/complement_base_list.py +7 -6
  39. smftools/informatics/converted_BAM_to_adata.py +265 -122
  40. smftools/informatics/fasta_functions.py +161 -83
  41. smftools/informatics/h5ad_functions.py +195 -29
  42. smftools/informatics/modkit_extract_to_adata.py +609 -270
  43. smftools/informatics/modkit_functions.py +85 -44
  44. smftools/informatics/ohe.py +44 -21
  45. smftools/informatics/pod5_functions.py +112 -73
  46. smftools/informatics/run_multiqc.py +20 -14
  47. smftools/logging_utils.py +51 -0
  48. smftools/machine_learning/__init__.py +2 -7
  49. smftools/machine_learning/data/anndata_data_module.py +143 -50
  50. smftools/machine_learning/data/preprocessing.py +2 -1
  51. smftools/machine_learning/evaluation/__init__.py +1 -1
  52. smftools/machine_learning/evaluation/eval_utils.py +11 -14
  53. smftools/machine_learning/evaluation/evaluators.py +46 -33
  54. smftools/machine_learning/inference/__init__.py +1 -1
  55. smftools/machine_learning/inference/inference_utils.py +7 -4
  56. smftools/machine_learning/inference/lightning_inference.py +9 -13
  57. smftools/machine_learning/inference/sklearn_inference.py +6 -8
  58. smftools/machine_learning/inference/sliding_window_inference.py +35 -25
  59. smftools/machine_learning/models/__init__.py +10 -5
  60. smftools/machine_learning/models/base.py +28 -42
  61. smftools/machine_learning/models/cnn.py +15 -11
  62. smftools/machine_learning/models/lightning_base.py +71 -40
  63. smftools/machine_learning/models/mlp.py +13 -4
  64. smftools/machine_learning/models/positional.py +3 -2
  65. smftools/machine_learning/models/rnn.py +3 -2
  66. smftools/machine_learning/models/sklearn_models.py +39 -22
  67. smftools/machine_learning/models/transformer.py +68 -53
  68. smftools/machine_learning/models/wrappers.py +2 -1
  69. smftools/machine_learning/training/__init__.py +2 -2
  70. smftools/machine_learning/training/train_lightning_model.py +29 -20
  71. smftools/machine_learning/training/train_sklearn_model.py +9 -15
  72. smftools/machine_learning/utils/__init__.py +1 -1
  73. smftools/machine_learning/utils/device.py +7 -4
  74. smftools/machine_learning/utils/grl.py +3 -1
  75. smftools/metadata.py +443 -0
  76. smftools/plotting/__init__.py +19 -5
  77. smftools/plotting/autocorrelation_plotting.py +145 -44
  78. smftools/plotting/classifiers.py +162 -72
  79. smftools/plotting/general_plotting.py +347 -168
  80. smftools/plotting/hmm_plotting.py +42 -13
  81. smftools/plotting/position_stats.py +145 -85
  82. smftools/plotting/qc_plotting.py +20 -12
  83. smftools/preprocessing/__init__.py +8 -8
  84. smftools/preprocessing/append_base_context.py +105 -79
  85. smftools/preprocessing/append_binary_layer_by_base_context.py +75 -37
  86. smftools/preprocessing/{archives → archived}/calculate_complexity.py +3 -1
  87. smftools/preprocessing/{archives → archived}/preprocessing.py +8 -6
  88. smftools/preprocessing/binarize.py +21 -4
  89. smftools/preprocessing/binarize_on_Youden.py +127 -31
  90. smftools/preprocessing/binary_layers_to_ohe.py +17 -11
  91. smftools/preprocessing/calculate_complexity_II.py +86 -59
  92. smftools/preprocessing/calculate_consensus.py +28 -19
  93. smftools/preprocessing/calculate_coverage.py +44 -22
  94. smftools/preprocessing/calculate_pairwise_differences.py +2 -1
  95. smftools/preprocessing/calculate_pairwise_hamming_distances.py +4 -3
  96. smftools/preprocessing/calculate_position_Youden.py +103 -55
  97. smftools/preprocessing/calculate_read_length_stats.py +52 -23
  98. smftools/preprocessing/calculate_read_modification_stats.py +91 -57
  99. smftools/preprocessing/clean_NaN.py +38 -28
  100. smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
  101. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +70 -37
  102. smftools/preprocessing/filter_reads_on_modification_thresholds.py +181 -73
  103. smftools/preprocessing/flag_duplicate_reads.py +688 -271
  104. smftools/preprocessing/invert_adata.py +26 -11
  105. smftools/preprocessing/load_sample_sheet.py +40 -22
  106. smftools/preprocessing/make_dirs.py +8 -3
  107. smftools/preprocessing/min_non_diagonal.py +2 -1
  108. smftools/preprocessing/recipes.py +56 -23
  109. smftools/preprocessing/reindex_references_adata.py +93 -27
  110. smftools/preprocessing/subsample_adata.py +33 -16
  111. smftools/readwrite.py +264 -109
  112. smftools/schema/__init__.py +11 -0
  113. smftools/schema/anndata_schema_v1.yaml +227 -0
  114. smftools/tools/__init__.py +3 -4
  115. smftools/tools/archived/classifiers.py +163 -0
  116. smftools/tools/archived/subset_adata_v1.py +10 -1
  117. smftools/tools/archived/subset_adata_v2.py +12 -1
  118. smftools/tools/calculate_umap.py +54 -15
  119. smftools/tools/cluster_adata_on_methylation.py +115 -46
  120. smftools/tools/general_tools.py +70 -25
  121. smftools/tools/position_stats.py +229 -98
  122. smftools/tools/read_stats.py +50 -29
  123. smftools/tools/spatial_autocorrelation.py +365 -192
  124. smftools/tools/subset_adata.py +23 -21
  125. {smftools-0.2.4.dist-info → smftools-0.2.5.dist-info}/METADATA +15 -43
  126. smftools-0.2.5.dist-info/RECORD +181 -0
  127. smftools-0.2.4.dist-info/RECORD +0 -176
  128. /smftools/preprocessing/{archives → archived}/add_read_length_and_mapping_qc.py +0 -0
  129. /smftools/preprocessing/{archives → archived}/mark_duplicates.py +0 -0
  130. /smftools/preprocessing/{archives → archived}/remove_duplicates.py +0 -0
  131. {smftools-0.2.4.dist-info → smftools-0.2.5.dist-info}/WHEEL +0 -0
  132. {smftools-0.2.4.dist-info → smftools-0.2.5.dist-info}/entry_points.txt +0 -0
  133. {smftools-0.2.4.dist-info → smftools-0.2.5.dist-info}/licenses/LICENSE +0 -0
smftools/constants.py ADDED
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ from types import MappingProxyType
4
+ from typing import Any, Dict, Final, Mapping
5
+
6
+
7
+ ## Helpers ##
8
+ def _deep_freeze(obj: Any) -> Any:
9
+ """Recursively freeze common containers. Use for constant exports."""
10
+ if isinstance(obj, dict):
11
+ return MappingProxyType({k: _deep_freeze(v) for k, v in obj.items()})
12
+ if isinstance(obj, (list, tuple)):
13
+ return tuple(_deep_freeze(v) for v in obj)
14
+ if isinstance(obj, set):
15
+ return frozenset(_deep_freeze(v) for v in obj)
16
+ return obj # ints/strs/tuples (already immutable)
17
+
18
+
19
+ ## Constants ##
20
+ BAM_SUFFIX: Final[str] = ".bam"
21
+ BARCODE_BOTH_ENDS: Final[bool] = False
22
+ REF_COL: Final[str] = "Reference_strand"
23
+ SAMPLE_COL: Final[str] = "Experiment_name_and_barcode"
24
+ SPLIT_DIR: Final[str] = "demultiplexed_BAMs"
25
+ TRIM: Final[bool] = False
26
+
27
+ _private_conversions = ["unconverted"]
28
+ CONVERSIONS: Final[list[str]] = _deep_freeze(_private_conversions)
29
+
30
+ _private_mod_list = ("5mC_5hmC", "6mA")
31
+ MOD_LIST: Final[tuple[str, ...]] = _deep_freeze(_private_mod_list)
32
+
33
+ _private_mod_map: Dict[str, str] = {"6mA": "6mA", "5mC_5hmC": "5mC"}
34
+ MOD_MAP: Final[Mapping[str, str]] = _deep_freeze(_private_mod_map)
35
+
36
+ _private_strands = ("bottom", "top")
37
+ STRANDS: Final[tuple[str, ...]] = _deep_freeze(_private_strands)
@@ -1,9 +1,3 @@
1
- from .datasets import (
2
- dCas9_kinetics,
3
- Kissiov_and_McKenna_2025
4
- )
1
+ from .datasets import Kissiov_and_McKenna_2025, dCas9_kinetics
5
2
 
6
- __all__ = [
7
- "dCas9_kinetics",
8
- "Kissiov_and_McKenna_2025"
9
- ]
3
+ __all__ = ["dCas9_kinetics", "Kissiov_and_McKenna_2025"]
@@ -1,28 +1,42 @@
1
- ## datasets
1
+ """Dataset helpers for bundled SMF datasets."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ import anndata as ad
2
10
 
3
- def import_HERE():
4
- """
5
- Imports HERE for loading datasets
6
- """
7
- from pathlib import Path
8
- from .._settings import settings
9
- HERE = Path(__file__).parent
10
- return HERE
11
11
 
12
- def dCas9_kinetics():
12
+ def import_HERE() -> Path:
13
+ """Resolve the local dataset directory.
14
+
15
+ Returns:
16
+ Path: Path to the datasets directory.
13
17
  """
14
- in vitro Hia5 dCas9 kinetics SMF dataset. Nanopore HAC m6A modcalls.
18
+ return Path(__file__).parent
19
+
20
+
21
+ def dCas9_kinetics() -> "ad.AnnData":
22
+ """Load the in vitro Hia5 dCas9 kinetics SMF dataset.
23
+
24
+ Returns:
25
+ anndata.AnnData: Annotated dataset with Nanopore HAC m6A modcalls.
15
26
  """
16
27
  import anndata as ad
17
- HERE = import_HERE()
18
- filepath = HERE / "dCas9_m6A_invitro_kinetics.h5ad.gz"
28
+
29
+ filepath = import_HERE() / "dCas9_m6A_invitro_kinetics.h5ad.gz"
19
30
  return ad.read_h5ad(filepath)
20
31
 
21
- def Kissiov_and_McKenna_2025():
22
- """
23
- F1 Hybrid M.CviPI natural killer cell SMF. Nanopore canonical calls of NEB EMseq converted SMF gDNA.
32
+
33
+ def Kissiov_and_McKenna_2025() -> "ad.AnnData":
34
+ """Load the F1 Hybrid M.CviPI natural killer cell SMF dataset.
35
+
36
+ Returns:
37
+ anndata.AnnData: Annotated dataset with canonical calls of NEB EMseq converted SMF gDNA.
24
38
  """
25
39
  import anndata as ad
26
- HERE = import_HERE()
27
- filepath = HERE / "F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz"
40
+
41
+ filepath = import_HERE() / "F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz"
28
42
  return ad.read_h5ad(filepath)