smftools 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. smftools/_version.py +1 -1
  2. smftools/cli/chimeric_adata.py +1563 -0
  3. smftools/cli/helpers.py +18 -2
  4. smftools/cli/hmm_adata.py +18 -1
  5. smftools/cli/latent_adata.py +522 -67
  6. smftools/cli/load_adata.py +2 -2
  7. smftools/cli/preprocess_adata.py +32 -93
  8. smftools/cli/recipes.py +26 -0
  9. smftools/cli/spatial_adata.py +23 -109
  10. smftools/cli/variant_adata.py +423 -0
  11. smftools/cli_entry.py +41 -5
  12. smftools/config/conversion.yaml +0 -10
  13. smftools/config/deaminase.yaml +3 -0
  14. smftools/config/default.yaml +49 -13
  15. smftools/config/experiment_config.py +96 -3
  16. smftools/constants.py +4 -0
  17. smftools/hmm/call_hmm_peaks.py +1 -1
  18. smftools/informatics/binarize_converted_base_identities.py +2 -89
  19. smftools/informatics/converted_BAM_to_adata.py +53 -13
  20. smftools/informatics/h5ad_functions.py +83 -0
  21. smftools/informatics/modkit_extract_to_adata.py +4 -0
  22. smftools/plotting/__init__.py +26 -12
  23. smftools/plotting/autocorrelation_plotting.py +22 -4
  24. smftools/plotting/chimeric_plotting.py +1893 -0
  25. smftools/plotting/classifiers.py +28 -14
  26. smftools/plotting/general_plotting.py +58 -3362
  27. smftools/plotting/hmm_plotting.py +1586 -2
  28. smftools/plotting/latent_plotting.py +804 -0
  29. smftools/plotting/plotting_utils.py +243 -0
  30. smftools/plotting/position_stats.py +16 -8
  31. smftools/plotting/preprocess_plotting.py +281 -0
  32. smftools/plotting/qc_plotting.py +8 -3
  33. smftools/plotting/spatial_plotting.py +1134 -0
  34. smftools/plotting/variant_plotting.py +1231 -0
  35. smftools/preprocessing/__init__.py +3 -0
  36. smftools/preprocessing/append_base_context.py +1 -1
  37. smftools/preprocessing/append_mismatch_frequency_sites.py +35 -6
  38. smftools/preprocessing/append_sequence_mismatch_annotations.py +171 -0
  39. smftools/preprocessing/append_variant_call_layer.py +480 -0
  40. smftools/preprocessing/flag_duplicate_reads.py +4 -4
  41. smftools/preprocessing/invert_adata.py +1 -0
  42. smftools/readwrite.py +109 -85
  43. smftools/tools/__init__.py +6 -0
  44. smftools/tools/calculate_knn.py +121 -0
  45. smftools/tools/calculate_nmf.py +18 -7
  46. smftools/tools/calculate_pca.py +180 -0
  47. smftools/tools/calculate_umap.py +70 -154
  48. smftools/tools/position_stats.py +4 -4
  49. smftools/tools/rolling_nn_distance.py +640 -3
  50. smftools/tools/sequence_alignment.py +140 -0
  51. smftools/tools/tensor_factorization.py +52 -4
  52. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/METADATA +3 -1
  53. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/RECORD +56 -42
  54. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/WHEEL +0 -0
  55. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/entry_points.txt +0 -0
  56. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,140 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Literal
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class AlignmentMismatch:
9
+ """Record a mismatch or gap between two aligned sequences."""
10
+
11
+ event: Literal["substitution", "insertion", "deletion"]
12
+ seq1_pos: int | None
13
+ seq1_base: str | None
14
+ seq2_pos: int | None
15
+ seq2_base: str | None
16
+
17
+
18
+ def align_sequences_with_mismatches(
19
+ seq1: str,
20
+ seq2: str,
21
+ match_score: int = 1,
22
+ mismatch_score: int = -1,
23
+ gap_score: int = -1,
24
+ ignore_n: bool = True,
25
+ ) -> tuple[str, str, list[AlignmentMismatch]]:
26
+ """Globally align two sequences and return mismatch positions/bases.
27
+
28
+ The alignment uses a simple Needleman-Wunsch dynamic programming approach
29
+ with configurable scores. Mismatch reporting is based on the aligned
30
+ sequences and returns 0-based coordinates in each sequence. Gap events
31
+ are represented with a ``None`` coordinate for the gapped sequence.
32
+
33
+ Args:
34
+ seq1: First sequence (treated as reference for positions).
35
+ seq2: Second sequence.
36
+ match_score: Score for matching bases.
37
+ mismatch_score: Score for mismatching bases.
38
+ gap_score: Score for introducing a gap.
39
+ ignore_n: Whether to ignore mismatches involving the base ``N``.
40
+
41
+ Returns:
42
+ Tuple of (aligned_seq1, aligned_seq2, mismatches).
43
+ """
44
+ seq1 = seq1.upper()
45
+ seq2 = seq2.upper()
46
+ n, m = len(seq1), len(seq2)
47
+
48
+ scores = [[0] * (m + 1) for _ in range(n + 1)]
49
+ traceback = [[None] * (m + 1) for _ in range(n + 1)]
50
+
51
+ for i in range(1, n + 1):
52
+ scores[i][0] = scores[i - 1][0] + gap_score
53
+ traceback[i][0] = "up"
54
+ for j in range(1, m + 1):
55
+ scores[0][j] = scores[0][j - 1] + gap_score
56
+ traceback[0][j] = "left"
57
+
58
+ for i in range(1, n + 1):
59
+ for j in range(1, m + 1):
60
+ diag_score = scores[i - 1][j - 1] + (
61
+ match_score if seq1[i - 1] == seq2[j - 1] else mismatch_score
62
+ )
63
+ up_score = scores[i - 1][j] + gap_score
64
+ left_score = scores[i][j - 1] + gap_score
65
+
66
+ best_score = max(diag_score, up_score, left_score)
67
+ scores[i][j] = best_score
68
+ if best_score == diag_score:
69
+ traceback[i][j] = "diag"
70
+ elif best_score == up_score:
71
+ traceback[i][j] = "up"
72
+ else:
73
+ traceback[i][j] = "left"
74
+
75
+ aligned1: list[str] = []
76
+ aligned2: list[str] = []
77
+ i, j = n, m
78
+ while i > 0 or j > 0:
79
+ direction = traceback[i][j]
80
+ if direction == "diag":
81
+ aligned1.append(seq1[i - 1])
82
+ aligned2.append(seq2[j - 1])
83
+ i -= 1
84
+ j -= 1
85
+ elif direction == "up":
86
+ aligned1.append(seq1[i - 1])
87
+ aligned2.append("-")
88
+ i -= 1
89
+ else:
90
+ aligned1.append("-")
91
+ aligned2.append(seq2[j - 1])
92
+ j -= 1
93
+
94
+ aligned_seq1 = "".join(reversed(aligned1))
95
+ aligned_seq2 = "".join(reversed(aligned2))
96
+
97
+ mismatches: list[AlignmentMismatch] = []
98
+ seq1_index = 0
99
+ seq2_index = 0
100
+ for base1, base2 in zip(aligned_seq1, aligned_seq2, strict=True):
101
+ if base1 == "-":
102
+ if not (ignore_n and base2 == "N"):
103
+ mismatches.append(
104
+ AlignmentMismatch(
105
+ event="insertion",
106
+ seq1_pos=None,
107
+ seq1_base=None,
108
+ seq2_pos=seq2_index,
109
+ seq2_base=base2,
110
+ )
111
+ )
112
+ seq2_index += 1
113
+ continue
114
+ if base2 == "-":
115
+ if not (ignore_n and base1 == "N"):
116
+ mismatches.append(
117
+ AlignmentMismatch(
118
+ event="deletion",
119
+ seq1_pos=seq1_index,
120
+ seq1_base=base1,
121
+ seq2_pos=None,
122
+ seq2_base=None,
123
+ )
124
+ )
125
+ seq1_index += 1
126
+ continue
127
+ if base1 != base2 and not (ignore_n and "N" in (base1, base2)):
128
+ mismatches.append(
129
+ AlignmentMismatch(
130
+ event="substitution",
131
+ seq1_pos=seq1_index,
132
+ seq1_base=base1,
133
+ seq2_pos=seq2_index,
134
+ seq2_base=base2,
135
+ )
136
+ )
137
+ seq1_index += 1
138
+ seq2_index += 1
139
+
140
+ return aligned_seq1, aligned_seq2, mismatches
@@ -60,6 +60,8 @@ def calculate_sequence_cp_decomposition(
60
60
  adata: "ad.AnnData",
61
61
  *,
62
62
  layer: str,
63
+ var_mask: np.ndarray | None = None,
64
+ var_mask_name: str | None = None,
63
65
  rank: int = 5,
64
66
  n_iter_max: int = 100,
65
67
  random_state: int = 0,
@@ -71,12 +73,15 @@ def calculate_sequence_cp_decomposition(
71
73
  backend: str = "pytorch",
72
74
  show_progress: bool = False,
73
75
  init: str = "random",
76
+ non_negative: bool = False,
74
77
  ) -> "ad.AnnData":
75
78
  """Compute CP decomposition on one-hot encoded sequence data with masking.
76
79
 
77
80
  Args:
78
81
  adata: AnnData object to update.
79
82
  layer: Layer name containing integer-encoded sequences.
83
+ var_mask: Optional boolean mask over variables to include in the CP fit.
84
+ var_mask_name: Optional label describing the provided ``var_mask``.
80
85
  rank: CP rank.
81
86
  n_iter_max: Maximum number of iterations for the solver.
82
87
  random_state: Random seed for initialization.
@@ -87,6 +92,7 @@ def calculate_sequence_cp_decomposition(
87
92
  bases: Bases to one-hot encode (in order).
88
93
  backend: Tensorly backend to use (``numpy`` or ``pytorch``).
89
94
  show_progress: Whether to display progress during factorization if supported.
95
+ non_negative: Whether to request a non-negative CP decomposition.
90
96
 
91
97
  Returns:
92
98
  Updated AnnData object containing the CP decomposition outputs.
@@ -101,12 +107,31 @@ def calculate_sequence_cp_decomposition(
101
107
  tensorly = require("tensorly", extra="ml-base", purpose="CP decomposition")
102
108
  from tensorly.decomposition import parafac
103
109
 
110
+ try:
111
+ from tensorly.decomposition import non_negative_parafac
112
+ except ImportError:
113
+ non_negative_parafac = None
114
+
104
115
  tensorly.set_backend(backend)
105
116
 
106
117
  if layer not in adata.layers:
107
118
  raise KeyError(f"Layer '{layer}' not found in adata.layers.")
108
119
 
109
- one_hot, mask = build_sequence_one_hot_and_mask(adata.layers[layer], bases=tuple(bases))
120
+ layer_data = adata.layers[layer]
121
+ mask_indices = None
122
+ if var_mask is not None:
123
+ var_mask_array = np.asarray(var_mask, dtype=bool)
124
+ if var_mask_array.shape[0] != adata.n_vars:
125
+ raise ValueError(
126
+ "var_mask must match adata.n_vars; "
127
+ f"got {var_mask_array.shape[0]} vs {adata.n_vars}."
128
+ )
129
+ if not var_mask_array.any():
130
+ raise ValueError("var_mask must include at least one variable.")
131
+ mask_indices = var_mask_array
132
+ layer_data = layer_data[:, var_mask_array]
133
+
134
+ one_hot, mask = build_sequence_one_hot_and_mask(layer_data, bases=tuple(bases))
110
135
  mask_tensor = np.repeat(mask[:, :, None], one_hot.shape[2], axis=2)
111
136
 
112
137
  device = "numpy"
@@ -131,10 +156,21 @@ def calculate_sequence_cp_decomposition(
131
156
  }
132
157
  import inspect
133
158
 
134
- if "verbose" in inspect.signature(parafac).parameters:
159
+ decomposition_fn = parafac
160
+ if non_negative:
161
+ if non_negative_parafac is not None:
162
+ decomposition_fn = non_negative_parafac
163
+ elif "non_negative" in inspect.signature(parafac).parameters:
164
+ parafac_kwargs["non_negative"] = True
165
+ else:
166
+ raise ValueError(
167
+ "Non-negative CP decomposition requested but tensorly does not support it."
168
+ )
169
+
170
+ if "verbose" in inspect.signature(decomposition_fn).parameters:
135
171
  parafac_kwargs["verbose"] = show_progress
136
172
 
137
- cp = parafac(one_hot, **parafac_kwargs)
173
+ cp = decomposition_fn(one_hot, **parafac_kwargs)
138
174
 
139
175
  if backend == "pytorch":
140
176
  weights = cp.weights.detach().cpu().numpy()
@@ -146,7 +182,16 @@ def calculate_sequence_cp_decomposition(
146
182
  read_factors, position_factors, base_factors = [np.asarray(f) for f in cp.factors]
147
183
 
148
184
  adata.obsm[embedding_key] = read_factors
149
- adata.varm[components_key] = position_factors
185
+ if mask_indices is None:
186
+ adata.varm[components_key] = position_factors
187
+ else:
188
+ full_components = np.full(
189
+ (adata.n_vars, position_factors.shape[1]),
190
+ np.nan,
191
+ dtype=position_factors.dtype,
192
+ )
193
+ full_components[mask_indices] = position_factors
194
+ adata.varm[components_key] = full_components
150
195
  adata.uns[uns_key] = {
151
196
  "rank": rank,
152
197
  "n_iter_max": n_iter_max,
@@ -158,6 +203,9 @@ def calculate_sequence_cp_decomposition(
158
203
  "base_labels": list(bases),
159
204
  "backend": backend,
160
205
  "device": str(device),
206
+ "non_negative": non_negative,
207
+ "var_mask_name": var_mask_name,
208
+ "var_mask_count": int(np.sum(mask_indices)) if mask_indices is not None else None,
161
209
  }
162
210
 
163
211
  logger.info(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: smftools
3
- Version: 0.3.1
3
+ Version: 0.3.2
4
4
  Summary: Single Molecule Footprinting Analysis in Python.
5
5
  Project-URL: Source, https://github.com/jkmckenna/smftools
6
6
  Project-URL: Documentation, https://smftools.readthedocs.io/
@@ -55,6 +55,7 @@ Provides-Extra: all
55
55
  Requires-Dist: captum; extra == 'all'
56
56
  Requires-Dist: fastcluster; extra == 'all'
57
57
  Requires-Dist: hydra-core; extra == 'all'
58
+ Requires-Dist: igraph; extra == 'all'
58
59
  Requires-Dist: leidenalg; extra == 'all'
59
60
  Requires-Dist: lightning; extra == 'all'
60
61
  Requires-Dist: matplotlib>=3.6.2; extra == 'all'
@@ -69,6 +70,7 @@ Requires-Dist: pysam>=0.19.1; extra == 'all'
69
70
  Requires-Dist: scikit-learn>=1.0.2; extra == 'all'
70
71
  Requires-Dist: seaborn>=0.11; extra == 'all'
71
72
  Requires-Dist: shap; extra == 'all'
73
+ Requires-Dist: tensorly; extra == 'all'
72
74
  Requires-Dist: torch>=1.9.0; extra == 'all'
73
75
  Requires-Dist: umap-learn>=0.5.5; extra == 'all'
74
76
  Requires-Dist: upsetplot; extra == 'all'
@@ -1,27 +1,30 @@
1
1
  smftools/__init__.py,sha256=Wun5eO3FHy7sAelTLlLdFj3NurqZFQxfO3U5YHJ3KcY,1247
2
2
  smftools/_settings.py,sha256=QqZzjz6Y_-gZH4VJAPRCL68HRlGjzl3hXU7d-dMi8-M,418
3
- smftools/_version.py,sha256=e9cd8nKFi-8fyxRlTRJYWgSHyt1C_r_TE3kQS7NeJv0,58
4
- smftools/cli_entry.py,sha256=Mfhg8GWqER0AHYqKB2McocljX3OWcaYFOEfDrR3WGbQ,10808
5
- smftools/constants.py,sha256=atMoOE3wM803ixhE_PdklOYbNhJvh-37gNkOyg9Jtnw,4007
3
+ smftools/_version.py,sha256=xm1u7RHIf6fYv7ou5-vxQ9OiaAwZ0vJK7AngPQ7IBek,58
4
+ smftools/cli_entry.py,sha256=rFd_rSY8KA28LdHGS5wnSY1nCaiSZMSlhG4bm2Dn1w0,11675
5
+ smftools/constants.py,sha256=NZjUgTD57StBaktxPIpUGoAmNjNB6D68Yr6ueAQJ8KY,4111
6
6
  smftools/logging_utils.py,sha256=OpDul2P5FCWhjJqunYK2MrSaYtbKEPGFE0Z7vZhSX0Y,2193
7
7
  smftools/metadata.py,sha256=-U8jVzjBsXbZ2SKRcyPHMCDPnsQQRR4djzerjpeQvzU,14917
8
8
  smftools/optional_imports.py,sha256=PpjWa-H-rlxwXJjkPLSHkiuim922xwDVsqlgFY8wWqw,1039
9
- smftools/readwrite.py,sha256=ZOy54ABP4fcSe37-bGyhhd8kKOaVXPMCB-WFn_rrOwg,53665
9
+ smftools/readwrite.py,sha256=fvCJsIZ6Idjp0QIzbxXTPSXmQOb6ggx8idRaTbDck7w,54876
10
10
  smftools/cli/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
11
- smftools/cli/helpers.py,sha256=Oj5wrdjrZHNqyPrmwaaPoQ_QvuM-b7e_A4osyrYsf-A,2432
12
- smftools/cli/hmm_adata.py,sha256=068V4qBCodW6xCZEkTWOkz0QHZ3UuixhTKAipFLimxE,48284
13
- smftools/cli/latent_adata.py,sha256=O9CPbm0ru4pFOENQmvTDzUINKLiSYeTOR671vJnyxJU,10431
14
- smftools/cli/load_adata.py,sha256=7-wsYIRs-axh2U4LYm0JtgpCNpWQntxkU-9X4nuz9Fc,31606
15
- smftools/cli/preprocess_adata.py,sha256=A7P5JvBhlUtl2Pepu5yHigzgRDmhDp7uwJS8DWyBZEk,28294
16
- smftools/cli/spatial_adata.py,sha256=xRoYYGtGI4msiZVy68fISLjMlbWDhKQYFe6l1n0zKtM,34273
11
+ smftools/cli/chimeric_adata.py,sha256=roX2qpIg_84lpRFCHgD9MPLTh_Qv8XQU0250_RLyhtI,73758
12
+ smftools/cli/helpers.py,sha256=LGiQyJfqB8mTQ6BhJWG1vc4hkZv0BmxBCbCzY7vAE2A,2932
13
+ smftools/cli/hmm_adata.py,sha256=ZGzU9cfLaYUrUTtGFzUvmgFNQHHzBTkN1jXsGMEpZzE,49363
14
+ smftools/cli/latent_adata.py,sha256=q4PupWWPR8ZY-Qsmj6Ug2WoTRf8SUA4-caNq5Vie2-M,26523
15
+ smftools/cli/load_adata.py,sha256=RSgX1kvEcqSx3JRXL055m2u4hro8uM-sNvA5jVVxOr8,31539
16
+ smftools/cli/preprocess_adata.py,sha256=D2LGNto2aeAvPjMwSPKR9CzxTIewhTtBUL3n47gE5Pw,25161
17
+ smftools/cli/recipes.py,sha256=v3CnuNOyizD_tdNlHF2Qv9LsUkNMYDIVlqr_I3275QA,735
18
+ smftools/cli/spatial_adata.py,sha256=MLsCo8PTYw1jxxXh9bdUG1Ftjuk2bp36oSGmnw7GvF8,30149
19
+ smftools/cli/variant_adata.py,sha256=RNV0WyLVOdJhFAaM6XjUVh__kLMD7kv1BTSym7HIezY,16889
17
20
  smftools/cli/archived/cli_flows.py,sha256=w1rPHSH8WzZn7owz0ra7WOUGlJSEsaRw_W3x-9Ra42k,4977
18
21
  smftools/config/__init__.py,sha256=rcI3qG1DGfRcvQzh8bffiHuRPcmPsas5T8MN6uhScxA,106
19
- smftools/config/conversion.yaml,sha256=ih5qu1gMSOFmjHU1JLqgQ9CzekWD5vKQQREWnIm0HX0,1498
20
- smftools/config/deaminase.yaml,sha256=JCE1nF39AX436-3RyvuZN24gdJfriDijNv5XNHrMT8I,1357
21
- smftools/config/default.yaml,sha256=yyATffMZbyLuiTQ6dpq4pNFvAgYp1CTntAdGA30R4wM,14976
22
+ smftools/config/conversion.yaml,sha256=djZTJ1EwHc1Ax_fIsZo1UnERDpyER15rgo8APrOtAwE,1243
23
+ smftools/config/deaminase.yaml,sha256=uZZzWeNc3TcBGY7o58lqQb3XOMAi3kIpHsUBqWUuM9g,1388
24
+ smftools/config/default.yaml,sha256=gCWhoemO97zyaNTrt0A40RWnaHJ6R65mJJ4gEMUjgC4,16242
22
25
  smftools/config/direct.yaml,sha256=Jw8Nj3QKQrCfJpVl8sGgCKdRvjk5cV66dzRlNfoDOzI,2269
23
26
  smftools/config/discover_input_files.py,sha256=NcOqNYoXggLDieam8UMJAc2sWmoYOZ_Wqp2mApnlBfs,4015
24
- smftools/config/experiment_config.py,sha256=7KoibibpZxz_rRuOj8QiU9gvuMhwMtlF2sbw6EZeszE,70415
27
+ smftools/config/experiment_config.py,sha256=soaTSwVbZELCDS1D5EQa2MF5RvPFP3lNQAaDhYMIbjM,75729
25
28
  smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
26
29
  smftools/datasets/F1_sample_sheet.csv,sha256=9PodIIOXK2eamYPbC6DGnXdzgi9bRDovf296j1aM0ak,259
27
30
  smftools/datasets/__init__.py,sha256=_G08ZAMlA9zpY5UjWTVlpAbW2YPS4PLCmz5g1pZdcCw,157
@@ -29,7 +32,7 @@ smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz,sha256=niOcVHaYY7h3XyvwSkN-
29
32
  smftools/datasets/datasets.py,sha256=-VOdONP-K_ftLDtLktRKbq3S6vSB4pzRQ6VYBxAy_4A,1074
30
33
  smftools/hmm/HMM.py,sha256=Yq7awVwCtTPb4HET1UIx2VTHVvgiwu_2pjEgZTTkTP4,85660
31
34
  smftools/hmm/__init__.py,sha256=vs-fjo-v9y8EwdoErK6KDBm-YzD_XmTtflsdTJlRgTg,746
32
- smftools/hmm/call_hmm_peaks.py,sha256=CzTEz3EP_LNLirIVYzRxbV7_5fCBpLa9azLPSEt6s7s,13529
35
+ smftools/hmm/call_hmm_peaks.py,sha256=jfmo-zzmA57d3pNG1RM1FqjsHue3IErgjOWIf3EuXtQ,13533
33
36
  smftools/hmm/display_hmm.py,sha256=N94hmEKZGehPdsB9yLyY7U1_tTWDk2NTMU-PuWnEQqA,1321
34
37
  smftools/hmm/hmm_readwrite.py,sha256=n-ok3wH1-anSn90vEA91jWKRbtq0bxM66hp2eYoWk34,687
35
38
  smftools/hmm/nucleosome_hmm_refinement.py,sha256=lHB6XVJWhwN-jR1TjBOMIOBQ5soncAJbv3djbkyH1NU,6882
@@ -41,12 +44,12 @@ smftools/informatics/__init__.py,sha256=Q2l7iGOEuZXtGmqWG3NwF-_vNwl4VmJPsElg7-eA
41
44
  smftools/informatics/bam_functions.py,sha256=b_Vb08HmOl8-hXLbkVNBw-zl-1loXqw-yQJkklJUvyY,73636
42
45
  smftools/informatics/basecalling.py,sha256=PgjWoOgfQaUOCoKpyaKO8m8sauMW3el6wdEtzGlvy4s,3699
43
46
  smftools/informatics/bed_functions.py,sha256=9y3XNNl6QivqkWEfoH5XszP3Qsj0P-rCopgfd0HpFgs,21268
44
- smftools/informatics/binarize_converted_base_identities.py,sha256=KRL-KT8MYADadHgRbTgoOFD30LlZazKHe10Hz9nO6Z4,7850
47
+ smftools/informatics/binarize_converted_base_identities.py,sha256=2vncAGQ84GD-yvZpvnbBLj_ex9ABNACOzYAeX9yOPB4,4023
45
48
  smftools/informatics/complement_base_list.py,sha256=6DInlD4cdjKJmmUv4Cp4UU9HHLe60Pm8RF9AoBfBfF0,571
46
- smftools/informatics/converted_BAM_to_adata.py,sha256=CIIrJVQHFpc2gg7K1W99kpmG5IUVBVqQIRBPKuR4EHk,42759
49
+ smftools/informatics/converted_BAM_to_adata.py,sha256=qDdY5S7oCkH9Ov6a0BADnt2ytZYtcXSU1K2RyoNZxoo,44986
47
50
  smftools/informatics/fasta_functions.py,sha256=MD-fL0BkExiDXMUSPkHaHrkOK7aqzOozWREGr5Gzw8w,14111
48
- smftools/informatics/h5ad_functions.py,sha256=wGsvk51FhrByNPI-TCgZEh0nKyW6jd1t_mHR6957IAo,17577
49
- smftools/informatics/modkit_extract_to_adata.py,sha256=oL1dxcNLCsGqFngZd3HYW63tBzkGKQx9kbCWOAzwwfc,84454
51
+ smftools/informatics/h5ad_functions.py,sha256=V-lLmrff4Yv02PiKp9ePjYK7dHZALJInHID3_bSabTY,21178
52
+ smftools/informatics/modkit_extract_to_adata.py,sha256=YF0zeg6k0FiPRCzLLrTcZQ5Q97fI8tZRnzNWco5Jwa0,84581
50
53
  smftools/informatics/modkit_functions.py,sha256=BvWd_qulVOQKZJFMd4pLTVD3ruo1dT48BmvQsdHB0_E,6103
51
54
  smftools/informatics/ohe.py,sha256=fdMEdXG45hiCsHtmYkPsXJKLJ-SBaZktdGx-bmfI3a0,5997
52
55
  smftools/informatics/pod5_functions.py,sha256=6_EA2StpslOe0phSwR9TDB_U-Tmx4ykuBcTAiOL0LPU,10327
@@ -123,17 +126,25 @@ smftools/machine_learning/training/train_sklearn_model.py,sha256=zQ5SQpu_sl7IVcZ
123
126
  smftools/machine_learning/utils/__init__.py,sha256=aiCNpHD08ENEBtz8jzDfVZ8cB7ef9uOE7YaOPDkQUYg,99
124
127
  smftools/machine_learning/utils/device.py,sha256=2D5TF6DQIZKTcyNqIcJ7UMFeiWS_kCAvZUBK6XngCXk,432
125
128
  smftools/machine_learning/utils/grl.py,sha256=ptr-08dRKAGBZ1cySy_B90GgMn5kXtNXlcUC4PJ5mNA,485
126
- smftools/plotting/__init__.py,sha256=GA-fyueJMOgQYe3EH6juWZ3wNYo0ugUqKyoBlubWOoc,2100
127
- smftools/plotting/autocorrelation_plotting.py,sha256=gc3iyGgFEvMcyHXE2K8S956Nx6IfduglOOeJj3te8nU,30590
128
- smftools/plotting/classifiers.py,sha256=9Qt0eixvgE3WXl6jcwPm7O3ATA7Q6mfqRh_B_9M2qTQ,16749
129
- smftools/plotting/general_plotting.py,sha256=TZeIJOCLuNzeUDktee2HP9LJO-o1clNPcPaZ64WHFLk,134703
130
- smftools/plotting/hmm_plotting.py,sha256=3UkhfbZYM0hXvSoQJ94F2GR70cTPjpfm9w2nOo8itL0,14964
131
- smftools/plotting/position_stats.py,sha256=yxZx0Hjx4MsKINe9kkq-R7TkjODBGaDrbH9y0mrrTWs,19351
132
- smftools/plotting/qc_plotting.py,sha256=ODK8UrjVrAJeG8Qfu09DmAJ_o9GJaXRxGwN0pEgbHAU,10358
133
- smftools/preprocessing/__init__.py,sha256=x-QTdVOYbVPpHqO2cBPhmsO-6AliemcBb2Qo78cQrqw,1952
134
- smftools/preprocessing/append_base_context.py,sha256=sZfgkhYzXYtsNO-XgMcQ4v2Hl1u6agaHCpmx-s3jCVY,7134
129
+ smftools/plotting/__init__.py,sha256=HfDYE0N7ZQSpA6gnFVd1XwEIIAq7Z2U8N4deWtCn1g4,3095
130
+ smftools/plotting/autocorrelation_plotting.py,sha256=fnnvzl_5cD09Jz2wH3iGIZE3UT5oyHQ1OtwPSVk2cbM,31314
131
+ smftools/plotting/chimeric_plotting.py,sha256=j-TMqKg0bzt2Fssi89FWR04WvLYFGkMOyksjN9T-qjo,70200
132
+ smftools/plotting/classifiers.py,sha256=EouBhnrHbBbeL8mJZEPU3GbFyIO1p6aERp0FIqp-3j4,17362
133
+ smftools/plotting/general_plotting.py,sha256=Tj9_V27K0f4mymsNWfwRbgQkPwvgiyoDw7yzM8JVSRo,1809
134
+ smftools/plotting/hmm_plotting.py,sha256=afEAZVknf8wP8yYFA7-tT8Hlwy6sLGKuZKHlu7a-tg8,79999
135
+ smftools/plotting/latent_plotting.py,sha256=vC12rJg-8_luYmX6XI3meMw-oyD3bYYIu0nFTvTiZfQ,28584
136
+ smftools/plotting/plotting_utils.py,sha256=HYNDbCJUMyo_JlIjZ5JtVmv02ncpFoiGzqc1vCRbJKA,7282
137
+ smftools/plotting/position_stats.py,sha256=ITG4tEuZgjn3mCoGCNs2SQfiWAzqfl_cki9VOGD_RAA,19862
138
+ smftools/plotting/preprocess_plotting.py,sha256=VbQWKgO-Tlu2hYT2IOupsATEb6jxgqAwinUbivnNYNw,11736
139
+ smftools/plotting/qc_plotting.py,sha256=h1JGCNzDOTNHq2Zr93mI_qqkdqaNxrragmJyZ9PDIyw,10556
140
+ smftools/plotting/spatial_plotting.py,sha256=uVzpdh7pYRqRoDRvQR_fQaOl44FIMkq8YhfwDRYSbTE,44372
141
+ smftools/plotting/variant_plotting.py,sha256=AGcPZ2WSO4VSWTIn9rbxhOzud7sH4CnmZkxcwPVIo5M,54080
142
+ smftools/preprocessing/__init__.py,sha256=Hcq89k8M2kbzOW7vl7X91PEn4npsOPFShmyrUXeBtmA,2232
143
+ smftools/preprocessing/append_base_context.py,sha256=1C8ZeULZNbAWmD5sZxNh14axI4OZMnsOAaZifjeOZHE,7134
135
144
  smftools/preprocessing/append_binary_layer_by_base_context.py,sha256=7mFG7xjAPgm_60AcdCnnjJZK5gJe4VuLohopkrAHmvQ,7640
136
- smftools/preprocessing/append_mismatch_frequency_sites.py,sha256=ZOE6rXAM0D4ElvTi-XJ6iriJielx7Afb4ewgHTyiWBA,6048
145
+ smftools/preprocessing/append_mismatch_frequency_sites.py,sha256=cB-lg8GEEoFu2ESSxCyVObIcuMZTFip4Es96a-EJQQ4,7446
146
+ smftools/preprocessing/append_sequence_mismatch_annotations.py,sha256=wa2hMtsAkk3IYz1dAUBfBUS-5fwHZWUH98stujW4E00,6429
147
+ smftools/preprocessing/append_variant_call_layer.py,sha256=dhvIPVWRSv-Tn9JoTKmeAz6kbHg-iocpRGq_YGA0hHM,19098
137
148
  smftools/preprocessing/binarize.py,sha256=eDFLybKKIF2wcrtN3JWVjeGXSUayezxLhX76UllAhVc,888
138
149
  smftools/preprocessing/binarize_on_Youden.py,sha256=JTHosTDy9-gJ0bPrHkGnz_Ao_AeE8IiutqFA6MksdM8,4887
139
150
  smftools/preprocessing/binary_layers_to_ohe.py,sha256=nYvAefdIKGj-JyNtBqHcekJKI4BI2CM9pN-Mq1BT-28,1931
@@ -149,8 +160,8 @@ smftools/preprocessing/clean_NaN.py,sha256=hrPhbKfqDpSiXLXXJxvcmtwqFhOecJVC29Z7P
149
160
  smftools/preprocessing/filter_adata_by_nan_proportion.py,sha256=5V1PNJISYm92QtEGmS9XSqx456Ult8RY8LMBclNylno,1490
150
161
  smftools/preprocessing/filter_reads_on_length_quality_mapping.py,sha256=83G8ovetiAmh8EbikqAUhNnzaX_KnWe0rZ7vfrgeye4,8018
151
162
  smftools/preprocessing/filter_reads_on_modification_thresholds.py,sha256=yxYZB9Ran1ZxuZm9iHi5bpLo6xcro5zKyv9rnYUtT6g,21688
152
- smftools/preprocessing/flag_duplicate_reads.py,sha256=7MooUKnUULJY6IiCNO7P0idE7_6X0usMtX7F9us8n4U,75420
153
- smftools/preprocessing/invert_adata.py,sha256=-aiidr_PXSf8IIpC4PMRm2RlLAWi-dtZDpr0iSOY-PY,1367
163
+ smftools/preprocessing/flag_duplicate_reads.py,sha256=Khauxw8_stBLmmkM1z4HVVuXpcWIhgTjJwoH-WfOoko,75436
164
+ smftools/preprocessing/invert_adata.py,sha256=QIbNDWra34hp_HHGvYfUYpqwtimIuKys-mVwrb1ChPI,1418
154
165
  smftools/preprocessing/load_sample_sheet.py,sha256=rzY76rCYVf3idu2ZRw4pEjVRBB5AyUkSYY-UzpmE_aw,2193
155
166
  smftools/preprocessing/make_dirs.py,sha256=SjeiXoWsy_SldM-RoQbpfo2mXdnP78xhHVHywTD7IZI,662
156
167
  smftools/preprocessing/min_non_diagonal.py,sha256=2sOWl7zKWltNmv-Jv-ZXb099dYBX28suAOE6DUlUO-U,749
@@ -164,26 +175,29 @@ smftools/preprocessing/archived/preprocessing.py,sha256=l0Im9O45kaMYxipyFiPFBA8M
164
175
  smftools/preprocessing/archived/remove_duplicates.py,sha256=W1Y2ufD2nE9Tnx3NXpEPxso08tiV50iRy77_X3RZkyQ,735
165
176
  smftools/schema/__init__.py,sha256=0chkz2Zc3UKSJO4m0MUemfs-WjGUSSghiuuFM28UvsY,293
166
177
  smftools/schema/anndata_schema_v1.yaml,sha256=FCMjYIqgt-YxQxehcgkdwWBzqk1k-aSZ3fUcksX1bH4,8310
167
- smftools/tools/__init__.py,sha256=M1FF2r0z-thHmU397w_VB2TiEJfmGQyLiYzf264xlB8,1242
178
+ smftools/tools/__init__.py,sha256=40LW8N6CiQMyRkujbm12cCJf8X2PdqrLWYwFiFUeNbg,1650
179
+ smftools/tools/calculate_knn.py,sha256=pPnkZpbO6qUD0Te62kB1D_6f7Y5UzUnrIu7b_XhtVNg,3921
168
180
  smftools/tools/calculate_leiden.py,sha256=48Y72NIzGb6yxtnUund7gS_2jIFro5lwJC0ycNOoAJI,1867
169
- smftools/tools/calculate_nmf.py,sha256=O5IC9G5CrWdhefHaOPOkyx0Jiro_DZ4mvF75psEP4Q8,4192
170
- smftools/tools/calculate_umap.py,sha256=N5cPHMosPPFkp9NInYVZiTGcbJtFsQhFccHDKQG7yhI,6258
181
+ smftools/tools/calculate_nmf.py,sha256=E1SCqOycGAdPm_-VLauLtyy4fd5UaFKNiNlVK3nr8Lo,4577
182
+ smftools/tools/calculate_pca.py,sha256=4dm2sEDkKZhW9_0q1e2lfpr6zVgmUNNReuEyCWbOa3g,6444
183
+ smftools/tools/calculate_umap.py,sha256=D5kTWRR6VIvSXWH_6uWC8q8PSnnK5f1g3yN4xxzQXVE,3370
171
184
  smftools/tools/cluster_adata_on_methylation.py,sha256=NsU11zFyBB_TZFdVZxjqeSZsiVZgb8iCXaOBY54FA9U,7684
172
185
  smftools/tools/general_tools.py,sha256=XO8em-clV4onfbYEH6JTfNj3svLQnwBZ1Tja7s8qsXg,3260
173
- smftools/tools/position_stats.py,sha256=FiFidt3b5cdEMylFoPPrCZbLAXQHsehxFFcTaHvTtt4,27425
186
+ smftools/tools/position_stats.py,sha256=6tDCgnLNZMU_bKs784ku-gdDZDW-2xbZSsQFdvhpLGQ,27441
174
187
  smftools/tools/read_stats.py,sha256=8rV2BXymdPuPihh0Ev-HqPT40lobyt5WExoYjbmrbcI,6534
175
- smftools/tools/rolling_nn_distance.py,sha256=mhoTWBwD1-HmktpQ5yOAz5Q8u2BMkp8RegTMKj5FFC4,8255
188
+ smftools/tools/rolling_nn_distance.py,sha256=k0Ut59wc2A0nTIOvocMM6zxHbQwswYE65ZKYXFqnCbA,32388
189
+ smftools/tools/sequence_alignment.py,sha256=KgnDC9Ve66v_xmb4rzGWHsGAlbrPi2K930sSx6NkGkA,4562
176
190
  smftools/tools/spatial_autocorrelation.py,sha256=euunec6Mmkm5iBDN7TM4q4NXLl9n8UP77-6GSGYCVOk,25473
177
191
  smftools/tools/subset_adata.py,sha256=6xPf6hyKcYwg4L2n0iCnz-Pl84fS4jLgxmD47J-OEco,1012
178
- smftools/tools/tensor_factorization.py,sha256=csGomD78xGmFwu_R_5PZhgcgqgFcWFsds4XkqtgsM7w,5740
192
+ smftools/tools/tensor_factorization.py,sha256=c5p2DtltUIUYRUuoPiNQnAj-ms-V2C-WEcwgF1p4-5k,7697
179
193
  smftools/tools/archived/apply_hmm.py,sha256=b1DKT_02weiPgkfQ0_Zfk7wN8FRZAMeYvBe74H3QuDU,9357
180
194
  smftools/tools/archived/classifiers.py,sha256=iKkK9UyEwEdNwLx-t_r52CURkP3iZ-pFwmQtXF_lnLY,42191
181
195
  smftools/tools/archived/classify_methylated_features.py,sha256=uXWXl4t9cP4inRiSvL4MxGbwC2MxT5uT-D1FFdex8oE,2933
182
196
  smftools/tools/archived/classify_non_methylated_features.py,sha256=vhfLbR5fqALps5HXxQ91x3lUTGLcfciaEsQkSFeLOgM,3292
183
197
  smftools/tools/archived/subset_adata_v1.py,sha256=CBTbHolOil7m4eR0bwIzxS7ZPvo3hmDsPVZGUBzWYrs,1361
184
198
  smftools/tools/archived/subset_adata_v2.py,sha256=npic7cuFIOeUiyRjATVrP4A0O7cV0EgHvRXi9aMMOcI,2311
185
- smftools-0.3.1.dist-info/METADATA,sha256=BN-jDQTEhEUhQ4JHTxlHIgWqHNyfxLKNFWupYqOOPVg,7958
186
- smftools-0.3.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
187
- smftools-0.3.1.dist-info/entry_points.txt,sha256=q4hg4w-mKkI2leekM_-YZc5XRJzp96Mh1FcU3hac82g,52
188
- smftools-0.3.1.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
189
- smftools-0.3.1.dist-info/RECORD,,
199
+ smftools-0.3.2.dist-info/METADATA,sha256=nOOfJCCWKHQMZU-BSGiipUtnuKEA70esQj-w2Nc_NHE,8036
200
+ smftools-0.3.2.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
201
+ smftools-0.3.2.dist-info/entry_points.txt,sha256=q4hg4w-mKkI2leekM_-YZc5XRJzp96Mh1FcU3hac82g,52
202
+ smftools-0.3.2.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
203
+ smftools-0.3.2.dist-info/RECORD,,