smftools 0.1.3__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. smftools/__init__.py +5 -1
  2. smftools/_version.py +1 -1
  3. smftools/informatics/__init__.py +2 -0
  4. smftools/informatics/archived/print_bam_query_seq.py +29 -0
  5. smftools/informatics/basecall_pod5s.py +80 -0
  6. smftools/informatics/conversion_smf.py +63 -10
  7. smftools/informatics/direct_smf.py +66 -18
  8. smftools/informatics/helpers/LoadExperimentConfig.py +1 -0
  9. smftools/informatics/helpers/__init__.py +16 -2
  10. smftools/informatics/helpers/align_and_sort_BAM.py +27 -16
  11. smftools/informatics/helpers/aligned_BAM_to_bed.py +49 -48
  12. smftools/informatics/helpers/bam_qc.py +66 -0
  13. smftools/informatics/helpers/binarize_converted_base_identities.py +69 -21
  14. smftools/informatics/helpers/canoncall.py +12 -3
  15. smftools/informatics/helpers/concatenate_fastqs_to_bam.py +5 -4
  16. smftools/informatics/helpers/converted_BAM_to_adata.py +34 -22
  17. smftools/informatics/helpers/converted_BAM_to_adata_II.py +369 -0
  18. smftools/informatics/helpers/demux_and_index_BAM.py +52 -0
  19. smftools/informatics/helpers/extract_base_identities.py +33 -46
  20. smftools/informatics/helpers/extract_mods.py +55 -23
  21. smftools/informatics/helpers/extract_read_features_from_bam.py +31 -0
  22. smftools/informatics/helpers/extract_read_lengths_from_bed.py +25 -0
  23. smftools/informatics/helpers/find_conversion_sites.py +33 -44
  24. smftools/informatics/helpers/generate_converted_FASTA.py +87 -86
  25. smftools/informatics/helpers/modcall.py +13 -5
  26. smftools/informatics/helpers/modkit_extract_to_adata.py +762 -396
  27. smftools/informatics/helpers/ohe_batching.py +65 -41
  28. smftools/informatics/helpers/ohe_layers_decode.py +32 -0
  29. smftools/informatics/helpers/one_hot_decode.py +27 -0
  30. smftools/informatics/helpers/one_hot_encode.py +45 -9
  31. smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py +1 -0
  32. smftools/informatics/helpers/run_multiqc.py +28 -0
  33. smftools/informatics/helpers/split_and_index_BAM.py +3 -8
  34. smftools/informatics/load_adata.py +58 -3
  35. smftools/plotting/__init__.py +15 -0
  36. smftools/plotting/classifiers.py +355 -0
  37. smftools/plotting/general_plotting.py +205 -0
  38. smftools/plotting/position_stats.py +462 -0
  39. smftools/preprocessing/__init__.py +6 -7
  40. smftools/preprocessing/append_C_context.py +22 -9
  41. smftools/preprocessing/{mark_duplicates.py → archives/mark_duplicates.py} +38 -26
  42. smftools/preprocessing/binarize_on_Youden.py +35 -32
  43. smftools/preprocessing/binary_layers_to_ohe.py +13 -3
  44. smftools/preprocessing/calculate_complexity.py +3 -2
  45. smftools/preprocessing/calculate_converted_read_methylation_stats.py +44 -46
  46. smftools/preprocessing/calculate_coverage.py +26 -25
  47. smftools/preprocessing/calculate_pairwise_differences.py +49 -0
  48. smftools/preprocessing/calculate_position_Youden.py +18 -7
  49. smftools/preprocessing/calculate_read_length_stats.py +39 -46
  50. smftools/preprocessing/clean_NaN.py +33 -25
  51. smftools/preprocessing/filter_adata_by_nan_proportion.py +31 -0
  52. smftools/preprocessing/filter_converted_reads_on_methylation.py +20 -5
  53. smftools/preprocessing/filter_reads_on_length.py +14 -4
  54. smftools/preprocessing/flag_duplicate_reads.py +149 -0
  55. smftools/preprocessing/invert_adata.py +18 -11
  56. smftools/preprocessing/load_sample_sheet.py +30 -16
  57. smftools/preprocessing/recipes.py +22 -20
  58. smftools/preprocessing/subsample_adata.py +58 -0
  59. smftools/readwrite.py +105 -13
  60. smftools/tools/__init__.py +49 -0
  61. smftools/tools/apply_hmm.py +202 -0
  62. smftools/tools/apply_hmm_batched.py +241 -0
  63. smftools/tools/archived/classify_methylated_features.py +66 -0
  64. smftools/tools/archived/classify_non_methylated_features.py +75 -0
  65. smftools/tools/archived/subset_adata_v1.py +32 -0
  66. smftools/tools/archived/subset_adata_v2.py +46 -0
  67. smftools/tools/calculate_distances.py +18 -0
  68. smftools/tools/calculate_umap.py +62 -0
  69. smftools/tools/call_hmm_peaks.py +105 -0
  70. smftools/tools/classifiers.py +787 -0
  71. smftools/tools/cluster_adata_on_methylation.py +105 -0
  72. smftools/tools/data/__init__.py +2 -0
  73. smftools/tools/data/anndata_data_module.py +90 -0
  74. smftools/tools/data/preprocessing.py +6 -0
  75. smftools/tools/display_hmm.py +18 -0
  76. smftools/tools/general_tools.py +69 -0
  77. smftools/tools/hmm_readwrite.py +16 -0
  78. smftools/tools/inference/__init__.py +1 -0
  79. smftools/tools/inference/lightning_inference.py +41 -0
  80. smftools/tools/models/__init__.py +9 -0
  81. smftools/tools/models/base.py +14 -0
  82. smftools/tools/models/cnn.py +34 -0
  83. smftools/tools/models/lightning_base.py +41 -0
  84. smftools/tools/models/mlp.py +17 -0
  85. smftools/tools/models/positional.py +17 -0
  86. smftools/tools/models/rnn.py +16 -0
  87. smftools/tools/models/sklearn_models.py +40 -0
  88. smftools/tools/models/transformer.py +133 -0
  89. smftools/tools/models/wrappers.py +20 -0
  90. smftools/tools/nucleosome_hmm_refinement.py +104 -0
  91. smftools/tools/position_stats.py +239 -0
  92. smftools/tools/read_stats.py +70 -0
  93. smftools/tools/subset_adata.py +19 -23
  94. smftools/tools/train_hmm.py +78 -0
  95. smftools/tools/training/__init__.py +1 -0
  96. smftools/tools/training/train_lightning_model.py +47 -0
  97. smftools/tools/utils/__init__.py +2 -0
  98. smftools/tools/utils/device.py +10 -0
  99. smftools/tools/utils/grl.py +14 -0
  100. {smftools-0.1.3.dist-info → smftools-0.1.7.dist-info}/METADATA +47 -11
  101. smftools-0.1.7.dist-info/RECORD +136 -0
  102. smftools/tools/apply_HMM.py +0 -1
  103. smftools/tools/read_HMM.py +0 -1
  104. smftools/tools/train_HMM.py +0 -43
  105. smftools-0.1.3.dist-info/RECORD +0 -84
  106. /smftools/preprocessing/{remove_duplicates.py → archives/remove_duplicates.py} +0 -0
  107. /smftools/tools/{cluster.py → evaluation/__init__.py} +0 -0
  108. {smftools-0.1.3.dist-info → smftools-0.1.7.dist-info}/WHEEL +0 -0
  109. {smftools-0.1.3.dist-info → smftools-0.1.7.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,78 @@
1
+ def train_hmm(
2
+ data,
3
+ emission_probs=[[0.8, 0.2], [0.2, 0.8]],
4
+ transitions=[[0.9, 0.1], [0.1, 0.9]],
5
+ start_probs=[0.5, 0.5],
6
+ end_probs=[0.5, 0.5],
7
+ device=None,
8
+ max_iter=50,
9
+ verbose=True,
10
+ tol=50,
11
+ pad_value=0,
12
+ ):
13
+ """
14
+ Trains a 2-state DenseHMM model on binary methylation data.
15
+
16
+ Parameters:
17
+ data (list or np.ndarray): List of sequences (lists) with 0, 1, or NaN.
18
+ emission_probs (list): List of emission probabilities for two states.
19
+ transitions (list): Transition matrix between states.
20
+ start_probs (list): Initial state probabilities.
21
+ end_probs (list): End state probabilities.
22
+ device (str or torch.device): "cpu", "mps", "cuda", or None (auto).
23
+ max_iter (int): Maximum EM iterations.
24
+ verbose (bool): Verbose output from pomegranate.
25
+ tol (float): Convergence tolerance.
26
+ pad_value (int): Value used to pad shorter sequences.
27
+
28
+ Returns:
29
+ hmm: Trained DenseHMM model
30
+ """
31
+ import torch
32
+ from pomegranate.hmm import DenseHMM
33
+ from pomegranate.distributions import Categorical
34
+ import numpy as np
35
+ from tqdm import tqdm
36
+
37
+ # Auto device detection
38
+ if device is None:
39
+ device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
40
+ elif isinstance(device, str):
41
+ device = torch.device(device)
42
+ print(f"Using device: {device}")
43
+
44
+ # Ensure emission probs on correct device
45
+ dists = [
46
+ Categorical(torch.tensor([p], device=device))
47
+ for p in emission_probs
48
+ ]
49
+
50
+ # Create DenseHMM
51
+ hmm = DenseHMM(
52
+ distributions=dists,
53
+ edges=transitions,
54
+ starts=start_probs,
55
+ ends=end_probs,
56
+ verbose=verbose,
57
+ max_iter=max_iter,
58
+ tol=tol,
59
+ ).to(device)
60
+
61
+ # Convert data to list if needed
62
+ if isinstance(data, np.ndarray):
63
+ data = data.tolist()
64
+
65
+ # Preprocess data (replace NaNs + pad)
66
+ max_length = max(len(seq) for seq in data)
67
+ processed_data = []
68
+ for sequence in tqdm(data, desc="Preprocessing Sequences"):
69
+ cleaned_seq = [int(x) if not np.isnan(x) else np.random.choice([0, 1]) for x in sequence]
70
+ cleaned_seq += [pad_value] * (max_length - len(cleaned_seq))
71
+ processed_data.append(cleaned_seq)
72
+
73
+ tensor_data = torch.tensor(processed_data, dtype=torch.long, device=device).unsqueeze(-1)
74
+
75
+ # Fit HMM
76
+ hmm.fit(tensor_data)
77
+
78
+ return hmm
@@ -0,0 +1 @@
1
+ from .train_lightning_model import train_lightning_model
@@ -0,0 +1,47 @@
1
+ import torch
2
+ from pytorch_lightning import Trainer
3
+ from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
4
+
5
+ def train_lightning_model(
6
+ model,
7
+ datamodule,
8
+ max_epochs=20,
9
+ patience=5,
10
+ monitor_metric="val_loss",
11
+ checkpoint_path=None,
12
+ ):
13
+ # Device logic
14
+ if torch.cuda.is_available():
15
+ accelerator = "gpu"
16
+ devices = 1
17
+ elif torch.backends.mps.is_available():
18
+ accelerator = "mps"
19
+ devices = 1
20
+ else:
21
+ accelerator = "cpu"
22
+ devices = 1
23
+
24
+ # Callbacks
25
+ callbacks = [
26
+ EarlyStopping(monitor=monitor_metric, patience=patience, mode="min"),
27
+ ]
28
+ if checkpoint_path:
29
+ callbacks.append(ModelCheckpoint(
30
+ dirpath=checkpoint_path,
31
+ filename="{epoch}-{val_loss:.4f}",
32
+ monitor=monitor_metric,
33
+ save_top_k=1,
34
+ mode="min",
35
+ ))
36
+
37
+ # Trainer setup
38
+ trainer = Trainer(
39
+ max_epochs=max_epochs,
40
+ callbacks=callbacks,
41
+ accelerator=accelerator,
42
+ devices=devices,
43
+ log_every_n_steps=10,
44
+ )
45
+ trainer.fit(model, datamodule=datamodule)
46
+
47
+ return trainer
@@ -0,0 +1,2 @@
1
+ from .device import detect_device
2
+ from .grl import GradReverse
@@ -0,0 +1,10 @@
1
+ import torch
2
+
3
+ def detect_device():
4
+ device = (
5
+ torch.device('cuda') if torch.cuda.is_available() else
6
+ torch.device('mps') if torch.backends.mps.is_available() else
7
+ torch.device('cpu')
8
+ )
9
+ print(f"Detected device: {device}")
10
+ return device
@@ -0,0 +1,14 @@
1
+ import torch
2
+
3
+ class GradReverse(torch.autograd.Function):
4
+ @staticmethod
5
+ def forward(ctx, x, alpha):
6
+ ctx.alpha = alpha
7
+ return x.view_as(x)
8
+
9
+ @staticmethod
10
+ def backward(ctx, grad_output):
11
+ return -ctx.alpha * grad_output, None
12
+
13
+ def grad_reverse(x, alpha=1.0):
14
+ return GradReverse.apply(x, alpha)
@@ -1,12 +1,32 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: smftools
3
- Version: 0.1.3
3
+ Version: 0.1.7
4
4
  Summary: Single Molecule Footprinting Analysis in Python.
5
5
  Project-URL: Source, https://github.com/jkmckenna/smftools
6
6
  Project-URL: Documentation, https://smftools.readthedocs.io/
7
7
  Author: Joseph McKenna
8
8
  Maintainer-email: Joseph McKenna <jkmckenna@berkeley.edu>
9
- License-Expression: MIT
9
+ License: MIT License
10
+
11
+ Copyright (c) 2024 jkmckenna
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
10
30
  License-File: LICENSE
11
31
  Keywords: anndata,chromatin-accessibility,machine-learning,nanopore,protein-dna-binding,single-locus,single-molecule-footprinting
12
32
  Classifier: Development Status :: 2 - Pre-Alpha
@@ -26,12 +46,18 @@ Classifier: Topic :: Scientific/Engineering :: Visualization
26
46
  Requires-Python: >=3.9
27
47
  Requires-Dist: anndata>=0.10.0
28
48
  Requires-Dist: biopython>=1.79
29
- Requires-Dist: cython>=0.29.28
49
+ Requires-Dist: fastcluster
50
+ Requires-Dist: hydra-core
51
+ Requires-Dist: igraph
52
+ Requires-Dist: leidenalg
53
+ Requires-Dist: lightning
54
+ Requires-Dist: multiqc
30
55
  Requires-Dist: networkx>=3.2
31
56
  Requires-Dist: numpy<2,>=1.22.0
57
+ Requires-Dist: omegaconf
32
58
  Requires-Dist: pandas>=1.4.2
33
59
  Requires-Dist: pod5>=0.1.21
34
- Requires-Dist: pomegranate>1.0.0
60
+ Requires-Dist: pomegranate>=1.0.0
35
61
  Requires-Dist: pyfaidx>=0.8.0
36
62
  Requires-Dist: pysam>=0.19.1
37
63
  Requires-Dist: scanpy>=1.9
@@ -40,6 +66,7 @@ Requires-Dist: scipy>=1.7.3
40
66
  Requires-Dist: seaborn>=0.11
41
67
  Requires-Dist: torch>=1.9.0
42
68
  Requires-Dist: tqdm
69
+ Requires-Dist: wandb
43
70
  Provides-Extra: docs
44
71
  Requires-Dist: ipython>=7.20; extra == 'docs'
45
72
  Requires-Dist: matplotlib!=3.6.1; extra == 'docs'
@@ -67,7 +94,7 @@ Description-Content-Type: text/markdown
67
94
  A Python tool for processing raw sequencing data derived from single molecule footprinting experiments into [anndata](https://anndata.readthedocs.io/en/latest/) objects. Additional functionality for preprocessing, analysis, and visualization.
68
95
 
69
96
  ## Philosophy
70
- While most genomic data structures handle low-coverage data (<100X) along large references, smftools prioritizes high-coverage data (scalable to at least 1 million X coverage) of a few genomic loci at a time. This enables efficient data storage, rapid data operations, hierarchical metadata handling, seamless integration with various machine-learning packages, and ease of visualization. Furthermore, functionality is modularized, enabling analysis sessions to be saved, reloaded, and easily shared with collaborators. Analyses are centered around the [anndata](https://anndata.readthedocs.io/en/latest/) object, and are heavily inspired by the work conducted within the single-cell genomics community.
97
+ While most genomic data structures handle low-coverage data (<100X) along large references, smftools prioritizes high-coverage data (scalable to >1,000,000X coverage) of a few genomic loci at a time. This enables efficient data storage, rapid data operations, hierarchical metadata handling, seamless integration with various machine-learning packages, and ease of visualization. Furthermore, functionality is modularized, enabling analysis sessions to be saved, reloaded, and easily shared with collaborators. Analyses are centered around the [anndata](https://anndata.readthedocs.io/en/latest/) object, and are heavily inspired by the work conducted within the single-cell genomics community.
71
98
 
72
99
  ## Dependencies
73
100
  The following CLI tools need to be installed and configured before using the informatics (smftools.inform) module of smftools:
@@ -81,14 +108,23 @@ The following CLI tools need to be installed and configured before using the inf
81
108
  ## Modules
82
109
  ### Informatics: Processes raw Nanopore/Illumina data from SMF experiments into an AnnData object.
83
110
  ![](docs/source/_static/smftools_informatics_diagram.png)
84
- ### Preprocessing: Appends QC metrics to the AnnData object and perfroms filtering.
111
+ ### Preprocessing: Appends QC metrics to the AnnData object and performs filtering.
85
112
  ![](docs/source/_static/smftools_preprocessing_diagram.png)
86
- - Tools: Appends various analyses to the AnnData object.
87
- - Plotting: Visualization of analyses stored within the AnnData object.
113
+ ### Tools: Appends analyses to the AnnData object.
114
+ - Currently Includes: Position X Position correlation matrices, Hidden Markov Model feature detection, clustering, dimensionality reduction, peak calling, train/test workflows for various ML classifiers.
115
+ - To do: Additional ML methods for learning predictive single molecule features on condition labels: Autoencoders, Variational Autoencoders, Transformers.
116
+ ### Plotting: Visualization of analyses stored within the AnnData object.
117
+ - Most analyses appended to the adata object by a tools method have, or will have, an accompanying plotting method.
88
118
 
89
119
  ## Announcements
90
- ### 09/09/24 - The pre-alpha phase package ([smftools-0.1.1](https://pypi.org/project/smftools/))
120
+
121
+ ### 05/29/25 - Version 0.1.6 is available through PyPI.
122
+ Informatics, preprocessing, tools, plotting modules have core functionality that is approaching stability on MacOS(Intel/Silicon) and Linux(Ubuntu). I will work on improving documentation/tutorials shortly. The base PyTorch/Scikit-Learn ML-infrastructure is going through some organizational changes to work with PyTorch Lightning, Hydra, and WanDB to facilitate organizational scaling, multi-device usage, and logging.
123
+
124
+ ### 10/01/24 - More recent versions are being updated frequently. Installation from source over PyPI is recommended!
125
+
126
+ ### 09/09/24 - The version 0.1.1 package ([smftools-0.1.1](https://pypi.org/project/smftools/)) is installable through pypi!
91
127
  The informatics module has been bumped to alpha-phase status. This module can deal with POD5s and unaligned BAMS from nanopore conversion and direct SMF experiments, as well as FASTQs from Illumina conversion SMF experiments. Primary output from this module is an AnnData object containing all relevant SMF data, which is compatible with all downstream smftools modules. The other modules are still in pre-alpha phase. Preprocessing, Tools, and Plotting modules should be promoted to alpha-phase within the next month or so.
92
128
 
93
- ### 08/30/24 - The pre-alpha phase package ([smftools-0.1.0](https://pypi.org/project/smftools/)) is installable through pypi!
94
- Currently, this package (smftools-0.1.0) is going through rapid improvement (dependency handling accross Linux and Mac OS, testing, documentation, debugging) and is still too early in development for standard use. The underlying functionality was originally developed as a collection of scripts for single molecule footprinting (SMF) experiments in our lab, but is being packaged/developed to facilitate the expansion of SMF to any lab that is interested in performing these styles of experiments/analyses. The alpha-phase package is expected to be available within a couple months, so stay tuned!
129
+ ### 08/30/24 - The version 0.1.0 package ([smftools-0.1.0](https://pypi.org/project/smftools/)) is installable through pypi!
130
+ Currently, this package (smftools-0.1.0) is going through rapid improvement (dependency handling accross Linux and Mac OS, testing, documentation, debugging) and is still too early in development for widespread use. The underlying functionality was originally developed as a collection of scripts for single molecule footprinting (SMF) experiments in our lab, but is being packaged/developed to facilitate the expansion of SMF to any lab that is interested in performing these styles of experiments/analyses. The alpha-phase package is expected to be available within a couple months, so stay tuned!
@@ -0,0 +1,136 @@
1
+ smftools/__init__.py,sha256=0Llj2kZuzB0PvwcQV5RjvMC0KgFW6F__eceV2eYR4TU,551
2
+ smftools/_settings.py,sha256=Ed8lzKUA5ncq5ZRfSp0t6_rphEEjMxts6guttwTZP5Y,409
3
+ smftools/_version.py,sha256=GmypIHlw9-BaSEaoucCIwm0ut1DUut0hUvsyTCr17qk,21
4
+ smftools/readwrite.py,sha256=Y-6ehzoEMUIBWp3WQtyX2Vhe9aHwY1tsoNsVApRJRy4,7303
5
+ smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
6
+ smftools/datasets/F1_sample_sheet.csv,sha256=9PodIIOXK2eamYPbC6DGnXdzgi9bRDovf296j1aM0ak,259
7
+ smftools/datasets/__init__.py,sha256=xkSTlPuakVYVCuRurif9BceNBDt6bsngJvvjI8757QI,142
8
+ smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz,sha256=niOcVHaYY7h3XyvwSkN-V_NMBaRt2vTP5TrJO0CwMCs,8385050
9
+ smftools/datasets/datasets.py,sha256=0y597Ntp707bOgDwN6O-JEt9yxgplj66p0aj6Zs_IB4,779
10
+ smftools/informatics/__init__.py,sha256=Iz5Jyzln5wKRJ2yu4AWbUx0sMavpMy8XZtGOaLECVmE,391
11
+ smftools/informatics/basecall_pod5s.py,sha256=Ynmxscsxj6qp-zVY0RWodq513oDuHDaHnpqoepB3RUU,3930
12
+ smftools/informatics/conversion_smf.py,sha256=QhlISVi3Z-XqFKyDG_CenLojovAt5-ZhuVe9hus36lg,7177
13
+ smftools/informatics/direct_smf.py,sha256=ylPGFBvRLdxLHeDJjAwq98j8Q8_lfGK3k5JJnQxrwJw,7485
14
+ smftools/informatics/fast5_to_pod5.py,sha256=xfdZU3QluaAcR-q2uBRz8hcBwYt73nCnrFeahvi0OKQ,704
15
+ smftools/informatics/load_adata.py,sha256=90eseT30qkKc9TCwBQ6UvoLbR7_oQ9foLniSxv-x8Q0,10563
16
+ smftools/informatics/readwrite.py,sha256=DgVisHYdkjzaO7suPbUvluImeTc3jqGDlioNveHUxPc,4158
17
+ smftools/informatics/subsample_fasta_from_bed.py,sha256=YqYV09rvEQdeiS5hTTrKa8xYmJfeM3Vk-UUqwpw0qBk,1983
18
+ smftools/informatics/subsample_pod5.py,sha256=zDw9tRcrFRmPI62xkcy9dh8IfsJcuYm7R-FVeBC_g3s,4701
19
+ smftools/informatics/archived/bam_conversion.py,sha256=I8EzXjQixMmqx2oWnoNSH5NURBhfT-krbWHkoi_M964,3330
20
+ smftools/informatics/archived/bam_direct.py,sha256=jbEFtUIiUR8Wlp3po_sWkr19AUNS9WZjglojb9j28vo,3606
21
+ smftools/informatics/archived/basecalls_to_adata.py,sha256=-Nag6lr_NAtU4t8jo0GSMdgIAIfmDge-5VEUPQbEatE,3692
22
+ smftools/informatics/archived/print_bam_query_seq.py,sha256=8Z2ZJEOOlfWYUXiZGjteLWU4yTgvV8KQzEIBHUmamGM,838
23
+ smftools/informatics/helpers/LoadExperimentConfig.py,sha256=6K8AmwWVapx5XbZdhIRLB7tNSr6szpPtzM78hbEts7k,2891
24
+ smftools/informatics/helpers/__init__.py,sha256=-PuxmsaS_IrFndAVNwyd13UqSZ4OawvxK87s2gbZIcU,2803
25
+ smftools/informatics/helpers/align_and_sort_BAM.py,sha256=Ce-_m9wQrLS7MPy-sA4yEHNjBPNmmzoLjLbjjJYkvwM,2470
26
+ smftools/informatics/helpers/aligned_BAM_to_bed.py,sha256=5-5fpE7ovDTwF7FZSwpfTNGcgxFKKE-ANxAxGuVH1ks,2887
27
+ smftools/informatics/helpers/bam_qc.py,sha256=IlrXXpCdTYIv_89SE8D5tJ1wtTzxWGjk9vc-rbC1UjU,2430
28
+ smftools/informatics/helpers/bed_to_bigwig.py,sha256=AazYEZzKgKgukSFwCpeiApzxh1kbt11X4RFqRIiBIaY,1466
29
+ smftools/informatics/helpers/binarize_converted_base_identities.py,sha256=VqXXm61KL2z2xK1AcohvezY69bYHI3uL8RTnDDjOOgI,3756
30
+ smftools/informatics/helpers/canoncall.py,sha256=5WS6lwukc_xYTdPQy0OSj-WLbx0Rg70Cun1lCucY7w8,1741
31
+ smftools/informatics/helpers/complement_base_list.py,sha256=k6EkLtxFoajaIufxw1p0pShJ2nPHyGLTbzZmIFFjB4o,532
32
+ smftools/informatics/helpers/concatenate_fastqs_to_bam.py,sha256=uSWazdNRCa_Cc1SOMreJZBchPIcII4DNluB9PJF_rA8,2713
33
+ smftools/informatics/helpers/converted_BAM_to_adata.py,sha256=sRmOtn0kNosLYfogqslDHg1Azk51l6nfNOLgQOnQjlA,14591
34
+ smftools/informatics/helpers/converted_BAM_to_adata_II.py,sha256=yYjCc5tJ0_-HgcPziccjNXCe8A7kmD5mFfNWcEDcA3o,16482
35
+ smftools/informatics/helpers/count_aligned_reads.py,sha256=uYyUYglF1asiaoxr-LKxPMUEbfyD7FS-dumTg2hJHzQ,2170
36
+ smftools/informatics/helpers/demux_and_index_BAM.py,sha256=2B_UiU05ln3gYvcN9aC_w6qs8j_WAF4pHWZekAYsXm4,2114
37
+ smftools/informatics/helpers/extract_base_identities.py,sha256=cWcAcWK0vhHl-jRpMX2YMLtYezhSdhMfyj4E7rm2VEU,1833
38
+ smftools/informatics/helpers/extract_mods.py,sha256=MbSIiyj3zx7WlSSWMRPriLMkBtxYc1EWZiAAirMVgqA,3865
39
+ smftools/informatics/helpers/extract_read_features_from_bam.py,sha256=nJxGjVe7LtPi8Eu5HuFFQuDi5ZnvDxLMsPfFc5bLfx4,1275
40
+ smftools/informatics/helpers/extract_read_lengths_from_bed.py,sha256=Cw39wgp1eBTV45Wk1l0c9l-upBW5N2OcgyWXTAXln90,678
41
+ smftools/informatics/helpers/extract_readnames_from_BAM.py,sha256=3FxSNqbZ1VsOK2RfHrvevQTzhWATf5E8bZ5yVOqayvk,759
42
+ smftools/informatics/helpers/find_conversion_sites.py,sha256=e7gRmvZSakwhnFJkhfgg9i_85rYEXtbGv4_oS8RoNlE,2329
43
+ smftools/informatics/helpers/generate_converted_FASTA.py,sha256=UniQfERNt4FC5L8T1tzr4cLQOJc3wMBPhuWmC-lC8Fs,3747
44
+ smftools/informatics/helpers/get_chromosome_lengths.py,sha256=sLumLrGsU_Xg_oJcdOpQyjUGpJoT2HbcmxWwbwzXUlE,1036
45
+ smftools/informatics/helpers/get_native_references.py,sha256=fRuyEm9UJkfd5DwHmFb1bxEtNvtSI1_BxGRmrCymGkw,981
46
+ smftools/informatics/helpers/index_fasta.py,sha256=N3IErfSiavYldeaat8xcQgA1MpykoQHcE0gHUeWuClE,267
47
+ smftools/informatics/helpers/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
48
+ smftools/informatics/helpers/make_modbed.py,sha256=cOQ97gPfRiCcw_fqboxousXIiOYjp78IFYLbu749U1Y,939
49
+ smftools/informatics/helpers/modQC.py,sha256=LeOBObG8gAVVdgESIMceYhd5AW1gfN7ABo91OQtOzTM,1041
50
+ smftools/informatics/helpers/modcall.py,sha256=LVPrdMNVp2gyQTJ4BNp8NJNm89AueDjsKaY7Gqkluho,1777
51
+ smftools/informatics/helpers/modkit_extract_to_adata.py,sha256=uU5p9A1C9ZSEqU5P9Dc_ssDTGrZlh5uXVYlS0RRKdj0,51833
52
+ smftools/informatics/helpers/ohe_batching.py,sha256=QVOiyl9fYHNIFWM23afYnQo0uaOjf1NR3ASKGVSrmuw,2975
53
+ smftools/informatics/helpers/ohe_layers_decode.py,sha256=gIgUC9L8TFLi-fTnjR4PRzXdUaH5D6WL2Hump6XOoy0,1042
54
+ smftools/informatics/helpers/one_hot_decode.py,sha256=3n4rzY8_aC9YKmgrftsguMsH7fUyQ-DbWmrOYF6la9s,906
55
+ smftools/informatics/helpers/one_hot_encode.py,sha256=5hHigA6-SZLK84WH_RHo06F_6aTg7S3TJgvSr8gxGX8,1968
56
+ smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py,sha256=3PmxZJGjIyPLkJvL7VIILrkE9JzB6Kdxh0IJfoJ0e5k,1942
57
+ smftools/informatics/helpers/run_multiqc.py,sha256=qkw48DeBdTEqzhKFGjMUlvNmTehp8wRPkcxdkwERkHc,980
58
+ smftools/informatics/helpers/separate_bam_by_bc.py,sha256=Fsi8OEmv5Ny13cWoHVV9JmEjVFEXT_ZxbBOlRdmyPbE,1742
59
+ smftools/informatics/helpers/split_and_index_BAM.py,sha256=tLAhLoPfiVJhLYGr3LVcjo0xQ_8-yb60hR46EQUpL-s,1570
60
+ smftools/informatics/helpers/archived/informatics.py,sha256=gKb2ZJ_LcAeEXuQqn9e-QDF_sS4tMpMTr2vZlqa7n54,14572
61
+ smftools/informatics/helpers/archived/load_adata.py,sha256=DhvYYqO9VLsZqhL1WjN9sd-e3fgvdXGlgTP18z1h0L0,33654
62
+ smftools/plotting/__init__.py,sha256=MKT8y7Wq1X1CCDhLxyzYQfqXpEXQZFpZzhi25qGOul4,707
63
+ smftools/plotting/classifiers.py,sha256=8_zabh4NNB1_yVxLD22lfrfl5yfzbEoG3XWqlIqdtrQ,13786
64
+ smftools/plotting/general_plotting.py,sha256=LjN85KyXtuLbDpryMweSps2vFX9GEfc5flmLybLVdn8,9483
65
+ smftools/plotting/position_stats.py,sha256=4XukYIWeWZ_aGSZg1K0t37KA2aknjNNKT5kcKFfuz8Q,17428
66
+ smftools/preprocessing/__init__.py,sha256=2s_46L2qDmM_YoaG3j9YvZ6nTu1T_IF0czhlzC2emQQ,1349
67
+ smftools/preprocessing/append_C_context.py,sha256=mmXju79pKYwrz7WP6v8rztDis4KHHhllBqJCmKky7lk,4414
68
+ smftools/preprocessing/binarize_on_Youden.py,sha256=O5E3vFc2zXMfKW0p0JGDlmRKEx2_VP6dAqfvrumzz00,1797
69
+ smftools/preprocessing/binary_layers_to_ohe.py,sha256=Lxd8knelNTaUozfGMFNMlnrOb6uP28Laj3Ymw6cRHL0,1826
70
+ smftools/preprocessing/calculate_complexity.py,sha256=cXMpFrhkwkPipQo2GZGT5yFknMYUMt1t8gz0Cse1DrA,3288
71
+ smftools/preprocessing/calculate_consensus.py,sha256=6zRpRmb2xdfDu5hctZrReALRb7Pjn8sy8xJZTm3o0nU,2442
72
+ smftools/preprocessing/calculate_converted_read_methylation_stats.py,sha256=CWS3yoDTceZ8kDMWdy9eEo9Nd-yEbr2OehNovyoLR8w,5822
73
+ smftools/preprocessing/calculate_coverage.py,sha256=XhtOo73ZL1kOvpzEaZAkOCtSD870zBPm0H9D7Kpv190,1867
74
+ smftools/preprocessing/calculate_pairwise_differences.py,sha256=5zJbNNaFld5qgKRoPyplCmMHflbvAQ9eKWCXPXPpJ60,1774
75
+ smftools/preprocessing/calculate_pairwise_hamming_distances.py,sha256=e5Mzyex7pT29H2PY014uU4Fi_eewbut1JkzC1ffBbCg,961
76
+ smftools/preprocessing/calculate_position_Youden.py,sha256=9GY_WWwaxpB2Xppck3WT1zHtFOhTXrpuDIgbxLC9A7E,7450
77
+ smftools/preprocessing/calculate_read_length_stats.py,sha256=gNNePwMqYZJidzGgT1ZkfSlvc5Y3I3bi5KNYpP6wQQc,4584
78
+ smftools/preprocessing/clean_NaN.py,sha256=6eWSFFLxipiejcR_BeYJ4sVaayMuiEIoscd0eLvJuL0,1557
79
+ smftools/preprocessing/filter_adata_by_nan_proportion.py,sha256=GZcvr2JCsthX8EMw34S9-W3fc6JElw6ka99Jy6f2JvA,1292
80
+ smftools/preprocessing/filter_converted_reads_on_methylation.py,sha256=LOyBho1ltD2HXWrpO2xEeHk4aiHJxxWuG7_lr2NHSJk,2042
81
+ smftools/preprocessing/filter_reads_on_length.py,sha256=Y4WQO5Mna4Xm9hFYKk66hEQb67GnzCTXPOokebRnV-g,2625
82
+ smftools/preprocessing/flag_duplicate_reads.py,sha256=8izI9ekC8oOY1gf500hg8lM0UQFs0_2j2PFUQwgXES8,6276
83
+ smftools/preprocessing/invert_adata.py,sha256=FS-Yo8o70pIT39CoxRtt73dFr1SGZiZqU1HcIj8zDQg,782
84
+ smftools/preprocessing/load_sample_sheet.py,sha256=cRrf-6FDWu6t0eqZufHaF5qn1DLww3DcN5IEncj5K6k,1497
85
+ smftools/preprocessing/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
86
+ smftools/preprocessing/min_non_diagonal.py,sha256=hx1asW8CEmLaIroZISW8EcAf_RnBEC_nofGD8QG0b1E,711
87
+ smftools/preprocessing/recipes.py,sha256=cfKEpKW8TtQLe1CMdSHyPuIgKiWOPn7uP6uMIoRlnaQ,7063
88
+ smftools/preprocessing/subsample_adata.py,sha256=ivJvJIOvEtyvAjqZ7cwEeVedm4QgJxCJEI7sFaTuI3w,2360
89
+ smftools/preprocessing/archives/mark_duplicates.py,sha256=kwfstcWb7KkqeNB321dB-NLe8yd9_hZsSmpL8pCVBQg,8747
90
+ smftools/preprocessing/archives/preprocessing.py,sha256=4mLT09A7vwRZ78FHmuwtv38mH9TQ9qrZc_WjHRhhkIw,34379
91
+ smftools/preprocessing/archives/remove_duplicates.py,sha256=Erooi5_1VOUNfWpzddzmMNYMCl1U1jJryt7ZtMhabAs,699
92
+ smftools/tools/__init__.py,sha256=jnO_-xJyt9Q86QKCZa6GvFCtacx9G4rW_HJtSyeiook,1717
93
+ smftools/tools/apply_hmm.py,sha256=pJXCULay0zbmubrwql368y7yiHAZr2bJhuGx2QUuKnE,9321
94
+ smftools/tools/apply_hmm_batched.py,sha256=1_36BK3ie7lPU4pD93TOBYltmsbdE3VJPqZydiw8I5s,10410
95
+ smftools/tools/calculate_distances.py,sha256=KDWimQ6u-coyxCKrbTm42Fh_Alf_gURBZ0vfFaem848,644
96
+ smftools/tools/calculate_umap.py,sha256=SoZvvnCBjCdshgkW0ODH1lUY97AHkrPXTxWDdYZO8VY,2513
97
+ smftools/tools/call_hmm_peaks.py,sha256=ixnO-KwFYtLiXt8dSa1dEjNR0RO6b-Eswziz1AA647A,4933
98
+ smftools/tools/classifiers.py,sha256=mwSTpWUXBPjmUuV5i_SMG1lIPpHSMCzsKhl8wTbm-Og,36903
99
+ smftools/tools/cluster_adata_on_methylation.py,sha256=UDC5lpW8fZ6O-16ETu-mbflLkNBKuIg7RIzQ9r7knvA,5760
100
+ smftools/tools/display_hmm.py,sha256=4yM-4wpQnWQiim16cUV4ITI-USEEEtaLHqK3LPu5YCg,840
101
+ smftools/tools/general_tools.py,sha256=P9Ecq2SxzmhwLBvWaMpqGGzSPjOiMmG-AspCb0QaaPk,2573
102
+ smftools/tools/hmm_readwrite.py,sha256=DjJ3hunpBQ7N0GVvxL7-0QUas_SkA88LVgL72mVK2cI,359
103
+ smftools/tools/nucleosome_hmm_refinement.py,sha256=yzkx1i3ez678SS_mOMbg6sbI-yHXaUq5eJdaEF43S1w,4649
104
+ smftools/tools/position_stats.py,sha256=FNR5JaD4e3ykrieN922IIyM4GHvwybz--_bgNPhAkNs,9500
105
+ smftools/tools/read_stats.py,sha256=ze-1kuEnFR7UhNzuPB-UgJ_YIHCfDhOgs6oWAo90VaI,2546
106
+ smftools/tools/subset_adata.py,sha256=nBbtAxCNteZCUBmPnZ9swQNyU74XgWM8aJHHWg2AuL0,1025
107
+ smftools/tools/train_hmm.py,sha256=TCzvHlKLoQnKhflZOsXyXlgPhtk54k9EAoP89kUbQ1U,2465
108
+ smftools/tools/archived/classify_methylated_features.py,sha256=Z0N2UKw3luD3CTQ8wcUvdnMY7w-8574OJbEcwzNsy88,2897
109
+ smftools/tools/archived/classify_non_methylated_features.py,sha256=IJERTozEs7IPL7K-VIjq2q2K36wRCW9iiNSYLAXasrA,3256
110
+ smftools/tools/archived/subset_adata_v1.py,sha256=qyU9iCal03edb5aUS3AZ2U4TlL3uQ42jGI9hX3QF7Fc,1047
111
+ smftools/tools/archived/subset_adata_v2.py,sha256=OKZoUpvdURPtckIQxGTWmOI5jLa-_EU62Xs3LyyehnA,1880
112
+ smftools/tools/data/__init__.py,sha256=DEEeRUbOOaeqvy1swkUWPK_TPcjmkU6CcoZAqSa5cfs,91
113
+ smftools/tools/data/anndata_data_module.py,sha256=ReAdYxwPR446eeNRKYCrt9OEhlQ9woY_4qxncJbUh4c,4425
114
+ smftools/tools/data/preprocessing.py,sha256=dSs6Qs3wmlccFPZSpOc-uy1nlFSf68wWQKwF1iTqMok,137
115
+ smftools/tools/evaluation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
+ smftools/tools/inference/__init__.py,sha256=VJCamuKjcucG7RPFsjd8vECd_kwPlIz92u64gdPePDY,56
117
+ smftools/tools/inference/lightning_inference.py,sha256=8SXbJWLDgbWiZW0bpC3IMkU01S1d_2zyzB__KxIAuBk,1469
118
+ smftools/tools/models/__init__.py,sha256=bMfPbQ5bDmn_kWv82virLuUhjb12Yow7t_j96afNbyA,421
119
+ smftools/tools/models/base.py,sha256=TRJMyKHGLLUkhEbzRMKIaNf-6yFyCEf5s0Xs0QUeG-0,505
120
+ smftools/tools/models/cnn.py,sha256=M7SYPL-7f2Cyf0zQGmnV2vI6-KCz6rfikHV81XLy-lA,1169
121
+ smftools/tools/models/lightning_base.py,sha256=8Zoj-ij5fSsFAn30hJfdUv-pZGuJLGtwj8HoHK3Msws,1354
122
+ smftools/tools/models/mlp.py,sha256=YXVf1Pix-S2aqOMvmsVzbF_igsf9_MjKbuZw6FBo_nk,561
123
+ smftools/tools/models/positional.py,sha256=7g93nyxnvWTYrfrdvIMWa74DG0obn5FbC2-ngWmCBVo,631
124
+ smftools/tools/models/rnn.py,sha256=DVhG1mJ47ObqaHiLHEwHTpme1vR_uNeTnaaKzC6whgQ,656
125
+ smftools/tools/models/sklearn_models.py,sha256=rFDrq7nJXHd4yCrc5oAvB76m04vdHQRNqWh3RCCOBQ0,1289
126
+ smftools/tools/models/transformer.py,sha256=d0v7vtXNXOHDgOsXMyRkMLObpPpWjLvbFntGnh924g8,4896
127
+ smftools/tools/models/wrappers.py,sha256=HEY2A6-Bk6MtVZ9jOaPT8S1Qi0L98SyEg1nbKqYZoag,697
128
+ smftools/tools/training/__init__.py,sha256=PxAsc6UhXYyZkmwewhYK3OPVZG_se_YfSq75fqc0EBM,56
129
+ smftools/tools/training/train_lightning_model.py,sha256=dtx1lJzgP8eMR1VyXsQo8KnzB6bhF2VY3kHhkEqVV58,1157
130
+ smftools/tools/utils/__init__.py,sha256=yOpzBc9AXbarSRfN8Ixh2Z1uWLGpgpjRR46h6E46_2w,62
131
+ smftools/tools/utils/device.py,sha256=GITrULOty2Fr96Bqt1wi1PaYl_oVgB5Z99Gfn5vQy4o,274
132
+ smftools/tools/utils/grl.py,sha256=BWBDp_kQBigrUzQpRbZzgpfr_WOcd2K2V3MQL-aAIc4,334
133
+ smftools-0.1.7.dist-info/METADATA,sha256=xb2e-EdaGquNERQ-0Pcs3SR4yTi5V_ua10ITchFjTkI,8870
134
+ smftools-0.1.7.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
135
+ smftools-0.1.7.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
136
+ smftools-0.1.7.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- # apply_HMM
@@ -1 +0,0 @@
1
- # read_HMM
@@ -1,43 +0,0 @@
1
- # train_HMM
2
-
3
- def train_HMM(adata, model_name='trained_HMM', save_hmm=False):
4
- """
5
-
6
- Parameters:
7
- adata (AnnData): Input AnnData object
8
- model_name (str): Name of the model
9
- save_hmm (bool): Whether to save the model
10
-
11
- """
12
- import numpy as np
13
- import anndata as ad
14
- from pomegranate.distributions import Categorical
15
- from pomegranate.hmm import DenseHMM
16
-
17
- bound = Categorical([[0.95, 0.05]])
18
- unbound = Categorical([[0.05, 0.95]])
19
-
20
- edges = [[0.9, 0.1], [0.1, 0.9]]
21
- starts = [0.5, 0.5]
22
- ends = [0.5, 0.5]
23
-
24
- model = DenseHMM([bound, unbound], edges=edges, starts=starts, ends=ends, max_iter=5, verbose=True)
25
-
26
- # define training sets and labels
27
- # Determine the number of reads to sample
28
- n_sample = round(0.7 * adata.X.shape[0])
29
- # Generate random indices
30
- np.random.seed(0)
31
- random_indices = np.random.choice(adata.shape[0], size=n_sample, replace=False)
32
- # Subset the AnnData object using the random indices
33
- training_adata_subsampled = adata[random_indices, :]
34
- training_sequences = training_adata_subsampled.X
35
-
36
- # Train the HMM without labeled data
37
- model.fit(training_sequences, algorithm='baum-welch')
38
-
39
- if save_hmm:
40
- # Save the model to a file
41
- model_json = model.to_json()
42
- with open(f'{model_name}.json', 'w') as f:
43
- f.write(model_json)
@@ -1,84 +0,0 @@
1
- smftools/__init__.py,sha256=zy4ckT7hKrLrlm6NiZQoupvc6oSN7wJsyOBCYdzukcQ,401
2
- smftools/_settings.py,sha256=Ed8lzKUA5ncq5ZRfSp0t6_rphEEjMxts6guttwTZP5Y,409
3
- smftools/_version.py,sha256=R5TtpJu7Qu6sOarfDpp-5Oyy8Pi2Ir3VewCvsCQiAgo,21
4
- smftools/readwrite.py,sha256=DgVisHYdkjzaO7suPbUvluImeTc3jqGDlioNveHUxPc,4158
5
- smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
6
- smftools/datasets/F1_sample_sheet.csv,sha256=9PodIIOXK2eamYPbC6DGnXdzgi9bRDovf296j1aM0ak,259
7
- smftools/datasets/__init__.py,sha256=xkSTlPuakVYVCuRurif9BceNBDt6bsngJvvjI8757QI,142
8
- smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz,sha256=niOcVHaYY7h3XyvwSkN-V_NMBaRt2vTP5TrJO0CwMCs,8385050
9
- smftools/datasets/datasets.py,sha256=0y597Ntp707bOgDwN6O-JEt9yxgplj66p0aj6Zs_IB4,779
10
- smftools/informatics/__init__.py,sha256=WQiMBr1yjDrlmHg8UNgW2MJsq4fPrVfh-UBr5tYI9x4,326
11
- smftools/informatics/conversion_smf.py,sha256=PS-TjgMttr3VRrT0zg5L_L01xMOewB_OXSsQyoM7DWI,4333
12
- smftools/informatics/direct_smf.py,sha256=ue7p7deuRwaZtEh9EFV1YTE8HKRAmOsx9oaRJdjCrbY,4697
13
- smftools/informatics/fast5_to_pod5.py,sha256=xfdZU3QluaAcR-q2uBRz8hcBwYt73nCnrFeahvi0OKQ,704
14
- smftools/informatics/load_adata.py,sha256=i-2YCSaeLzbPfNtKPrLwfkv-9u_TrTAZrbtNAj3FRWY,7271
15
- smftools/informatics/readwrite.py,sha256=DgVisHYdkjzaO7suPbUvluImeTc3jqGDlioNveHUxPc,4158
16
- smftools/informatics/subsample_fasta_from_bed.py,sha256=YqYV09rvEQdeiS5hTTrKa8xYmJfeM3Vk-UUqwpw0qBk,1983
17
- smftools/informatics/subsample_pod5.py,sha256=zDw9tRcrFRmPI62xkcy9dh8IfsJcuYm7R-FVeBC_g3s,4701
18
- smftools/informatics/archived/bam_conversion.py,sha256=I8EzXjQixMmqx2oWnoNSH5NURBhfT-krbWHkoi_M964,3330
19
- smftools/informatics/archived/bam_direct.py,sha256=jbEFtUIiUR8Wlp3po_sWkr19AUNS9WZjglojb9j28vo,3606
20
- smftools/informatics/archived/basecalls_to_adata.py,sha256=-Nag6lr_NAtU4t8jo0GSMdgIAIfmDge-5VEUPQbEatE,3692
21
- smftools/informatics/helpers/LoadExperimentConfig.py,sha256=gsWGoa9cydwY4Kd-hTXF2gtmxc8glRRD2V1JB88e9js,2822
22
- smftools/informatics/helpers/__init__.py,sha256=KrfyM08_RgDf3Ajvb4KNTvcOqZiWYSIVhEznCr01Gcc,2255
23
- smftools/informatics/helpers/align_and_sort_BAM.py,sha256=DouG6nGWXtz2ulZD5p0sEShE-4dbPudHaWcHFm4-oJA,2184
24
- smftools/informatics/helpers/aligned_BAM_to_bed.py,sha256=eYkGQFSM2gPEauASkY_-9Yvy6727vP8Q4wx_st85Dpc,2638
25
- smftools/informatics/helpers/bed_to_bigwig.py,sha256=AazYEZzKgKgukSFwCpeiApzxh1kbt11X4RFqRIiBIaY,1466
26
- smftools/informatics/helpers/binarize_converted_base_identities.py,sha256=iJlDah-YJ0zx0UrlHdtgvrALVNSA0TTTdDoKmNCVg0Q,1846
27
- smftools/informatics/helpers/canoncall.py,sha256=M7HEqhYsWMUB0tLP3hzMM0L7PhcOTXgetl5lV3GgIaw,1062
28
- smftools/informatics/helpers/complement_base_list.py,sha256=k6EkLtxFoajaIufxw1p0pShJ2nPHyGLTbzZmIFFjB4o,532
29
- smftools/informatics/helpers/concatenate_fastqs_to_bam.py,sha256=RXPn7e6Dcwol9tnUsfXJu3EuZcMSOJJo5LNWouovvZs,2715
30
- smftools/informatics/helpers/converted_BAM_to_adata.py,sha256=Rsnydzpf9lMS3TQjXpbXJSSfCzhVTPn3rBDLiK-8utA,13991
31
- smftools/informatics/helpers/count_aligned_reads.py,sha256=uYyUYglF1asiaoxr-LKxPMUEbfyD7FS-dumTg2hJHzQ,2170
32
- smftools/informatics/helpers/extract_base_identities.py,sha256=E-_m9W82N52NjX5kz9Af5YH0S2k58hnq9KTrm4S5vgM,4370
33
- smftools/informatics/helpers/extract_mods.py,sha256=UBFjXDKz_A6ivjcocYT1_pKjvygY2Fdg0RjQmMS8UuA,2269
34
- smftools/informatics/helpers/extract_readnames_from_BAM.py,sha256=3FxSNqbZ1VsOK2RfHrvevQTzhWATf5E8bZ5yVOqayvk,759
35
- smftools/informatics/helpers/find_conversion_sites.py,sha256=5AghDQzEoSvE2Og98VsKoeWUFSLnIGY1LnRu1BtQavM,3700
36
- smftools/informatics/helpers/generate_converted_FASTA.py,sha256=ueaAsFnBuc7zKwkBivBR3DJg4DtkxkHHIQcVVSWzv-w,5161
37
- smftools/informatics/helpers/get_chromosome_lengths.py,sha256=sLumLrGsU_Xg_oJcdOpQyjUGpJoT2HbcmxWwbwzXUlE,1036
38
- smftools/informatics/helpers/get_native_references.py,sha256=fRuyEm9UJkfd5DwHmFb1bxEtNvtSI1_BxGRmrCymGkw,981
39
- smftools/informatics/helpers/index_fasta.py,sha256=N3IErfSiavYldeaat8xcQgA1MpykoQHcE0gHUeWuClE,267
40
- smftools/informatics/helpers/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
41
- smftools/informatics/helpers/make_modbed.py,sha256=cOQ97gPfRiCcw_fqboxousXIiOYjp78IFYLbu749U1Y,939
42
- smftools/informatics/helpers/modQC.py,sha256=LeOBObG8gAVVdgESIMceYhd5AW1gfN7ABo91OQtOzTM,1041
43
- smftools/informatics/helpers/modcall.py,sha256=9PH7Peq4y-VBqQcMkbv0TwgePBlD5aM4_FmI7H4hbQQ,1142
44
- smftools/informatics/helpers/modkit_extract_to_adata.py,sha256=duPlRAIz4VWM-jm9iaLY7N6JHQcun_L0nhr2VyUjNTI,38184
45
- smftools/informatics/helpers/ohe_batching.py,sha256=_Mz2p1We5PVIb8S6Hbq_hREKJ9mGQiADwfFK_NgMGhA,1909
46
- smftools/informatics/helpers/one_hot_encode.py,sha256=hpZAuwa9ndkhyCm9sO65KVHE0lbFDKqRylfliEKyD4o,632
47
- smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py,sha256=tAnXFleGzXJNjHRAgZ0NUJuZ0P3aKmUYIrK-V9VoJKY,1860
48
- smftools/informatics/helpers/separate_bam_by_bc.py,sha256=Fsi8OEmv5Ny13cWoHVV9JmEjVFEXT_ZxbBOlRdmyPbE,1742
49
- smftools/informatics/helpers/split_and_index_BAM.py,sha256=_TFJ8fcLbIf37JG83hSc1zgs1yxX70-NhA8y-PbhTpo,1966
50
- smftools/informatics/helpers/archived/informatics.py,sha256=gKb2ZJ_LcAeEXuQqn9e-QDF_sS4tMpMTr2vZlqa7n54,14572
51
- smftools/informatics/helpers/archived/load_adata.py,sha256=DhvYYqO9VLsZqhL1WjN9sd-e3fgvdXGlgTP18z1h0L0,33654
52
- smftools/plotting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
- smftools/preprocessing/__init__.py,sha256=5FQNrj51KmaDLeAGGBA8iWMkYiSOe7O91ES8mT4aVtE,1399
54
- smftools/preprocessing/append_C_context.py,sha256=pP5u9o5U4JmHras0PK6yas65u4-U5KlX3sKLb-duo80,3728
55
- smftools/preprocessing/binarize_on_Youden.py,sha256=slkkt56DZ1FZWy8Un5mNJEZ49JlPnPKow2zU4GoHEr8,2303
56
- smftools/preprocessing/binary_layers_to_ohe.py,sha256=931eHuVda6pMZTvC7jVTKkY2a_KQWpSfgi-nkA5NmaI,1238
57
- smftools/preprocessing/calculate_complexity.py,sha256=ut60et8bmIswtiLhctJWHNseIV4ZRQultYdtJPHcRPs,3224
58
- smftools/preprocessing/calculate_consensus.py,sha256=6zRpRmb2xdfDu5hctZrReALRb7Pjn8sy8xJZTm3o0nU,2442
59
- smftools/preprocessing/calculate_converted_read_methylation_stats.py,sha256=Si0DcES0lLMvg3XgdKpedxfPnXQ14tEFKrOAFRn3fHs,6059
60
- smftools/preprocessing/calculate_coverage.py,sha256=ZgRxQGpydxQg1exkvSiy8nHmzDIPGGqL5vL9XQ2PZQ4,2068
61
- smftools/preprocessing/calculate_pairwise_hamming_distances.py,sha256=e5Mzyex7pT29H2PY014uU4Fi_eewbut1JkzC1ffBbCg,961
62
- smftools/preprocessing/calculate_position_Youden.py,sha256=mfQ6nFfUaEaKg_icyHA1zZlhh0wHjpLE56BZDXOdP_4,6364
63
- smftools/preprocessing/calculate_read_length_stats.py,sha256=6m362JaCKlD0QoBUMnM2qsB6Jo_4shl7xFzqU1uZccU,4945
64
- smftools/preprocessing/clean_NaN.py,sha256=1vieT026p0gDJCbqB_CiLvAGGxlc-5xufoKJgZuBFFk,1150
65
- smftools/preprocessing/filter_converted_reads_on_methylation.py,sha256=SN5q0rqYtYW9j3i0sVSyTv9EmR_uLKI7GkjmJixeOU0,1307
66
- smftools/preprocessing/filter_reads_on_length.py,sha256=sAT66bjuI8ZtXyQc9SuPzq1dPIB1CNVx6VfWqVng4Dg,2191
67
- smftools/preprocessing/invert_adata.py,sha256=u6Y70EH0B5mXb9-HuukIlzpMgZ6rhzcJuy3YZZTx3SA,684
68
- smftools/preprocessing/load_sample_sheet.py,sha256=uGjzG9x-1t_1lCooH85P8Tfg80GdvVx8Jv1LPl9XNFM,915
69
- smftools/preprocessing/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
70
- smftools/preprocessing/mark_duplicates.py,sha256=sQuPcTw8JsQoONOk-kMlAF965sIk2Pu-M7rIyfbyGGs,8145
71
- smftools/preprocessing/min_non_diagonal.py,sha256=hx1asW8CEmLaIroZISW8EcAf_RnBEC_nofGD8QG0b1E,711
72
- smftools/preprocessing/recipes.py,sha256=KzSw5JW0WJGzSis5Fm7moQY5PxOYl6-uYYf1NDj6nOE,7117
73
- smftools/preprocessing/remove_duplicates.py,sha256=Erooi5_1VOUNfWpzddzmMNYMCl1U1jJryt7ZtMhabAs,699
74
- smftools/preprocessing/archives/preprocessing.py,sha256=4mLT09A7vwRZ78FHmuwtv38mH9TQ9qrZc_WjHRhhkIw,34379
75
- smftools/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
- smftools/tools/apply_HMM.py,sha256=AuVtOki69-Xs4mhjhTXJzd49KCVXwixFyWSUgDjtR6s,11
77
- smftools/tools/cluster.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
- smftools/tools/read_HMM.py,sha256=N0MGG494VjlxYJcCVz1jN4OasGtRITZS98SJ2xB_j8k,10
79
- smftools/tools/subset_adata.py,sha256=qyU9iCal03edb5aUS3AZ2U4TlL3uQ42jGI9hX3QF7Fc,1047
80
- smftools/tools/train_HMM.py,sha256=x5ZcXj-heWQqDOX86nuuDoj1tPkYKl04fYA1fCKNQ0c,1380
81
- smftools-0.1.3.dist-info/METADATA,sha256=u26Og8tpAF2TgXZztotk3Q4EuP7Fvf73s1tlIjBDD-A,6410
82
- smftools-0.1.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
83
- smftools-0.1.3.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
84
- smftools-0.1.3.dist-info/RECORD,,
File without changes