smftools 0.1.7__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +9 -4
- smftools/_version.py +1 -1
- smftools/cli.py +184 -0
- smftools/config/__init__.py +1 -0
- smftools/config/conversion.yaml +33 -0
- smftools/config/deaminase.yaml +56 -0
- smftools/config/default.yaml +253 -0
- smftools/config/direct.yaml +17 -0
- smftools/config/experiment_config.py +1191 -0
- smftools/hmm/HMM.py +1576 -0
- smftools/hmm/__init__.py +20 -0
- smftools/{tools → hmm}/apply_hmm_batched.py +8 -7
- smftools/hmm/call_hmm_peaks.py +106 -0
- smftools/{tools → hmm}/display_hmm.py +3 -3
- smftools/{tools → hmm}/nucleosome_hmm_refinement.py +2 -2
- smftools/{tools → hmm}/train_hmm.py +1 -1
- smftools/informatics/__init__.py +0 -2
- smftools/informatics/archived/deaminase_smf.py +132 -0
- smftools/informatics/fast5_to_pod5.py +4 -1
- smftools/informatics/helpers/__init__.py +3 -4
- smftools/informatics/helpers/align_and_sort_BAM.py +34 -7
- smftools/informatics/helpers/aligned_BAM_to_bed.py +35 -24
- smftools/informatics/helpers/binarize_converted_base_identities.py +116 -23
- smftools/informatics/helpers/concatenate_fastqs_to_bam.py +365 -42
- smftools/informatics/helpers/converted_BAM_to_adata_II.py +165 -29
- smftools/informatics/helpers/discover_input_files.py +100 -0
- smftools/informatics/helpers/extract_base_identities.py +29 -3
- smftools/informatics/helpers/extract_read_features_from_bam.py +4 -2
- smftools/informatics/helpers/find_conversion_sites.py +5 -4
- smftools/informatics/helpers/modkit_extract_to_adata.py +6 -3
- smftools/informatics/helpers/plot_bed_histograms.py +269 -0
- smftools/informatics/helpers/separate_bam_by_bc.py +2 -2
- smftools/informatics/helpers/split_and_index_BAM.py +1 -5
- smftools/load_adata.py +1346 -0
- smftools/machine_learning/__init__.py +12 -0
- smftools/machine_learning/data/__init__.py +2 -0
- smftools/machine_learning/data/anndata_data_module.py +234 -0
- smftools/machine_learning/evaluation/__init__.py +2 -0
- smftools/machine_learning/evaluation/eval_utils.py +31 -0
- smftools/machine_learning/evaluation/evaluators.py +223 -0
- smftools/machine_learning/inference/__init__.py +3 -0
- smftools/machine_learning/inference/inference_utils.py +27 -0
- smftools/machine_learning/inference/lightning_inference.py +68 -0
- smftools/machine_learning/inference/sklearn_inference.py +55 -0
- smftools/machine_learning/inference/sliding_window_inference.py +114 -0
- smftools/machine_learning/models/base.py +295 -0
- smftools/machine_learning/models/cnn.py +138 -0
- smftools/machine_learning/models/lightning_base.py +345 -0
- smftools/machine_learning/models/mlp.py +26 -0
- smftools/{tools → machine_learning}/models/positional.py +3 -2
- smftools/{tools → machine_learning}/models/rnn.py +2 -1
- smftools/machine_learning/models/sklearn_models.py +273 -0
- smftools/machine_learning/models/transformer.py +303 -0
- smftools/machine_learning/training/__init__.py +2 -0
- smftools/machine_learning/training/train_lightning_model.py +135 -0
- smftools/machine_learning/training/train_sklearn_model.py +114 -0
- smftools/plotting/__init__.py +4 -1
- smftools/plotting/autocorrelation_plotting.py +611 -0
- smftools/plotting/general_plotting.py +566 -89
- smftools/plotting/hmm_plotting.py +260 -0
- smftools/plotting/qc_plotting.py +270 -0
- smftools/preprocessing/__init__.py +13 -8
- smftools/preprocessing/add_read_length_and_mapping_qc.py +129 -0
- smftools/preprocessing/append_base_context.py +122 -0
- smftools/preprocessing/append_binary_layer_by_base_context.py +143 -0
- smftools/preprocessing/calculate_complexity_II.py +248 -0
- smftools/preprocessing/calculate_coverage.py +10 -1
- smftools/preprocessing/calculate_read_modification_stats.py +101 -0
- smftools/preprocessing/clean_NaN.py +17 -1
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py +158 -0
- smftools/preprocessing/filter_reads_on_modification_thresholds.py +352 -0
- smftools/preprocessing/flag_duplicate_reads.py +1326 -124
- smftools/preprocessing/invert_adata.py +12 -5
- smftools/preprocessing/load_sample_sheet.py +19 -4
- smftools/readwrite.py +849 -43
- smftools/tools/__init__.py +3 -32
- smftools/tools/calculate_umap.py +5 -5
- smftools/tools/general_tools.py +3 -3
- smftools/tools/position_stats.py +468 -106
- smftools/tools/read_stats.py +115 -1
- smftools/tools/spatial_autocorrelation.py +562 -0
- {smftools-0.1.7.dist-info → smftools-0.2.1.dist-info}/METADATA +5 -1
- smftools-0.2.1.dist-info/RECORD +161 -0
- smftools-0.2.1.dist-info/entry_points.txt +2 -0
- smftools/informatics/helpers/LoadExperimentConfig.py +0 -75
- smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py +0 -53
- smftools/informatics/load_adata.py +0 -182
- smftools/preprocessing/append_C_context.py +0 -82
- smftools/preprocessing/calculate_converted_read_methylation_stats.py +0 -94
- smftools/preprocessing/filter_converted_reads_on_methylation.py +0 -44
- smftools/preprocessing/filter_reads_on_length.py +0 -51
- smftools/tools/call_hmm_peaks.py +0 -105
- smftools/tools/data/__init__.py +0 -2
- smftools/tools/data/anndata_data_module.py +0 -90
- smftools/tools/evaluation/__init__.py +0 -0
- smftools/tools/inference/__init__.py +0 -1
- smftools/tools/inference/lightning_inference.py +0 -41
- smftools/tools/models/base.py +0 -14
- smftools/tools/models/cnn.py +0 -34
- smftools/tools/models/lightning_base.py +0 -41
- smftools/tools/models/mlp.py +0 -17
- smftools/tools/models/sklearn_models.py +0 -40
- smftools/tools/models/transformer.py +0 -133
- smftools/tools/training/__init__.py +0 -1
- smftools/tools/training/train_lightning_model.py +0 -47
- smftools-0.1.7.dist-info/RECORD +0 -136
- /smftools/{tools → hmm}/calculate_distances.py +0 -0
- /smftools/{tools → hmm}/hmm_readwrite.py +0 -0
- /smftools/informatics/{conversion_smf.py → archived/conversion_smf.py} +0 -0
- /smftools/informatics/{direct_smf.py → archived/direct_smf.py} +0 -0
- /smftools/{tools → machine_learning}/data/preprocessing.py +0 -0
- /smftools/{tools → machine_learning}/models/__init__.py +0 -0
- /smftools/{tools → machine_learning}/models/wrappers.py +0 -0
- /smftools/{tools → machine_learning}/utils/__init__.py +0 -0
- /smftools/{tools → machine_learning}/utils/device.py +0 -0
- /smftools/{tools → machine_learning}/utils/grl.py +0 -0
- /smftools/tools/{apply_hmm.py → archived/apply_hmm.py} +0 -0
- /smftools/tools/{classifiers.py → archived/classifiers.py} +0 -0
- {smftools-0.1.7.dist-info → smftools-0.2.1.dist-info}/WHEEL +0 -0
- {smftools-0.1.7.dist-info → smftools-0.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
## invert_adata
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def invert_adata(adata):
|
|
3
|
+
def invert_adata(adata, uns_flag='adata_positions_inverted', force_redo=False):
|
|
6
4
|
"""
|
|
7
5
|
Inverts the AnnData object along the column (variable) axis.
|
|
8
6
|
|
|
@@ -15,7 +13,13 @@ def invert_adata(adata):
|
|
|
15
13
|
import numpy as np
|
|
16
14
|
import anndata as ad
|
|
17
15
|
|
|
18
|
-
|
|
16
|
+
# Only run if not already performed
|
|
17
|
+
already = bool(adata.uns.get(uns_flag, False))
|
|
18
|
+
if (already and not force_redo):
|
|
19
|
+
# QC already performed; nothing to do
|
|
20
|
+
return adata
|
|
21
|
+
|
|
22
|
+
print("Inverting AnnData along the column axis...")
|
|
19
23
|
|
|
20
24
|
# Reverse the order of columns (variables)
|
|
21
25
|
inverted_adata = adata[:, ::-1].copy()
|
|
@@ -26,5 +30,8 @@ def invert_adata(adata):
|
|
|
26
30
|
# Optional: Store original coordinates for reference
|
|
27
31
|
inverted_adata.var["Original_var_names"] = adata.var_names[::-1]
|
|
28
32
|
|
|
29
|
-
|
|
33
|
+
# mark as done
|
|
34
|
+
inverted_adata.uns[uns_flag] = True
|
|
35
|
+
|
|
36
|
+
print("Inversion complete!")
|
|
30
37
|
return inverted_adata
|
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
def load_sample_sheet(adata,
|
|
1
|
+
def load_sample_sheet(adata,
|
|
2
|
+
sample_sheet_path,
|
|
3
|
+
mapping_key_column='obs_names',
|
|
4
|
+
as_category=True,
|
|
5
|
+
uns_flag='sample_sheet_loaded',
|
|
6
|
+
force_reload=True
|
|
7
|
+
):
|
|
2
8
|
"""
|
|
3
9
|
Loads a sample sheet CSV and maps metadata into the AnnData object as categorical columns.
|
|
4
10
|
|
|
@@ -13,7 +19,13 @@ def load_sample_sheet(adata, sample_sheet_path, mapping_key_column='obs_names',
|
|
|
13
19
|
"""
|
|
14
20
|
import pandas as pd
|
|
15
21
|
|
|
16
|
-
|
|
22
|
+
# Only run if not already performed
|
|
23
|
+
already = bool(adata.uns.get(uns_flag, False))
|
|
24
|
+
if already and not force_reload:
|
|
25
|
+
# QC already performed; nothing to do
|
|
26
|
+
return
|
|
27
|
+
|
|
28
|
+
print('Loading sample sheet...')
|
|
17
29
|
df = pd.read_csv(sample_sheet_path)
|
|
18
30
|
df[mapping_key_column] = df[mapping_key_column].astype(str)
|
|
19
31
|
|
|
@@ -25,7 +37,7 @@ def load_sample_sheet(adata, sample_sheet_path, mapping_key_column='obs_names',
|
|
|
25
37
|
|
|
26
38
|
value_columns = [col for col in df.columns if col != mapping_key_column]
|
|
27
39
|
|
|
28
|
-
print(f'
|
|
40
|
+
print(f'Appending metadata columns: {value_columns}')
|
|
29
41
|
df = df.set_index(mapping_key_column)
|
|
30
42
|
|
|
31
43
|
for col in value_columns:
|
|
@@ -34,5 +46,8 @@ def load_sample_sheet(adata, sample_sheet_path, mapping_key_column='obs_names',
|
|
|
34
46
|
mapped = mapped.astype('category')
|
|
35
47
|
adata.obs[col] = mapped
|
|
36
48
|
|
|
37
|
-
|
|
49
|
+
# mark as done
|
|
50
|
+
adata.uns[uns_flag] = True
|
|
51
|
+
|
|
52
|
+
print('Sample sheet metadata successfully added as categories.' if as_category else 'Metadata added.')
|
|
38
53
|
return adata
|