smftools 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +6 -8
- smftools/_settings.py +4 -6
- smftools/_version.py +1 -1
- smftools/cli/helpers.py +54 -0
- smftools/cli/hmm_adata.py +937 -256
- smftools/cli/load_adata.py +448 -268
- smftools/cli/preprocess_adata.py +469 -263
- smftools/cli/spatial_adata.py +536 -319
- smftools/cli_entry.py +97 -182
- smftools/config/__init__.py +1 -1
- smftools/config/conversion.yaml +17 -6
- smftools/config/deaminase.yaml +12 -10
- smftools/config/default.yaml +142 -33
- smftools/config/direct.yaml +11 -3
- smftools/config/discover_input_files.py +19 -5
- smftools/config/experiment_config.py +594 -264
- smftools/constants.py +37 -0
- smftools/datasets/__init__.py +2 -8
- smftools/datasets/datasets.py +32 -18
- smftools/hmm/HMM.py +2128 -1418
- smftools/hmm/__init__.py +2 -9
- smftools/hmm/archived/call_hmm_peaks.py +121 -0
- smftools/hmm/call_hmm_peaks.py +299 -91
- smftools/hmm/display_hmm.py +19 -6
- smftools/hmm/hmm_readwrite.py +13 -4
- smftools/hmm/nucleosome_hmm_refinement.py +102 -14
- smftools/informatics/__init__.py +30 -7
- smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +14 -1
- smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
- smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
- smftools/informatics/archived/helpers/archived/load_adata.py +3 -3
- smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +3 -1
- smftools/informatics/archived/print_bam_query_seq.py +7 -1
- smftools/informatics/bam_functions.py +397 -175
- smftools/informatics/basecalling.py +51 -9
- smftools/informatics/bed_functions.py +90 -57
- smftools/informatics/binarize_converted_base_identities.py +18 -7
- smftools/informatics/complement_base_list.py +7 -6
- smftools/informatics/converted_BAM_to_adata.py +265 -122
- smftools/informatics/fasta_functions.py +161 -83
- smftools/informatics/h5ad_functions.py +196 -30
- smftools/informatics/modkit_extract_to_adata.py +609 -270
- smftools/informatics/modkit_functions.py +85 -44
- smftools/informatics/ohe.py +44 -21
- smftools/informatics/pod5_functions.py +112 -73
- smftools/informatics/run_multiqc.py +20 -14
- smftools/logging_utils.py +51 -0
- smftools/machine_learning/__init__.py +2 -7
- smftools/machine_learning/data/anndata_data_module.py +143 -50
- smftools/machine_learning/data/preprocessing.py +2 -1
- smftools/machine_learning/evaluation/__init__.py +1 -1
- smftools/machine_learning/evaluation/eval_utils.py +11 -14
- smftools/machine_learning/evaluation/evaluators.py +46 -33
- smftools/machine_learning/inference/__init__.py +1 -1
- smftools/machine_learning/inference/inference_utils.py +7 -4
- smftools/machine_learning/inference/lightning_inference.py +9 -13
- smftools/machine_learning/inference/sklearn_inference.py +6 -8
- smftools/machine_learning/inference/sliding_window_inference.py +35 -25
- smftools/machine_learning/models/__init__.py +10 -5
- smftools/machine_learning/models/base.py +28 -42
- smftools/machine_learning/models/cnn.py +15 -11
- smftools/machine_learning/models/lightning_base.py +71 -40
- smftools/machine_learning/models/mlp.py +13 -4
- smftools/machine_learning/models/positional.py +3 -2
- smftools/machine_learning/models/rnn.py +3 -2
- smftools/machine_learning/models/sklearn_models.py +39 -22
- smftools/machine_learning/models/transformer.py +68 -53
- smftools/machine_learning/models/wrappers.py +2 -1
- smftools/machine_learning/training/__init__.py +2 -2
- smftools/machine_learning/training/train_lightning_model.py +29 -20
- smftools/machine_learning/training/train_sklearn_model.py +9 -15
- smftools/machine_learning/utils/__init__.py +1 -1
- smftools/machine_learning/utils/device.py +7 -4
- smftools/machine_learning/utils/grl.py +3 -1
- smftools/metadata.py +443 -0
- smftools/plotting/__init__.py +19 -5
- smftools/plotting/autocorrelation_plotting.py +145 -44
- smftools/plotting/classifiers.py +162 -72
- smftools/plotting/general_plotting.py +422 -197
- smftools/plotting/hmm_plotting.py +42 -13
- smftools/plotting/position_stats.py +147 -87
- smftools/plotting/qc_plotting.py +20 -12
- smftools/preprocessing/__init__.py +10 -12
- smftools/preprocessing/append_base_context.py +115 -80
- smftools/preprocessing/append_binary_layer_by_base_context.py +77 -39
- smftools/preprocessing/{calculate_complexity.py → archived/calculate_complexity.py} +3 -1
- smftools/preprocessing/{archives → archived}/preprocessing.py +8 -6
- smftools/preprocessing/binarize.py +21 -4
- smftools/preprocessing/binarize_on_Youden.py +129 -31
- smftools/preprocessing/binary_layers_to_ohe.py +17 -11
- smftools/preprocessing/calculate_complexity_II.py +86 -59
- smftools/preprocessing/calculate_consensus.py +28 -19
- smftools/preprocessing/calculate_coverage.py +50 -25
- smftools/preprocessing/calculate_pairwise_differences.py +2 -1
- smftools/preprocessing/calculate_pairwise_hamming_distances.py +4 -3
- smftools/preprocessing/calculate_position_Youden.py +118 -54
- smftools/preprocessing/calculate_read_length_stats.py +52 -23
- smftools/preprocessing/calculate_read_modification_stats.py +91 -57
- smftools/preprocessing/clean_NaN.py +38 -28
- smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py +71 -38
- smftools/preprocessing/filter_reads_on_modification_thresholds.py +181 -73
- smftools/preprocessing/flag_duplicate_reads.py +689 -272
- smftools/preprocessing/invert_adata.py +26 -11
- smftools/preprocessing/load_sample_sheet.py +40 -22
- smftools/preprocessing/make_dirs.py +8 -3
- smftools/preprocessing/min_non_diagonal.py +2 -1
- smftools/preprocessing/recipes.py +56 -23
- smftools/preprocessing/reindex_references_adata.py +103 -0
- smftools/preprocessing/subsample_adata.py +33 -16
- smftools/readwrite.py +331 -82
- smftools/schema/__init__.py +11 -0
- smftools/schema/anndata_schema_v1.yaml +227 -0
- smftools/tools/__init__.py +3 -4
- smftools/tools/archived/classifiers.py +163 -0
- smftools/tools/archived/subset_adata_v1.py +10 -1
- smftools/tools/archived/subset_adata_v2.py +12 -1
- smftools/tools/calculate_umap.py +54 -15
- smftools/tools/cluster_adata_on_methylation.py +115 -46
- smftools/tools/general_tools.py +70 -25
- smftools/tools/position_stats.py +229 -98
- smftools/tools/read_stats.py +50 -29
- smftools/tools/spatial_autocorrelation.py +365 -192
- smftools/tools/subset_adata.py +23 -21
- {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/METADATA +17 -39
- smftools-0.2.5.dist-info/RECORD +181 -0
- smftools-0.2.3.dist-info/RECORD +0 -173
- /smftools/cli/{cli_flows.py → archived/cli_flows.py} +0 -0
- /smftools/hmm/{apply_hmm_batched.py → archived/apply_hmm_batched.py} +0 -0
- /smftools/hmm/{calculate_distances.py → archived/calculate_distances.py} +0 -0
- /smftools/hmm/{train_hmm.py → archived/train_hmm.py} +0 -0
- /smftools/preprocessing/{add_read_length_and_mapping_qc.py → archived/add_read_length_and_mapping_qc.py} +0 -0
- /smftools/preprocessing/{archives → archived}/mark_duplicates.py +0 -0
- /smftools/preprocessing/{archives → archived}/remove_duplicates.py +0 -0
- {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/WHEEL +0 -0
- {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/entry_points.txt +0 -0
- {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/licenses/LICENSE +0 -0
smftools/__init__.py
CHANGED
|
@@ -2,17 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import warnings
|
|
5
|
+
from importlib.metadata import version
|
|
5
6
|
|
|
7
|
+
from . import cli, config, datasets, hmm
|
|
6
8
|
from . import informatics as inform
|
|
7
9
|
from . import machine_learning as ml
|
|
8
10
|
from . import plotting as pl
|
|
9
11
|
from . import preprocessing as pp
|
|
10
12
|
from . import tools as tl
|
|
11
|
-
|
|
12
|
-
from . import cli, config, datasets, hmm
|
|
13
|
-
from .readwrite import adata_to_df, safe_write_h5ad, safe_read_h5ad, merge_barcoded_anndatas_core
|
|
14
|
-
|
|
15
|
-
from importlib.metadata import version
|
|
13
|
+
from .readwrite import adata_to_df, merge_barcoded_anndatas_core, safe_read_h5ad, safe_write_h5ad
|
|
16
14
|
|
|
17
15
|
package_name = "smftools"
|
|
18
16
|
__version__ = version(package_name)
|
|
@@ -24,7 +22,7 @@ __all__ = [
|
|
|
24
22
|
"pp",
|
|
25
23
|
"tl",
|
|
26
24
|
"pl",
|
|
27
|
-
"datasets"
|
|
25
|
+
"datasets",
|
|
28
26
|
"safe_write_h5ad",
|
|
29
|
-
"safe_read_h5ad"
|
|
30
|
-
]
|
|
27
|
+
"safe_read_h5ad",
|
|
28
|
+
]
|
smftools/_settings.py
CHANGED
|
@@ -1,20 +1,18 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from typing import Union
|
|
3
3
|
|
|
4
|
+
|
|
4
5
|
class SMFConfig:
|
|
5
6
|
"""\
|
|
6
7
|
Config for smftools.
|
|
7
8
|
"""
|
|
8
9
|
|
|
9
|
-
def __init__(
|
|
10
|
-
self,
|
|
11
|
-
*,
|
|
12
|
-
datasetdir: Union[Path, str] = "./datasets/"
|
|
13
|
-
):
|
|
10
|
+
def __init__(self, *, datasetdir: Union[Path, str] = "./datasets/"):
|
|
14
11
|
self._datasetdir = Path(datasetdir) if isinstance(datasetdir, str) else datasetdir
|
|
15
12
|
|
|
16
13
|
@property
|
|
17
14
|
def datasetdir(self) -> Path:
|
|
18
15
|
return self._datasetdir
|
|
19
16
|
|
|
20
|
-
|
|
17
|
+
|
|
18
|
+
settings = SMFConfig()
|
smftools/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.2.
|
|
1
|
+
__version__ = "0.2.5"
|
smftools/cli/helpers.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import anndata as ad
|
|
5
|
+
|
|
6
|
+
from ..metadata import write_runtime_schema_yaml
|
|
7
|
+
from ..readwrite import safe_write_h5ad
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class AdataPaths:
|
|
12
|
+
raw: Path
|
|
13
|
+
pp: Path
|
|
14
|
+
pp_dedup: Path
|
|
15
|
+
spatial: Path
|
|
16
|
+
hmm: Path
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_adata_paths(cfg) -> AdataPaths:
|
|
20
|
+
"""
|
|
21
|
+
Central helper: given cfg, compute all standard AnnData paths.
|
|
22
|
+
"""
|
|
23
|
+
h5_dir = Path(cfg.output_directory) / "h5ads"
|
|
24
|
+
|
|
25
|
+
raw = h5_dir / f"{cfg.experiment_name}.h5ad.gz"
|
|
26
|
+
|
|
27
|
+
pp = h5_dir / f"{cfg.experiment_name}_preprocessed.h5ad.gz"
|
|
28
|
+
|
|
29
|
+
if cfg.smf_modality == "direct":
|
|
30
|
+
# direct SMF: duplicate-removed path is just preprocessed path
|
|
31
|
+
pp_dedup = pp
|
|
32
|
+
else:
|
|
33
|
+
pp_dedup = h5_dir / f"{cfg.experiment_name}_preprocessed_duplicates_removed.h5ad.gz"
|
|
34
|
+
|
|
35
|
+
pp_dedup_base = pp_dedup.name.removesuffix(".h5ad.gz")
|
|
36
|
+
|
|
37
|
+
spatial = h5_dir / f"{pp_dedup_base}_spatial.h5ad.gz"
|
|
38
|
+
hmm = h5_dir / f"{pp_dedup_base}_spatial_hmm.h5ad.gz"
|
|
39
|
+
|
|
40
|
+
return AdataPaths(
|
|
41
|
+
raw=raw,
|
|
42
|
+
pp=pp,
|
|
43
|
+
pp_dedup=pp_dedup,
|
|
44
|
+
spatial=spatial,
|
|
45
|
+
hmm=hmm,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def write_gz_h5ad(adata: ad.AnnData, path: Path) -> Path:
|
|
50
|
+
if path.suffix != ".gz":
|
|
51
|
+
path = path.with_name(path.name + ".gz")
|
|
52
|
+
safe_write_h5ad(adata, path, compression="gzip", backup=True)
|
|
53
|
+
write_runtime_schema_yaml(adata, path, step_name="runtime")
|
|
54
|
+
return path
|