smftools 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +39 -7
- smftools/_settings.py +2 -0
- smftools/_version.py +3 -1
- smftools/cli/__init__.py +1 -0
- smftools/cli/archived/cli_flows.py +2 -0
- smftools/cli/helpers.py +2 -0
- smftools/cli/hmm_adata.py +7 -2
- smftools/cli/load_adata.py +130 -98
- smftools/cli/preprocess_adata.py +2 -0
- smftools/cli/spatial_adata.py +5 -1
- smftools/cli_entry.py +26 -1
- smftools/config/__init__.py +2 -0
- smftools/config/default.yaml +4 -1
- smftools/config/experiment_config.py +6 -0
- smftools/datasets/__init__.py +2 -0
- smftools/hmm/HMM.py +9 -3
- smftools/hmm/__init__.py +24 -13
- smftools/hmm/archived/apply_hmm_batched.py +2 -0
- smftools/hmm/archived/calculate_distances.py +2 -0
- smftools/hmm/archived/call_hmm_peaks.py +2 -0
- smftools/hmm/archived/train_hmm.py +2 -0
- smftools/hmm/call_hmm_peaks.py +5 -2
- smftools/hmm/display_hmm.py +4 -1
- smftools/hmm/hmm_readwrite.py +7 -2
- smftools/hmm/nucleosome_hmm_refinement.py +2 -0
- smftools/informatics/__init__.py +53 -34
- smftools/informatics/archived/bam_conversion.py +2 -0
- smftools/informatics/archived/bam_direct.py +2 -0
- smftools/informatics/archived/basecall_pod5s.py +2 -0
- smftools/informatics/archived/basecalls_to_adata.py +2 -0
- smftools/informatics/archived/conversion_smf.py +2 -0
- smftools/informatics/archived/deaminase_smf.py +1 -0
- smftools/informatics/archived/direct_smf.py +2 -0
- smftools/informatics/archived/fast5_to_pod5.py +2 -0
- smftools/informatics/archived/helpers/archived/__init__.py +2 -0
- smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
- smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
- smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
- smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
- smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
- smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
- smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
- smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
- smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
- smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
- smftools/informatics/archived/helpers/archived/informatics.py +2 -0
- smftools/informatics/archived/helpers/archived/load_adata.py +2 -0
- smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
- smftools/informatics/archived/helpers/archived/modQC.py +2 -0
- smftools/informatics/archived/helpers/archived/modcall.py +2 -0
- smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
- smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
- smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
- smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
- smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +2 -0
- smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
- smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
- smftools/informatics/archived/print_bam_query_seq.py +2 -0
- smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
- smftools/informatics/archived/subsample_pod5.py +2 -0
- smftools/informatics/bam_functions.py +737 -170
- smftools/informatics/basecalling.py +2 -0
- smftools/informatics/bed_functions.py +271 -61
- smftools/informatics/binarize_converted_base_identities.py +3 -0
- smftools/informatics/complement_base_list.py +2 -0
- smftools/informatics/converted_BAM_to_adata.py +66 -22
- smftools/informatics/fasta_functions.py +94 -10
- smftools/informatics/h5ad_functions.py +8 -2
- smftools/informatics/modkit_extract_to_adata.py +16 -6
- smftools/informatics/modkit_functions.py +2 -0
- smftools/informatics/ohe.py +2 -0
- smftools/informatics/pod5_functions.py +3 -2
- smftools/machine_learning/__init__.py +22 -6
- smftools/machine_learning/data/__init__.py +2 -0
- smftools/machine_learning/data/anndata_data_module.py +18 -4
- smftools/machine_learning/data/preprocessing.py +2 -0
- smftools/machine_learning/evaluation/__init__.py +2 -0
- smftools/machine_learning/evaluation/eval_utils.py +2 -0
- smftools/machine_learning/evaluation/evaluators.py +14 -9
- smftools/machine_learning/inference/__init__.py +2 -0
- smftools/machine_learning/inference/inference_utils.py +2 -0
- smftools/machine_learning/inference/lightning_inference.py +6 -1
- smftools/machine_learning/inference/sklearn_inference.py +2 -0
- smftools/machine_learning/inference/sliding_window_inference.py +2 -0
- smftools/machine_learning/models/__init__.py +2 -0
- smftools/machine_learning/models/base.py +7 -2
- smftools/machine_learning/models/cnn.py +7 -2
- smftools/machine_learning/models/lightning_base.py +16 -11
- smftools/machine_learning/models/mlp.py +5 -1
- smftools/machine_learning/models/positional.py +7 -2
- smftools/machine_learning/models/rnn.py +5 -1
- smftools/machine_learning/models/sklearn_models.py +14 -9
- smftools/machine_learning/models/transformer.py +7 -2
- smftools/machine_learning/models/wrappers.py +6 -2
- smftools/machine_learning/training/__init__.py +2 -0
- smftools/machine_learning/training/train_lightning_model.py +13 -3
- smftools/machine_learning/training/train_sklearn_model.py +2 -0
- smftools/machine_learning/utils/__init__.py +2 -0
- smftools/machine_learning/utils/device.py +5 -1
- smftools/machine_learning/utils/grl.py +5 -1
- smftools/optional_imports.py +31 -0
- smftools/plotting/__init__.py +32 -31
- smftools/plotting/autocorrelation_plotting.py +9 -5
- smftools/plotting/classifiers.py +16 -4
- smftools/plotting/general_plotting.py +6 -3
- smftools/plotting/hmm_plotting.py +12 -2
- smftools/plotting/position_stats.py +15 -7
- smftools/plotting/qc_plotting.py +6 -1
- smftools/preprocessing/__init__.py +35 -37
- smftools/preprocessing/archived/add_read_length_and_mapping_qc.py +2 -0
- smftools/preprocessing/archived/calculate_complexity.py +2 -0
- smftools/preprocessing/archived/mark_duplicates.py +2 -0
- smftools/preprocessing/archived/preprocessing.py +2 -0
- smftools/preprocessing/archived/remove_duplicates.py +2 -0
- smftools/preprocessing/binary_layers_to_ohe.py +2 -1
- smftools/preprocessing/calculate_complexity_II.py +4 -1
- smftools/preprocessing/calculate_pairwise_differences.py +2 -0
- smftools/preprocessing/calculate_pairwise_hamming_distances.py +3 -0
- smftools/preprocessing/calculate_position_Youden.py +9 -2
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py +2 -0
- smftools/preprocessing/filter_reads_on_modification_thresholds.py +2 -0
- smftools/preprocessing/flag_duplicate_reads.py +42 -54
- smftools/preprocessing/make_dirs.py +2 -1
- smftools/preprocessing/min_non_diagonal.py +2 -0
- smftools/preprocessing/recipes.py +2 -0
- smftools/tools/__init__.py +26 -18
- smftools/tools/archived/apply_hmm.py +2 -0
- smftools/tools/archived/classifiers.py +2 -0
- smftools/tools/archived/classify_methylated_features.py +2 -0
- smftools/tools/archived/classify_non_methylated_features.py +2 -0
- smftools/tools/archived/subset_adata_v1.py +2 -0
- smftools/tools/archived/subset_adata_v2.py +2 -0
- smftools/tools/calculate_umap.py +3 -1
- smftools/tools/cluster_adata_on_methylation.py +7 -1
- smftools/tools/position_stats.py +17 -27
- {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/METADATA +67 -33
- smftools-0.3.0.dist-info/RECORD +182 -0
- smftools-0.2.5.dist-info/RECORD +0 -181
- {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/WHEEL +0 -0
- {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/entry_points.txt +0 -0
- {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,51 +1,64 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
# duplicate_detection_with_hier_and_plots.py
|
|
2
4
|
import copy
|
|
3
5
|
import math
|
|
4
6
|
import os
|
|
5
7
|
import warnings
|
|
6
8
|
from collections import defaultdict
|
|
7
|
-
from
|
|
9
|
+
from importlib.util import find_spec
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple, Union
|
|
8
11
|
|
|
9
|
-
import anndata as ad
|
|
10
|
-
import matplotlib.pyplot as plt
|
|
11
12
|
import numpy as np
|
|
12
13
|
import pandas as pd
|
|
13
|
-
import
|
|
14
|
+
from scipy.cluster import hierarchy as sch
|
|
15
|
+
from scipy.spatial.distance import pdist, squareform
|
|
16
|
+
from scipy.stats import gaussian_kde
|
|
14
17
|
|
|
15
18
|
from smftools.logging_utils import get_logger
|
|
19
|
+
from smftools.optional_imports import require
|
|
16
20
|
|
|
17
21
|
from ..readwrite import make_dirs
|
|
18
22
|
|
|
19
23
|
logger = get_logger(__name__)
|
|
20
24
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
from scipy.cluster import hierarchy as sch
|
|
24
|
-
from scipy.spatial.distance import pdist, squareform
|
|
25
|
-
|
|
26
|
-
SCIPY_AVAILABLE = True
|
|
27
|
-
except Exception:
|
|
28
|
-
sch = None
|
|
29
|
-
pdist = None
|
|
30
|
-
squareform = None
|
|
31
|
-
SCIPY_AVAILABLE = False
|
|
25
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="duplicate read plots")
|
|
26
|
+
torch = require("torch", extra="torch", purpose="duplicate read detection")
|
|
32
27
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
from sklearn.decomposition import PCA
|
|
36
|
-
from sklearn.metrics import silhouette_score
|
|
37
|
-
from sklearn.mixture import GaussianMixture
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
import anndata as ad
|
|
38
30
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
PCA = None
|
|
42
|
-
KMeans = DBSCAN = GaussianMixture = silhouette_score = None
|
|
43
|
-
SKLEARN_AVAILABLE = False
|
|
31
|
+
SCIPY_AVAILABLE = True
|
|
32
|
+
SKLEARN_AVAILABLE = find_spec("sklearn") is not None
|
|
44
33
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
34
|
+
PCA = None
|
|
35
|
+
KMeans = DBSCAN = GaussianMixture = silhouette_score = None
|
|
36
|
+
if SKLEARN_AVAILABLE:
|
|
37
|
+
sklearn_cluster = require(
|
|
38
|
+
"sklearn.cluster",
|
|
39
|
+
extra="ml-base",
|
|
40
|
+
purpose="duplicate read clustering",
|
|
41
|
+
)
|
|
42
|
+
sklearn_decomp = require(
|
|
43
|
+
"sklearn.decomposition",
|
|
44
|
+
extra="ml-base",
|
|
45
|
+
purpose="duplicate read PCA",
|
|
46
|
+
)
|
|
47
|
+
sklearn_metrics = require(
|
|
48
|
+
"sklearn.metrics",
|
|
49
|
+
extra="ml-base",
|
|
50
|
+
purpose="duplicate read clustering diagnostics",
|
|
51
|
+
)
|
|
52
|
+
sklearn_mixture = require(
|
|
53
|
+
"sklearn.mixture",
|
|
54
|
+
extra="ml-base",
|
|
55
|
+
purpose="duplicate read clustering",
|
|
56
|
+
)
|
|
57
|
+
DBSCAN = sklearn_cluster.DBSCAN
|
|
58
|
+
KMeans = sklearn_cluster.KMeans
|
|
59
|
+
PCA = sklearn_decomp.PCA
|
|
60
|
+
silhouette_score = sklearn_metrics.silhouette_score
|
|
61
|
+
GaussianMixture = sklearn_mixture.GaussianMixture
|
|
49
62
|
|
|
50
63
|
|
|
51
64
|
def merge_uns_preserve(orig_uns: dict, new_uns: dict, prefer: str = "orig") -> dict:
|
|
@@ -153,24 +166,6 @@ def flag_duplicate_reads(
|
|
|
153
166
|
import numpy as np
|
|
154
167
|
import pandas as pd
|
|
155
168
|
|
|
156
|
-
# optional imports already guarded at module import time, but re-check
|
|
157
|
-
try:
|
|
158
|
-
from scipy.cluster import hierarchy as sch
|
|
159
|
-
from scipy.spatial.distance import pdist
|
|
160
|
-
|
|
161
|
-
SCIPY_AVAILABLE = True
|
|
162
|
-
except Exception:
|
|
163
|
-
sch = None
|
|
164
|
-
pdist = None
|
|
165
|
-
SCIPY_AVAILABLE = False
|
|
166
|
-
try:
|
|
167
|
-
from sklearn.decomposition import PCA
|
|
168
|
-
|
|
169
|
-
SKLEARN_AVAILABLE = True
|
|
170
|
-
except Exception:
|
|
171
|
-
PCA = None
|
|
172
|
-
SKLEARN_AVAILABLE = False
|
|
173
|
-
|
|
174
169
|
# -------- helper: demux-aware keeper selection --------
|
|
175
170
|
def _choose_keeper_with_demux_preference(
|
|
176
171
|
members_idx: List[int],
|
|
@@ -1577,13 +1572,6 @@ def _run_clustering(
|
|
|
1577
1572
|
Run clustering on 2D points (x,y). Returns labels (len = npoints) and diagnostics dict.
|
|
1578
1573
|
Labels follow sklearn conventions (noise -> -1 for DBSCAN/HDBSCAN).
|
|
1579
1574
|
"""
|
|
1580
|
-
try:
|
|
1581
|
-
from sklearn.cluster import DBSCAN, KMeans
|
|
1582
|
-
from sklearn.metrics import silhouette_score
|
|
1583
|
-
from sklearn.mixture import GaussianMixture
|
|
1584
|
-
except Exception:
|
|
1585
|
-
KMeans = DBSCAN = GaussianMixture = silhouette_score = None
|
|
1586
|
-
|
|
1587
1575
|
pts = np.column_stack([x, y])
|
|
1588
1576
|
diagnostics: Dict[str, Any] = {"method": method, "n_input": len(x)}
|
|
1589
1577
|
if len(x) < min_points:
|
smftools/tools/__init__.py
CHANGED
|
@@ -1,19 +1,27 @@
|
|
|
1
|
-
from
|
|
2
|
-
from .cluster_adata_on_methylation import cluster_adata_on_methylation
|
|
3
|
-
from .general_tools import combine_layers, create_nan_mask_from_X, create_nan_or_non_gpc_mask
|
|
4
|
-
from .position_stats import calculate_relative_risk_on_activity, compute_positionwise_statistics
|
|
5
|
-
from .read_stats import calculate_row_entropy
|
|
6
|
-
from .spatial_autocorrelation import *
|
|
7
|
-
from .subset_adata import subset_adata
|
|
1
|
+
from __future__ import annotations
|
|
8
2
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
"calculate_umap",
|
|
13
|
-
"
|
|
14
|
-
"
|
|
15
|
-
"create_nan_mask_from_X",
|
|
16
|
-
"create_nan_or_non_gpc_mask",
|
|
17
|
-
"
|
|
18
|
-
"
|
|
19
|
-
|
|
3
|
+
from importlib import import_module
|
|
4
|
+
|
|
5
|
+
_LAZY_ATTRS = {
|
|
6
|
+
"calculate_umap": "smftools.tools.calculate_umap",
|
|
7
|
+
"cluster_adata_on_methylation": "smftools.tools.cluster_adata_on_methylation",
|
|
8
|
+
"combine_layers": "smftools.tools.general_tools",
|
|
9
|
+
"create_nan_mask_from_X": "smftools.tools.general_tools",
|
|
10
|
+
"create_nan_or_non_gpc_mask": "smftools.tools.general_tools",
|
|
11
|
+
"calculate_relative_risk_on_activity": "smftools.tools.position_stats",
|
|
12
|
+
"compute_positionwise_statistics": "smftools.tools.position_stats",
|
|
13
|
+
"calculate_row_entropy": "smftools.tools.read_stats",
|
|
14
|
+
"subset_adata": "smftools.tools.subset_adata",
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def __getattr__(name: str):
|
|
19
|
+
if name in _LAZY_ATTRS:
|
|
20
|
+
module = import_module(_LAZY_ATTRS[name])
|
|
21
|
+
attr = getattr(module, name)
|
|
22
|
+
globals()[name] = attr
|
|
23
|
+
return attr
|
|
24
|
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
__all__ = list(_LAZY_ATTRS.keys())
|
smftools/tools/calculate_umap.py
CHANGED
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from typing import TYPE_CHECKING, Sequence
|
|
4
4
|
|
|
5
5
|
from smftools.logging_utils import get_logger
|
|
6
|
+
from smftools.optional_imports import require
|
|
6
7
|
|
|
7
8
|
if TYPE_CHECKING:
|
|
8
9
|
import anndata as ad
|
|
@@ -36,7 +37,8 @@ def calculate_umap(
|
|
|
36
37
|
import os
|
|
37
38
|
|
|
38
39
|
import numpy as np
|
|
39
|
-
|
|
40
|
+
|
|
41
|
+
sc = require("scanpy", extra="scanpy", purpose="UMAP calculation")
|
|
40
42
|
from scipy.sparse import issparse
|
|
41
43
|
|
|
42
44
|
os.environ["OMP_NUM_THREADS"] = str(threads)
|
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
from typing import TYPE_CHECKING, Sequence
|
|
5
5
|
|
|
6
6
|
from smftools.logging_utils import get_logger
|
|
7
|
+
from smftools.optional_imports import require
|
|
7
8
|
|
|
8
9
|
if TYPE_CHECKING:
|
|
9
10
|
import anndata as ad
|
|
@@ -109,7 +110,12 @@ def cluster_adata_on_methylation(
|
|
|
109
110
|
)
|
|
110
111
|
elif method == "kmeans":
|
|
111
112
|
try:
|
|
112
|
-
|
|
113
|
+
sklearn_cluster = require(
|
|
114
|
+
"sklearn.cluster",
|
|
115
|
+
extra="ml-base",
|
|
116
|
+
purpose="k-means clustering",
|
|
117
|
+
)
|
|
118
|
+
KMeans = sklearn_cluster.KMeans
|
|
113
119
|
|
|
114
120
|
kmeans = KMeans(n_clusters=n_clusters)
|
|
115
121
|
kmeans.fit(site_subset.layers[layer])
|
smftools/tools/position_stats.py
CHANGED
|
@@ -1,41 +1,26 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import os
|
|
3
4
|
import warnings
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from itertools import cycle
|
|
4
7
|
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple
|
|
5
8
|
|
|
6
|
-
if TYPE_CHECKING:
|
|
7
|
-
import anndata as ad
|
|
8
|
-
|
|
9
|
-
import matplotlib.pyplot as plt
|
|
10
9
|
import numpy as np
|
|
11
10
|
import pandas as pd
|
|
11
|
+
from scipy.stats import chi2_contingency
|
|
12
|
+
from tqdm import tqdm
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
try:
|
|
15
|
-
from joblib import Parallel, delayed
|
|
16
|
-
|
|
17
|
-
JOBLIB_AVAILABLE = True
|
|
18
|
-
except Exception:
|
|
19
|
-
JOBLIB_AVAILABLE = False
|
|
14
|
+
from smftools.optional_imports import require
|
|
20
15
|
|
|
21
|
-
|
|
22
|
-
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
import anndata as ad
|
|
23
18
|
|
|
24
|
-
|
|
25
|
-
except Exception:
|
|
26
|
-
SCIPY_STATS_AVAILABLE = False
|
|
19
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="position stats plots")
|
|
27
20
|
|
|
28
21
|
# -----------------------------
|
|
29
22
|
# Compute positionwise statistic (multi-method + simple site_types)
|
|
30
23
|
# -----------------------------
|
|
31
|
-
import os
|
|
32
|
-
from contextlib import contextmanager
|
|
33
|
-
from itertools import cycle
|
|
34
|
-
|
|
35
|
-
import joblib
|
|
36
|
-
from joblib import Parallel, cpu_count, delayed
|
|
37
|
-
from scipy.stats import chi2_contingency
|
|
38
|
-
from tqdm import tqdm
|
|
39
24
|
|
|
40
25
|
|
|
41
26
|
# ------------------------- Utilities -------------------------
|
|
@@ -197,6 +182,8 @@ def calculate_relative_risk_on_activity(
|
|
|
197
182
|
@contextmanager
|
|
198
183
|
def tqdm_joblib(tqdm_object: tqdm):
|
|
199
184
|
"""Context manager to patch joblib to update a tqdm progress bar."""
|
|
185
|
+
joblib = require("joblib", extra="ml-base", purpose="parallel position statistics")
|
|
186
|
+
|
|
200
187
|
old = joblib.parallel.BatchCompletionCallBack
|
|
201
188
|
|
|
202
189
|
class TqdmBatchCompletionCallback(old): # type: ignore
|
|
@@ -315,6 +302,8 @@ def compute_positionwise_statistics(
|
|
|
315
302
|
max_threads: Maximum number of threads.
|
|
316
303
|
reverse_indices_on_store: Whether to reverse indices on output storage.
|
|
317
304
|
"""
|
|
305
|
+
joblib = require("joblib", extra="ml-base", purpose="parallel position statistics")
|
|
306
|
+
|
|
318
307
|
if isinstance(methods, str):
|
|
319
308
|
methods = [methods]
|
|
320
309
|
methods = [m.lower() for m in methods]
|
|
@@ -325,7 +314,7 @@ def compute_positionwise_statistics(
|
|
|
325
314
|
|
|
326
315
|
# workers
|
|
327
316
|
if max_threads is None or max_threads <= 0:
|
|
328
|
-
n_jobs = max(1, cpu_count() or 1)
|
|
317
|
+
n_jobs = max(1, joblib.cpu_count() or 1)
|
|
329
318
|
else:
|
|
330
319
|
n_jobs = max(1, int(max_threads))
|
|
331
320
|
|
|
@@ -439,13 +428,14 @@ def compute_positionwise_statistics(
|
|
|
439
428
|
worker = _relative_risk_row_job
|
|
440
429
|
out = np.full((n_pos, n_pos), np.nan, dtype=float)
|
|
441
430
|
tasks = (
|
|
442
|
-
delayed(worker)(i, X_bin, min_count_for_pairwise)
|
|
431
|
+
joblib.delayed(worker)(i, X_bin, min_count_for_pairwise)
|
|
432
|
+
for i in range(n_pos)
|
|
443
433
|
)
|
|
444
434
|
pbar_rows = tqdm(
|
|
445
435
|
total=n_pos, desc=f"{m}: rows ({sample}__{ref})", leave=False
|
|
446
436
|
)
|
|
447
437
|
with tqdm_joblib(pbar_rows):
|
|
448
|
-
results = Parallel(n_jobs=n_jobs, prefer="processes")(tasks)
|
|
438
|
+
results = joblib.Parallel(n_jobs=n_jobs, prefer="processes")(tasks)
|
|
449
439
|
pbar_rows.close()
|
|
450
440
|
for i, row in results:
|
|
451
441
|
out[int(i), :] = row
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: smftools
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Single Molecule Footprinting Analysis in Python.
|
|
5
5
|
Project-URL: Source, https://github.com/jkmckenna/smftools
|
|
6
6
|
Project-URL: Documentation, https://smftools.readthedocs.io/
|
|
@@ -29,7 +29,7 @@ License: MIT License
|
|
|
29
29
|
SOFTWARE.
|
|
30
30
|
License-File: LICENSE
|
|
31
31
|
Keywords: anndata,chromatin-accessibility,machine-learning,nanopore,protein-dna-binding,single-locus,single-molecule-footprinting
|
|
32
|
-
Classifier: Development Status ::
|
|
32
|
+
Classifier: Development Status :: 3 - Alpha
|
|
33
33
|
Classifier: Environment :: Console
|
|
34
34
|
Classifier: Intended Audience :: Developers
|
|
35
35
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -41,38 +41,41 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
41
41
|
Classifier: Programming Language :: Python :: 3.11
|
|
42
42
|
Classifier: Programming Language :: Python :: 3.12
|
|
43
43
|
Classifier: Programming Language :: Python :: 3.13
|
|
44
|
-
Classifier: Programming Language :: Python :: 3.14
|
|
45
44
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
46
45
|
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
47
46
|
Requires-Python: >=3.10
|
|
48
47
|
Requires-Dist: anndata>=0.10.0
|
|
49
48
|
Requires-Dist: biopython>=1.79
|
|
50
|
-
Requires-Dist: captum
|
|
51
49
|
Requires-Dist: click
|
|
52
|
-
Requires-Dist: fastcluster
|
|
53
|
-
Requires-Dist: hydra-core
|
|
54
|
-
Requires-Dist: igraph
|
|
55
|
-
Requires-Dist: leidenalg
|
|
56
|
-
Requires-Dist: lightning
|
|
57
|
-
Requires-Dist: multiqc
|
|
58
|
-
Requires-Dist: networkx>=3.2
|
|
59
50
|
Requires-Dist: numpy<2,>=1.22.0
|
|
60
|
-
Requires-Dist: omegaconf
|
|
61
51
|
Requires-Dist: pandas>=1.4.2
|
|
62
|
-
Requires-Dist: pod5>=0.1.21
|
|
63
|
-
Requires-Dist: pybedtools>=0.12.0
|
|
64
|
-
Requires-Dist: pybigwig>=0.3.24
|
|
65
|
-
Requires-Dist: pyfaidx>=0.8.0
|
|
66
|
-
Requires-Dist: pysam>=0.19.1
|
|
67
|
-
Requires-Dist: scanpy>=1.9
|
|
68
|
-
Requires-Dist: scikit-learn>=1.0.2
|
|
69
52
|
Requires-Dist: scipy>=1.7.3
|
|
70
|
-
Requires-Dist: seaborn>=0.11
|
|
71
|
-
Requires-Dist: shap
|
|
72
|
-
Requires-Dist: torch>=1.9.0
|
|
73
53
|
Requires-Dist: tqdm
|
|
74
|
-
|
|
75
|
-
Requires-Dist:
|
|
54
|
+
Provides-Extra: all
|
|
55
|
+
Requires-Dist: captum; extra == 'all'
|
|
56
|
+
Requires-Dist: fastcluster; extra == 'all'
|
|
57
|
+
Requires-Dist: hydra-core; extra == 'all'
|
|
58
|
+
Requires-Dist: igraph; extra == 'all'
|
|
59
|
+
Requires-Dist: leidenalg; extra == 'all'
|
|
60
|
+
Requires-Dist: lightning; extra == 'all'
|
|
61
|
+
Requires-Dist: matplotlib>=3.6.2; extra == 'all'
|
|
62
|
+
Requires-Dist: multiqc; extra == 'all'
|
|
63
|
+
Requires-Dist: networkx>=3.2; extra == 'all'
|
|
64
|
+
Requires-Dist: omegaconf; extra == 'all'
|
|
65
|
+
Requires-Dist: pod5>=0.1.21; extra == 'all'
|
|
66
|
+
Requires-Dist: pybedtools>=0.12.0; extra == 'all'
|
|
67
|
+
Requires-Dist: pybigwig>=0.3.24; extra == 'all'
|
|
68
|
+
Requires-Dist: pysam>=0.19.1; extra == 'all'
|
|
69
|
+
Requires-Dist: scanpy>=1.9; extra == 'all'
|
|
70
|
+
Requires-Dist: scikit-learn>=1.0.2; extra == 'all'
|
|
71
|
+
Requires-Dist: seaborn>=0.11; extra == 'all'
|
|
72
|
+
Requires-Dist: shap; extra == 'all'
|
|
73
|
+
Requires-Dist: torch>=1.9.0; extra == 'all'
|
|
74
|
+
Requires-Dist: upsetplot; extra == 'all'
|
|
75
|
+
Requires-Dist: wandb; extra == 'all'
|
|
76
|
+
Provides-Extra: cluster
|
|
77
|
+
Requires-Dist: fastcluster; extra == 'cluster'
|
|
78
|
+
Requires-Dist: leidenalg; extra == 'cluster'
|
|
76
79
|
Provides-Extra: dev
|
|
77
80
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
78
81
|
Requires-Dist: pytest; extra == 'dev'
|
|
@@ -81,20 +84,51 @@ Requires-Dist: ruff; extra == 'dev'
|
|
|
81
84
|
Provides-Extra: docs
|
|
82
85
|
Requires-Dist: ipython>=7.20; extra == 'docs'
|
|
83
86
|
Requires-Dist: matplotlib!=3.6.1; extra == 'docs'
|
|
84
|
-
Requires-Dist: myst-nb
|
|
85
|
-
Requires-Dist: myst-parser
|
|
87
|
+
Requires-Dist: myst-nb<2,>=1; extra == 'docs'
|
|
88
|
+
Requires-Dist: myst-parser<3,>=2; extra == 'docs'
|
|
86
89
|
Requires-Dist: nbsphinx>=0.9; extra == 'docs'
|
|
87
90
|
Requires-Dist: pyyaml; extra == 'docs'
|
|
88
91
|
Requires-Dist: readthedocs-sphinx-search; extra == 'docs'
|
|
89
92
|
Requires-Dist: setuptools; extra == 'docs'
|
|
90
|
-
Requires-Dist: sphinx-autodoc-typehints
|
|
91
|
-
Requires-Dist: sphinx-book-theme
|
|
92
|
-
Requires-Dist: sphinx-click; extra == 'docs'
|
|
93
|
-
Requires-Dist: sphinx-copybutton; extra == 'docs'
|
|
93
|
+
Requires-Dist: sphinx-autodoc-typehints<4,>=1.25.2; extra == 'docs'
|
|
94
|
+
Requires-Dist: sphinx-book-theme<2,>=1.1; extra == 'docs'
|
|
95
|
+
Requires-Dist: sphinx-click<7,>=5; extra == 'docs'
|
|
96
|
+
Requires-Dist: sphinx-copybutton<0.6,>=0.5; extra == 'docs'
|
|
94
97
|
Requires-Dist: sphinx-design; extra == 'docs'
|
|
95
|
-
Requires-Dist: sphinx
|
|
96
|
-
Requires-Dist: sphinxcontrib-bibtex; extra == 'docs'
|
|
97
|
-
Requires-Dist: sphinxext-opengraph; extra == 'docs'
|
|
98
|
+
Requires-Dist: sphinx<8,>=7; extra == 'docs'
|
|
99
|
+
Requires-Dist: sphinxcontrib-bibtex<3,>=2; extra == 'docs'
|
|
100
|
+
Requires-Dist: sphinxext-opengraph<0.10,>=0.9; extra == 'docs'
|
|
101
|
+
Provides-Extra: misc
|
|
102
|
+
Requires-Dist: networkx>=3.2; extra == 'misc'
|
|
103
|
+
Requires-Dist: upsetplot; extra == 'misc'
|
|
104
|
+
Provides-Extra: ml-base
|
|
105
|
+
Requires-Dist: scikit-learn>=1.0.2; extra == 'ml-base'
|
|
106
|
+
Requires-Dist: torch>=1.9.0; extra == 'ml-base'
|
|
107
|
+
Provides-Extra: ml-extended
|
|
108
|
+
Requires-Dist: captum; extra == 'ml-extended'
|
|
109
|
+
Requires-Dist: hydra-core; extra == 'ml-extended'
|
|
110
|
+
Requires-Dist: lightning; extra == 'ml-extended'
|
|
111
|
+
Requires-Dist: omegaconf; extra == 'ml-extended'
|
|
112
|
+
Requires-Dist: shap; extra == 'ml-extended'
|
|
113
|
+
Requires-Dist: wandb; extra == 'ml-extended'
|
|
114
|
+
Provides-Extra: ont
|
|
115
|
+
Requires-Dist: pod5>=0.1.21; extra == 'ont'
|
|
116
|
+
Provides-Extra: plotting
|
|
117
|
+
Requires-Dist: matplotlib>=3.6.2; extra == 'plotting'
|
|
118
|
+
Requires-Dist: seaborn>=0.11; extra == 'plotting'
|
|
119
|
+
Provides-Extra: pybedtools
|
|
120
|
+
Requires-Dist: pybedtools>=0.12.0; extra == 'pybedtools'
|
|
121
|
+
Provides-Extra: pybigwig
|
|
122
|
+
Requires-Dist: pybigwig>=0.3.24; extra == 'pybigwig'
|
|
123
|
+
Provides-Extra: pysam
|
|
124
|
+
Requires-Dist: pysam>=0.19.1; extra == 'pysam'
|
|
125
|
+
Provides-Extra: qc
|
|
126
|
+
Requires-Dist: multiqc; extra == 'qc'
|
|
127
|
+
Provides-Extra: scanpy
|
|
128
|
+
Requires-Dist: igraph; extra == 'scanpy'
|
|
129
|
+
Requires-Dist: scanpy>=1.9; extra == 'scanpy'
|
|
130
|
+
Provides-Extra: torch
|
|
131
|
+
Requires-Dist: torch>=1.9.0; extra == 'torch'
|
|
98
132
|
Description-Content-Type: text/markdown
|
|
99
133
|
|
|
100
134
|
[](https://pypi.org/project/smftools)
|