oscb 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscb/__init__.py +14 -0
- oscb/data.py +135 -0
- oscb/evaluation/__init__.py +10 -0
- oscb/evaluation/annotation.py +39 -0
- oscb/evaluation/ccc.py +74 -0
- oscb/evaluation/clustering.py +20 -0
- oscb/evaluation/imputation.py +161 -0
- oscb/evaluation/integration.py +60 -0
- oscb/evaluation/multimodal.py +49 -0
- oscb/evaluation/trajectory.py +196 -0
- oscb/evaluator.py +219 -0
- oscb/utilization.py +133 -0
- oscb/utils.py +107 -0
- oscb-0.1.0.dist-info/METADATA +73 -0
- oscb-0.1.0.dist-info/RECORD +17 -0
- oscb-0.1.0.dist-info/WHEEL +5 -0
- oscb-0.1.0.dist-info/top_level.txt +1 -0
oscb/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import absolute_import
|
|
2
|
+
|
|
3
|
+
from .evaluation import annotation, ccc, clustering, imputation, integration, multimodal, trajectory, annotation
|
|
4
|
+
from .data import *
|
|
5
|
+
from .evaluator import *
|
|
6
|
+
from .utilization import *
|
|
7
|
+
from .utils import *
|
|
8
|
+
# from .evaluation import ccc
|
|
9
|
+
# from .evaluation import clustering
|
|
10
|
+
# from .evaluation import imputation
|
|
11
|
+
# from .evaluation import integration
|
|
12
|
+
# from .evaluation import multimodal
|
|
13
|
+
# from .evaluation import trajectory
|
|
14
|
+
# from .evaluation import annotation
|
oscb/data.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from tqdm import tqdm
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import hashlib
|
|
6
|
+
import re
|
|
7
|
+
from muon import MuData
|
|
8
|
+
import muon as mu
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import scanpy as sc
|
|
12
|
+
import anndata as ad
|
|
13
|
+
import mudata as md
|
|
14
|
+
from .utils import *
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FileDownloader:
|
|
19
|
+
def __init__(self, chunk_size=8192):
|
|
20
|
+
self.chunk_size = chunk_size
|
|
21
|
+
self.session = requests.Session()
|
|
22
|
+
|
|
23
|
+
def get_filename_from_response(self, headers):
|
|
24
|
+
"""
|
|
25
|
+
Extracts filename from Content-Disposition header or URL.
|
|
26
|
+
"""
|
|
27
|
+
print(headers)
|
|
28
|
+
if "content-disposition" in headers:
|
|
29
|
+
cd = headers["content-disposition"]
|
|
30
|
+
match = re.search(r"filename\*?=['\"]?(.*?)['\"]?(?:;|$)", cd)
|
|
31
|
+
if match:
|
|
32
|
+
filename = match.group(1)
|
|
33
|
+
# Handle potential encoding if filename* is used
|
|
34
|
+
if filename.startswith("utf-8''"):
|
|
35
|
+
filename = filename.split("''", 1)[1]
|
|
36
|
+
filename = requests.utils.unquote(filename)
|
|
37
|
+
return filename
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
def get_file_size(self, response):
|
|
41
|
+
# response = self.session.head(url)
|
|
42
|
+
return int(response.headers.get('content-length', 0))
|
|
43
|
+
|
|
44
|
+
def get_file_hash(self, file_path):
|
|
45
|
+
sha256_hash = hashlib.sha256()
|
|
46
|
+
with open(file_path, "rb") as f:
|
|
47
|
+
for byte_block in iter(lambda: f.read(4096), b""):
|
|
48
|
+
sha256_hash.update(byte_block)
|
|
49
|
+
return sha256_hash.hexdigest()
|
|
50
|
+
|
|
51
|
+
def download(self, url, data_dict, data_folder='downloads/', verify_hash=None):
|
|
52
|
+
try:
|
|
53
|
+
response = self.session.post(url, json=data_dict, stream=True)
|
|
54
|
+
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
|
|
55
|
+
total_size = self.get_file_size(response)
|
|
56
|
+
file_name = self.get_filename_from_response(response.headers)
|
|
57
|
+
local_file_path = os.path.join(data_folder, file_name)
|
|
58
|
+
local_file_path = Path(local_file_path)
|
|
59
|
+
# Make dir
|
|
60
|
+
local_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
|
|
62
|
+
# Progress bar
|
|
63
|
+
progress = tqdm(total=total_size,
|
|
64
|
+
unit='B',
|
|
65
|
+
unit_scale=True,
|
|
66
|
+
desc=local_file_path.name)
|
|
67
|
+
|
|
68
|
+
with local_file_path.open('wb') as f:
|
|
69
|
+
for chunk in response.iter_content(chunk_size=self.chunk_size):
|
|
70
|
+
if chunk:
|
|
71
|
+
f.write(chunk)
|
|
72
|
+
progress.update(len(chunk))
|
|
73
|
+
progress.close()
|
|
74
|
+
|
|
75
|
+
# File validation
|
|
76
|
+
if verify_hash:
|
|
77
|
+
downloaded_hash = self.get_file_hash(local_file_path)
|
|
78
|
+
if downloaded_hash != verify_hash:
|
|
79
|
+
raise ValueError("File hash verification failed.")
|
|
80
|
+
|
|
81
|
+
print(f"File downloaded successfully to: {local_file_path}")
|
|
82
|
+
|
|
83
|
+
return local_file_path
|
|
84
|
+
|
|
85
|
+
except Exception as e:
|
|
86
|
+
progress.close()
|
|
87
|
+
print(f"Download failed: {str(e)}")
|
|
88
|
+
if local_file_path.exists():
|
|
89
|
+
local_file_path.unlink()
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
def download_multiple(self, url_list, data_folder):
|
|
93
|
+
results = []
|
|
94
|
+
for url in url_list:
|
|
95
|
+
filename = url.split('/')[-1]
|
|
96
|
+
local_file_path = Path(data_folder) / filename
|
|
97
|
+
success = self.download(url, local_file_path)
|
|
98
|
+
results.append({
|
|
99
|
+
'url': url,
|
|
100
|
+
'success': success,
|
|
101
|
+
'local_file_path': str(local_file_path)
|
|
102
|
+
})
|
|
103
|
+
return results
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def DataLoader(benchmarks_id, data_folder='downloads/', server_endpoint=server_endpoint+'download'):
|
|
108
|
+
dataset_id, task = get_dataset_id(benchmarks_id)
|
|
109
|
+
if task is not None:
|
|
110
|
+
print(f"Downloading dataset for {task} Benchmarks.")
|
|
111
|
+
else:
|
|
112
|
+
print("Downloading dataset.")
|
|
113
|
+
data_dict = {
|
|
114
|
+
"dataset_id": dataset_id
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
downloader = FileDownloader()
|
|
118
|
+
adata_path = downloader.download(server_endpoint, data_dict, data_folder="downloads")
|
|
119
|
+
|
|
120
|
+
if os.path.isfile(adata_path):
|
|
121
|
+
if str(adata_path).endswith(".h5mu"):
|
|
122
|
+
mdata = muon.read_h5mu(adata_path)
|
|
123
|
+
return mdata
|
|
124
|
+
else:
|
|
125
|
+
adata = sc.read_h5ad(adata_path)
|
|
126
|
+
return adata
|
|
127
|
+
else:
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def split_data(adata):
|
|
132
|
+
train_adata = adata[adata.obs.split_idx.str.contains('train'), :].copy()
|
|
133
|
+
test_adata = adata[adata.obs.split_idx.str.contains('test'), :].copy()
|
|
134
|
+
|
|
135
|
+
return train_adata, test_adata
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import sklearn.preprocessing
|
|
3
|
+
from sklearn.metrics import f1_score, accuracy_score
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def annotation_metrics(labels, labels_pred):
|
|
7
|
+
print("Encode labels", flush=True)
|
|
8
|
+
labels = labels.astype('category')
|
|
9
|
+
labels_pred = labels_pred.astype('category')
|
|
10
|
+
if labels.isna().any():
|
|
11
|
+
labels = labels.cat.add_categories(['Unkown'])
|
|
12
|
+
labels = labels.fillna('Unkown')
|
|
13
|
+
if labels_pred.isna().any():
|
|
14
|
+
labels_pred = labels_pred.cat.add_categories(['Unkown'])
|
|
15
|
+
labels_pred = labels_pred.fillna('Unkown')
|
|
16
|
+
cats = list(labels.dtype.categories) + list(labels_pred.dtype.categories)
|
|
17
|
+
encoder = sklearn.preprocessing.LabelEncoder().fit(cats)
|
|
18
|
+
labels = encoder.transform(labels)
|
|
19
|
+
labels_pred = encoder.transform(labels_pred)
|
|
20
|
+
|
|
21
|
+
print("Compute prediction accuracy", flush=True)
|
|
22
|
+
accuracy = accuracy_score(labels, labels_pred)
|
|
23
|
+
accuracy = float('{:.4f}'.format(accuracy))
|
|
24
|
+
|
|
25
|
+
print("Compute F1 score", flush=True)
|
|
26
|
+
f1_macro = float('{:.4f}'.format(f1_score(
|
|
27
|
+
labels, labels_pred,
|
|
28
|
+
average='macro'
|
|
29
|
+
)))
|
|
30
|
+
f1_micro = float('{:.4f}'.format(f1_score(
|
|
31
|
+
labels, labels_pred,
|
|
32
|
+
average='micro'
|
|
33
|
+
)))
|
|
34
|
+
f1_weighted = float('{:.4f}'.format(f1_score(
|
|
35
|
+
labels, labels_pred,
|
|
36
|
+
average='weighted'
|
|
37
|
+
)))
|
|
38
|
+
|
|
39
|
+
return accuracy, f1_macro, f1_micro, f1_weighted
|
oscb/evaluation/ccc.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
import anndata
|
|
4
|
+
import collections
|
|
5
|
+
import numpy as np
|
|
6
|
+
from sklearn.metrics import auc
|
|
7
|
+
from sklearn.metrics import precision_recall_curve
|
|
8
|
+
|
|
9
|
+
# Cell Cell Communication
|
|
10
|
+
def ccc_metrics(adata, ccc_pred="ccc_pred", ccc_target="ccc_target", score="score", top_prop=0.05):
|
|
11
|
+
# Precision-recall AUC
|
|
12
|
+
gt = join_truth_and_pred(adata, ccc_pred, ccc_target, score)
|
|
13
|
+
precision, recall, _ = precision_recall_curve(
|
|
14
|
+
gt["response"], gt[score], pos_label=1
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
auc_score = auc(recall, precision)
|
|
18
|
+
|
|
19
|
+
# Odds Ratio
|
|
20
|
+
gt = gt.sort_values(score, ascending=False)
|
|
21
|
+
top_n = int(adata.uns[ccc_target].shape[0] * top_prop)
|
|
22
|
+
|
|
23
|
+
# assign the top rank interactions to 1
|
|
24
|
+
a = np.zeros(len(gt[score]))
|
|
25
|
+
a[0:top_n] = 1
|
|
26
|
+
gt.loc[:, ["top_n"]] = a
|
|
27
|
+
|
|
28
|
+
top = gt[gt["top_n"] == 1]
|
|
29
|
+
tp = np.sum(top.response == 1)
|
|
30
|
+
fp = np.sum(top.response == 0)
|
|
31
|
+
|
|
32
|
+
bot = gt[gt["top_n"] == 0]
|
|
33
|
+
fn = np.sum(bot.response == 1)
|
|
34
|
+
tn = np.sum(bot.response == 0)
|
|
35
|
+
|
|
36
|
+
numerator = tp * tn
|
|
37
|
+
denominator = fp * fn
|
|
38
|
+
if denominator == 0:
|
|
39
|
+
if numerator == 0:
|
|
40
|
+
# undefined
|
|
41
|
+
oddsratio_score = np.nan
|
|
42
|
+
else:
|
|
43
|
+
# perfect score
|
|
44
|
+
oddsratio_score = np.inf
|
|
45
|
+
else:
|
|
46
|
+
oddsratio_score = numerator / denominator
|
|
47
|
+
oddsratio_score = _sigmoid_transform(oddsratio_score)
|
|
48
|
+
|
|
49
|
+
return float('{:.4f}'.format(auc_score)), float('{:.4f}'.format(oddsratio_score))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# Join predictions to target
|
|
53
|
+
def join_truth_and_pred(adata, ccc_pred="ccc_pred", ccc_target="ccc_target", score="lrscore"):
|
|
54
|
+
merge_keys = list(adata.uns["merge_keys"])
|
|
55
|
+
gt = adata.uns[ccc_target].merge(adata.uns[ccc_pred], on=merge_keys, how="left")
|
|
56
|
+
|
|
57
|
+
gt.loc[gt["response"].isna(), "response"] = 0
|
|
58
|
+
gt.loc[gt[score].isna(), score] = np.nanmin(gt[score]) - np.finfo(float).eps
|
|
59
|
+
|
|
60
|
+
return gt
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _sigmoid_transform(x):
|
|
64
|
+
return 1 - 1 / (1 + x / 2)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def aggregate_method_scores(adata, how, ccc_pred="LIANA", score="score"):
|
|
68
|
+
merge_keys = list(adata.uns["merge_keys"])
|
|
69
|
+
return (
|
|
70
|
+
adata.uns[ccc_pred]
|
|
71
|
+
.groupby(merge_keys)
|
|
72
|
+
.agg(score=(score, how))
|
|
73
|
+
.reset_index()
|
|
74
|
+
)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from sklearn.metrics import adjusted_rand_score as ARI
|
|
2
|
+
from sklearn.metrics import normalized_mutual_info_score as NMI
|
|
3
|
+
from sklearn.metrics import silhouette_score
|
|
4
|
+
from sklearn.metrics import fowlkes_mallows_score as FM
|
|
5
|
+
|
|
6
|
+
def clustering_metrics(labels, labels_pred, embedding):
|
|
7
|
+
asw_score = silhouette_score(embedding, labels)
|
|
8
|
+
nmi_score = NMI(labels, labels_pred)
|
|
9
|
+
ari_score = ARI(labels, labels_pred)
|
|
10
|
+
fm_score = FM(labels, labels_pred)
|
|
11
|
+
asw_score = float('{:.4f}'.format(asw_score))
|
|
12
|
+
nmi_score = float('{:.4f}'.format(nmi_score))
|
|
13
|
+
ari_score = float('{:.4f}'.format(ari_score))
|
|
14
|
+
fm_score = float('{:.4f}'.format(fm_score))
|
|
15
|
+
|
|
16
|
+
print(
|
|
17
|
+
"Clustering Scores:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nFowlkes Mallows: %.4f"
|
|
18
|
+
% (asw_score, nmi_score, ari_score, fm_score)
|
|
19
|
+
)
|
|
20
|
+
return asw_score, nmi_score, ari_score, fm_score
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
from scipy import sparse
|
|
2
|
+
import importlib
|
|
3
|
+
import numbers
|
|
4
|
+
import anndata
|
|
5
|
+
import scanpy as sc
|
|
6
|
+
import sklearn.metrics
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import re
|
|
10
|
+
import warnings
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# test_data = adata.obsm["test"]
|
|
14
|
+
# denoised_data = adata.obsm["denoised"]
|
|
15
|
+
# train_data = adata.obsm["train"]
|
|
16
|
+
def imputation_metrics(adata, denoised_layer, train='train', test='test'):
|
|
17
|
+
#Mean-squared error
|
|
18
|
+
test_adata = anndata.AnnData(X=adata.obsm[test], obs=adata.obs, var=adata.var)
|
|
19
|
+
denoised_adata = anndata.AnnData(
|
|
20
|
+
X=adata.layers[denoised_layer], obs=adata.obs, var=adata.var
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# scaling and transformation
|
|
24
|
+
target_sum = 10000
|
|
25
|
+
|
|
26
|
+
sc.pp.normalize_total(test_adata, target_sum)
|
|
27
|
+
sc.pp.log1p(test_adata)
|
|
28
|
+
|
|
29
|
+
sc.pp.normalize_total(denoised_adata, target_sum)
|
|
30
|
+
sc.pp.log1p(denoised_adata)
|
|
31
|
+
|
|
32
|
+
mse = sklearn.metrics.mean_squared_error(
|
|
33
|
+
toarray(test_adata.X), toarray(denoised_adata.X)
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Poisson loss
|
|
37
|
+
test_data = adata.obsm[test]
|
|
38
|
+
denoised_data = adata.layers[denoised_layer]
|
|
39
|
+
|
|
40
|
+
# scaling
|
|
41
|
+
initial_sum = adata.obsm[train].sum()
|
|
42
|
+
target_sum = test_data.sum()
|
|
43
|
+
denoised_data = denoised_data * target_sum / initial_sum
|
|
44
|
+
|
|
45
|
+
possion = poisson_nll_loss(toarray(test_data), toarray(denoised_data))
|
|
46
|
+
|
|
47
|
+
return float('{:.4f}'.format(mse)), float('{:.4f}'.format(possion))
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def poisson_nll_loss(y_pred: np.ndarray, y_true: np.ndarray) -> float:
|
|
51
|
+
return (y_pred - y_true * np.log(y_pred + 1e-6)).mean()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def toarray(x):
|
|
55
|
+
"""Convert an array-like to a np.ndarray.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
x : array-like
|
|
60
|
+
Array-like to be converted
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
x : np.ndarray
|
|
64
|
+
"""
|
|
65
|
+
if is_SparseDataFrame(x):
|
|
66
|
+
x = x.to_coo().toarray()
|
|
67
|
+
elif is_SparseSeries(x):
|
|
68
|
+
x = x.to_dense().to_numpy()
|
|
69
|
+
elif isinstance(x, (pd.DataFrame, pd.Series, pd.Index)):
|
|
70
|
+
x = x.to_numpy()
|
|
71
|
+
elif isinstance(x, sparse.spmatrix):
|
|
72
|
+
x = x.toarray()
|
|
73
|
+
elif isinstance(x, np.matrix):
|
|
74
|
+
x = x.A
|
|
75
|
+
elif isinstance(x, list):
|
|
76
|
+
x_out = []
|
|
77
|
+
for xi in x:
|
|
78
|
+
try:
|
|
79
|
+
xi = toarray(xi)
|
|
80
|
+
except TypeError:
|
|
81
|
+
# recursed too far
|
|
82
|
+
pass
|
|
83
|
+
x_out.append(xi)
|
|
84
|
+
# convert x_out from list to array
|
|
85
|
+
x = np.array(x_out, dtype=_check_numpy_dtype(x_out))
|
|
86
|
+
elif isinstance(x, (np.ndarray, numbers.Number)):
|
|
87
|
+
pass
|
|
88
|
+
else:
|
|
89
|
+
raise TypeError("Expected array-like. Got {}".format(type(x)))
|
|
90
|
+
return x
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def is_SparseSeries(X):
|
|
94
|
+
with warnings.catch_warnings():
|
|
95
|
+
warnings.filterwarnings(
|
|
96
|
+
"ignore",
|
|
97
|
+
"The SparseSeries class is removed from pandas. Accessing it from the "
|
|
98
|
+
"top-level namespace will also be removed in the next version",
|
|
99
|
+
FutureWarning,
|
|
100
|
+
)
|
|
101
|
+
try:
|
|
102
|
+
return isinstance(X, pd.SparseSeries)
|
|
103
|
+
except AttributeError:
|
|
104
|
+
return False
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def is_SparseDataFrame(X):
|
|
108
|
+
with warnings.catch_warnings():
|
|
109
|
+
warnings.filterwarnings(
|
|
110
|
+
"ignore",
|
|
111
|
+
"The SparseDataFrame class is removed from pandas. Accessing it from the "
|
|
112
|
+
"top-level namespace will also be removed in the next version",
|
|
113
|
+
FutureWarning,
|
|
114
|
+
)
|
|
115
|
+
try:
|
|
116
|
+
return isinstance(X, pd.SparseDataFrame)
|
|
117
|
+
except AttributeError:
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def is_sparse_dataframe(x):
|
|
122
|
+
if isinstance(x, pd.DataFrame) and not is_SparseDataFrame(x):
|
|
123
|
+
try:
|
|
124
|
+
x.sparse
|
|
125
|
+
return True
|
|
126
|
+
except AttributeError:
|
|
127
|
+
pass
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def is_sparse_series(x):
|
|
132
|
+
if isinstance(x, pd.Series) and not is_SparseSeries(x):
|
|
133
|
+
try:
|
|
134
|
+
x.sparse
|
|
135
|
+
return True
|
|
136
|
+
except AttributeError:
|
|
137
|
+
pass
|
|
138
|
+
return False
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def dataframe_to_sparse(x, fill_value=0.0):
|
|
142
|
+
x = pd.DataFrame.sparse.from_spmatrix(
|
|
143
|
+
sparse.coo_matrix(x.values), index=x.index, columns=x.columns
|
|
144
|
+
)
|
|
145
|
+
x.sparse.fill_value = fill_value
|
|
146
|
+
return x
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def SparseDataFrame(X, columns=None, index=None, default_fill_value=0.0):
|
|
150
|
+
if sparse.issparse(X):
|
|
151
|
+
X = pd.DataFrame.sparse.from_spmatrix(X)
|
|
152
|
+
X.sparse.fill_value = default_fill_value
|
|
153
|
+
else:
|
|
154
|
+
if is_SparseDataFrame(X) or not isinstance(X, pd.DataFrame):
|
|
155
|
+
X = pd.DataFrame(X)
|
|
156
|
+
X = dataframe_to_sparse(X, fill_value=default_fill_value)
|
|
157
|
+
if columns is not None:
|
|
158
|
+
X.columns = columns
|
|
159
|
+
if index is not None:
|
|
160
|
+
X.index = index
|
|
161
|
+
return X
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from scib.metrics import metrics
|
|
2
|
+
|
|
3
|
+
# https://github.com/theislab/scib/blob/main/scib/metrics/metrics.py
|
|
4
|
+
# https://scib.readthedocs.io/en/latest/api/scib.metrics.metrics_all.html
|
|
5
|
+
def integration_metrics(adata, adata_int, batch_key='batch', label_key='cell_type', species="mouse"):
|
|
6
|
+
"""All metrics
|
|
7
|
+
|
|
8
|
+
:Biological conservation:
|
|
9
|
+
+ HVG overlap :func:`~scib.metrics.hvg_overlap`
|
|
10
|
+
+ Cell type ASW :func:`~scib.metrics.silhouette`
|
|
11
|
+
+ Isolated label ASW :func:`~scib.metrics.isolated_labels`
|
|
12
|
+
+ Isolated label F1 :func:`~scib.metrics.isolated_labels`
|
|
13
|
+
+ NMI cluster/label :func:`~scib.metrics.nmi`
|
|
14
|
+
+ ARI cluster/label :func:`~scib.metrics.ari`
|
|
15
|
+
+ Cell cycle conservation :func:`~scib.metrics.cell_cycle`
|
|
16
|
+
+ cLISI (cell type Local Inverse Simpson's Index) :func:`~scib.metrics.clisi_graph`
|
|
17
|
+
+ Trajectory conservation :func:`~scib.metrics.trajectory_conservation`
|
|
18
|
+
|
|
19
|
+
:Batch correction:
|
|
20
|
+
+ Graph connectivity :func:`~scib.metrics.graph_connectivity`
|
|
21
|
+
+ Batch ASW :func:`~scib.metrics.silhouette_batch`
|
|
22
|
+
+ Principal component regression :func:`~scib.metrics.pcr_comparison`
|
|
23
|
+
+ kBET (k-nearest neighbour batch effect test) :func:`~scib.metrics.kBET`
|
|
24
|
+
+ iLISI (integration Local Inverse Simpson's Index) :func:`~scib.metrics.ilisi_graph`
|
|
25
|
+
|
|
26
|
+
:param adata: unintegrated, preprocessed anndata object
|
|
27
|
+
:param adata_int: integrated anndata object
|
|
28
|
+
:param batch_key: name of batch column in adata.obs and adata_int.obs
|
|
29
|
+
:param label_key: name of biological label (cell type) column in adata.obs and adata_int.obs
|
|
30
|
+
:param kwargs:
|
|
31
|
+
Parameters to pass on to :func:`~scib.metrics.metrics` function:
|
|
32
|
+
|
|
33
|
+
+ ``embed``
|
|
34
|
+
+ ``cluster_key``
|
|
35
|
+
+ ``cluster_nmi``
|
|
36
|
+
+ ``nmi_method``
|
|
37
|
+
+ ``nmi_dir``
|
|
38
|
+
+ ``si_metric``
|
|
39
|
+
+ ``organism``
|
|
40
|
+
+ ``n_isolated``
|
|
41
|
+
+ ``subsample``
|
|
42
|
+
+ ``type_``
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
metrics_all = metrics(adata, adata_int, batch_key=batch_key, label_key=label_key, cluster_nmi=None, ari_=True, nmi_=True, nmi_method='arithmetic', nmi_dir=None, silhouette_=True, si_metric='euclidean', pcr_=True, cell_cycle_=True, organism=species, hvg_score_=True, isolated_labels_=True, isolated_labels_f1_=True, isolated_labels_asw_=True, n_isolated=True, graph_conn_=True, trajectory_=False, kBET_=True)
|
|
46
|
+
biological_conservation_metrics = ['NMI_cluster/label', 'ARI_cluster/label', 'ASW_label', 'cell_cycle_conservation','isolated_label_F1', 'isolated_label_silhouette', 'hvg_overlap']
|
|
47
|
+
# metrics_dict = metrics_all.dropna().to_dict()[0]
|
|
48
|
+
metrics_dict = metrics_all.fillna(0).to_dict()[0]
|
|
49
|
+
|
|
50
|
+
for key, value in metrics_dict.items():
|
|
51
|
+
metrics_dict[key] = float('{:.4f}'.format(value))
|
|
52
|
+
|
|
53
|
+
bc_total = 0
|
|
54
|
+
for key in biological_conservation_metrics:
|
|
55
|
+
bc_total += metrics_dict[key]
|
|
56
|
+
biological_conservation_score = float('{:.4f}'.format(bc_total/len(biological_conservation_metrics)))
|
|
57
|
+
|
|
58
|
+
metrics_dict['Biological Conservation'] = biological_conservation_score
|
|
59
|
+
|
|
60
|
+
return metrics_dict
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import os
|
|
4
|
+
import subprocess
|
|
5
|
+
import scanpy as sc
|
|
6
|
+
import scipy.io
|
|
7
|
+
import scib
|
|
8
|
+
import muon as mu
|
|
9
|
+
from muon import MuData
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def multimodal_metrics(mdata, embed, mod1='rna', batch='group', label_key='cell_type'):
|
|
13
|
+
scib_anndata = sc.AnnData(mdata.obsm[embed]).copy()
|
|
14
|
+
scib_anndata.obs = mdata.obs.copy()
|
|
15
|
+
scib_anndata.obsp["connectivities"] = mdata.obsp["connectivities"].copy()
|
|
16
|
+
scib_anndata.obsm[embed] = mdata.obsm[embed].copy()
|
|
17
|
+
scib_anndata = scib_anndata[~scib_anndata.obs[f"{mod1}:{batch}"].isna()] # Remove NaN in batch
|
|
18
|
+
scib_anndata = scib_anndata[~scib_anndata.obs[f"{mod1}:{label_key}"].isna()] # Remove NaN in cell type label
|
|
19
|
+
scib_anndata.obs[f"{mod1}:{batch}"] = scib_anndata.obs[f"{mod1}:{batch}"].astype("category")
|
|
20
|
+
scib_anndata.obs[f"{mod1}:{label_key}"] = scib_anndata.obs[f"{mod1}:{label_key}"].astype("category")
|
|
21
|
+
|
|
22
|
+
metrics = scib.metrics.metrics(
|
|
23
|
+
scib_anndata,
|
|
24
|
+
scib_anndata,
|
|
25
|
+
batch_key=f"{mod1}:{batch}",
|
|
26
|
+
label_key=f"{mod1}:{label_key}",
|
|
27
|
+
embed=embed,
|
|
28
|
+
ari_=True,
|
|
29
|
+
nmi_=True,
|
|
30
|
+
silhouette_=True,
|
|
31
|
+
graph_conn_=True,
|
|
32
|
+
isolated_labels_asw_=True,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
biological_conservation_metrics = ['NMI_cluster/label', 'ARI_cluster/label', 'ASW_label', 'cell_cycle_conservation','isolated_label_F1', 'isolated_label_silhouette', 'hvg_overlap']
|
|
36
|
+
metrics = metrics.fillna(0).to_dict()[0]
|
|
37
|
+
|
|
38
|
+
for key, value in metrics.items():
|
|
39
|
+
metrics[key] = float('{:.4f}'.format(value))
|
|
40
|
+
|
|
41
|
+
bc_total = 0
|
|
42
|
+
for key in biological_conservation_metrics:
|
|
43
|
+
bc_total += metrics[key]
|
|
44
|
+
biological_conservation_score = float('{:.4f}'.format(bc_total/len(biological_conservation_metrics)))
|
|
45
|
+
|
|
46
|
+
metrics['Biological Conservation'] = biological_conservation_score
|
|
47
|
+
scib_anndata = None
|
|
48
|
+
|
|
49
|
+
return metrics
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import numpy as np
|
|
3
|
+
import networkx as nx
|
|
4
|
+
import zss
|
|
5
|
+
from math import inf
|
|
6
|
+
from grakel import GraphKernel, Graph
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def trajectory_metrics(traj, bm_traj, root_node):
|
|
10
|
+
# Graph edit distance
|
|
11
|
+
traj_dict = traj_to_dict(traj)
|
|
12
|
+
bm_traj_dict = traj_to_dict(bm_traj)
|
|
13
|
+
traj_ls = []
|
|
14
|
+
for key in traj_dict.keys():
|
|
15
|
+
traj_ls.append((traj_dict[key], key))
|
|
16
|
+
|
|
17
|
+
bm_traj_ls = []
|
|
18
|
+
for key in bm_traj_dict.keys():
|
|
19
|
+
bm_traj_ls.append((bm_traj_dict[key], key))
|
|
20
|
+
|
|
21
|
+
traj_ls = sorted(traj_ls, key=lambda edge: (edge[0], edge[1]))
|
|
22
|
+
bm_traj_ls = sorted(bm_traj_ls, key=lambda edge: (edge[0], edge[1]))
|
|
23
|
+
|
|
24
|
+
G1 = nx.DiGraph()
|
|
25
|
+
G1.add_edges_from(bm_traj_ls)
|
|
26
|
+
|
|
27
|
+
G2 = nx.DiGraph()
|
|
28
|
+
G2.add_edges_from(traj_ls)
|
|
29
|
+
|
|
30
|
+
# Calculate the graph edit distance
|
|
31
|
+
distance = graph_edit_distance(G1, G2)
|
|
32
|
+
|
|
33
|
+
max_distance = 4*len(bm_traj_dict.keys()) + 2
|
|
34
|
+
|
|
35
|
+
ged_score = (max_distance-distance)/max_distance
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# Jaccard similarity coefficient
|
|
39
|
+
total_n = len(bm_traj_dict.keys()) + len(traj_dict.keys())
|
|
40
|
+
|
|
41
|
+
nn = 0
|
|
42
|
+
for key in bm_traj_dict.keys():
|
|
43
|
+
if key in traj_dict.keys():
|
|
44
|
+
if bm_traj_dict[key] == traj_dict[key]:
|
|
45
|
+
nn += 1
|
|
46
|
+
|
|
47
|
+
total_n = total_n - nn
|
|
48
|
+
jsc_score = nn/total_n
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Graph kernel score
|
|
52
|
+
traj_ls = sorted(traj_ls, key=lambda edge: (edge[0], edge[1]))
|
|
53
|
+
bm_traj_ls = sorted(bm_traj_ls, key=lambda edge: (edge[0], edge[1]))
|
|
54
|
+
|
|
55
|
+
gks_score = graph_kernel_score(bm_traj_ls, traj_ls, root_node=root_node)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# Tree edit distance
|
|
59
|
+
# Build trees from edge lists
|
|
60
|
+
bm_tree = build_tree_from_edges(bm_traj_ls, root_node)
|
|
61
|
+
tree = build_tree_from_edges(traj_ls, root_node)
|
|
62
|
+
|
|
63
|
+
# Calculate tree edit distance
|
|
64
|
+
tree_distance = zss.distance(
|
|
65
|
+
tree, bm_tree,
|
|
66
|
+
get_children=lambda node: node.children,
|
|
67
|
+
insert_cost=insert_cost,
|
|
68
|
+
remove_cost=remove_cost,
|
|
69
|
+
update_cost=update_cost
|
|
70
|
+
)
|
|
71
|
+
tree_max_distance = 2*len(bm_traj_dict.keys())-2
|
|
72
|
+
ted_score = (tree_max_distance-tree_distance)/tree_max_distance
|
|
73
|
+
|
|
74
|
+
mean = (ged_score + gks_score + jsc_score + ted_score)/4
|
|
75
|
+
|
|
76
|
+
return float('{:.4f}'.format(ged_score)), float('{:.4f}'.format(gks_score)), float('{:.4f}'.format(jsc_score)), float('{:.4f}'.format(ted_score)), float('{:.4f}'.format(mean))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def traj_to_dict(df):
|
|
80
|
+
graph = {}
|
|
81
|
+
for i, row in df.iterrows():
|
|
82
|
+
graph[row['to']] = row['from']
|
|
83
|
+
|
|
84
|
+
return graph
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class TreeNode:
|
|
89
|
+
def __init__(self, label):
|
|
90
|
+
self.label = label
|
|
91
|
+
self.children = []
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def build_tree_from_edges(edges, root_node):
|
|
95
|
+
nodes = {}
|
|
96
|
+
for parent, child in edges:
|
|
97
|
+
if parent not in nodes:
|
|
98
|
+
nodes[parent] = TreeNode(parent)
|
|
99
|
+
if child not in nodes:
|
|
100
|
+
nodes[child] = TreeNode(child)
|
|
101
|
+
nodes[parent].children.append(nodes[child])
|
|
102
|
+
return nodes[root_node] # Return the root node
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def insert_cost(node):
|
|
106
|
+
return 1
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def remove_cost(node):
|
|
110
|
+
return 1
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def update_cost(node1, node2):
|
|
114
|
+
return 0 if node1.label == node2.label else inf
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def graph_edit_distance(G1, G2):
|
|
118
|
+
# Initialize cost for edges
|
|
119
|
+
node_cost = 0
|
|
120
|
+
nodes1 = set(G1.nodes)
|
|
121
|
+
nodes2 = set(G2.nodes)
|
|
122
|
+
|
|
123
|
+
# Calculate edge substitution cost
|
|
124
|
+
for node1 in nodes1:
|
|
125
|
+
if node1 in nodes2:
|
|
126
|
+
nodes2.remove(node1) # Matched edges
|
|
127
|
+
else:
|
|
128
|
+
node_cost += 1 # Unmatched edge in G1 (deletion)
|
|
129
|
+
|
|
130
|
+
node_cost += len(nodes2) # Remaining unmatched edges in G2 (insertion)
|
|
131
|
+
|
|
132
|
+
# Initialize cost for edges
|
|
133
|
+
edge_cost = 0
|
|
134
|
+
edges1 = set(G1.edges)
|
|
135
|
+
edges2 = set(G2.edges)
|
|
136
|
+
|
|
137
|
+
# Calculate edge substitution cost
|
|
138
|
+
for edge1 in edges1:
|
|
139
|
+
if edge1 in edges2:
|
|
140
|
+
edges2.remove(edge1) # Matched edges
|
|
141
|
+
else:
|
|
142
|
+
edge_cost += 1 # Unmatched edge in G1 (deletion)
|
|
143
|
+
|
|
144
|
+
edge_cost += len(edges2) # Remaining unmatched edges in G2 (insertion)
|
|
145
|
+
|
|
146
|
+
return node_cost + edge_cost
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def graph_kernel_score(edges1, edges2, root_node):
|
|
150
|
+
edges1 = find_root_node(edges1, root_node)
|
|
151
|
+
edges2 = find_root_node(edges2, root_node)
|
|
152
|
+
|
|
153
|
+
G1 = nx.Graph()
|
|
154
|
+
G1.add_edges_from(edges1)
|
|
155
|
+
|
|
156
|
+
G2 = nx.Graph()
|
|
157
|
+
G2.add_edges_from(edges2)
|
|
158
|
+
|
|
159
|
+
grakel_G1 = nx_to_grakel(G1)
|
|
160
|
+
grakel_G2 = nx_to_grakel(G2)
|
|
161
|
+
|
|
162
|
+
# Initialize the Weisfeiler-Lehman subtree kernel
|
|
163
|
+
gk = GraphKernel(kernel={"name": "shortest_path"}, normalize=True)
|
|
164
|
+
|
|
165
|
+
# Compute the kernel matrix
|
|
166
|
+
G = [grakel_G1, grakel_G2]
|
|
167
|
+
K = gk.fit_transform(G)
|
|
168
|
+
|
|
169
|
+
final_score = K[0,1]
|
|
170
|
+
|
|
171
|
+
return final_score
|
|
172
|
+
|
|
173
|
+
# Convert NetworkX graphs to Grakel format
|
|
174
|
+
def nx_to_grakel(G):
|
|
175
|
+
nodes = list(G.nodes)
|
|
176
|
+
edges = list(G.edges)
|
|
177
|
+
node_labels = {node: i for i, node in enumerate(nodes)}
|
|
178
|
+
edges_transformed = [(node_labels[edge[0]], node_labels[edge[1]]) for edge in edges]
|
|
179
|
+
return (edges_transformed, {i: label for label, i in node_labels.items()})
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def find_root_node(edges1, root_node):
|
|
183
|
+
if edges1[0][0] == root_node:
|
|
184
|
+
pass
|
|
185
|
+
|
|
186
|
+
else:
|
|
187
|
+
for i in range(1, len(edges1)):
|
|
188
|
+
if edges1[i][0] == root_node:
|
|
189
|
+
aaa = edges1[0]
|
|
190
|
+
edges1[0] = edges1[i]
|
|
191
|
+
edges1[i] = aaa
|
|
192
|
+
i = len(edges1)+1
|
|
193
|
+
else:
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
return edges1
|
oscb/evaluator.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
from .evaluation.annotation import *
|
|
2
|
+
from .evaluation.ccc import *
|
|
3
|
+
from .evaluation.clustering import *
|
|
4
|
+
from .evaluation.imputation import *
|
|
5
|
+
from .evaluation.integration import *
|
|
6
|
+
from .evaluation.multimodal import *
|
|
7
|
+
from .evaluation.trajectory import *
|
|
8
|
+
from .evaluation.annotation import *
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from .utils import *
|
|
11
|
+
import requests
|
|
12
|
+
import json
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def eval(adata, adata_int=None, benchmarks_id=None, task=None, cluster_key=None, label_key=None, label_pred_key=None, embedding_key=None, ccc_pred="ccc_pred", ccc_target="ccc_target", score="score", denoised_layer=None, train='train', test='test', mod1_key='rna', mod2_key='atac', traj_key=None, bm_traj_key=None, root_node=None, species=None, server_endpoint=server_endpoint+'benchmarks/', method="Your method"):
|
|
16
|
+
if adata is None:
|
|
17
|
+
raise ValueError("adata is required.")
|
|
18
|
+
|
|
19
|
+
benchmarks = None
|
|
20
|
+
current_date_and_time = datetime.now()
|
|
21
|
+
benchmarks_data = None
|
|
22
|
+
|
|
23
|
+
if benchmarks_id is not None:
|
|
24
|
+
dataset_id, task = get_dataset_id(benchmarks_id)
|
|
25
|
+
url = server_endpoint + benchmarks_id
|
|
26
|
+
response = requests.get(url)
|
|
27
|
+
if response.status_code == 200:
|
|
28
|
+
try:
|
|
29
|
+
benchmarks = response.json()
|
|
30
|
+
benchmarks_data = benchmarks['benchmarks_plot']['data']
|
|
31
|
+
match task:
|
|
32
|
+
case "Clustering" | "CL":
|
|
33
|
+
label_key = benchmarks['label']
|
|
34
|
+
|
|
35
|
+
case "Imputation" | "IM":
|
|
36
|
+
species = benchmarks['species']
|
|
37
|
+
|
|
38
|
+
case "Batch Integration" | "BI":
|
|
39
|
+
label_key = benchmarks['label']
|
|
40
|
+
batch_key = benchmarks['batch_key']
|
|
41
|
+
species = benchmarks['species']
|
|
42
|
+
|
|
43
|
+
case "Trajectory" | "TJ":
|
|
44
|
+
label_key = benchmarks['label']
|
|
45
|
+
root_node = benchmarks['origin_group']
|
|
46
|
+
bm_traj_key = benchmarks['bm_traj']
|
|
47
|
+
|
|
48
|
+
case "Cell-Cell Communication" | "CCC":
|
|
49
|
+
label_key = benchmarks['label']
|
|
50
|
+
ccc_target = benchmarks['ccc_target']
|
|
51
|
+
species = benchmarks['species']
|
|
52
|
+
|
|
53
|
+
case "Multimodal Data Integration" | "MI":
|
|
54
|
+
mod1_key = benchmarks['mod1']
|
|
55
|
+
mod2_key = benchmarks['mod2']
|
|
56
|
+
label_key = benchmarks['label']
|
|
57
|
+
batch_key = benchmarks['batch_key']
|
|
58
|
+
|
|
59
|
+
case "Cell Type Annotation" | "CT":
|
|
60
|
+
label_key = benchmarks['label']
|
|
61
|
+
# species = benchmarks['species']
|
|
62
|
+
|
|
63
|
+
except Exception as e:
|
|
64
|
+
print(f"Failed to get Benchmarks: {str(e)}")
|
|
65
|
+
else:
|
|
66
|
+
print(f"Failed to get Benchmarks: {benchmarks_id}.")
|
|
67
|
+
|
|
68
|
+
if task is not None:
|
|
69
|
+
task_info = {
|
|
70
|
+
"benchmarksId": benchmarks_id,
|
|
71
|
+
"datasetId": dataset_id,
|
|
72
|
+
"task_type": task,
|
|
73
|
+
"tool": method,
|
|
74
|
+
"created_on": current_date_and_time
|
|
75
|
+
}
|
|
76
|
+
match task:
|
|
77
|
+
case "Clustering" | "CL":
|
|
78
|
+
if cluster_key is not None and label_key is not None and embedding_key is not None:
|
|
79
|
+
asw_score, nmi_score, ari_score, fm_score = clustering_metrics(adata.obs[label_key], adata.obs[cluster_key], adata.obsm[embedding_key])
|
|
80
|
+
results = {
|
|
81
|
+
"benchmarksId": benchmarks_id,
|
|
82
|
+
"datasetId": dataset_id,
|
|
83
|
+
"task_type": task,
|
|
84
|
+
"tool": method,
|
|
85
|
+
"Silhouette": asw_score,
|
|
86
|
+
"NMI": nmi_score,
|
|
87
|
+
"ARI": ari_score,
|
|
88
|
+
"Fowlkes Mallows": fm_score,
|
|
89
|
+
"created_on": current_date_and_time
|
|
90
|
+
}
|
|
91
|
+
if benchmarks_data is not None:
|
|
92
|
+
labels, y_labels, data = get_bar_plot_data(benchmarks_data, user_results=results)
|
|
93
|
+
plot_bars(task, labels, y_labels, data)
|
|
94
|
+
|
|
95
|
+
return results
|
|
96
|
+
else:
|
|
97
|
+
raise ValueError(f"cluster_key, label_key and embedding_key are required for {task}.")
|
|
98
|
+
|
|
99
|
+
case "Imputation" | "IM":
|
|
100
|
+
if denoised_layer is not None:
|
|
101
|
+
mse, possion = imputation_metrics(adata, denoised_layer=denoised_layer)
|
|
102
|
+
results = {
|
|
103
|
+
"benchmarksId": benchmarks_id,
|
|
104
|
+
"datasetId": dataset_id,
|
|
105
|
+
"task_type": task,
|
|
106
|
+
"tool": method,
|
|
107
|
+
"MSE": mse,
|
|
108
|
+
"Possion": possion,
|
|
109
|
+
"created_on": current_date_and_time
|
|
110
|
+
}
|
|
111
|
+
if benchmarks_data is not None:
|
|
112
|
+
labels, y_labels, data = get_bar_plot_data(benchmarks_data, user_results=results)
|
|
113
|
+
plot_bars(task, labels, y_labels, data)
|
|
114
|
+
return results
|
|
115
|
+
else:
|
|
116
|
+
raise ValueError(f"denoised_layer is required for {task}.")
|
|
117
|
+
|
|
118
|
+
case "Batch Integration" | "BI":
|
|
119
|
+
if adata_int is not None and label_key is not None and batch_key is not None:
|
|
120
|
+
metrics_dict = integration_metrics(adata, adata_int, batch_key=batch_key, label_key=label_key, species=species)
|
|
121
|
+
results = {**task_info, **metrics_dict}
|
|
122
|
+
if benchmarks_data is not None:
|
|
123
|
+
labels, y_labels, data = get_bar_plot_data(benchmarks_data, user_results=results)
|
|
124
|
+
plot_bars(task, labels, y_labels, data)
|
|
125
|
+
return results
|
|
126
|
+
else:
|
|
127
|
+
raise ValueError(f"adata_int, label_key and batch_key are required for {task}.")
|
|
128
|
+
|
|
129
|
+
case "Trajectory" | "TJ":
|
|
130
|
+
if traj_key is not None and bm_traj_key is not None and root_node is not None:
|
|
131
|
+
ged_score, gks_score, jsc_score, ted_score, mean = trajectory_metrics(adata.uns[traj_key], adata.uns[bm_traj_key], adata.uns[root_node])
|
|
132
|
+
results = {
|
|
133
|
+
"benchmarksId": benchmarks_id,
|
|
134
|
+
"datasetId": dataset_id,
|
|
135
|
+
"task_type": task,
|
|
136
|
+
"tool": method,
|
|
137
|
+
"Graph Edit Distance": ged_score,
|
|
138
|
+
"Graph Kernel Score": gks_score,
|
|
139
|
+
"Jaccard Similarity Coefficient": jsc_score,
|
|
140
|
+
"Tree Edit Distance": ted_score,
|
|
141
|
+
"Mean": mean,
|
|
142
|
+
"created_on": current_date_and_time
|
|
143
|
+
}
|
|
144
|
+
if benchmarks_data is not None:
|
|
145
|
+
labels, y_labels, data = get_bar_plot_data(benchmarks_data, user_results=results)
|
|
146
|
+
plot_bars(task, labels, y_labels, data)
|
|
147
|
+
return results
|
|
148
|
+
else:
|
|
149
|
+
raise ValueError(f"adata_int, label_key and batch_key are required for {task}.")
|
|
150
|
+
|
|
151
|
+
case "Cell-Cell Communication" | "CCC":
|
|
152
|
+
if ccc_pred is not None and ccc_target is not None and score is not None:
|
|
153
|
+
auc_score, oddsratio_score = ccc_metrics(adata, ccc_pred=ccc_pred, ccc_target=ccc_target, score='score')
|
|
154
|
+
results = {
|
|
155
|
+
"benchmarksId": benchmarks_id,
|
|
156
|
+
"datasetId": dataset_id,
|
|
157
|
+
"task_type": task,
|
|
158
|
+
"tool": method,
|
|
159
|
+
"Precision-recall AUC": auc_score,
|
|
160
|
+
"Odds Ratio": oddsratio_score,
|
|
161
|
+
"created_on": current_date_and_time
|
|
162
|
+
}
|
|
163
|
+
if benchmarks_data is not None:
|
|
164
|
+
labels, y_labels, data = get_bar_plot_data(benchmarks_data, user_results=results)
|
|
165
|
+
plot_bars(task, labels, y_labels, data)
|
|
166
|
+
return results
|
|
167
|
+
else:
|
|
168
|
+
raise ValueError(f"ccc_pred, ccc_target and score are required for {task}.")
|
|
169
|
+
|
|
170
|
+
case "Multimodal Data Integration" | "MI":
|
|
171
|
+
if embedding_key is not None and mod1_key is not None and batch_key is not None and label_key is not None:
|
|
172
|
+
metrics_dict = multimodal_metrics(mdata, embed=embedding_key, mod1=mod1_key, batch=batch_key, label_key=label_key)
|
|
173
|
+
results = {**task_info, **metrics_dict}
|
|
174
|
+
if benchmarks_data is not None:
|
|
175
|
+
labels, y_labels, data = get_bar_plot_data(benchmarks_data, user_results=results)
|
|
176
|
+
plot_bars(task, labels, y_labels, data)
|
|
177
|
+
return results
|
|
178
|
+
else:
|
|
179
|
+
raise ValueError(f"embedding_key, mod1_key, label_key and batch_key are required for {task}.")
|
|
180
|
+
|
|
181
|
+
case "Cell Type Annotation" | "CT":
|
|
182
|
+
if label_pred_key is not None and label_key is not None:
|
|
183
|
+
accuracy, f1_macro, f1_micro, f1_weighted = annotation_metrics(adata.obs[label_key], adata.obs[label_pred_key])
|
|
184
|
+
results = {
|
|
185
|
+
"benchmarksId": benchmarks_id,
|
|
186
|
+
"datasetId": dataset_id,
|
|
187
|
+
"task_type": task,
|
|
188
|
+
"tool": method,
|
|
189
|
+
"Accuracy": accuracy,
|
|
190
|
+
"F1_macro": f1_macro,
|
|
191
|
+
"F1_micro": f1_micro,
|
|
192
|
+
"F1_weighted": f1_weighted,
|
|
193
|
+
"created_on": current_date_and_time
|
|
194
|
+
}
|
|
195
|
+
if benchmarks_data is not None:
|
|
196
|
+
labels, y_labels, data = get_bar_plot_data(benchmarks_data, user_results=results)
|
|
197
|
+
plot_bars(task, labels, y_labels, data)
|
|
198
|
+
return results
|
|
199
|
+
else:
|
|
200
|
+
raise ValueError(f"label_pred_key, and label_key are required for {task}.")
|
|
201
|
+
|
|
202
|
+
case _: # Default case, equivalent to 'default' in other languages
|
|
203
|
+
raise ValueError(f"{task} is not supported. Please input the task name from the following list [Clustering, Imputation, Batch Integration, Trajectory, Cell-Cell Communication, Multimodal Data Integration, Cell Type Annotation].")
|
|
204
|
+
else:
|
|
205
|
+
raise ValueError("benchmarks_id or task is required.")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def write_json(data, file_path="./output.json"):
|
|
209
|
+
# Open the file in write mode ('w') and use json.dump() to write the dictionary
|
|
210
|
+
with open(file_path, 'w') as json_file:
|
|
211
|
+
json.dump(data, json_file, indent=4, default=serialize_datetime) # indent=4 for pretty-printing
|
|
212
|
+
|
|
213
|
+
print(f"Dictionary successfully written to {file_path}")
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def serialize_datetime(obj):
|
|
217
|
+
if isinstance(obj, datetime):
|
|
218
|
+
return obj.isoformat()
|
|
219
|
+
raise TypeError("Type not serializable")
|
oscb/utilization.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
|
|
2
|
+
# pip install nvidia-ml-py
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
import psutil
|
|
6
|
+
# import GPUtil
|
|
7
|
+
from threading import Thread
|
|
8
|
+
from pynvml import *
|
|
9
|
+
from .utils import *
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Monitor(Thread):
|
|
14
|
+
def __init__(self, delay=1):
|
|
15
|
+
super(Monitor, self).__init__()
|
|
16
|
+
self.stopped = False
|
|
17
|
+
self.delay = delay # Time between calls to GPUtil
|
|
18
|
+
self.time_points = []
|
|
19
|
+
self.cpu_usage = []
|
|
20
|
+
self.mem_usage = []
|
|
21
|
+
self.gpu_usage = []
|
|
22
|
+
self.gpu_mem_usage = []
|
|
23
|
+
self.start()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def run(self):
|
|
27
|
+
while not self.stopped:
|
|
28
|
+
# Obtaining all the essential details
|
|
29
|
+
self.time_points.append(time.time())
|
|
30
|
+
self.cpu_usage.append(psutil.cpu_percent())
|
|
31
|
+
self.mem_usage.append(psutil.virtual_memory().percent)
|
|
32
|
+
# self.gpu_mem_usage.append(self.gpu_mem_percent())
|
|
33
|
+
self.gpu_usage.append(self.get_nvidia_info()['gpus'][0]['gpu_utilization'])
|
|
34
|
+
self.gpu_mem_usage.append(self.get_nvidia_info()['gpus'][0]['memory_utilization'])
|
|
35
|
+
time.sleep(self.delay)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def stop(self):
|
|
39
|
+
self.stopped = True
|
|
40
|
+
sys_info = self.get_sys_info()
|
|
41
|
+
results = {
|
|
42
|
+
"sys_info": sys_info,
|
|
43
|
+
"CPU": self.cpu_usage,
|
|
44
|
+
"Memory": self.mem_usage,
|
|
45
|
+
"GPU": self.gpu_usage,
|
|
46
|
+
"GPU Memory": self.gpu_mem_usage,
|
|
47
|
+
'time_points': self.time_points
|
|
48
|
+
}
|
|
49
|
+
plot_lines(results)
|
|
50
|
+
|
|
51
|
+
return results
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_sys_info(self) -> dict:
|
|
55
|
+
sys_info: dict = {}
|
|
56
|
+
cpu, ram = self.get_cpu_mem_info()
|
|
57
|
+
gpus = self.get_nvidia_info()
|
|
58
|
+
sys_info['CPU'] = cpu
|
|
59
|
+
sys_info['RAM'] = ram
|
|
60
|
+
if len(gpus['gpus']) > 0:
|
|
61
|
+
gpu_list = []
|
|
62
|
+
for i in range(len(gpus['gpus'])):
|
|
63
|
+
gpu = f"{gpus['gpus'][i]['gpu_model']} @ {gpus['gpus'][i]['total']} GB"
|
|
64
|
+
gpu_list.append(gpu)
|
|
65
|
+
|
|
66
|
+
sys_info['GPU'] = gpu_list
|
|
67
|
+
|
|
68
|
+
return sys_info
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def get_cpu_mem_info(self):
|
|
72
|
+
import platform
|
|
73
|
+
|
|
74
|
+
n_cores = psutil.cpu_count(logical=False)
|
|
75
|
+
# n_thread = psutil.cpu_count()
|
|
76
|
+
freq = float('{:.2f}'.format(psutil.cpu_freq().current / 1000)) # GHz
|
|
77
|
+
cpu_model = platform.processor()
|
|
78
|
+
mem_total = round(psutil.virtual_memory().total / 1024 / 1024 / 1024, 2) # GB
|
|
79
|
+
# mem_free = round(psutil.virtual_memory().available / 1024 / 1024 / 1024, 2) # GB
|
|
80
|
+
# mem_process_used = round(psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 / 1024, 2) # GB
|
|
81
|
+
cpu = f"{cpu_model} {n_cores}-core @ {freq} GHz"
|
|
82
|
+
ram = f"{mem_total} GB"
|
|
83
|
+
|
|
84
|
+
return cpu, ram
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_nvidia_info(self):
|
|
88
|
+
nvidia_dict = {
|
|
89
|
+
"state": True,
|
|
90
|
+
"nvidia_version": "",
|
|
91
|
+
"nvidia_count": 0,
|
|
92
|
+
"gpus": []
|
|
93
|
+
}
|
|
94
|
+
try:
|
|
95
|
+
nvmlInit()
|
|
96
|
+
nvidia_dict["nvidia_version"] = nvmlSystemGetDriverVersion()
|
|
97
|
+
nvidia_dict["nvidia_count"] = nvmlDeviceGetCount()
|
|
98
|
+
for i in range(nvidia_dict["nvidia_count"]):
|
|
99
|
+
handle = nvmlDeviceGetHandleByIndex(i)
|
|
100
|
+
memory_info = nvmlDeviceGetMemoryInfo(handle)
|
|
101
|
+
utilization = nvmlDeviceGetUtilizationRates(handle)
|
|
102
|
+
gpu = {
|
|
103
|
+
"gpu_model": nvmlDeviceGetName(handle),
|
|
104
|
+
"total": round(memory_info.total / 1024 / 1024 / 1024, 2), # GB
|
|
105
|
+
"free": round(memory_info.free / 1024 / 1024 / 1024, 2), # GB
|
|
106
|
+
"used": round(memory_info.used / 1024 / 1024 / 1024, 2), # GB
|
|
107
|
+
"gpu_utilization": utilization.gpu,
|
|
108
|
+
"memory_utilization": round(memory_info.used * 100 / memory_info.total, 2),
|
|
109
|
+
"temperature": f"{nvmlDeviceGetTemperature(handle, 0)}℃",
|
|
110
|
+
"powerStatus": nvmlDeviceGetPowerState(handle)
|
|
111
|
+
}
|
|
112
|
+
nvidia_dict['gpus'].append(gpu)
|
|
113
|
+
except NVMLError as _:
|
|
114
|
+
nvidia_dict["state"] = False
|
|
115
|
+
except Exception as _:
|
|
116
|
+
nvidia_dict["state"] = False
|
|
117
|
+
finally:
|
|
118
|
+
try:
|
|
119
|
+
nvmlShutdown()
|
|
120
|
+
except:
|
|
121
|
+
pass
|
|
122
|
+
return nvidia_dict
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def gpu_mem_percent(self):
|
|
126
|
+
mem_rate = 0.0
|
|
127
|
+
info = self.get_nvidia_info()
|
|
128
|
+
if len(info['gpus']) > 0:
|
|
129
|
+
used = info['gpus'][0]['used']
|
|
130
|
+
tot = info['gpus'][0]['total']
|
|
131
|
+
mem_rate = used/tot
|
|
132
|
+
|
|
133
|
+
return mem_rate
|
oscb/utils.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import matplotlib
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
server_endpoint = "http://c4130-110233.wisc.cloudlab.us:5005/api/"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_dataset_id(benchmarks_id):
|
|
10
|
+
task = None
|
|
11
|
+
dataset_id = None
|
|
12
|
+
if benchmarks_id.startswith("CL-"):
|
|
13
|
+
dataset_id = benchmarks_id.removeprefix("CL-")
|
|
14
|
+
task = "Clustering"
|
|
15
|
+
|
|
16
|
+
elif benchmarks_id.startswith("IM-"):
|
|
17
|
+
dataset_id = benchmarks_id.removeprefix("IM-")
|
|
18
|
+
task = "Imputation"
|
|
19
|
+
|
|
20
|
+
elif benchmarks_id.startswith("BI-"):
|
|
21
|
+
dataset_id = benchmarks_id.removeprefix("BI-")
|
|
22
|
+
task = "Batch Integration"
|
|
23
|
+
|
|
24
|
+
elif benchmarks_id.startswith("TJ-"):
|
|
25
|
+
dataset_id = benchmarks_id.removeprefix("TJ-")
|
|
26
|
+
task = "Trajectory"
|
|
27
|
+
|
|
28
|
+
elif benchmarks_id.startswith("CCC-"):
|
|
29
|
+
dataset_id = benchmarks_id.removeprefix("CCC-")
|
|
30
|
+
task = "Cell-Cell Communication"
|
|
31
|
+
|
|
32
|
+
elif benchmarks_id.startswith("MI-"):
|
|
33
|
+
dataset_id = benchmarks_id.removeprefix("MI-")
|
|
34
|
+
task = "Multimodal Data Integration"
|
|
35
|
+
|
|
36
|
+
elif benchmarks_id.startswith("CT-"):
|
|
37
|
+
dataset_id = benchmarks_id.removeprefix("CT-")
|
|
38
|
+
task = "Cell Type Annotation"
|
|
39
|
+
|
|
40
|
+
return dataset_id, task
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_bar_plot_data(benchmark_data, user_results=None):
|
|
44
|
+
labels = benchmark_data[0]['x']
|
|
45
|
+
y_labels = []
|
|
46
|
+
data = []
|
|
47
|
+
y_user = []
|
|
48
|
+
|
|
49
|
+
# Add Benchmark data
|
|
50
|
+
for i in range(len(benchmark_data)):
|
|
51
|
+
data.append(benchmark_data[i]['y'])
|
|
52
|
+
y_labels.append(benchmark_data[i]['name'])
|
|
53
|
+
|
|
54
|
+
# Add user results
|
|
55
|
+
if user_results is not None:
|
|
56
|
+
y_labels.append(user_results['tool'])
|
|
57
|
+
for label in labels:
|
|
58
|
+
y_user.append(user_results[label])
|
|
59
|
+
data.append(y_user)
|
|
60
|
+
|
|
61
|
+
return labels, y_labels, data
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def plot_bars(task, labels, y_labels, data, tick_step=1, group_gap=0.2, bar_gap=0):
|
|
65
|
+
x = np.arange(len(labels)) * tick_step
|
|
66
|
+
group_num = len(data)
|
|
67
|
+
group_width = tick_step - group_gap
|
|
68
|
+
bar_span = group_width / group_num
|
|
69
|
+
bar_width = bar_span - bar_gap
|
|
70
|
+
for index, y in enumerate(data):
|
|
71
|
+
plt.bar(x + index*bar_span, y, bar_width, label=y_labels[index])
|
|
72
|
+
plt.ylabel('Scores')
|
|
73
|
+
plt.title(f'Benchmarks for {task}')
|
|
74
|
+
ticks = x + (group_width - bar_span) / 2
|
|
75
|
+
plt.xticks(ticks, labels)
|
|
76
|
+
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
|
|
77
|
+
plt.show()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def plot_lines(results):
|
|
81
|
+
x = results['time_points']
|
|
82
|
+
x = [n for n in range(len(x))]
|
|
83
|
+
data = []
|
|
84
|
+
y_labels = []
|
|
85
|
+
labels = ["CPU", "Memory", "GPU", "GPU Memory"]
|
|
86
|
+
|
|
87
|
+
for label in labels:
|
|
88
|
+
if sum(results[label]) != 0:
|
|
89
|
+
data.append(results[label])
|
|
90
|
+
y_labels.append(label)
|
|
91
|
+
|
|
92
|
+
for i in range(len(data)):
|
|
93
|
+
if len(x) == len(data[i]):
|
|
94
|
+
if 'GPU' in y_labels[i]:
|
|
95
|
+
plt.plot(x, data[i], label=y_labels[i], marker='o', linestyle='--') # '--' sets a dashed line style
|
|
96
|
+
else:
|
|
97
|
+
plt.plot(x, data[i], label=y_labels[i], marker='o') # 'o' adds circular markers
|
|
98
|
+
|
|
99
|
+
# Adding labels, title, and legend for clarity
|
|
100
|
+
plt.xlabel('Time Points (s)')
|
|
101
|
+
plt.ylabel('Utilization (%)')
|
|
102
|
+
plt.title('Computing Assessments')
|
|
103
|
+
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.) # Displays the labels for each line
|
|
104
|
+
|
|
105
|
+
# Displaying the plot
|
|
106
|
+
plt.show()
|
|
107
|
+
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: oscb
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: OSCB aims to provide automated end-to-end single-cell analyses ML pipelines to simplify and standardize the process of single-cell data formatting, quality control, loading, model development, and model evaluation.
|
|
5
|
+
Home-page: https://github.com/cirisjl/Machine-learning-development-environment-for-single-cell-sequencing-data-analyses
|
|
6
|
+
Author: Lei Jiang
|
|
7
|
+
Author-email: leijiang@missouri.edu
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: single-cell,benchmarks
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Requires-Python: >=3.6, <=3.12
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: leidenalg>=0.8.10
|
|
16
|
+
Requires-Dist: matplotlib>=3.5.1
|
|
17
|
+
Requires-Dist: networkx>=2.6.3
|
|
18
|
+
Requires-Dist: numpy>=1.26.4
|
|
19
|
+
Requires-Dist: pandas>=1.3.5
|
|
20
|
+
Requires-Dist: python_igraph>=0.9.9
|
|
21
|
+
Requires-Dist: python_louvain>=0.16
|
|
22
|
+
Requires-Dist: scanpy
|
|
23
|
+
Requires-Dist: muon
|
|
24
|
+
Requires-Dist: mudata
|
|
25
|
+
Requires-Dist: tqdm
|
|
26
|
+
Requires-Dist: requests
|
|
27
|
+
Requires-Dist: scib
|
|
28
|
+
Requires-Dist: zss
|
|
29
|
+
Requires-Dist: grakel
|
|
30
|
+
Requires-Dist: scikit_learn>=1.0.2
|
|
31
|
+
Requires-Dist: scipy>=1.7.3
|
|
32
|
+
Requires-Dist: umap_learn>=0.5.2
|
|
33
|
+
Dynamic: author
|
|
34
|
+
Dynamic: author-email
|
|
35
|
+
Dynamic: classifier
|
|
36
|
+
Dynamic: description
|
|
37
|
+
Dynamic: description-content-type
|
|
38
|
+
Dynamic: home-page
|
|
39
|
+
Dynamic: keywords
|
|
40
|
+
Dynamic: license
|
|
41
|
+
Dynamic: requires-dist
|
|
42
|
+
Dynamic: requires-python
|
|
43
|
+
Dynamic: summary
|
|
44
|
+
|
|
45
|
+
# Overview
|
|
46
|
+
|
|
47
|
+
--------------------------------------------------------------------------------
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
Machine learning (ML) is transforming single-cell sequencing data analysis; however, the barriers of technology complexity and biology knowledge remain challenging for the involvement of the ML community in single-cell data analysis. We present an ML development environment for single-cell sequencing data analyses with a diverse set of AI-Ready benchmark datasets. A cloud-based platform is built to dynamically scale workflows for collecting, processing, and managing various single-cell sequencing data to make them ML-ready. In addition, benchmarks for each problem formulation and a code-level and web-interface IDE for single-cell analysis method development are provided.
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+

|
|
54
|
+
|
|
55
|
+
OSCB aims to provide automated end-to-end single-cell analyses ML pipelines to simplify and standardize the process of single-cell data formatting, quality control, loading, model development, and model evaluation.
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
**Workflows** are developed for collecting, processing, and managing diverse single-cell sequencing data to make them ML-ready and build benchmarks.
|
|
59
|
+
|
|
60
|
+
**IDE** is provided for supporting partial method development.
|
|
61
|
+
|
|
62
|
+
**Assessment utilities** are provided for evaluating results and report generation.
|
|
63
|
+
|
|
64
|
+
This **end-to-end pipeline** transforms the traditional “static” machine Learning into **continuous learning** on extensive new data.
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
By **in-depth fusing models with data**, this platform could ultimately help many single-cell sequencing researchers substantially.
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+

|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
OSCB is an on-going effort, and we are planning to increase our coverage in the future.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
oscb/__init__.py,sha256=4-lyUl644DHa1-kCvev0FnHX5YX7WH-xQ8-aMsEgy_8,498
|
|
2
|
+
oscb/data.py,sha256=AxV-umgeh_MdpOsR99V9Y7SXvEL6IqHby1RsNcp4kc0,4676
|
|
3
|
+
oscb/evaluator.py,sha256=KoU4-l0EFsd4fUrL871MWJ9-cPVjmdgpRg5PApHKs3s,11029
|
|
4
|
+
oscb/utilization.py,sha256=yOiUMnYw1PRnywCo_4Eeyl25Er6GJYZrvaSP4evQZ_4,4513
|
|
5
|
+
oscb/utils.py,sha256=Ezm6cWWfldSO3lDNMOBzDFXWVC2pTXNxTzgMxA8k43g,3361
|
|
6
|
+
oscb/evaluation/__init__.py,sha256=yV92jToZZNb4sJE2GsmXm6YCk1VQZv7nbrivrI8b-LQ,241
|
|
7
|
+
oscb/evaluation/annotation.py,sha256=DY1-GasL5PPHLhsC6lIxE5kkVFQqshkt5yxWgxhAk9Q,1394
|
|
8
|
+
oscb/evaluation/ccc.py,sha256=jKo7A1Vat1XIKYoZZlUDUrGBgTfLJmSl5JCjT1hWFNU,2151
|
|
9
|
+
oscb/evaluation/clustering.py,sha256=xpYvumLnQjc147G3zyzqgEQNeUT9g45d65H6WVTTSNI,864
|
|
10
|
+
oscb/evaluation/imputation.py,sha256=5eSWB2zkJLuqqg3mUET1UxdwHP2ytQsVV6H20HO5eyM,4499
|
|
11
|
+
oscb/evaluation/integration.py,sha256=dD1uX5n11qcKJjAb_MRAZq5owszxoWgwfwa8EElKI1U,3106
|
|
12
|
+
oscb/evaluation/multimodal.py,sha256=vFWVZI04ghaBvmyc6KfNpFwlDIuIfVkOYZLHd_geU2s,1851
|
|
13
|
+
oscb/evaluation/trajectory.py,sha256=qyvsgFAzRs4ydQkkYU80OLIZOOhL-4jggid4maajaik,5231
|
|
14
|
+
oscb-0.1.0.dist-info/METADATA,sha256=Yt6_SVTENmQeu2Ab6qHclON53eLtzYATFGNqDmT5l8g,3259
|
|
15
|
+
oscb-0.1.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
16
|
+
oscb-0.1.0.dist-info/top_level.txt,sha256=-kscy76s5yJOs8EyWQof-Ico6tACMlsgQ7tHMT4sd2Q,5
|
|
17
|
+
oscb-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
oscb
|