SURE-tools 2.1.6__tar.gz → 2.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of SURE-tools might be problematic. Click here for more details.
- {sure_tools-2.1.6 → sure_tools-2.1.8}/PKG-INFO +1 -1
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/__init__.py +2 -1
- sure_tools-2.1.8/SURE/perturb/__init__.py +1 -0
- sure_tools-2.1.8/SURE/perturb/perturb.py +85 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE_tools.egg-info/PKG-INFO +1 -1
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE_tools.egg-info/SOURCES.txt +2 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/setup.py +1 -1
- {sure_tools-2.1.6 → sure_tools-2.1.8}/LICENSE +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/README.md +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/PerturbFlow.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/SURE.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/assembly/__init__.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/assembly/assembly.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/assembly/atlas.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/atac/__init__.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/atac/utils.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/codebook/__init__.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/codebook/codebook.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/flow/__init__.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/flow/flow_stats.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/flow/plot_quiver.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/utils/__init__.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/utils/custom_mlp.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/utils/queue.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE/utils/utils.py +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE_tools.egg-info/dependency_links.txt +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE_tools.egg-info/entry_points.txt +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE_tools.egg-info/requires.txt +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/SURE_tools.egg-info/top_level.txt +0 -0
- {sure_tools-2.1.6 → sure_tools-2.1.8}/setup.cfg +0 -0
|
@@ -7,5 +7,6 @@ from . import SURE
|
|
|
7
7
|
from . import PerturbFlow
|
|
8
8
|
from . import atac
|
|
9
9
|
from . import flow
|
|
10
|
+
from . import perturb
|
|
10
11
|
|
|
11
|
-
__all__ = ['SURE', 'PerturbFlow', 'flow', 'atac', 'utils', 'codebook']
|
|
12
|
+
__all__ = ['SURE', 'PerturbFlow', 'flow', 'perturb', 'atac', 'utils', 'codebook']
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .perturb import LabelMatrix
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import numpy as np
|
|
3
|
+
from itertools import chain
|
|
4
|
+
from joblib import Parallel, delayed
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
class LabelMatrix:
|
|
8
|
+
def __init__(self):
|
|
9
|
+
self.labels_ = None
|
|
10
|
+
|
|
11
|
+
def fit_transform(self, labels, sep_pattern=r'[;_\-\s]', speedup: Literal['none','vectorize','parallel']='none'):
|
|
12
|
+
if speedup=='none':
|
|
13
|
+
mat, self.labels_ = label_to_matrix(labels=labels, sep_pattern=sep_pattern)
|
|
14
|
+
elif speedup=='vectorize':
|
|
15
|
+
mat, self.labels_ = vectorized_label_to_matrix(labels=labels, sep_pattern=sep_pattern)
|
|
16
|
+
elif speedup=='parallel':
|
|
17
|
+
mat, self.labels_ = parallel_label_to_matrix(labels=labels, sep_pattern=sep_pattern)
|
|
18
|
+
return mat
|
|
19
|
+
|
|
20
|
+
def inverse_transform(self, matrix):
|
|
21
|
+
matrix_to_labels(matrix=matrix, unique_labels=self.labels_)
|
|
22
|
+
|
|
23
|
+
def label_to_matrix(labels, sep_pattern=r'[;_\-\s]'):
|
|
24
|
+
"""
|
|
25
|
+
将混合分隔符的多标签数据转换为 0-1 矩阵
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
labels: 原始标签列表,如 ["cat", "dog", "cat;dog", "bird_dog"]
|
|
29
|
+
sep_pattern: 多标签分隔符的正则模式(默认匹配 ; _ - 和空格)
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
one_hot_matrix: 0-1 矩阵
|
|
33
|
+
unique_labels: 唯一标签列表
|
|
34
|
+
"""
|
|
35
|
+
# 统一分隔符
|
|
36
|
+
labels_unified = [re.sub(sep_pattern, ';', label) for label in labels]
|
|
37
|
+
|
|
38
|
+
# 获取所有唯一标签
|
|
39
|
+
all_unique_labels = sorted(set(chain(*[label.split(';') for label in labels_unified])))
|
|
40
|
+
|
|
41
|
+
# 生成 0-1 矩阵
|
|
42
|
+
matrix = np.zeros((len(labels), len(all_unique_labels)), dtype=int)
|
|
43
|
+
label_to_idx = {label: i for i, label in enumerate(all_unique_labels)}
|
|
44
|
+
|
|
45
|
+
for i, label in enumerate(labels_unified):
|
|
46
|
+
for sub_label in label.split(';'):
|
|
47
|
+
if sub_label in label_to_idx:
|
|
48
|
+
matrix[i, label_to_idx[sub_label]] = 1
|
|
49
|
+
|
|
50
|
+
return matrix, all_unique_labels
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def vectorized_label_to_matrix(labels, sep_pattern=r'[;_\-\s]'):
|
|
54
|
+
labels_unified = [re.sub(sep_pattern, ';', label) for label in labels]
|
|
55
|
+
unique_labels = sorted(set(chain(*[label.split(';') for label in labels_unified])))
|
|
56
|
+
|
|
57
|
+
# 向量化操作
|
|
58
|
+
label_matrix = np.array([label.split(';') for label in labels_unified], dtype=object)
|
|
59
|
+
matrix = np.zeros((len(labels), len(unique_labels)), dtype=int)
|
|
60
|
+
|
|
61
|
+
for i, label in enumerate(unique_labels):
|
|
62
|
+
matrix[:, i] = np.array([label in lst for lst in label_matrix], dtype=int)
|
|
63
|
+
|
|
64
|
+
return matrix, unique_labels
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def parallel_label_to_matrix(labels, sep_pattern=r'[;_\-\s]', n_jobs=4):
|
|
69
|
+
labels_unified = [re.sub(sep_pattern, ';', label) for label in labels]
|
|
70
|
+
unique_labels = sorted(set(chain(*[label.split(';') for label in labels_unified])))
|
|
71
|
+
|
|
72
|
+
def process_row(row_labels, unique_labels):
|
|
73
|
+
return [1 if label in row_labels else 0 for label in unique_labels]
|
|
74
|
+
|
|
75
|
+
label_lists = [label.split(';') for label in labels_unified]
|
|
76
|
+
matrix = Parallel(n_jobs=n_jobs)(
|
|
77
|
+
delayed(process_row)(row, unique_labels) for row in label_lists
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
return np.array(matrix, dtype=int), unique_labels
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def matrix_to_labels(matrix, unique_labels):
|
|
84
|
+
return [';'.join([unique_labels[i] for i in np.where(row)[0]])
|
|
85
|
+
for row in matrix]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|