maradoner 0.9__tar.gz → 0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maradoner might be problematic. Click here for more details.
- {maradoner-0.9 → maradoner-0.11}/PKG-INFO +1 -1
- {maradoner-0.9 → maradoner-0.11}/maradoner/__init__.py +1 -1
- {maradoner-0.9 → maradoner-0.11}/maradoner/create.py +2 -0
- {maradoner-0.9 → maradoner-0.11}/maradoner/dataset_filter.py +1 -0
- {maradoner-0.9 → maradoner-0.11}/maradoner/export.py +121 -106
- {maradoner-0.9 → maradoner-0.11}/maradoner/fit.py +277 -74
- {maradoner-0.9 → maradoner-0.11}/maradoner/main.py +6 -3
- maradoner-0.11/maradoner/mara/__init__.py +2 -0
- maradoner-0.11/maradoner/mara/export.py +93 -0
- maradoner-0.11/maradoner/mara/fit.py +336 -0
- maradoner-0.11/maradoner/mara/main.py +115 -0
- {maradoner-0.9 → maradoner-0.11}/maradoner/meta_optimizer.py +17 -15
- {maradoner-0.9 → maradoner-0.11}/maradoner/utils.py +2 -0
- {maradoner-0.9 → maradoner-0.11}/maradoner.egg-info/PKG-INFO +1 -1
- {maradoner-0.9 → maradoner-0.11}/maradoner.egg-info/SOURCES.txt +5 -1
- {maradoner-0.9 → maradoner-0.11}/README.md +0 -0
- {maradoner-0.9 → maradoner-0.11}/maradoner/mara.py +0 -0
- {maradoner-0.9 → maradoner-0.11}/maradoner/select.py +0 -0
- {maradoner-0.9 → maradoner-0.11}/maradoner/synthetic_data.py +0 -0
- {maradoner-0.9 → maradoner-0.11}/maradoner.egg-info/dependency_links.txt +0 -0
- {maradoner-0.9 → maradoner-0.11}/maradoner.egg-info/entry_points.txt +0 -0
- {maradoner-0.9 → maradoner-0.11}/maradoner.egg-info/requires.txt +0 -0
- {maradoner-0.9 → maradoner-0.11}/maradoner.egg-info/top_level.txt +0 -0
- {maradoner-0.9 → maradoner-0.11}/setup.cfg +0 -0
- {maradoner-0.9 → maradoner-0.11}/setup.py +0 -0
|
@@ -88,6 +88,7 @@ def create_project(project_name: str, promoter_expression_filename: str, loading
|
|
|
88
88
|
f'{len(loading_matrix_transformations)}.')
|
|
89
89
|
|
|
90
90
|
logger_print('Filtering promoters of low expression...', verbose)
|
|
91
|
+
print('aaaaa', len(promoter_expression))
|
|
91
92
|
inds, weights = filter_lowexp(promoter_expression, cutoff=promoter_filter_lowexp_cutoff, fit_plot_filename=promoter_filter_plot_filename)
|
|
92
93
|
promoter_expression = promoter_expression.loc[inds]
|
|
93
94
|
proms = promoter_expression.index
|
|
@@ -115,6 +116,7 @@ def create_project(project_name: str, promoter_expression_filename: str, loading
|
|
|
115
116
|
motif_expression = None
|
|
116
117
|
loading_matrices = pd.concat(loading_matrices, axis=1)
|
|
117
118
|
if motif_names is not None:
|
|
119
|
+
motif_names = list(set(motif_names) & set(loading_matrices.columns))
|
|
118
120
|
loading_matrices = loading_matrices[motif_names]
|
|
119
121
|
proms = list(promoter_expression.index)
|
|
120
122
|
sample_names = list(promoter_expression.columns)
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
from pandas import DataFrame as DF
|
|
4
4
|
# add dot
|
|
5
5
|
from .utils import read_init, openers
|
|
6
|
-
from .fit import FOVResult
|
|
6
|
+
from .fit import FOVResult, ActivitiesPrediction, FitResult
|
|
7
7
|
from scipy.stats import norm, chi2, multivariate_normal, Covariance
|
|
8
8
|
from scipy.linalg import eigh, lapack, cholesky, solve
|
|
9
9
|
from statsmodels.stats import multitest
|
|
@@ -13,6 +13,8 @@ from tqdm import tqdm
|
|
|
13
13
|
import multiprocessing as mp
|
|
14
14
|
from functools import partial
|
|
15
15
|
from scipy.integrate import quad
|
|
16
|
+
import math
|
|
17
|
+
import time
|
|
16
18
|
import dill
|
|
17
19
|
import os
|
|
18
20
|
|
|
@@ -58,7 +60,11 @@ def chol_inv(x: np.array):
|
|
|
58
60
|
class Information():
|
|
59
61
|
eps = 1e-10
|
|
60
62
|
|
|
61
|
-
def __init__(self, fim: np.ndarray, slc=None, use_preconditioner=False):
|
|
63
|
+
def __init__(self, fim: np.ndarray, slc=None, use_preconditioner=False, filter_items=None):
|
|
64
|
+
self.filter_items = filter_items
|
|
65
|
+
if filter_items is not None:
|
|
66
|
+
fim = np.delete(fim, filter_items, axis=0)
|
|
67
|
+
fim = np.delete(fim, filter_items, axis=1)
|
|
62
68
|
self.square_root_inv = self._square_root_inv(fim, slc, corr=True)
|
|
63
69
|
precond = 1 / fim.diagonal() ** 0.5
|
|
64
70
|
if not use_preconditioner:
|
|
@@ -69,11 +75,17 @@ class Information():
|
|
|
69
75
|
self.slice = slice(None, None) if slc is None else slc
|
|
70
76
|
|
|
71
77
|
def _inv(self, x: np.ndarray):
|
|
78
|
+
x = np.array(x)
|
|
79
|
+
# t = np.linalg.eigh(x)
|
|
72
80
|
try:
|
|
73
81
|
x = chol_inv(x)
|
|
74
82
|
except:
|
|
75
83
|
print('alarm')
|
|
84
|
+
# print(x.diagonal().min())
|
|
85
|
+
assert np.allclose(x, x.T), x - x.T
|
|
76
86
|
x = np.linalg.eigh(x)
|
|
87
|
+
print(x[0].min(), x[0].max())
|
|
88
|
+
# x = np.linalg.pinv(x, hermitian=True)
|
|
77
89
|
x = x[1] * (1/np.clip(x[0], self.eps, float('inf'))) @ x[1].T
|
|
78
90
|
return x
|
|
79
91
|
|
|
@@ -94,6 +106,8 @@ class Information():
|
|
|
94
106
|
def standardize(self, x: np.ndarray,
|
|
95
107
|
mode: Standardization=Standardization.std,
|
|
96
108
|
return_std=True):
|
|
109
|
+
if self.filter_items is not None:
|
|
110
|
+
x = np.delete(x, self.filter_items)
|
|
97
111
|
x = x / self.precond[self.slice]
|
|
98
112
|
cov = self._inv(self.fim)
|
|
99
113
|
cov = cov[self.slice, self.slice]
|
|
@@ -127,59 +141,6 @@ class Information():
|
|
|
127
141
|
|
|
128
142
|
|
|
129
143
|
|
|
130
|
-
def _corrected_numerical(x, mvn, n: int):
|
|
131
|
-
x = np.abs(x)
|
|
132
|
-
return 1.0 - mvn.cdf(np.repeat(x, n), lower_limit=-x)
|
|
133
|
-
|
|
134
|
-
def _corrected_sampled(x, information: Information, num_samples: int, m: int,
|
|
135
|
-
num_repeats=1):
|
|
136
|
-
x = np.abs(x)
|
|
137
|
-
c = 0
|
|
138
|
-
n = 0
|
|
139
|
-
for _ in range(num_repeats):
|
|
140
|
-
t = np.abs(information.cholesky_transform(norm.rvs(size=(m, num_samples))))
|
|
141
|
-
c += np.any(t > x, axis=0).sum()
|
|
142
|
-
n += num_samples
|
|
143
|
-
return c / n
|
|
144
|
-
|
|
145
|
-
def corrected_z_test(stat: np.ndarray, information: Information,
|
|
146
|
-
numerical: bool, num_samples: int,
|
|
147
|
-
n_jobs: int) -> np.ndarray:
|
|
148
|
-
if numerical:
|
|
149
|
-
raise NotImplementedError
|
|
150
|
-
|
|
151
|
-
num_samples = int(num_samples)
|
|
152
|
-
f = partial(_corrected_sampled, information=information, num_samples=num_samples,
|
|
153
|
-
m=len(stat), num_repeats=1)
|
|
154
|
-
|
|
155
|
-
if n_jobs > 1:
|
|
156
|
-
with mp.Pool(n_jobs) as p:
|
|
157
|
-
corrected = np.array(list(p.map(f , stat)))
|
|
158
|
-
else:
|
|
159
|
-
corrected = np.array(list(map(f, stat)))
|
|
160
|
-
return corrected
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
def weird_test(mu, shift=0, eps=1e-12, std=None):
|
|
164
|
-
if std is None:
|
|
165
|
-
std = np.ones_like(mu)
|
|
166
|
-
|
|
167
|
-
def log_integrand(u, mu, mu_k, std, std_k):
|
|
168
|
-
return norm.logpdf(u, loc=mu_k, scale=std_k) + norm.logcdf((u - mu) / std_k).sum()
|
|
169
|
-
|
|
170
|
-
def integrand(u, mu, mu_k, std, std_k):
|
|
171
|
-
return np.exp(log_integrand(u, mu, mu_k, std, std_k) + shift)
|
|
172
|
-
|
|
173
|
-
argmax = np.zeros_like(mu, dtype=float)
|
|
174
|
-
for k in tqdm(list(range(len(mu)))):
|
|
175
|
-
argmax[k] = quad(lambda x: integrand(x, np.delete(mu, k), mu[k], np.delete(std, k), std[k]),
|
|
176
|
-
-np.inf, np.inf, epsabs=eps, epsrel=eps)[0]
|
|
177
|
-
result = np.zeros_like(argmax)
|
|
178
|
-
inds = np.arange(len(result), dtype=int)
|
|
179
|
-
return argmax
|
|
180
|
-
for k in range(len(mu)):
|
|
181
|
-
result[k] = argmax[np.delete(inds, k)].sum()
|
|
182
|
-
return result * np.exp(-shift)
|
|
183
144
|
|
|
184
145
|
def export_fov(fovs: tuple[FOVResult], folder: str,
|
|
185
146
|
promoter_names: list[str], sample_names: list[str]):
|
|
@@ -195,14 +156,69 @@ def export_fov(fovs: tuple[FOVResult], folder: str,
|
|
|
195
156
|
samples = np.concatenate(samples, axis=-1)
|
|
196
157
|
DF(samples, index=sample_names, columns=cols).to_csv(os.path.join(folder, 'samples.tsv'), sep='\t')
|
|
197
158
|
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def posterior_anova(activities: ActivitiesPrediction, fit: FitResult,
|
|
162
|
+
B: np.ndarray, corr_stat=False, map_cov=False):
|
|
163
|
+
precs = list()
|
|
164
|
+
istds = list()
|
|
165
|
+
covs = list()
|
|
166
|
+
mean = 0.0
|
|
167
|
+
bad_inds = np.zeros(activities.U.shape[0], dtype=bool)
|
|
168
|
+
# for cov, U, nu in zip(activities.cov(), activities.U.T, fit.motif_variance.group):
|
|
169
|
+
# mot = fit.motif_variance.motif
|
|
170
|
+
# mot = np.delete(mot, activities.filtered_motifs)
|
|
171
|
+
# ind = mot * nu < cov.diagonal() + 1e-9
|
|
172
|
+
# bad_inds[ind] = True
|
|
173
|
+
# mot = fit.motif_variance.motif
|
|
174
|
+
# mot = np.delete(mot, activities.filtered_motifs)[~bad_inds]
|
|
175
|
+
if map_cov:
|
|
176
|
+
# fit.motif_variance.m
|
|
177
|
+
BTB = B.T @ B
|
|
178
|
+
BTB_s = BTB * fit.motif_variance.motif ** 0.5
|
|
179
|
+
BTB_s = BTB_s @ BTB_s.T
|
|
180
|
+
for cov, U, sigma, n, nu in zip(activities.cov(), activities.U.T,
|
|
181
|
+
activities._cov[-2],
|
|
182
|
+
fit.error_variance.variance, fit.motif_variance.group):
|
|
183
|
+
# cov = cov[~bad_inds, ~bad_inds]
|
|
184
|
+
# cov = cov[..., ~bad_inds]
|
|
185
|
+
# cov = cov[~bad_inds]
|
|
186
|
+
if map_cov:
|
|
187
|
+
D = BTB_s * nu + np.identity(len(BTB)) * sigma
|
|
188
|
+
cov = cov @ D @ cov.T * n / sigma ** 2
|
|
189
|
+
covs.append(cov)
|
|
190
|
+
# U = U[~bad_inds]
|
|
191
|
+
# prec = np.linalg.inv(np.diag(mot * nu) - cov)
|
|
192
|
+
prec = np.linalg.inv(cov)
|
|
193
|
+
mean += prec @ U
|
|
194
|
+
precs.append(prec)
|
|
195
|
+
total_prec = sum(precs)
|
|
196
|
+
total_cov = np.linalg.inv(total_prec)
|
|
197
|
+
mean = total_cov @ mean
|
|
198
|
+
stats = activities.U[~bad_inds] - mean.reshape(-1, 1)
|
|
199
|
+
# if corr_stat:
|
|
200
|
+
# istd = 1 / total_cov.diagonal() ** 0.5
|
|
201
|
+
# total_cor = istd.reshape(-1, 1) * total_cov * istd
|
|
202
|
+
# stats = total_cor @ stats
|
|
203
|
+
# total_cov = total_cor @ total_cov @ total_cor
|
|
204
|
+
# stats = (1 / total_cov.diagonal().reshape(-1, 1)) ** 0.5 * stats
|
|
205
|
+
istds = [1 / c.diagonal() ** 0.5 for c in covs]
|
|
206
|
+
istds = np.array(istds).T
|
|
207
|
+
stats = stats * istds
|
|
208
|
+
stats = stats ** 2
|
|
209
|
+
stats = stats.sum(axis=-1)
|
|
210
|
+
pvalues = chi2.sf(stats, len(precs) - 1)
|
|
211
|
+
fdr = multitest.multipletests(pvalues, alpha=0.05, method='fdr_by')[1]
|
|
212
|
+
return stats, pvalues, fdr, bad_inds
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
|
|
198
216
|
|
|
199
217
|
|
|
200
218
|
def export_results(project_name: str, output_folder: str,
|
|
201
219
|
std_mode: Standardization,
|
|
202
220
|
anova_mode: ANOVAType=ANOVAType.positive,
|
|
203
|
-
|
|
204
|
-
corrected_numerical=False,
|
|
205
|
-
corrected_num_samples=1e5,
|
|
221
|
+
weighted_zscore=False,
|
|
206
222
|
alpha=0.05,
|
|
207
223
|
n_jobs=6):
|
|
208
224
|
|
|
@@ -221,12 +237,12 @@ def export_results(project_name: str, output_folder: str,
|
|
|
221
237
|
prom_names = data.promoter_names
|
|
222
238
|
# del data
|
|
223
239
|
with openers[fmt](f'{project_name}.fit.{fmt}', 'rb') as f:
|
|
224
|
-
fit = dill.load(f)
|
|
240
|
+
fit: FitResult = dill.load(f)
|
|
225
241
|
if fit.promoter_inds_to_drop:
|
|
226
242
|
prom_names = np.delete(prom_names, fit.promoter_inds_to_drop)
|
|
227
243
|
group_names = fit.group_names
|
|
228
244
|
with openers[fmt](f'{project_name}.predict.{fmt}', 'rb') as f:
|
|
229
|
-
act = dill.load(f)
|
|
245
|
+
act: ActivitiesPrediction = dill.load(f)
|
|
230
246
|
if act.filtered_motifs is not None:
|
|
231
247
|
motif_names_filtered = np.delete(motif_names, act.filtered_motifs)
|
|
232
248
|
else:
|
|
@@ -240,13 +256,13 @@ def export_results(project_name: str, output_folder: str,
|
|
|
240
256
|
mode=Standardization.std)
|
|
241
257
|
|
|
242
258
|
motif_variance = fit.motif_variance.motif
|
|
243
|
-
motif_variance_fim = Information(fit.motif_variance.fim, slice(None, len(
|
|
259
|
+
motif_variance_fim = Information(fit.motif_variance.fim, slice(None, len(motif_names_filtered)),
|
|
260
|
+
filter_items=act.filtered_motifs)
|
|
244
261
|
motif_variance_stat, motif_variance_std = motif_variance_fim.standardize(motif_variance,
|
|
245
262
|
mode=Standardization.std)
|
|
246
263
|
|
|
247
264
|
motif_group_variance = fit.motif_variance.group
|
|
248
265
|
excluded_motif_group = fit.motif_variance.fixed_group
|
|
249
|
-
|
|
250
266
|
motif_group_variance_fim = Information(fit.motif_variance.fim, slice(len(motif_names), None))
|
|
251
267
|
motif_group_variance_std = motif_group_variance_fim.covariance().diagonal() ** 0.5
|
|
252
268
|
|
|
@@ -254,7 +270,7 @@ def export_results(project_name: str, output_folder: str,
|
|
|
254
270
|
motif_mean = fit.motif_mean.mean.flatten()
|
|
255
271
|
motif_mean_fim = Information(fit.motif_mean.fim)
|
|
256
272
|
motif_mean_stat, motif_mean_std = motif_mean_fim.standardize(motif_mean,
|
|
257
|
-
|
|
273
|
+
mode=Standardization.std)
|
|
258
274
|
|
|
259
275
|
promoter_mean = fit.promoter_mean.mean.flatten()
|
|
260
276
|
# del fit
|
|
@@ -264,6 +280,7 @@ def export_results(project_name: str, output_folder: str,
|
|
|
264
280
|
os.makedirs(folder, exist_ok=True)
|
|
265
281
|
if excluded_motif_group is not None:
|
|
266
282
|
motif_group_variance_std = np.insert(motif_group_variance_std, excluded_motif_group, np.nan)
|
|
283
|
+
print(error_variance.shape, error_variance_std.shape, motif_group_variance.shape, motif_group_variance_std.shape)
|
|
267
284
|
DF(np.array([error_variance, error_variance_std, motif_group_variance, motif_group_variance_std]).T,
|
|
268
285
|
index=group_names,
|
|
269
286
|
columns=['sigma', 'sigma_std', 'nu', 'nu_std']).to_csv(os.path.join(folder, 'group_variances.tsv'),
|
|
@@ -284,67 +301,40 @@ def export_results(project_name: str, output_folder: str,
|
|
|
284
301
|
sep='\t')
|
|
285
302
|
DF(motif_mean_fim.correlation(), index=motif_names, columns=motif_names).to_csv(os.path.join(folder, 'motif_means.tsv'),
|
|
286
303
|
sep='\t')
|
|
287
|
-
DF(motif_variance_fim.correlation(), index=
|
|
304
|
+
DF(motif_variance_fim.correlation(), index=motif_names_filtered, columns=motif_names_filtered).to_csv(os.path.join(folder, 'motif_variances.tsv'),
|
|
288
305
|
sep='\t')
|
|
289
306
|
_group_names = group_names
|
|
290
307
|
if excluded_motif_group is not None:
|
|
291
308
|
_group_names = np.delete(_group_names, excluded_motif_group)
|
|
292
309
|
DF(motif_group_variance_fim.correlation(), index=_group_names, columns=_group_names).to_csv(os.path.join(folder, 'motif_group_variances.tsv'),
|
|
293
310
|
sep='\t')
|
|
294
|
-
|
|
295
|
-
# sep='\t')
|
|
311
|
+
|
|
296
312
|
DF(error_variance_fim.correlation(), index=group_names, columns=group_names).to_csv(os.path.join(folder, 'error_variances.tsv'),
|
|
297
313
|
sep='\t')
|
|
298
314
|
|
|
299
315
|
|
|
300
316
|
folder = output_folder
|
|
301
|
-
U_raw, U_decor, stds = act.U, act.U_decor, act.stds
|
|
302
317
|
|
|
303
|
-
if std_mode == Standardization.full:
|
|
304
|
-
U = U_decor
|
|
305
|
-
else:
|
|
306
|
-
U = U_raw / stds
|
|
307
|
-
folder = os.path.join(output_folder, 'activities')
|
|
308
|
-
os.makedirs(folder, exist_ok=True)
|
|
309
|
-
DF(U_raw, index=motif_names_filtered, columns=group_names).to_csv(os.path.join(folder, 'activity_raw.tsv'), sep='\t')
|
|
310
|
-
DF(U, index=motif_names_filtered, columns=group_names).to_csv(os.path.join(folder, 'activity.tsv'), sep='\t')
|
|
311
|
-
DF(stds, index=motif_names_filtered, columns=group_names).to_csv(os.path.join(folder, 'activity_stds.tsv'), sep='\t')
|
|
312
|
-
|
|
313
318
|
folder = os.path.join(output_folder, 'tests', 'prediction_based')
|
|
314
319
|
os.makedirs(folder, exist_ok=True)
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
z_test.to_csv(os.path.join(folder, 'z_test.tsv'), sep='\t')
|
|
320
|
-
z_test = DF(z_test_fdr, index=motif_names_filtered, columns=group_names)
|
|
321
|
-
z_test.to_csv(os.path.join(folder, 'z_test_fdr.tsv'), sep='\t')
|
|
322
|
-
stat = (U ** 2).sum(axis=1)
|
|
323
|
-
anova = chi2.sf(stat, df=U.shape[1])
|
|
324
|
-
fdrs = multitest.multipletests(anova, alpha=0.05, method='fdr_bh')[1]
|
|
325
|
-
anova = DF([stat, anova, fdrs], columns=motif_names_filtered, index=['stat', 'p-value', 'FDR']).T
|
|
320
|
+
|
|
321
|
+
stat, pvalue, fdr, bad_inds = posterior_anova(act, fit, B=data.B)
|
|
322
|
+
motif_names_filtered = np.array(motif_names_filtered)[~bad_inds]
|
|
323
|
+
anova = DF([stat, pvalue, fdr], columns=motif_names_filtered, index=['stat', 'p-value', 'FDR']).T
|
|
326
324
|
anova.to_csv(os.path.join(folder, 'anova.tsv'), sep='\t')
|
|
327
|
-
|
|
328
|
-
off_test = -np.expm1(U.shape[1]*chi2.logsf(stat, df=1))
|
|
329
|
-
fdrs = multitest.multipletests(off_test, alpha=0.05, method='fdr_bh')[1]
|
|
330
|
-
off_test = DF([stat, off_test, fdrs], columns=motif_names_filtered, index=['stat', 'p-value', 'FDR']).T
|
|
331
|
-
off_test.to_csv(os.path.join(folder, 'off_test.tsv'), sep='\t')
|
|
332
|
-
|
|
325
|
+
|
|
333
326
|
folder = os.path.join(output_folder, 'tests', 'asymptotics_based')
|
|
334
327
|
os.makedirs(folder, exist_ok=True)
|
|
335
328
|
|
|
336
329
|
anova_ass = motif_variance_stat
|
|
337
330
|
pval = calc_z_test(anova_ass)
|
|
338
|
-
|
|
339
|
-
# pval = weird_test(anova_ass, std=motif_variance_std)
|
|
331
|
+
|
|
340
332
|
fdrs = multitest.multipletests(pval, alpha=0.05, method='fdr_bh')[1]
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
else:
|
|
347
|
-
anova_ass = DF(np.array([anova_ass, pval, fdrs]).T, index=motif_names, columns=['stat', 'p-value', 'FDR'])
|
|
333
|
+
# lrt = 2 * fit.motif_variance.logratios
|
|
334
|
+
# lrt_pvalues = chi2.sf(lrt, 1)
|
|
335
|
+
# lrt_fdr = multitest.multipletests(lrt_pvalues, alpha=0.05, method='fdr_bh')[1]
|
|
336
|
+
anova_ass = DF(np.array([anova_ass, pval, fdrs]).T, index=motif_names_filtered,
|
|
337
|
+
columns=['stat', 'p-value', 'FDR'])
|
|
348
338
|
anova_ass.to_csv(os.path.join(folder, 'anova.tsv'), sep='\t')
|
|
349
339
|
|
|
350
340
|
sign = motif_mean.flatten() / motif_mean_std
|
|
@@ -362,6 +352,28 @@ def export_results(project_name: str, output_folder: str,
|
|
|
362
352
|
index=motif_names)
|
|
363
353
|
sign_ass.to_csv(os.path.join(folder, 'sign.tsv'), sep='\t')
|
|
364
354
|
|
|
355
|
+
folder = os.path.join(output_folder, 'activities')
|
|
356
|
+
os.makedirs(folder, exist_ok=True)
|
|
357
|
+
U = list()
|
|
358
|
+
stds = list()
|
|
359
|
+
for u, cov in zip(act.U.T, act.cov()):
|
|
360
|
+
std = cov.diagonal() ** 0.5
|
|
361
|
+
u = u / std
|
|
362
|
+
U.append(u)
|
|
363
|
+
stds.append(std)
|
|
364
|
+
U = np.array(U).T
|
|
365
|
+
DF(U, index=motif_names_filtered, columns=group_names).to_csv(os.path.join(folder, 'activity.tsv'), sep='\t')
|
|
366
|
+
U = U ** 2
|
|
367
|
+
if weighted_zscore:
|
|
368
|
+
U_total = U.sum(axis=1, keepdims=True) / (1 / np.array(stds).T ** 2).sum(axis=1, keepdims=True)
|
|
369
|
+
else:
|
|
370
|
+
U_total = U.mean(axis=1, keepdims=True)
|
|
371
|
+
|
|
372
|
+
U = np.hstack((U_total, U)) ** 0.5
|
|
373
|
+
DF(U, index=motif_names_filtered,
|
|
374
|
+
columns=['overall'] + list(group_names)).to_csv(os.path.join(folder, 'z_score.tsv'), sep='\t')
|
|
375
|
+
DF(act.U_raw, index=motif_names_filtered, columns=data.sample_names).to_csv(os.path.join(folder, 'activity_raw.tsv'), sep='\t')
|
|
376
|
+
|
|
365
377
|
if os.path.isfile(f'{project_name}.fov.{fmt}'):
|
|
366
378
|
with open(f'{project_name}.fov.{fmt}', 'rb') as f:
|
|
367
379
|
fov = dill.load(f)
|
|
@@ -376,7 +388,10 @@ def export_results(project_name: str, output_folder: str,
|
|
|
376
388
|
name = data.group_names[i]
|
|
377
389
|
for k, j in enumerate(inds):
|
|
378
390
|
sample_names[j] = f'{name}_{k+1}'
|
|
379
|
-
|
|
391
|
+
if fit.promoter_inds_to_drop:
|
|
392
|
+
promoter_names_train = np.delete(data.promoter_names, fit.promoter_inds_to_drop)
|
|
393
|
+
else:
|
|
394
|
+
promoter_names_train = data.promoter_names
|
|
380
395
|
export_fov(train, os.path.join(folder, 'train'), promoter_names=promoter_names_train,
|
|
381
396
|
sample_names=sample_names)
|
|
382
397
|
if test is not None:
|
|
@@ -386,5 +401,5 @@ def export_results(project_name: str, output_folder: str,
|
|
|
386
401
|
|
|
387
402
|
|
|
388
403
|
|
|
389
|
-
return {'z-test': z_test, 'anova': anova, 'off_test': off_test,
|
|
390
|
-
|
|
404
|
+
# return {'z-test': z_test, 'anova': anova, 'off_test': off_test,
|
|
405
|
+
# 'anova_ass': anova_ass, 'sign_ass': sign_ass}
|