maradoner 0.9__tar.gz → 0.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maradoner might be problematic. Click here for more details.
- {maradoner-0.9 → maradoner-0.10}/PKG-INFO +1 -1
- {maradoner-0.9 → maradoner-0.10}/maradoner/__init__.py +1 -1
- {maradoner-0.9 → maradoner-0.10}/maradoner/export.py +91 -103
- {maradoner-0.9 → maradoner-0.10}/maradoner/fit.py +150 -59
- {maradoner-0.9 → maradoner-0.10}/maradoner/main.py +4 -1
- maradoner-0.10/maradoner/mara/__init__.py +2 -0
- maradoner-0.10/maradoner/mara/export.py +94 -0
- maradoner-0.10/maradoner/mara/fit.py +319 -0
- maradoner-0.10/maradoner/mara/main.py +110 -0
- {maradoner-0.9 → maradoner-0.10}/maradoner/meta_optimizer.py +17 -15
- {maradoner-0.9 → maradoner-0.10}/maradoner/utils.py +2 -0
- {maradoner-0.9 → maradoner-0.10}/maradoner.egg-info/PKG-INFO +1 -1
- {maradoner-0.9 → maradoner-0.10}/maradoner.egg-info/SOURCES.txt +5 -1
- {maradoner-0.9 → maradoner-0.10}/README.md +0 -0
- {maradoner-0.9 → maradoner-0.10}/maradoner/create.py +0 -0
- {maradoner-0.9 → maradoner-0.10}/maradoner/dataset_filter.py +0 -0
- {maradoner-0.9 → maradoner-0.10}/maradoner/mara.py +0 -0
- {maradoner-0.9 → maradoner-0.10}/maradoner/select.py +0 -0
- {maradoner-0.9 → maradoner-0.10}/maradoner/synthetic_data.py +0 -0
- {maradoner-0.9 → maradoner-0.10}/maradoner.egg-info/dependency_links.txt +0 -0
- {maradoner-0.9 → maradoner-0.10}/maradoner.egg-info/entry_points.txt +0 -0
- {maradoner-0.9 → maradoner-0.10}/maradoner.egg-info/requires.txt +0 -0
- {maradoner-0.9 → maradoner-0.10}/maradoner.egg-info/top_level.txt +0 -0
- {maradoner-0.9 → maradoner-0.10}/setup.cfg +0 -0
- {maradoner-0.9 → maradoner-0.10}/setup.py +0 -0
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
from pandas import DataFrame as DF
|
|
4
4
|
# add dot
|
|
5
5
|
from .utils import read_init, openers
|
|
6
|
-
from .fit import FOVResult
|
|
6
|
+
from .fit import FOVResult, ActivitiesPrediction, FitResult
|
|
7
7
|
from scipy.stats import norm, chi2, multivariate_normal, Covariance
|
|
8
8
|
from scipy.linalg import eigh, lapack, cholesky, solve
|
|
9
9
|
from statsmodels.stats import multitest
|
|
@@ -13,6 +13,8 @@ from tqdm import tqdm
|
|
|
13
13
|
import multiprocessing as mp
|
|
14
14
|
from functools import partial
|
|
15
15
|
from scipy.integrate import quad
|
|
16
|
+
import math
|
|
17
|
+
import time
|
|
16
18
|
import dill
|
|
17
19
|
import os
|
|
18
20
|
|
|
@@ -58,7 +60,11 @@ def chol_inv(x: np.array):
|
|
|
58
60
|
class Information():
|
|
59
61
|
eps = 1e-10
|
|
60
62
|
|
|
61
|
-
def __init__(self, fim: np.ndarray, slc=None, use_preconditioner=False):
|
|
63
|
+
def __init__(self, fim: np.ndarray, slc=None, use_preconditioner=False, filter_items=None):
|
|
64
|
+
self.filter_items = filter_items
|
|
65
|
+
if filter_items is not None:
|
|
66
|
+
fim = np.delete(fim, filter_items, axis=0)
|
|
67
|
+
fim = np.delete(fim, filter_items, axis=1)
|
|
62
68
|
self.square_root_inv = self._square_root_inv(fim, slc, corr=True)
|
|
63
69
|
precond = 1 / fim.diagonal() ** 0.5
|
|
64
70
|
if not use_preconditioner:
|
|
@@ -69,11 +75,17 @@ class Information():
|
|
|
69
75
|
self.slice = slice(None, None) if slc is None else slc
|
|
70
76
|
|
|
71
77
|
def _inv(self, x: np.ndarray):
|
|
78
|
+
x = np.array(x)
|
|
79
|
+
# t = np.linalg.eigh(x)
|
|
72
80
|
try:
|
|
73
81
|
x = chol_inv(x)
|
|
74
82
|
except:
|
|
75
83
|
print('alarm')
|
|
84
|
+
# print(x.diagonal().min())
|
|
85
|
+
assert np.allclose(x, x.T), x - x.T
|
|
76
86
|
x = np.linalg.eigh(x)
|
|
87
|
+
print(x[0].min(), x[0].max())
|
|
88
|
+
# x = np.linalg.pinv(x, hermitian=True)
|
|
77
89
|
x = x[1] * (1/np.clip(x[0], self.eps, float('inf'))) @ x[1].T
|
|
78
90
|
return x
|
|
79
91
|
|
|
@@ -94,6 +106,8 @@ class Information():
|
|
|
94
106
|
def standardize(self, x: np.ndarray,
|
|
95
107
|
mode: Standardization=Standardization.std,
|
|
96
108
|
return_std=True):
|
|
109
|
+
if self.filter_items is not None:
|
|
110
|
+
x = np.delete(x, self.filter_items)
|
|
97
111
|
x = x / self.precond[self.slice]
|
|
98
112
|
cov = self._inv(self.fim)
|
|
99
113
|
cov = cov[self.slice, self.slice]
|
|
@@ -127,59 +141,6 @@ class Information():
|
|
|
127
141
|
|
|
128
142
|
|
|
129
143
|
|
|
130
|
-
def _corrected_numerical(x, mvn, n: int):
|
|
131
|
-
x = np.abs(x)
|
|
132
|
-
return 1.0 - mvn.cdf(np.repeat(x, n), lower_limit=-x)
|
|
133
|
-
|
|
134
|
-
def _corrected_sampled(x, information: Information, num_samples: int, m: int,
|
|
135
|
-
num_repeats=1):
|
|
136
|
-
x = np.abs(x)
|
|
137
|
-
c = 0
|
|
138
|
-
n = 0
|
|
139
|
-
for _ in range(num_repeats):
|
|
140
|
-
t = np.abs(information.cholesky_transform(norm.rvs(size=(m, num_samples))))
|
|
141
|
-
c += np.any(t > x, axis=0).sum()
|
|
142
|
-
n += num_samples
|
|
143
|
-
return c / n
|
|
144
|
-
|
|
145
|
-
def corrected_z_test(stat: np.ndarray, information: Information,
|
|
146
|
-
numerical: bool, num_samples: int,
|
|
147
|
-
n_jobs: int) -> np.ndarray:
|
|
148
|
-
if numerical:
|
|
149
|
-
raise NotImplementedError
|
|
150
|
-
|
|
151
|
-
num_samples = int(num_samples)
|
|
152
|
-
f = partial(_corrected_sampled, information=information, num_samples=num_samples,
|
|
153
|
-
m=len(stat), num_repeats=1)
|
|
154
|
-
|
|
155
|
-
if n_jobs > 1:
|
|
156
|
-
with mp.Pool(n_jobs) as p:
|
|
157
|
-
corrected = np.array(list(p.map(f , stat)))
|
|
158
|
-
else:
|
|
159
|
-
corrected = np.array(list(map(f, stat)))
|
|
160
|
-
return corrected
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
def weird_test(mu, shift=0, eps=1e-12, std=None):
|
|
164
|
-
if std is None:
|
|
165
|
-
std = np.ones_like(mu)
|
|
166
|
-
|
|
167
|
-
def log_integrand(u, mu, mu_k, std, std_k):
|
|
168
|
-
return norm.logpdf(u, loc=mu_k, scale=std_k) + norm.logcdf((u - mu) / std_k).sum()
|
|
169
|
-
|
|
170
|
-
def integrand(u, mu, mu_k, std, std_k):
|
|
171
|
-
return np.exp(log_integrand(u, mu, mu_k, std, std_k) + shift)
|
|
172
|
-
|
|
173
|
-
argmax = np.zeros_like(mu, dtype=float)
|
|
174
|
-
for k in tqdm(list(range(len(mu)))):
|
|
175
|
-
argmax[k] = quad(lambda x: integrand(x, np.delete(mu, k), mu[k], np.delete(std, k), std[k]),
|
|
176
|
-
-np.inf, np.inf, epsabs=eps, epsrel=eps)[0]
|
|
177
|
-
result = np.zeros_like(argmax)
|
|
178
|
-
inds = np.arange(len(result), dtype=int)
|
|
179
|
-
return argmax
|
|
180
|
-
for k in range(len(mu)):
|
|
181
|
-
result[k] = argmax[np.delete(inds, k)].sum()
|
|
182
|
-
return result * np.exp(-shift)
|
|
183
144
|
|
|
184
145
|
def export_fov(fovs: tuple[FOVResult], folder: str,
|
|
185
146
|
promoter_names: list[str], sample_names: list[str]):
|
|
@@ -195,6 +156,55 @@ def export_fov(fovs: tuple[FOVResult], folder: str,
|
|
|
195
156
|
samples = np.concatenate(samples, axis=-1)
|
|
196
157
|
DF(samples, index=sample_names, columns=cols).to_csv(os.path.join(folder, 'samples.tsv'), sep='\t')
|
|
197
158
|
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def posterior_anova(activities: ActivitiesPrediction, fit: FitResult,
|
|
162
|
+
B: np.ndarray, corr_stat=False):
|
|
163
|
+
precs = list()
|
|
164
|
+
istds = list()
|
|
165
|
+
covs = list()
|
|
166
|
+
mean = 0.0
|
|
167
|
+
bad_inds = np.zeros(activities.U.shape[0], dtype=bool)
|
|
168
|
+
# for cov, U, nu in zip(activities.cov(), activities.U.T, fit.motif_variance.group):
|
|
169
|
+
# mot = fit.motif_variance.motif
|
|
170
|
+
# mot = np.delete(mot, activities.filtered_motifs)
|
|
171
|
+
# ind = mot * nu < cov.diagonal() + 1e-9
|
|
172
|
+
# bad_inds[ind] = True
|
|
173
|
+
|
|
174
|
+
for cov, U, nu in zip(activities.cov(), activities.U.T, fit.motif_variance.group):
|
|
175
|
+
mot = fit.motif_variance.motif
|
|
176
|
+
mot = np.delete(mot, activities.filtered_motifs)[~bad_inds]
|
|
177
|
+
# cov = cov[~bad_inds, ~bad_inds]
|
|
178
|
+
cov = cov[..., ~bad_inds]
|
|
179
|
+
cov = cov[~bad_inds]
|
|
180
|
+
covs.append(cov)
|
|
181
|
+
U = U[~bad_inds]
|
|
182
|
+
# prec = np.linalg.inv(np.diag(mot * nu) - cov)
|
|
183
|
+
prec = np.linalg.inv(cov)
|
|
184
|
+
mean += prec @ U
|
|
185
|
+
precs.append(prec)
|
|
186
|
+
print(bad_inds.sum())
|
|
187
|
+
total_prec = sum(precs)
|
|
188
|
+
total_cov = np.linalg.inv(total_prec)
|
|
189
|
+
mean = total_cov @ mean
|
|
190
|
+
stats = activities.U[~bad_inds] - mean.reshape(-1, 1)
|
|
191
|
+
# if corr_stat:
|
|
192
|
+
# istd = 1 / total_cov.diagonal() ** 0.5
|
|
193
|
+
# total_cor = istd.reshape(-1, 1) * total_cov * istd
|
|
194
|
+
# stats = total_cor @ stats
|
|
195
|
+
# total_cov = total_cor @ total_cov @ total_cor
|
|
196
|
+
# stats = (1 / total_cov.diagonal().reshape(-1, 1)) ** 0.5 * stats
|
|
197
|
+
istds = [1 / c.diagonal() ** 0.5 for c in covs]
|
|
198
|
+
istds = np.array(istds).T
|
|
199
|
+
stats = stats * istds
|
|
200
|
+
stats = stats ** 2
|
|
201
|
+
stats = stats.sum(axis=-1)
|
|
202
|
+
pvalues = chi2.sf(stats, len(precs) - 1)
|
|
203
|
+
fdr = multitest.multipletests(pvalues, alpha=0.05, method='fdr_by')[1]
|
|
204
|
+
return stats, pvalues, fdr, bad_inds
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
|
|
198
208
|
|
|
199
209
|
|
|
200
210
|
def export_results(project_name: str, output_folder: str,
|
|
@@ -221,12 +231,12 @@ def export_results(project_name: str, output_folder: str,
|
|
|
221
231
|
prom_names = data.promoter_names
|
|
222
232
|
# del data
|
|
223
233
|
with openers[fmt](f'{project_name}.fit.{fmt}', 'rb') as f:
|
|
224
|
-
fit = dill.load(f)
|
|
234
|
+
fit: FitResult = dill.load(f)
|
|
225
235
|
if fit.promoter_inds_to_drop:
|
|
226
236
|
prom_names = np.delete(prom_names, fit.promoter_inds_to_drop)
|
|
227
237
|
group_names = fit.group_names
|
|
228
238
|
with openers[fmt](f'{project_name}.predict.{fmt}', 'rb') as f:
|
|
229
|
-
act = dill.load(f)
|
|
239
|
+
act: ActivitiesPrediction = dill.load(f)
|
|
230
240
|
if act.filtered_motifs is not None:
|
|
231
241
|
motif_names_filtered = np.delete(motif_names, act.filtered_motifs)
|
|
232
242
|
else:
|
|
@@ -240,13 +250,13 @@ def export_results(project_name: str, output_folder: str,
|
|
|
240
250
|
mode=Standardization.std)
|
|
241
251
|
|
|
242
252
|
motif_variance = fit.motif_variance.motif
|
|
243
|
-
motif_variance_fim = Information(fit.motif_variance.fim, slice(None, len(
|
|
253
|
+
motif_variance_fim = Information(fit.motif_variance.fim, slice(None, len(motif_names_filtered)),
|
|
254
|
+
filter_items=act.filtered_motifs)
|
|
244
255
|
motif_variance_stat, motif_variance_std = motif_variance_fim.standardize(motif_variance,
|
|
245
256
|
mode=Standardization.std)
|
|
246
257
|
|
|
247
258
|
motif_group_variance = fit.motif_variance.group
|
|
248
259
|
excluded_motif_group = fit.motif_variance.fixed_group
|
|
249
|
-
|
|
250
260
|
motif_group_variance_fim = Information(fit.motif_variance.fim, slice(len(motif_names), None))
|
|
251
261
|
motif_group_variance_std = motif_group_variance_fim.covariance().diagonal() ** 0.5
|
|
252
262
|
|
|
@@ -254,7 +264,7 @@ def export_results(project_name: str, output_folder: str,
|
|
|
254
264
|
motif_mean = fit.motif_mean.mean.flatten()
|
|
255
265
|
motif_mean_fim = Information(fit.motif_mean.fim)
|
|
256
266
|
motif_mean_stat, motif_mean_std = motif_mean_fim.standardize(motif_mean,
|
|
257
|
-
|
|
267
|
+
mode=Standardization.std)
|
|
258
268
|
|
|
259
269
|
promoter_mean = fit.promoter_mean.mean.flatten()
|
|
260
270
|
# del fit
|
|
@@ -264,6 +274,7 @@ def export_results(project_name: str, output_folder: str,
|
|
|
264
274
|
os.makedirs(folder, exist_ok=True)
|
|
265
275
|
if excluded_motif_group is not None:
|
|
266
276
|
motif_group_variance_std = np.insert(motif_group_variance_std, excluded_motif_group, np.nan)
|
|
277
|
+
print(error_variance.shape, error_variance_std.shape, motif_group_variance.shape, motif_group_variance_std.shape)
|
|
267
278
|
DF(np.array([error_variance, error_variance_std, motif_group_variance, motif_group_variance_std]).T,
|
|
268
279
|
index=group_names,
|
|
269
280
|
columns=['sigma', 'sigma_std', 'nu', 'nu_std']).to_csv(os.path.join(folder, 'group_variances.tsv'),
|
|
@@ -284,67 +295,41 @@ def export_results(project_name: str, output_folder: str,
|
|
|
284
295
|
sep='\t')
|
|
285
296
|
DF(motif_mean_fim.correlation(), index=motif_names, columns=motif_names).to_csv(os.path.join(folder, 'motif_means.tsv'),
|
|
286
297
|
sep='\t')
|
|
287
|
-
DF(motif_variance_fim.correlation(), index=
|
|
298
|
+
DF(motif_variance_fim.correlation(), index=motif_names_filtered, columns=motif_names_filtered).to_csv(os.path.join(folder, 'motif_variances.tsv'),
|
|
288
299
|
sep='\t')
|
|
289
300
|
_group_names = group_names
|
|
290
301
|
if excluded_motif_group is not None:
|
|
291
302
|
_group_names = np.delete(_group_names, excluded_motif_group)
|
|
292
303
|
DF(motif_group_variance_fim.correlation(), index=_group_names, columns=_group_names).to_csv(os.path.join(folder, 'motif_group_variances.tsv'),
|
|
293
304
|
sep='\t')
|
|
294
|
-
|
|
295
|
-
# sep='\t')
|
|
305
|
+
|
|
296
306
|
DF(error_variance_fim.correlation(), index=group_names, columns=group_names).to_csv(os.path.join(folder, 'error_variances.tsv'),
|
|
297
307
|
sep='\t')
|
|
298
308
|
|
|
299
309
|
|
|
300
310
|
folder = output_folder
|
|
301
|
-
U_raw, U_decor, stds = act.U, act.U_decor, act.stds
|
|
302
311
|
|
|
303
|
-
if std_mode == Standardization.full:
|
|
304
|
-
U = U_decor
|
|
305
|
-
else:
|
|
306
|
-
U = U_raw / stds
|
|
307
|
-
folder = os.path.join(output_folder, 'activities')
|
|
308
|
-
os.makedirs(folder, exist_ok=True)
|
|
309
|
-
DF(U_raw, index=motif_names_filtered, columns=group_names).to_csv(os.path.join(folder, 'activity_raw.tsv'), sep='\t')
|
|
310
|
-
DF(U, index=motif_names_filtered, columns=group_names).to_csv(os.path.join(folder, 'activity.tsv'), sep='\t')
|
|
311
|
-
DF(stds, index=motif_names_filtered, columns=group_names).to_csv(os.path.join(folder, 'activity_stds.tsv'), sep='\t')
|
|
312
|
-
|
|
313
312
|
folder = os.path.join(output_folder, 'tests', 'prediction_based')
|
|
314
313
|
os.makedirs(folder, exist_ok=True)
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
z_test.to_csv(os.path.join(folder, 'z_test.tsv'), sep='\t')
|
|
320
|
-
z_test = DF(z_test_fdr, index=motif_names_filtered, columns=group_names)
|
|
321
|
-
z_test.to_csv(os.path.join(folder, 'z_test_fdr.tsv'), sep='\t')
|
|
322
|
-
stat = (U ** 2).sum(axis=1)
|
|
323
|
-
anova = chi2.sf(stat, df=U.shape[1])
|
|
324
|
-
fdrs = multitest.multipletests(anova, alpha=0.05, method='fdr_bh')[1]
|
|
325
|
-
anova = DF([stat, anova, fdrs], columns=motif_names_filtered, index=['stat', 'p-value', 'FDR']).T
|
|
314
|
+
|
|
315
|
+
stat, pvalue, fdr, bad_inds = posterior_anova(act, fit, B=data.B)
|
|
316
|
+
motif_names_filtered = np.array(motif_names_filtered)[~bad_inds]
|
|
317
|
+
anova = DF([stat, pvalue, fdr], columns=motif_names_filtered, index=['stat', 'p-value', 'FDR']).T
|
|
326
318
|
anova.to_csv(os.path.join(folder, 'anova.tsv'), sep='\t')
|
|
327
|
-
|
|
328
|
-
off_test = -np.expm1(U.shape[1]*chi2.logsf(stat, df=1))
|
|
329
|
-
fdrs = multitest.multipletests(off_test, alpha=0.05, method='fdr_bh')[1]
|
|
330
|
-
off_test = DF([stat, off_test, fdrs], columns=motif_names_filtered, index=['stat', 'p-value', 'FDR']).T
|
|
331
|
-
off_test.to_csv(os.path.join(folder, 'off_test.tsv'), sep='\t')
|
|
332
|
-
|
|
319
|
+
|
|
333
320
|
folder = os.path.join(output_folder, 'tests', 'asymptotics_based')
|
|
334
321
|
os.makedirs(folder, exist_ok=True)
|
|
335
322
|
|
|
336
323
|
anova_ass = motif_variance_stat
|
|
337
324
|
pval = calc_z_test(anova_ass)
|
|
338
|
-
|
|
339
|
-
# pval = weird_test(anova_ass, std=motif_variance_std)
|
|
325
|
+
|
|
340
326
|
fdrs = multitest.multipletests(pval, alpha=0.05, method='fdr_bh')[1]
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
anova_ass = DF(np.array([anova_ass, pval, fdrs]).T, index=motif_names, columns=['stat', 'p-value', 'FDR'])
|
|
327
|
+
lrt = 2 * fit.motif_variance.logratios
|
|
328
|
+
lrt_pvalues = chi2.sf(lrt, 1)
|
|
329
|
+
lrt_fdr = multitest.multipletests(lrt_pvalues, alpha=0.05, method='fdr_bh')[1]
|
|
330
|
+
anova_ass = DF(np.array([anova_ass, pval, fdrs, lrt, lrt_pvalues, lrt_fdr]).T, index=motif_names_filtered,
|
|
331
|
+
columns=['stat', 'p-value', 'FDR',
|
|
332
|
+
'logratio', 'lrt_p-value', 'lrt_FDR'])
|
|
348
333
|
anova_ass.to_csv(os.path.join(folder, 'anova.tsv'), sep='\t')
|
|
349
334
|
|
|
350
335
|
sign = motif_mean.flatten() / motif_mean_std
|
|
@@ -376,7 +361,10 @@ def export_results(project_name: str, output_folder: str,
|
|
|
376
361
|
name = data.group_names[i]
|
|
377
362
|
for k, j in enumerate(inds):
|
|
378
363
|
sample_names[j] = f'{name}_{k+1}'
|
|
379
|
-
|
|
364
|
+
if fit.promoter_inds_to_drop:
|
|
365
|
+
promoter_names_train = np.delete(data.promoter_names, fit.promoter_inds_to_drop)
|
|
366
|
+
else:
|
|
367
|
+
promoter_names_train = data.promoter_names
|
|
380
368
|
export_fov(train, os.path.join(folder, 'train'), promoter_names=promoter_names_train,
|
|
381
369
|
sample_names=sample_names)
|
|
382
370
|
if test is not None:
|
|
@@ -386,5 +374,5 @@ def export_results(project_name: str, output_folder: str,
|
|
|
386
374
|
|
|
387
375
|
|
|
388
376
|
|
|
389
|
-
return {'z-test': z_test, 'anova': anova, 'off_test': off_test,
|
|
390
|
-
|
|
377
|
+
# return {'z-test': z_test, 'anova': anova, 'off_test': off_test,
|
|
378
|
+
# 'anova_ass': anova_ass, 'sign_ass': sign_ass}
|