maradoner 0.10__tar.gz → 0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maradoner might be problematic. Click here for more details.
- {maradoner-0.10 → maradoner-0.11}/PKG-INFO +1 -1
- {maradoner-0.10 → maradoner-0.11}/maradoner/__init__.py +1 -1
- {maradoner-0.10 → maradoner-0.11}/maradoner/create.py +2 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner/dataset_filter.py +1 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner/export.py +45 -18
- {maradoner-0.10 → maradoner-0.11}/maradoner/fit.py +153 -41
- {maradoner-0.10 → maradoner-0.11}/maradoner/main.py +2 -2
- {maradoner-0.10 → maradoner-0.11}/maradoner/mara/export.py +5 -6
- {maradoner-0.10 → maradoner-0.11}/maradoner/mara/fit.py +50 -33
- {maradoner-0.10 → maradoner-0.11}/maradoner/mara/main.py +18 -13
- {maradoner-0.10 → maradoner-0.11}/maradoner.egg-info/PKG-INFO +1 -1
- {maradoner-0.10 → maradoner-0.11}/README.md +0 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner/mara/__init__.py +0 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner/mara.py +0 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner/meta_optimizer.py +0 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner/select.py +0 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner/synthetic_data.py +0 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner/utils.py +0 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner.egg-info/SOURCES.txt +0 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner.egg-info/dependency_links.txt +0 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner.egg-info/entry_points.txt +0 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner.egg-info/requires.txt +0 -0
- {maradoner-0.10 → maradoner-0.11}/maradoner.egg-info/top_level.txt +0 -0
- {maradoner-0.10 → maradoner-0.11}/setup.cfg +0 -0
- {maradoner-0.10 → maradoner-0.11}/setup.py +0 -0
|
@@ -88,6 +88,7 @@ def create_project(project_name: str, promoter_expression_filename: str, loading
|
|
|
88
88
|
f'{len(loading_matrix_transformations)}.')
|
|
89
89
|
|
|
90
90
|
logger_print('Filtering promoters of low expression...', verbose)
|
|
91
|
+
print('aaaaa', len(promoter_expression))
|
|
91
92
|
inds, weights = filter_lowexp(promoter_expression, cutoff=promoter_filter_lowexp_cutoff, fit_plot_filename=promoter_filter_plot_filename)
|
|
92
93
|
promoter_expression = promoter_expression.loc[inds]
|
|
93
94
|
proms = promoter_expression.index
|
|
@@ -115,6 +116,7 @@ def create_project(project_name: str, promoter_expression_filename: str, loading
|
|
|
115
116
|
motif_expression = None
|
|
116
117
|
loading_matrices = pd.concat(loading_matrices, axis=1)
|
|
117
118
|
if motif_names is not None:
|
|
119
|
+
motif_names = list(set(motif_names) & set(loading_matrices.columns))
|
|
118
120
|
loading_matrices = loading_matrices[motif_names]
|
|
119
121
|
proms = list(promoter_expression.index)
|
|
120
122
|
sample_names = list(promoter_expression.columns)
|
|
@@ -159,7 +159,7 @@ def export_fov(fovs: tuple[FOVResult], folder: str,
|
|
|
159
159
|
|
|
160
160
|
|
|
161
161
|
def posterior_anova(activities: ActivitiesPrediction, fit: FitResult,
|
|
162
|
-
B: np.ndarray, corr_stat=False):
|
|
162
|
+
B: np.ndarray, corr_stat=False, map_cov=False):
|
|
163
163
|
precs = list()
|
|
164
164
|
istds = list()
|
|
165
165
|
covs = list()
|
|
@@ -170,20 +170,28 @@ def posterior_anova(activities: ActivitiesPrediction, fit: FitResult,
|
|
|
170
170
|
# mot = np.delete(mot, activities.filtered_motifs)
|
|
171
171
|
# ind = mot * nu < cov.diagonal() + 1e-9
|
|
172
172
|
# bad_inds[ind] = True
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
173
|
+
# mot = fit.motif_variance.motif
|
|
174
|
+
# mot = np.delete(mot, activities.filtered_motifs)[~bad_inds]
|
|
175
|
+
if map_cov:
|
|
176
|
+
# fit.motif_variance.m
|
|
177
|
+
BTB = B.T @ B
|
|
178
|
+
BTB_s = BTB * fit.motif_variance.motif ** 0.5
|
|
179
|
+
BTB_s = BTB_s @ BTB_s.T
|
|
180
|
+
for cov, U, sigma, n, nu in zip(activities.cov(), activities.U.T,
|
|
181
|
+
activities._cov[-2],
|
|
182
|
+
fit.error_variance.variance, fit.motif_variance.group):
|
|
177
183
|
# cov = cov[~bad_inds, ~bad_inds]
|
|
178
|
-
cov = cov[..., ~bad_inds]
|
|
179
|
-
cov = cov[~bad_inds]
|
|
184
|
+
# cov = cov[..., ~bad_inds]
|
|
185
|
+
# cov = cov[~bad_inds]
|
|
186
|
+
if map_cov:
|
|
187
|
+
D = BTB_s * nu + np.identity(len(BTB)) * sigma
|
|
188
|
+
cov = cov @ D @ cov.T * n / sigma ** 2
|
|
180
189
|
covs.append(cov)
|
|
181
|
-
U = U[~bad_inds]
|
|
190
|
+
# U = U[~bad_inds]
|
|
182
191
|
# prec = np.linalg.inv(np.diag(mot * nu) - cov)
|
|
183
192
|
prec = np.linalg.inv(cov)
|
|
184
193
|
mean += prec @ U
|
|
185
194
|
precs.append(prec)
|
|
186
|
-
print(bad_inds.sum())
|
|
187
195
|
total_prec = sum(precs)
|
|
188
196
|
total_cov = np.linalg.inv(total_prec)
|
|
189
197
|
mean = total_cov @ mean
|
|
@@ -210,9 +218,7 @@ def posterior_anova(activities: ActivitiesPrediction, fit: FitResult,
|
|
|
210
218
|
def export_results(project_name: str, output_folder: str,
|
|
211
219
|
std_mode: Standardization,
|
|
212
220
|
anova_mode: ANOVAType=ANOVAType.positive,
|
|
213
|
-
|
|
214
|
-
corrected_numerical=False,
|
|
215
|
-
corrected_num_samples=1e5,
|
|
221
|
+
weighted_zscore=False,
|
|
216
222
|
alpha=0.05,
|
|
217
223
|
n_jobs=6):
|
|
218
224
|
|
|
@@ -324,12 +330,11 @@ def export_results(project_name: str, output_folder: str,
|
|
|
324
330
|
pval = calc_z_test(anova_ass)
|
|
325
331
|
|
|
326
332
|
fdrs = multitest.multipletests(pval, alpha=0.05, method='fdr_bh')[1]
|
|
327
|
-
lrt = 2 * fit.motif_variance.logratios
|
|
328
|
-
lrt_pvalues = chi2.sf(lrt, 1)
|
|
329
|
-
lrt_fdr = multitest.multipletests(lrt_pvalues, alpha=0.05, method='fdr_bh')[1]
|
|
330
|
-
anova_ass = DF(np.array([anova_ass, pval, fdrs
|
|
331
|
-
columns=['stat', 'p-value', 'FDR'
|
|
332
|
-
'logratio', 'lrt_p-value', 'lrt_FDR'])
|
|
333
|
+
# lrt = 2 * fit.motif_variance.logratios
|
|
334
|
+
# lrt_pvalues = chi2.sf(lrt, 1)
|
|
335
|
+
# lrt_fdr = multitest.multipletests(lrt_pvalues, alpha=0.05, method='fdr_bh')[1]
|
|
336
|
+
anova_ass = DF(np.array([anova_ass, pval, fdrs]).T, index=motif_names_filtered,
|
|
337
|
+
columns=['stat', 'p-value', 'FDR'])
|
|
333
338
|
anova_ass.to_csv(os.path.join(folder, 'anova.tsv'), sep='\t')
|
|
334
339
|
|
|
335
340
|
sign = motif_mean.flatten() / motif_mean_std
|
|
@@ -347,6 +352,28 @@ def export_results(project_name: str, output_folder: str,
|
|
|
347
352
|
index=motif_names)
|
|
348
353
|
sign_ass.to_csv(os.path.join(folder, 'sign.tsv'), sep='\t')
|
|
349
354
|
|
|
355
|
+
folder = os.path.join(output_folder, 'activities')
|
|
356
|
+
os.makedirs(folder, exist_ok=True)
|
|
357
|
+
U = list()
|
|
358
|
+
stds = list()
|
|
359
|
+
for u, cov in zip(act.U.T, act.cov()):
|
|
360
|
+
std = cov.diagonal() ** 0.5
|
|
361
|
+
u = u / std
|
|
362
|
+
U.append(u)
|
|
363
|
+
stds.append(std)
|
|
364
|
+
U = np.array(U).T
|
|
365
|
+
DF(U, index=motif_names_filtered, columns=group_names).to_csv(os.path.join(folder, 'activity.tsv'), sep='\t')
|
|
366
|
+
U = U ** 2
|
|
367
|
+
if weighted_zscore:
|
|
368
|
+
U_total = U.sum(axis=1, keepdims=True) / (1 / np.array(stds).T ** 2).sum(axis=1, keepdims=True)
|
|
369
|
+
else:
|
|
370
|
+
U_total = U.mean(axis=1, keepdims=True)
|
|
371
|
+
|
|
372
|
+
U = np.hstack((U_total, U)) ** 0.5
|
|
373
|
+
DF(U, index=motif_names_filtered,
|
|
374
|
+
columns=['overall'] + list(group_names)).to_csv(os.path.join(folder, 'z_score.tsv'), sep='\t')
|
|
375
|
+
DF(act.U_raw, index=motif_names_filtered, columns=data.sample_names).to_csv(os.path.join(folder, 'activity_raw.tsv'), sep='\t')
|
|
376
|
+
|
|
350
377
|
if os.path.isfile(f'{project_name}.fov.{fmt}'):
|
|
351
378
|
with open(f'{project_name}.fov.{fmt}', 'rb') as f:
|
|
352
379
|
fov = dill.load(f)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
import jax.numpy as jnp
|
|
3
3
|
import jax
|
|
4
|
+
import scipy.linalg.lapack as lapack
|
|
4
5
|
from sklearn.cluster import KMeans
|
|
5
6
|
from sklearn.decomposition import NMF
|
|
6
7
|
from dataclasses import dataclass
|
|
@@ -27,7 +28,67 @@ class LowrankDecomposition:
|
|
|
27
28
|
Q: np.ndarray
|
|
28
29
|
S: np.ndarray
|
|
29
30
|
V: np.ndarray
|
|
30
|
-
|
|
31
|
+
|
|
32
|
+
def null_space_transform(self, Y: np.ndarray) -> np.ndarray:
|
|
33
|
+
"""
|
|
34
|
+
Compute V^T Y where V is the orthogonal complement to Q, using Householder
|
|
35
|
+
transformations via LAPACK's dormqr. Ensures inputs are compatible.
|
|
36
|
+
|
|
37
|
+
Parameters:
|
|
38
|
+
Q (ndarray): p x r semi-orthogonal matrix where Q^T Q = I_r, r <= p.
|
|
39
|
+
Should be a standard float array (e.g., float64).
|
|
40
|
+
Y (ndarray): p x n matrix. Will be converted to float64 if necessary.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
VT_Y (ndarray): (p - r) x n matrix representing V^T Y (float64).
|
|
44
|
+
"""
|
|
45
|
+
Y = np.array(Y, order='F', copy=True)
|
|
46
|
+
Q = np.array(self.Q).astype(np.float64, copy=False)
|
|
47
|
+
|
|
48
|
+
p, r = Q.shape
|
|
49
|
+
|
|
50
|
+
if r > p:
|
|
51
|
+
raise ValueError(f"Number of columns r ({r}) cannot exceed number of rows p ({p}) in Q.")
|
|
52
|
+
|
|
53
|
+
# 1. Compute QR factorization of Q
|
|
54
|
+
# Need a copy of Q because 'raw' QR might modify it slightly in some versions/backends,
|
|
55
|
+
# even though documentation often says it doesn't. Using overwrite_a=True below is safer.
|
|
56
|
+
Q_copy = np.array(Q, order='F', dtype=np.float64) # Fortran order often preferred by LAPACK
|
|
57
|
+
qr_a, tau, work_qr, info_qr = lapack.dgeqrf(Q_copy, overwrite_a=True)
|
|
58
|
+
if info_qr != 0:
|
|
59
|
+
raise RuntimeError(f"LAPACK dgeqrf failed with info = {info_qr}")
|
|
60
|
+
# qr_a now contains R in upper triangle and reflectors below diagonal (overwritten Q_copy)
|
|
61
|
+
|
|
62
|
+
# 2. Prepare matrix Z (to be modified by dormqr)
|
|
63
|
+
|
|
64
|
+
# 3. Apply Q_full^T to Z using dormqr
|
|
65
|
+
# Workspace query
|
|
66
|
+
# try:
|
|
67
|
+
lwork = -1
|
|
68
|
+
# Use Z's shape here for the query, pass dummy Z
|
|
69
|
+
_, work_query, _ = lapack.dormqr('L', 'T', qr_a, tau, np.empty_like(Y), lwork=lwork, overwrite_c=True)
|
|
70
|
+
optimal_lwork = int(work_query[0].real)
|
|
71
|
+
lwork = max(1, optimal_lwork)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# Actual application
|
|
75
|
+
q_mult_y, work_actual, info_ormqr = lapack.dormqr('L', 'T', qr_a, tau, Y,
|
|
76
|
+
lwork=lwork, overwrite_c=True)
|
|
77
|
+
|
|
78
|
+
if info_ormqr != 0:
|
|
79
|
+
# Add more debug info if it fails
|
|
80
|
+
print("--- Debug Info Before dormqr Failure ---")
|
|
81
|
+
print(f"Q shape: {Q.shape}, dtype: {Q.dtype}")
|
|
82
|
+
print(f"qr_a shape: {qr_a.shape}, dtype: {qr_a.dtype}, order: {'F' if qr_a.flags.f_contiguous else 'C'}")
|
|
83
|
+
print(f"tau shape: {tau.shape}, dtype: {tau.dtype}")
|
|
84
|
+
print(f"Y shape: {Y.shape}, dtype: {Y.dtype}, order: {'F' if Y.flags.f_contiguous else 'C'}")
|
|
85
|
+
print(f"lwork: {lwork}")
|
|
86
|
+
print("--- End Debug Info ---")
|
|
87
|
+
raise RuntimeError(f"LAPACK dormqr failed with info = {info_ormqr}")
|
|
88
|
+
|
|
89
|
+
VT_Y = q_mult_y[r:, :]
|
|
90
|
+
return VT_Y
|
|
91
|
+
#null_Q: np.ndarray
|
|
31
92
|
|
|
32
93
|
@dataclass
|
|
33
94
|
class TransformedData:
|
|
@@ -52,7 +113,6 @@ class MotifVarianceEstimates:
|
|
|
52
113
|
fixed_group: int
|
|
53
114
|
loglik: float
|
|
54
115
|
loglik_start: float
|
|
55
|
-
logratios: np.ndarray
|
|
56
116
|
|
|
57
117
|
@dataclass(frozen=True)
|
|
58
118
|
class MotifMeanEstimates:
|
|
@@ -87,9 +147,60 @@ def ones_nullspace(n: int):
|
|
|
87
147
|
res[i - 1, i] = 1 / norm
|
|
88
148
|
return res
|
|
89
149
|
|
|
150
|
+
def ones_nullspace_transform(x):
|
|
151
|
+
n, m = x.shape
|
|
152
|
+
if n <= 1:
|
|
153
|
+
return np.zeros((0, m), dtype=x.dtype)
|
|
154
|
+
|
|
155
|
+
Y = np.zeros((n - 1, m), dtype=float)
|
|
156
|
+
current_sum = x[0, :].astype(float)
|
|
157
|
+
|
|
158
|
+
for r in range(n - 1):
|
|
159
|
+
i = r + 1
|
|
160
|
+
sqrt_i_i_plus_1 = np.sqrt(i * (i + 1))
|
|
161
|
+
|
|
162
|
+
# Coefficients for row r of Y (which uses row i-1 = r of H)
|
|
163
|
+
coeff1 = -1.0 / sqrt_i_i_plus_1
|
|
164
|
+
coeff2 = np.sqrt(i / (i + 1))
|
|
165
|
+
Y[r, :] = coeff1 * current_sum + coeff2 * x[r + 1, :]
|
|
166
|
+
|
|
167
|
+
# Update current_sum for the next iteration (to become sum_{k=0}^{r+1} X[k,:])
|
|
168
|
+
if r < n - 2: # Avoid adding beyond X's bounds on the last iteration
|
|
169
|
+
current_sum += x[r + 1, :]
|
|
170
|
+
return Y
|
|
171
|
+
|
|
172
|
+
def ones_nullspace_transform_transpose(X: np.ndarray) -> np.ndarray:
|
|
173
|
+
n, m = X.shape
|
|
174
|
+
n = n + 1
|
|
175
|
+
|
|
176
|
+
if n == 1:
|
|
177
|
+
output_dtype = X.dtype if np.issubdtype(X.dtype, np.floating) else float
|
|
178
|
+
return np.zeros((1, m), dtype=output_dtype)
|
|
179
|
+
|
|
180
|
+
output_dtype = X.dtype if np.issubdtype(X.dtype, np.floating) else float
|
|
181
|
+
Y = np.zeros((n, m), dtype=output_dtype)
|
|
182
|
+
|
|
183
|
+
current_suffix_sum = np.zeros(m, dtype=output_dtype)
|
|
184
|
+
|
|
185
|
+
for k in range(n - 2, -1, -1):
|
|
186
|
+
i = k + 1.0
|
|
187
|
+
|
|
188
|
+
sqrt_term_i_ip1 = np.sqrt(i * (i + 1.0))
|
|
189
|
+
coeff_pos = i / sqrt_term_i_ip1
|
|
190
|
+
coeff_neg = -1.0 / sqrt_term_i_ip1
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
Y[k + 1, :] = coeff_pos * X[k, :] + current_suffix_sum
|
|
194
|
+
|
|
195
|
+
current_suffix_sum += coeff_neg * X[k, :]
|
|
196
|
+
|
|
197
|
+
Y[0, :] = current_suffix_sum
|
|
198
|
+
|
|
199
|
+
return Y
|
|
200
|
+
|
|
90
201
|
def lowrank_decomposition(X: np.ndarray, rel_eps=1e-12) -> LowrankDecomposition:
|
|
91
202
|
svd = jnp.linalg.svd
|
|
92
|
-
q, s, v = [np.array(t) for t in svd(X)]
|
|
203
|
+
q, s, v = [np.array(t) for t in svd(X, full_matrices=False)]
|
|
93
204
|
max_sv = max(s)
|
|
94
205
|
n = len(s)
|
|
95
206
|
for r in range(n):
|
|
@@ -98,10 +209,9 @@ def lowrank_decomposition(X: np.ndarray, rel_eps=1e-12) -> LowrankDecomposition:
|
|
|
98
209
|
break
|
|
99
210
|
r += 1
|
|
100
211
|
s = s[:r]
|
|
101
|
-
null_q = q[:, r:]
|
|
102
212
|
q = q[:, :r]
|
|
103
213
|
v = v[:r]
|
|
104
|
-
return LowrankDecomposition(q, s, v
|
|
214
|
+
return LowrankDecomposition(q, s, v)
|
|
105
215
|
|
|
106
216
|
def transform_data(data, std_y=False, std_b=False, helmert=True) -> TransformedData:
|
|
107
217
|
try:
|
|
@@ -115,9 +225,11 @@ def transform_data(data, std_y=False, std_b=False, helmert=True) -> TransformedD
|
|
|
115
225
|
if std_b:
|
|
116
226
|
B /= B.std(axis=0, keepdims=True)
|
|
117
227
|
if helmert:
|
|
118
|
-
F_p = ones_nullspace(len(Y))
|
|
119
|
-
Y = F_p @ Y
|
|
120
|
-
B = F_p @ B
|
|
228
|
+
# F_p = ones_nullspace(len(Y))
|
|
229
|
+
# Y = F_p @ Y
|
|
230
|
+
# B = F_p @ B
|
|
231
|
+
Y = ones_nullspace_transform(Y)
|
|
232
|
+
B = ones_nullspace_transform(B)
|
|
121
233
|
group_inds_inv = list()
|
|
122
234
|
d = dict()
|
|
123
235
|
for i, items in enumerate(group_inds):
|
|
@@ -346,9 +458,24 @@ def loglik_motifs_fim(x: jnp.ndarray, BTB: jnp.ndarray,
|
|
|
346
458
|
return FIM
|
|
347
459
|
|
|
348
460
|
|
|
461
|
+
def calc_error_variance_fim(data: TransformedData, error_variance: jnp.ndarray):
|
|
462
|
+
d = 1 / jnp.array(error_variance).at[data.group_inds_inv].get()
|
|
463
|
+
d = d / d.sum() ** 0.5
|
|
464
|
+
D_product_inv = jnp.outer(-d, d)
|
|
465
|
+
D_product_inv = jnp.fill_diagonal(D_product_inv,
|
|
466
|
+
D_product_inv.diagonal() + d * d.sum(),
|
|
467
|
+
inplace=False )
|
|
468
|
+
fim = D_product_inv * D_product_inv.T / 2
|
|
469
|
+
group_inds = data.group_inds
|
|
470
|
+
group_loadings = np.zeros((len(d), len(group_inds)), dtype=int)
|
|
471
|
+
for i, indices in enumerate(group_inds):
|
|
472
|
+
group_loadings[indices, i] = 1
|
|
473
|
+
group_loadings = jnp.array(group_loadings)
|
|
474
|
+
return group_loadings.T @ fim @ group_loadings
|
|
475
|
+
|
|
349
476
|
def estimate_error_variance(data: TransformedData, B_decomposition: LowrankDecomposition,
|
|
350
477
|
verbose=False) -> ErrorVarianceEstimates:
|
|
351
|
-
Y = B_decomposition.
|
|
478
|
+
Y = B_decomposition.null_space_transform(data.Y)
|
|
352
479
|
d0 = jnp.array([np.var(Y[:, inds]) for inds in data.group_inds])
|
|
353
480
|
|
|
354
481
|
fun = partial(loglik_error, Qn_Y=Y, group_inds_inv=data.group_inds_inv)
|
|
@@ -362,7 +489,8 @@ def estimate_error_variance(data: TransformedData, B_decomposition: LowrankDecom
|
|
|
362
489
|
print('-' * 15)
|
|
363
490
|
print(res)
|
|
364
491
|
print('-' * 15)
|
|
365
|
-
|
|
492
|
+
|
|
493
|
+
fim = calc_error_variance_fim(data, res.x)
|
|
366
494
|
return ErrorVarianceEstimates(np.array(res.x), np.array(fim),
|
|
367
495
|
loglik_start=res.start_loglik,
|
|
368
496
|
loglik=res.fun)
|
|
@@ -374,13 +502,16 @@ def estimate_promoter_mean(data: TransformedData,
|
|
|
374
502
|
|
|
375
503
|
D = error_variance.variance[data.group_inds_inv]
|
|
376
504
|
Y = jnp.array(data.Y)
|
|
377
|
-
F_p = jnp.array(ones_nullspace(len(Y) + 1))
|
|
378
|
-
Q_N = jnp.array(B_decomposition.null_Q)
|
|
505
|
+
# F_p = jnp.array(ones_nullspace(len(Y) + 1))
|
|
506
|
+
# Q_N = jnp.array(B_decomposition.null_Q)
|
|
507
|
+
Q_C = jnp.array(B_decomposition.Q)
|
|
379
508
|
w = (1 / D).sum()
|
|
380
509
|
mean = Y @ (1 / D.reshape(-1, 1))
|
|
381
|
-
mean =
|
|
382
|
-
mean = Q_N @ mean
|
|
383
|
-
mean =
|
|
510
|
+
mean = mean - Q_C @ (Q_C.T @ mean)
|
|
511
|
+
# mean = Q_N.T @ mean
|
|
512
|
+
# mean = Q_N @ mean
|
|
513
|
+
# mean = F_p.T @ mean
|
|
514
|
+
mean = ones_nullspace_transform_transpose(mean)
|
|
384
515
|
mean = mean / w
|
|
385
516
|
return PromoterMeanEstimates(mean)
|
|
386
517
|
|
|
@@ -437,12 +568,14 @@ def estimate_motif_variance(data: TransformedData, B_decomposition: LowrankDecom
|
|
|
437
568
|
eig = jnp.linalg.eigh(f)[0].min()
|
|
438
569
|
if eig < 0:
|
|
439
570
|
eig = list()
|
|
440
|
-
epsilons = [1e-15, 1e-12, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
|
|
571
|
+
epsilons = [1e-23, 1e-15, 1e-12, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
|
|
441
572
|
for eps in epsilons:
|
|
442
573
|
x = res.x.copy()
|
|
443
574
|
x = x.at[:len(BTB)].set(jnp.clip(x.at[:len(BTB)].get(), eps, float('inf')))
|
|
444
575
|
f = fim(x)
|
|
445
576
|
eig.append(jnp.linalg.eigh(f)[0].min())
|
|
577
|
+
if eig[-1] > 0:
|
|
578
|
+
break
|
|
446
579
|
i = np.argmax(eig)
|
|
447
580
|
eps = epsilons[i]
|
|
448
581
|
x = res.x.copy()
|
|
@@ -450,31 +583,9 @@ def estimate_motif_variance(data: TransformedData, B_decomposition: LowrankDecom
|
|
|
450
583
|
fim = fim(x)
|
|
451
584
|
else:
|
|
452
585
|
fim = f
|
|
453
|
-
print('FIM', eig)
|
|
454
|
-
logliks = list()
|
|
455
|
-
from tqdm import tqdm
|
|
456
|
-
for i in tqdm(list(range(len(BTB)))):
|
|
457
|
-
x = res.x.copy()
|
|
458
|
-
x = x.at[i].set(0)
|
|
459
|
-
subfun = partial(fun, _motif_zero=i)
|
|
460
|
-
subgrad = partial(grad, _motif_zero=i)
|
|
461
|
-
opt = MetaOptimizer(subfun, subgrad, num_steps_momentum=5, skip_init=False)
|
|
462
|
-
logliks.append(opt.optimize(x).fun)
|
|
463
|
-
logliks = np.array(logliks) - float(res.fun)
|
|
464
|
-
# fim_naive = partial(loglik_motifs_fim_naive, B=data.B, D=D,
|
|
465
|
-
# group_inds_inv=data.group_inds_inv, group_inds=data.group_inds,
|
|
466
|
-
# G_fix_ind=j, G_fix_val=fix)
|
|
467
|
-
# fim_naive = fim_naive(res.x)
|
|
468
|
-
# print('FIM')
|
|
469
|
-
# print(fim)
|
|
470
|
-
# print('Naive')
|
|
471
|
-
# print(fim_naive)
|
|
472
|
-
# print(np.abs(fim - fim_naive) / np.abs(fim_naive))
|
|
473
|
-
# fim = fim_naive
|
|
474
|
-
# fim = (fim, fim_naive)
|
|
475
586
|
return MotifVarianceEstimates(motif=np.array(Sigma), group=np.array(G), fim=np.array(fim),
|
|
476
587
|
fixed_group=j, loglik_start=res.start_loglik,
|
|
477
|
-
loglik=res.fun
|
|
588
|
+
loglik=res.fun)
|
|
478
589
|
|
|
479
590
|
def estimate_motif_mean(data: TransformedData, B_decomposition: LowrankDecomposition,
|
|
480
591
|
error_variance: ErrorVarianceEstimates,
|
|
@@ -494,8 +605,9 @@ def estimate_motif_mean(data: TransformedData, B_decomposition: LowrankDecomposi
|
|
|
494
605
|
|
|
495
606
|
BTB = B_decomposition.V.T * B_decomposition.S ** 2 @ B_decomposition.V
|
|
496
607
|
A = jnp.sqrt(Sigma).reshape(-1, 1) * BTB
|
|
497
|
-
Fp = ones_nullspace(len(data.Y) + 1)
|
|
498
|
-
Y_tilde = (data.Y - Fp @ mu_p.reshape(-1, 1)) / d
|
|
608
|
+
# Fp = ones_nullspace(len(data.Y) + 1)
|
|
609
|
+
# Y_tilde = (data.Y - Fp @ mu_p.reshape(-1, 1)) / d
|
|
610
|
+
Y_tilde = (data.Y - ones_nullspace_transform(mu_p.reshape(-1, 1))) / d
|
|
499
611
|
Y_hat = jnp.sqrt(Sigma).reshape(-1,1) * data.B.T @ Y_tilde * g / d
|
|
500
612
|
D_B, Q_B = jnp.linalg.eigh(jnp.sqrt(Sigma).reshape(-1, 1) * BTB * jnp.sqrt(Sigma))
|
|
501
613
|
At_QB = A.T @ Q_B
|
|
@@ -264,14 +264,14 @@ def _export(name: str = Argument(..., help='Project name.'),
|
|
|
264
264
|
output_folder: Path = Argument(..., help='Output folder.'),
|
|
265
265
|
std_mode: Standardization = Option(Standardization.full, help='Whether to standardize activities with plain variances or also decorrelate them.'),
|
|
266
266
|
anova_mode: ANOVAType = Option(ANOVAType.positive, help='If negative, look for non-variative motifs'),
|
|
267
|
-
|
|
267
|
+
weighted_zscore: bool = Option(False, help='Reciprocal variance weighted Z-scores'),
|
|
268
268
|
alpha: float = Option(0.05, help='FDR alpha.')):
|
|
269
269
|
t0 = time()
|
|
270
270
|
p = Progress(SpinnerColumn(speed=0.5), TextColumn("[progress.description]{task.description}"), transient=True)
|
|
271
271
|
p.add_task(description="Exporting results...", total=None)
|
|
272
272
|
p.start()
|
|
273
273
|
export_results(name, output_folder, std_mode=std_mode, anova_mode=anova_mode, alpha=alpha,
|
|
274
|
-
|
|
274
|
+
weighted_zscore=weighted_zscore)
|
|
275
275
|
p.stop()
|
|
276
276
|
dt = time() - t0
|
|
277
277
|
rprint(f'[green][bold]✔️[/bold] Done![/green]\t time: {dt:.2f} s.')
|
|
@@ -62,6 +62,8 @@ def export_results(project_name: str, output_folder: str):
|
|
|
62
62
|
|
|
63
63
|
U = act.U
|
|
64
64
|
U_var = act.variance
|
|
65
|
+
|
|
66
|
+
U = U / U_var ** 0.5
|
|
65
67
|
|
|
66
68
|
# U_grouped = list()
|
|
67
69
|
# U_var_grouped = list()
|
|
@@ -74,15 +76,13 @@ def export_results(project_name: str, output_folder: str):
|
|
|
74
76
|
os.makedirs(output_folder, exist_ok=True)
|
|
75
77
|
DF(np.array([error_variance, motif_variance]).T, index=sample_names,
|
|
76
78
|
columns=['sigma', 'tau']).to_csv(os.path.join(output_folder, 'params.tsv'), sep='\t')
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
act = np.hstack((U_total, act))
|
|
79
|
+
U_total = U.mean(axis=1, keepdims=True) # / (1 / U_var ** 0.5).sum(axis=1, keepdims=True)
|
|
80
|
+
act = np.hstack((U_total, U))
|
|
80
81
|
DF(act, index=motif_names,
|
|
81
82
|
columns=['overall'] + list(sample_names)).to_csv(os.path.join(output_folder, 'activities.tsv'),
|
|
82
83
|
sep='\t')
|
|
83
84
|
|
|
84
|
-
z = U
|
|
85
|
-
z = z ** 2
|
|
85
|
+
z = U ** 2
|
|
86
86
|
U_total = z.mean(axis=1, keepdims=True) #/ (1 / U_var ** 0.5).sum(axis=1, keepdims=True)
|
|
87
87
|
z = np.hstack((U_total, z))
|
|
88
88
|
z = z ** 0.5
|
|
@@ -90,5 +90,4 @@ def export_results(project_name: str, output_folder: str):
|
|
|
90
90
|
columns=['overall'] + list(sample_names)).to_csv(os.path.join(output_folder, 'z_scores.tsv'),
|
|
91
91
|
sep='\t')
|
|
92
92
|
|
|
93
|
-
|
|
94
93
|
|
|
@@ -44,6 +44,7 @@ class MotifVarianceEstimates:
|
|
|
44
44
|
class FitResult:
|
|
45
45
|
error_variance: ErrorVarianceEstimates
|
|
46
46
|
motif_variance: MotifVarianceEstimates
|
|
47
|
+
B_decomposition: LowrankDecomposition
|
|
47
48
|
group_names: list
|
|
48
49
|
clustering: np.ndarray = None
|
|
49
50
|
clustered_B: np.ndarray = None
|
|
@@ -70,7 +71,8 @@ def transform_data(data, std_y=False, std_b=False, helmert=True) -> TransformedD
|
|
|
70
71
|
|
|
71
72
|
def estimate_error_variance(data: TransformedData,
|
|
72
73
|
B_decomposition: LowrankDecomposition) -> ErrorVarianceEstimates:
|
|
73
|
-
Y = B_decomposition.null_Q.T @ data.Y
|
|
74
|
+
# Y = B_decomposition.null_Q.T @ data.Y
|
|
75
|
+
Y = B_decomposition.null_space_transform(data.Y)
|
|
74
76
|
variance = (Y ** 2).mean(axis=0)
|
|
75
77
|
return ErrorVarianceEstimates(variance)
|
|
76
78
|
|
|
@@ -79,7 +81,7 @@ def calc_tau(tau: float, error_variance: np.ndarray, mode: TauMode):
|
|
|
79
81
|
if mode == mode.mara:
|
|
80
82
|
taus = tau * np.ones_like(error_variance)
|
|
81
83
|
else:
|
|
82
|
-
taus = tau /
|
|
84
|
+
taus = tau / error_variance
|
|
83
85
|
return taus
|
|
84
86
|
|
|
85
87
|
def loglik_tau(tau: float, Sigma: np.ndarray, Y_hat: np.ndarray,
|
|
@@ -88,10 +90,10 @@ def loglik_tau(tau: float, Sigma: np.ndarray, Y_hat: np.ndarray,
|
|
|
88
90
|
logdet = 0
|
|
89
91
|
taus = calc_tau(tau, error_variance, mode)
|
|
90
92
|
for sigma, tau, y in zip(error_variance, taus, Y_hat.T):
|
|
91
|
-
S = tau * Sigma +
|
|
92
|
-
vec += (y ** 2
|
|
93
|
+
S = tau / sigma * Sigma + 1
|
|
94
|
+
vec += (y ** 2 / S).sum() * (tau / sigma ** 2)
|
|
93
95
|
logdet += S.sum()
|
|
94
|
-
return vec + logdet
|
|
96
|
+
return -vec + logdet
|
|
95
97
|
|
|
96
98
|
def estimate_motif_variance(data: TransformedData, B_decomposition: LowrankDecomposition,
|
|
97
99
|
error_variance: ErrorVarianceEstimates,
|
|
@@ -106,7 +108,7 @@ def estimate_motif_variance(data: TransformedData, B_decomposition: LowrankDecom
|
|
|
106
108
|
Y_hat = Q.T @ data.B.T @ data.Y
|
|
107
109
|
fun = partial(loglik_tau, Sigma=Sigma, Y_hat=Y_hat, error_variance=error_variance.variance,
|
|
108
110
|
mode=mode)
|
|
109
|
-
tau = calc_tau(minimize_scalar(fun, bounds=(0.0,
|
|
111
|
+
tau = calc_tau(minimize_scalar(fun, bounds=(0.0, error_variance.variance.max() * 10)).x, error_variance.variance, mode)
|
|
110
112
|
return MotifVarianceEstimates(tau)
|
|
111
113
|
|
|
112
114
|
|
|
@@ -118,18 +120,11 @@ class ActivitiesPrediction:
|
|
|
118
120
|
|
|
119
121
|
|
|
120
122
|
def predict_activities(data: TransformedData, fit: FitResult,
|
|
121
|
-
|
|
123
|
+
gpu=False, verbose=True) -> ActivitiesPrediction:
|
|
122
124
|
U = list()
|
|
123
125
|
variance = list()
|
|
124
|
-
if gpu_decomposition:
|
|
125
|
-
device = jax.devices()
|
|
126
|
-
else:
|
|
127
|
-
device = jax.devices('cpu')
|
|
128
|
-
device = next(iter(device))
|
|
129
126
|
|
|
130
|
-
|
|
131
|
-
with jax.default_device(device):
|
|
132
|
-
B_decomposition = lowrank_decomposition(data.B)
|
|
127
|
+
B_decomposition = fit.B_decomposition
|
|
133
128
|
if gpu:
|
|
134
129
|
device = jax.devices()
|
|
135
130
|
else:
|
|
@@ -200,7 +195,7 @@ def fit(project: str, tau_mode: TauMode, tau_estimation: TauEstimation,
|
|
|
200
195
|
|
|
201
196
|
|
|
202
197
|
res = FitResult(error_variance=error_variance, motif_variance=motif_variance,
|
|
203
|
-
clustering=clustering,
|
|
198
|
+
clustering=clustering, B_decomposition=B_decomposition,
|
|
204
199
|
group_names=group_names, promoter_inds_to_drop=promoter_inds_to_drop)
|
|
205
200
|
if dump:
|
|
206
201
|
with openers[fmt](f'{project}.old.fit.{fmt}', 'wb') as f:
|
|
@@ -257,10 +252,10 @@ def _cor(a, b, axis=1):
|
|
|
257
252
|
return numerator / denominator
|
|
258
253
|
|
|
259
254
|
def calculate_fov(project: str, gpu: bool,
|
|
260
|
-
stat_type: GOFStat, x64=True,
|
|
255
|
+
stat_type: GOFStat, keep_motifs: str, x64=True,
|
|
261
256
|
verbose=True, dump=True):
|
|
262
257
|
def calc_fov(data: TransformedData, fit: FitResult,
|
|
263
|
-
activities: ActivitiesPrediction) -> tuple[FOVResult]:
|
|
258
|
+
activities: ActivitiesPrediction, keep_motifs=None) -> tuple[FOVResult]:
|
|
264
259
|
def sub(Y, effects) -> FOVResult:
|
|
265
260
|
if stat_type == stat_type.fov:
|
|
266
261
|
Y1 = Y - effects
|
|
@@ -277,17 +272,33 @@ def calculate_fov(project: str, gpu: bool,
|
|
|
277
272
|
sample = _cor(Y, effects, axis=0)
|
|
278
273
|
return FOVResult(total, prom, sample)
|
|
279
274
|
data = transform_data(data)
|
|
280
|
-
B = data.B
|
|
275
|
+
B = data.B if activities.clustering is None else activities.clustering[0]
|
|
281
276
|
Y = data.Y
|
|
282
277
|
U = activities.U
|
|
283
|
-
if
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
278
|
+
if keep_motifs is not None:
|
|
279
|
+
B = B[:, keep_motifs]
|
|
280
|
+
U = U[keep_motifs]
|
|
281
|
+
d = B @ U
|
|
287
282
|
stat_0 = sub(Y, d)
|
|
288
283
|
return stat_0,
|
|
289
284
|
data = read_init(project)
|
|
290
285
|
fmt = data.fmt
|
|
286
|
+
motif_names = data.motif_names
|
|
287
|
+
if keep_motifs:
|
|
288
|
+
import datatable as dt
|
|
289
|
+
df = dt.fread(keep_motifs).to_pandas().groupby('status')
|
|
290
|
+
keep_motifs = list()
|
|
291
|
+
for name, motifs in df:
|
|
292
|
+
inds = list()
|
|
293
|
+
for mot in motifs.iloc[:, 0]:
|
|
294
|
+
try:
|
|
295
|
+
i = motif_names.index(mot)
|
|
296
|
+
inds.append(i)
|
|
297
|
+
except ValueError:
|
|
298
|
+
print(f'Motif {mot} not found in the project.')
|
|
299
|
+
keep_motifs.append((name, np.array(inds, dtype=int)))
|
|
300
|
+
else:
|
|
301
|
+
keep_motifs = [(None, None)]
|
|
291
302
|
with openers[fmt](f'{project}.old.fit.{fmt}', 'rb') as f:
|
|
292
303
|
fit = dill.load(f)
|
|
293
304
|
with openers[fmt](f'{project}.old.predict.{fmt}', 'rb') as f:
|
|
@@ -303,17 +314,23 @@ def calculate_fov(project: str, gpu: bool,
|
|
|
303
314
|
else:
|
|
304
315
|
device = jax.devices('cpu')
|
|
305
316
|
device = next(iter(device))
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
if
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
317
|
+
results = list()
|
|
318
|
+
for status_name, motifs in keep_motifs:
|
|
319
|
+
if status_name:
|
|
320
|
+
status_name = f'{status_name} ({len(motifs)})'
|
|
321
|
+
print(status_name)
|
|
322
|
+
with jax.default_device(device):
|
|
323
|
+
|
|
324
|
+
if data_test is not None:
|
|
325
|
+
test_FOV = calc_fov(data=data_test, fit=fit, activities=activities, keep_motifs=motifs)
|
|
326
|
+
train_FOV = calc_fov(data=data, fit=fit, activities=activities, keep_motifs=motifs)
|
|
327
|
+
if data_test is None:
|
|
328
|
+
test_FOV = None
|
|
329
|
+
res = TestResult(train_FOV, test_FOV, grouped=False)
|
|
330
|
+
results.append((status_name, res))
|
|
314
331
|
with openers[fmt](f'{project}.old.fov.{fmt}', 'wb') as f:
|
|
315
|
-
dill.dump(
|
|
316
|
-
return
|
|
332
|
+
dill.dump(results, f)
|
|
333
|
+
return results
|
|
317
334
|
|
|
318
335
|
|
|
319
336
|
|
|
@@ -51,6 +51,7 @@ def _fit(name: str = Argument(..., help='Project name.'),
|
|
|
51
51
|
@app_old.command('gof', help='Estimate GOFs given test/train data split. Provides test info only if [orange]test-chromosomes[/orange] is not None in [cyan]fit[/cyan].')
|
|
52
52
|
def _gof(name: str = Argument(..., help='Project name.'),
|
|
53
53
|
# use_groups: bool = Option(False, help='Compute statistic for sammples aggragated across groups.'),
|
|
54
|
+
keep_motifs: Path = Option(None, help='Table with 2 columns: motif and status'),
|
|
54
55
|
stat_type: GOFStat = Option(GOFStat.fov, help='Statistic type to compute'),
|
|
55
56
|
gpu: bool = Option(False, help='Use GPU if available for most of computations.'),
|
|
56
57
|
x64: bool = Option(True, help='Use high precision algebra.')):
|
|
@@ -62,21 +63,25 @@ def _gof(name: str = Argument(..., help='Project name.'),
|
|
|
62
63
|
p = Progress(SpinnerColumn(speed=0.5), TextColumn("[progress.description]{task.description}"), transient=True)
|
|
63
64
|
p.add_task(description="Calculating FOVs...", total=None)
|
|
64
65
|
p.start()
|
|
65
|
-
res = calculate_fov(name, stat_type=stat_type, gpu=gpu, x64=x64)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
t.
|
|
66
|
+
res = calculate_fov(name, stat_type=stat_type, keep_motifs=keep_motifs, gpu=gpu, x64=x64)
|
|
67
|
+
for name, res in res:
|
|
68
|
+
print(name)
|
|
69
|
+
if stat_type == GOFStat.corr:
|
|
70
|
+
title = 'Pearson correlation'
|
|
71
|
+
else:
|
|
72
|
+
title = 'Fraction of variance explained'
|
|
73
|
+
if name:
|
|
74
|
+
title = f'({name}) {title}'
|
|
75
|
+
t = Table('Set', 'stat',
|
|
76
|
+
title=title)
|
|
77
|
+
row = [f'{t.total:.6f}' for t in res.train]
|
|
78
|
+
t.add_row('train', *row)
|
|
79
|
+
if res.test is not None:
|
|
80
|
+
row = [f'{t.total:.6f}' for t in res.test]
|
|
81
|
+
t.add_row('test', *row)
|
|
82
|
+
rprint(t)
|
|
77
83
|
p.stop()
|
|
78
84
|
dt = time() - t0
|
|
79
|
-
rprint(t)
|
|
80
85
|
rprint(f'[green][bold]✔️[/bold] Done![/green]\t time: {dt:.2f} s.')
|
|
81
86
|
|
|
82
87
|
@app_old.command('predict', help='Estimate deviations of motif activities from their means.')
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|