maradoner 0.10__tar.gz → 0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maradoner might be problematic. Click here for more details.

Files changed (25) hide show
  1. {maradoner-0.10 → maradoner-0.11}/PKG-INFO +1 -1
  2. {maradoner-0.10 → maradoner-0.11}/maradoner/__init__.py +1 -1
  3. {maradoner-0.10 → maradoner-0.11}/maradoner/create.py +2 -0
  4. {maradoner-0.10 → maradoner-0.11}/maradoner/dataset_filter.py +1 -0
  5. {maradoner-0.10 → maradoner-0.11}/maradoner/export.py +45 -18
  6. {maradoner-0.10 → maradoner-0.11}/maradoner/fit.py +153 -41
  7. {maradoner-0.10 → maradoner-0.11}/maradoner/main.py +2 -2
  8. {maradoner-0.10 → maradoner-0.11}/maradoner/mara/export.py +5 -6
  9. {maradoner-0.10 → maradoner-0.11}/maradoner/mara/fit.py +50 -33
  10. {maradoner-0.10 → maradoner-0.11}/maradoner/mara/main.py +18 -13
  11. {maradoner-0.10 → maradoner-0.11}/maradoner.egg-info/PKG-INFO +1 -1
  12. {maradoner-0.10 → maradoner-0.11}/README.md +0 -0
  13. {maradoner-0.10 → maradoner-0.11}/maradoner/mara/__init__.py +0 -0
  14. {maradoner-0.10 → maradoner-0.11}/maradoner/mara.py +0 -0
  15. {maradoner-0.10 → maradoner-0.11}/maradoner/meta_optimizer.py +0 -0
  16. {maradoner-0.10 → maradoner-0.11}/maradoner/select.py +0 -0
  17. {maradoner-0.10 → maradoner-0.11}/maradoner/synthetic_data.py +0 -0
  18. {maradoner-0.10 → maradoner-0.11}/maradoner/utils.py +0 -0
  19. {maradoner-0.10 → maradoner-0.11}/maradoner.egg-info/SOURCES.txt +0 -0
  20. {maradoner-0.10 → maradoner-0.11}/maradoner.egg-info/dependency_links.txt +0 -0
  21. {maradoner-0.10 → maradoner-0.11}/maradoner.egg-info/entry_points.txt +0 -0
  22. {maradoner-0.10 → maradoner-0.11}/maradoner.egg-info/requires.txt +0 -0
  23. {maradoner-0.10 → maradoner-0.11}/maradoner.egg-info/top_level.txt +0 -0
  24. {maradoner-0.10 → maradoner-0.11}/setup.cfg +0 -0
  25. {maradoner-0.10 → maradoner-0.11}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: maradoner
3
- Version: 0.10
3
+ Version: 0.11
4
4
  Summary: Variance-adjusted estimation of motif activities.
5
5
  Home-page: https://github.com/autosome-ru/nemara
6
6
  Author: Georgy Meshcheryakov
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- __version__ = '0.10'
2
+ __version__ = '0.11'
3
3
  import importlib
4
4
 
5
5
 
@@ -88,6 +88,7 @@ def create_project(project_name: str, promoter_expression_filename: str, loading
88
88
  f'{len(loading_matrix_transformations)}.')
89
89
 
90
90
  logger_print('Filtering promoters of low expression...', verbose)
91
+ print('aaaaa', len(promoter_expression))
91
92
  inds, weights = filter_lowexp(promoter_expression, cutoff=promoter_filter_lowexp_cutoff, fit_plot_filename=promoter_filter_plot_filename)
92
93
  promoter_expression = promoter_expression.loc[inds]
93
94
  proms = promoter_expression.index
@@ -115,6 +116,7 @@ def create_project(project_name: str, promoter_expression_filename: str, loading
115
116
  motif_expression = None
116
117
  loading_matrices = pd.concat(loading_matrices, axis=1)
117
118
  if motif_names is not None:
119
+ motif_names = list(set(motif_names) & set(loading_matrices.columns))
118
120
  loading_matrices = loading_matrices[motif_names]
119
121
  proms = list(promoter_expression.index)
120
122
  sample_names = list(promoter_expression.columns)
@@ -105,5 +105,6 @@ def filter_lowexp(expression: pd.DataFrame, cutoff=0.95, fit_plot_filename=None,
105
105
  inds[:k] = False
106
106
  # print(inds)
107
107
  # inds[:] = 1
108
+ print(x[inds].mean(), x[~inds].mean())
108
109
  inds = inds[inds_inv]
109
110
  return inds, ws
@@ -159,7 +159,7 @@ def export_fov(fovs: tuple[FOVResult], folder: str,
159
159
 
160
160
 
161
161
  def posterior_anova(activities: ActivitiesPrediction, fit: FitResult,
162
- B: np.ndarray, corr_stat=False):
162
+ B: np.ndarray, corr_stat=False, map_cov=False):
163
163
  precs = list()
164
164
  istds = list()
165
165
  covs = list()
@@ -170,20 +170,28 @@ def posterior_anova(activities: ActivitiesPrediction, fit: FitResult,
170
170
  # mot = np.delete(mot, activities.filtered_motifs)
171
171
  # ind = mot * nu < cov.diagonal() + 1e-9
172
172
  # bad_inds[ind] = True
173
-
174
- for cov, U, nu in zip(activities.cov(), activities.U.T, fit.motif_variance.group):
175
- mot = fit.motif_variance.motif
176
- mot = np.delete(mot, activities.filtered_motifs)[~bad_inds]
173
+ # mot = fit.motif_variance.motif
174
+ # mot = np.delete(mot, activities.filtered_motifs)[~bad_inds]
175
+ if map_cov:
176
+ # fit.motif_variance.m
177
+ BTB = B.T @ B
178
+ BTB_s = BTB * fit.motif_variance.motif ** 0.5
179
+ BTB_s = BTB_s @ BTB_s.T
180
+ for cov, U, sigma, n, nu in zip(activities.cov(), activities.U.T,
181
+ activities._cov[-2],
182
+ fit.error_variance.variance, fit.motif_variance.group):
177
183
  # cov = cov[~bad_inds, ~bad_inds]
178
- cov = cov[..., ~bad_inds]
179
- cov = cov[~bad_inds]
184
+ # cov = cov[..., ~bad_inds]
185
+ # cov = cov[~bad_inds]
186
+ if map_cov:
187
+ D = BTB_s * nu + np.identity(len(BTB)) * sigma
188
+ cov = cov @ D @ cov.T * n / sigma ** 2
180
189
  covs.append(cov)
181
- U = U[~bad_inds]
190
+ # U = U[~bad_inds]
182
191
  # prec = np.linalg.inv(np.diag(mot * nu) - cov)
183
192
  prec = np.linalg.inv(cov)
184
193
  mean += prec @ U
185
194
  precs.append(prec)
186
- print(bad_inds.sum())
187
195
  total_prec = sum(precs)
188
196
  total_cov = np.linalg.inv(total_prec)
189
197
  mean = total_cov @ mean
@@ -210,9 +218,7 @@ def posterior_anova(activities: ActivitiesPrediction, fit: FitResult,
210
218
  def export_results(project_name: str, output_folder: str,
211
219
  std_mode: Standardization,
212
220
  anova_mode: ANOVAType=ANOVAType.positive,
213
- compute_corrected_pvalues=False,
214
- corrected_numerical=False,
215
- corrected_num_samples=1e5,
221
+ weighted_zscore=False,
216
222
  alpha=0.05,
217
223
  n_jobs=6):
218
224
 
@@ -324,12 +330,11 @@ def export_results(project_name: str, output_folder: str,
324
330
  pval = calc_z_test(anova_ass)
325
331
 
326
332
  fdrs = multitest.multipletests(pval, alpha=0.05, method='fdr_bh')[1]
327
- lrt = 2 * fit.motif_variance.logratios
328
- lrt_pvalues = chi2.sf(lrt, 1)
329
- lrt_fdr = multitest.multipletests(lrt_pvalues, alpha=0.05, method='fdr_bh')[1]
330
- anova_ass = DF(np.array([anova_ass, pval, fdrs, lrt, lrt_pvalues, lrt_fdr]).T, index=motif_names_filtered,
331
- columns=['stat', 'p-value', 'FDR',
332
- 'logratio', 'lrt_p-value', 'lrt_FDR'])
333
+ # lrt = 2 * fit.motif_variance.logratios
334
+ # lrt_pvalues = chi2.sf(lrt, 1)
335
+ # lrt_fdr = multitest.multipletests(lrt_pvalues, alpha=0.05, method='fdr_bh')[1]
336
+ anova_ass = DF(np.array([anova_ass, pval, fdrs]).T, index=motif_names_filtered,
337
+ columns=['stat', 'p-value', 'FDR'])
333
338
  anova_ass.to_csv(os.path.join(folder, 'anova.tsv'), sep='\t')
334
339
 
335
340
  sign = motif_mean.flatten() / motif_mean_std
@@ -347,6 +352,28 @@ def export_results(project_name: str, output_folder: str,
347
352
  index=motif_names)
348
353
  sign_ass.to_csv(os.path.join(folder, 'sign.tsv'), sep='\t')
349
354
 
355
+ folder = os.path.join(output_folder, 'activities')
356
+ os.makedirs(folder, exist_ok=True)
357
+ U = list()
358
+ stds = list()
359
+ for u, cov in zip(act.U.T, act.cov()):
360
+ std = cov.diagonal() ** 0.5
361
+ u = u / std
362
+ U.append(u)
363
+ stds.append(std)
364
+ U = np.array(U).T
365
+ DF(U, index=motif_names_filtered, columns=group_names).to_csv(os.path.join(folder, 'activity.tsv'), sep='\t')
366
+ U = U ** 2
367
+ if weighted_zscore:
368
+ U_total = U.sum(axis=1, keepdims=True) / (1 / np.array(stds).T ** 2).sum(axis=1, keepdims=True)
369
+ else:
370
+ U_total = U.mean(axis=1, keepdims=True)
371
+
372
+ U = np.hstack((U_total, U)) ** 0.5
373
+ DF(U, index=motif_names_filtered,
374
+ columns=['overall'] + list(group_names)).to_csv(os.path.join(folder, 'z_score.tsv'), sep='\t')
375
+ DF(act.U_raw, index=motif_names_filtered, columns=data.sample_names).to_csv(os.path.join(folder, 'activity_raw.tsv'), sep='\t')
376
+
350
377
  if os.path.isfile(f'{project_name}.fov.{fmt}'):
351
378
  with open(f'{project_name}.fov.{fmt}', 'rb') as f:
352
379
  fov = dill.load(f)
@@ -1,6 +1,7 @@
1
1
  import numpy as np
2
2
  import jax.numpy as jnp
3
3
  import jax
4
+ import scipy.linalg.lapack as lapack
4
5
  from sklearn.cluster import KMeans
5
6
  from sklearn.decomposition import NMF
6
7
  from dataclasses import dataclass
@@ -27,7 +28,67 @@ class LowrankDecomposition:
27
28
  Q: np.ndarray
28
29
  S: np.ndarray
29
30
  V: np.ndarray
30
- null_Q: np.ndarray
31
+
32
+ def null_space_transform(self, Y: np.ndarray) -> np.ndarray:
33
+ """
34
+ Compute V^T Y where V is the orthogonal complement to Q, using Householder
35
+ transformations via LAPACK's dormqr. Ensures inputs are compatible.
36
+
37
+ Parameters:
38
+ Q (ndarray): p x r semi-orthogonal matrix where Q^T Q = I_r, r <= p.
39
+ Should be a standard float array (e.g., float64).
40
+ Y (ndarray): p x n matrix. Will be converted to float64 if necessary.
41
+
42
+ Returns:
43
+ VT_Y (ndarray): (p - r) x n matrix representing V^T Y (float64).
44
+ """
45
+ Y = np.array(Y, order='F', copy=True)
46
+ Q = np.array(self.Q).astype(np.float64, copy=False)
47
+
48
+ p, r = Q.shape
49
+
50
+ if r > p:
51
+ raise ValueError(f"Number of columns r ({r}) cannot exceed number of rows p ({p}) in Q.")
52
+
53
+ # 1. Compute QR factorization of Q
54
+ # Need a copy of Q because 'raw' QR might modify it slightly in some versions/backends,
55
+ # even though documentation often says it doesn't. Using overwrite_a=True below is safer.
56
+ Q_copy = np.array(Q, order='F', dtype=np.float64) # Fortran order often preferred by LAPACK
57
+ qr_a, tau, work_qr, info_qr = lapack.dgeqrf(Q_copy, overwrite_a=True)
58
+ if info_qr != 0:
59
+ raise RuntimeError(f"LAPACK dgeqrf failed with info = {info_qr}")
60
+ # qr_a now contains R in upper triangle and reflectors below diagonal (overwritten Q_copy)
61
+
62
+ # 2. Prepare matrix Z (to be modified by dormqr)
63
+
64
+ # 3. Apply Q_full^T to Z using dormqr
65
+ # Workspace query
66
+ # try:
67
+ lwork = -1
68
+ # Use Z's shape here for the query, pass dummy Z
69
+ _, work_query, _ = lapack.dormqr('L', 'T', qr_a, tau, np.empty_like(Y), lwork=lwork, overwrite_c=True)
70
+ optimal_lwork = int(work_query[0].real)
71
+ lwork = max(1, optimal_lwork)
72
+
73
+
74
+ # Actual application
75
+ q_mult_y, work_actual, info_ormqr = lapack.dormqr('L', 'T', qr_a, tau, Y,
76
+ lwork=lwork, overwrite_c=True)
77
+
78
+ if info_ormqr != 0:
79
+ # Add more debug info if it fails
80
+ print("--- Debug Info Before dormqr Failure ---")
81
+ print(f"Q shape: {Q.shape}, dtype: {Q.dtype}")
82
+ print(f"qr_a shape: {qr_a.shape}, dtype: {qr_a.dtype}, order: {'F' if qr_a.flags.f_contiguous else 'C'}")
83
+ print(f"tau shape: {tau.shape}, dtype: {tau.dtype}")
84
+ print(f"Y shape: {Y.shape}, dtype: {Y.dtype}, order: {'F' if Y.flags.f_contiguous else 'C'}")
85
+ print(f"lwork: {lwork}")
86
+ print("--- End Debug Info ---")
87
+ raise RuntimeError(f"LAPACK dormqr failed with info = {info_ormqr}")
88
+
89
+ VT_Y = q_mult_y[r:, :]
90
+ return VT_Y
91
+ #null_Q: np.ndarray
31
92
 
32
93
  @dataclass
33
94
  class TransformedData:
@@ -52,7 +113,6 @@ class MotifVarianceEstimates:
52
113
  fixed_group: int
53
114
  loglik: float
54
115
  loglik_start: float
55
- logratios: np.ndarray
56
116
 
57
117
  @dataclass(frozen=True)
58
118
  class MotifMeanEstimates:
@@ -87,9 +147,60 @@ def ones_nullspace(n: int):
87
147
  res[i - 1, i] = 1 / norm
88
148
  return res
89
149
 
150
+ def ones_nullspace_transform(x):
151
+ n, m = x.shape
152
+ if n <= 1:
153
+ return np.zeros((0, m), dtype=x.dtype)
154
+
155
+ Y = np.zeros((n - 1, m), dtype=float)
156
+ current_sum = x[0, :].astype(float)
157
+
158
+ for r in range(n - 1):
159
+ i = r + 1
160
+ sqrt_i_i_plus_1 = np.sqrt(i * (i + 1))
161
+
162
+ # Coefficients for row r of Y (which uses row i-1 = r of H)
163
+ coeff1 = -1.0 / sqrt_i_i_plus_1
164
+ coeff2 = np.sqrt(i / (i + 1))
165
+ Y[r, :] = coeff1 * current_sum + coeff2 * x[r + 1, :]
166
+
167
+ # Update current_sum for the next iteration (to become sum_{k=0}^{r+1} X[k,:])
168
+ if r < n - 2: # Avoid adding beyond X's bounds on the last iteration
169
+ current_sum += x[r + 1, :]
170
+ return Y
171
+
172
+ def ones_nullspace_transform_transpose(X: np.ndarray) -> np.ndarray:
173
+ n, m = X.shape
174
+ n = n + 1
175
+
176
+ if n == 1:
177
+ output_dtype = X.dtype if np.issubdtype(X.dtype, np.floating) else float
178
+ return np.zeros((1, m), dtype=output_dtype)
179
+
180
+ output_dtype = X.dtype if np.issubdtype(X.dtype, np.floating) else float
181
+ Y = np.zeros((n, m), dtype=output_dtype)
182
+
183
+ current_suffix_sum = np.zeros(m, dtype=output_dtype)
184
+
185
+ for k in range(n - 2, -1, -1):
186
+ i = k + 1.0
187
+
188
+ sqrt_term_i_ip1 = np.sqrt(i * (i + 1.0))
189
+ coeff_pos = i / sqrt_term_i_ip1
190
+ coeff_neg = -1.0 / sqrt_term_i_ip1
191
+
192
+
193
+ Y[k + 1, :] = coeff_pos * X[k, :] + current_suffix_sum
194
+
195
+ current_suffix_sum += coeff_neg * X[k, :]
196
+
197
+ Y[0, :] = current_suffix_sum
198
+
199
+ return Y
200
+
90
201
  def lowrank_decomposition(X: np.ndarray, rel_eps=1e-12) -> LowrankDecomposition:
91
202
  svd = jnp.linalg.svd
92
- q, s, v = [np.array(t) for t in svd(X)]
203
+ q, s, v = [np.array(t) for t in svd(X, full_matrices=False)]
93
204
  max_sv = max(s)
94
205
  n = len(s)
95
206
  for r in range(n):
@@ -98,10 +209,9 @@ def lowrank_decomposition(X: np.ndarray, rel_eps=1e-12) -> LowrankDecomposition:
98
209
  break
99
210
  r += 1
100
211
  s = s[:r]
101
- null_q = q[:, r:]
102
212
  q = q[:, :r]
103
213
  v = v[:r]
104
- return LowrankDecomposition(q, s, v, null_q)
214
+ return LowrankDecomposition(q, s, v)
105
215
 
106
216
  def transform_data(data, std_y=False, std_b=False, helmert=True) -> TransformedData:
107
217
  try:
@@ -115,9 +225,11 @@ def transform_data(data, std_y=False, std_b=False, helmert=True) -> TransformedD
115
225
  if std_b:
116
226
  B /= B.std(axis=0, keepdims=True)
117
227
  if helmert:
118
- F_p = ones_nullspace(len(Y))
119
- Y = F_p @ Y
120
- B = F_p @ B
228
+ # F_p = ones_nullspace(len(Y))
229
+ # Y = F_p @ Y
230
+ # B = F_p @ B
231
+ Y = ones_nullspace_transform(Y)
232
+ B = ones_nullspace_transform(B)
121
233
  group_inds_inv = list()
122
234
  d = dict()
123
235
  for i, items in enumerate(group_inds):
@@ -346,9 +458,24 @@ def loglik_motifs_fim(x: jnp.ndarray, BTB: jnp.ndarray,
346
458
  return FIM
347
459
 
348
460
 
461
+ def calc_error_variance_fim(data: TransformedData, error_variance: jnp.ndarray):
462
+ d = 1 / jnp.array(error_variance).at[data.group_inds_inv].get()
463
+ d = d / d.sum() ** 0.5
464
+ D_product_inv = jnp.outer(-d, d)
465
+ D_product_inv = jnp.fill_diagonal(D_product_inv,
466
+ D_product_inv.diagonal() + d * d.sum(),
467
+ inplace=False )
468
+ fim = D_product_inv * D_product_inv.T / 2
469
+ group_inds = data.group_inds
470
+ group_loadings = np.zeros((len(d), len(group_inds)), dtype=int)
471
+ for i, indices in enumerate(group_inds):
472
+ group_loadings[indices, i] = 1
473
+ group_loadings = jnp.array(group_loadings)
474
+ return group_loadings.T @ fim @ group_loadings
475
+
349
476
  def estimate_error_variance(data: TransformedData, B_decomposition: LowrankDecomposition,
350
477
  verbose=False) -> ErrorVarianceEstimates:
351
- Y = B_decomposition.null_Q.T @ data.Y
478
+ Y = B_decomposition.null_space_transform(data.Y)
352
479
  d0 = jnp.array([np.var(Y[:, inds]) for inds in data.group_inds])
353
480
 
354
481
  fun = partial(loglik_error, Qn_Y=Y, group_inds_inv=data.group_inds_inv)
@@ -362,7 +489,8 @@ def estimate_error_variance(data: TransformedData, B_decomposition: LowrankDecom
362
489
  print('-' * 15)
363
490
  print(res)
364
491
  print('-' * 15)
365
- fim = jax.jacrev(grad)(res.x)
492
+
493
+ fim = calc_error_variance_fim(data, res.x)
366
494
  return ErrorVarianceEstimates(np.array(res.x), np.array(fim),
367
495
  loglik_start=res.start_loglik,
368
496
  loglik=res.fun)
@@ -374,13 +502,16 @@ def estimate_promoter_mean(data: TransformedData,
374
502
 
375
503
  D = error_variance.variance[data.group_inds_inv]
376
504
  Y = jnp.array(data.Y)
377
- F_p = jnp.array(ones_nullspace(len(Y) + 1))
378
- Q_N = jnp.array(B_decomposition.null_Q)
505
+ # F_p = jnp.array(ones_nullspace(len(Y) + 1))
506
+ # Q_N = jnp.array(B_decomposition.null_Q)
507
+ Q_C = jnp.array(B_decomposition.Q)
379
508
  w = (1 / D).sum()
380
509
  mean = Y @ (1 / D.reshape(-1, 1))
381
- mean = Q_N.T @ mean
382
- mean = Q_N @ mean
383
- mean = F_p.T @ mean
510
+ mean = mean - Q_C @ (Q_C.T @ mean)
511
+ # mean = Q_N.T @ mean
512
+ # mean = Q_N @ mean
513
+ # mean = F_p.T @ mean
514
+ mean = ones_nullspace_transform_transpose(mean)
384
515
  mean = mean / w
385
516
  return PromoterMeanEstimates(mean)
386
517
 
@@ -437,12 +568,14 @@ def estimate_motif_variance(data: TransformedData, B_decomposition: LowrankDecom
437
568
  eig = jnp.linalg.eigh(f)[0].min()
438
569
  if eig < 0:
439
570
  eig = list()
440
- epsilons = [1e-15, 1e-12, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
571
+ epsilons = [1e-23, 1e-15, 1e-12, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
441
572
  for eps in epsilons:
442
573
  x = res.x.copy()
443
574
  x = x.at[:len(BTB)].set(jnp.clip(x.at[:len(BTB)].get(), eps, float('inf')))
444
575
  f = fim(x)
445
576
  eig.append(jnp.linalg.eigh(f)[0].min())
577
+ if eig[-1] > 0:
578
+ break
446
579
  i = np.argmax(eig)
447
580
  eps = epsilons[i]
448
581
  x = res.x.copy()
@@ -450,31 +583,9 @@ def estimate_motif_variance(data: TransformedData, B_decomposition: LowrankDecom
450
583
  fim = fim(x)
451
584
  else:
452
585
  fim = f
453
- print('FIM', eig)
454
- logliks = list()
455
- from tqdm import tqdm
456
- for i in tqdm(list(range(len(BTB)))):
457
- x = res.x.copy()
458
- x = x.at[i].set(0)
459
- subfun = partial(fun, _motif_zero=i)
460
- subgrad = partial(grad, _motif_zero=i)
461
- opt = MetaOptimizer(subfun, subgrad, num_steps_momentum=5, skip_init=False)
462
- logliks.append(opt.optimize(x).fun)
463
- logliks = np.array(logliks) - float(res.fun)
464
- # fim_naive = partial(loglik_motifs_fim_naive, B=data.B, D=D,
465
- # group_inds_inv=data.group_inds_inv, group_inds=data.group_inds,
466
- # G_fix_ind=j, G_fix_val=fix)
467
- # fim_naive = fim_naive(res.x)
468
- # print('FIM')
469
- # print(fim)
470
- # print('Naive')
471
- # print(fim_naive)
472
- # print(np.abs(fim - fim_naive) / np.abs(fim_naive))
473
- # fim = fim_naive
474
- # fim = (fim, fim_naive)
475
586
  return MotifVarianceEstimates(motif=np.array(Sigma), group=np.array(G), fim=np.array(fim),
476
587
  fixed_group=j, loglik_start=res.start_loglik,
477
- loglik=res.fun, logratios=logliks)
588
+ loglik=res.fun)
478
589
 
479
590
  def estimate_motif_mean(data: TransformedData, B_decomposition: LowrankDecomposition,
480
591
  error_variance: ErrorVarianceEstimates,
@@ -494,8 +605,9 @@ def estimate_motif_mean(data: TransformedData, B_decomposition: LowrankDecomposi
494
605
 
495
606
  BTB = B_decomposition.V.T * B_decomposition.S ** 2 @ B_decomposition.V
496
607
  A = jnp.sqrt(Sigma).reshape(-1, 1) * BTB
497
- Fp = ones_nullspace(len(data.Y) + 1)
498
- Y_tilde = (data.Y - Fp @ mu_p.reshape(-1, 1)) / d
608
+ # Fp = ones_nullspace(len(data.Y) + 1)
609
+ # Y_tilde = (data.Y - Fp @ mu_p.reshape(-1, 1)) / d
610
+ Y_tilde = (data.Y - ones_nullspace_transform(mu_p.reshape(-1, 1))) / d
499
611
  Y_hat = jnp.sqrt(Sigma).reshape(-1,1) * data.B.T @ Y_tilde * g / d
500
612
  D_B, Q_B = jnp.linalg.eigh(jnp.sqrt(Sigma).reshape(-1, 1) * BTB * jnp.sqrt(Sigma))
501
613
  At_QB = A.T @ Q_B
@@ -264,14 +264,14 @@ def _export(name: str = Argument(..., help='Project name.'),
264
264
  output_folder: Path = Argument(..., help='Output folder.'),
265
265
  std_mode: Standardization = Option(Standardization.full, help='Whether to standardize activities with plain variances or also decorrelate them.'),
266
266
  anova_mode: ANOVAType = Option(ANOVAType.positive, help='If negative, look for non-variative motifs'),
267
- corrected_pvalues: bool = Option(False, help='Compute MVN-based FDR correction.'),
267
+ weighted_zscore: bool = Option(False, help='Reciprocal variance weighted Z-scores'),
268
268
  alpha: float = Option(0.05, help='FDR alpha.')):
269
269
  t0 = time()
270
270
  p = Progress(SpinnerColumn(speed=0.5), TextColumn("[progress.description]{task.description}"), transient=True)
271
271
  p.add_task(description="Exporting results...", total=None)
272
272
  p.start()
273
273
  export_results(name, output_folder, std_mode=std_mode, anova_mode=anova_mode, alpha=alpha,
274
- compute_corrected_pvalues=corrected_pvalues)
274
+ weighted_zscore=weighted_zscore)
275
275
  p.stop()
276
276
  dt = time() - t0
277
277
  rprint(f'[green][bold]✔️[/bold] Done![/green]\t time: {dt:.2f} s.')
@@ -62,6 +62,8 @@ def export_results(project_name: str, output_folder: str):
62
62
 
63
63
  U = act.U
64
64
  U_var = act.variance
65
+
66
+ U = U / U_var ** 0.5
65
67
 
66
68
  # U_grouped = list()
67
69
  # U_var_grouped = list()
@@ -74,15 +76,13 @@ def export_results(project_name: str, output_folder: str):
74
76
  os.makedirs(output_folder, exist_ok=True)
75
77
  DF(np.array([error_variance, motif_variance]).T, index=sample_names,
76
78
  columns=['sigma', 'tau']).to_csv(os.path.join(output_folder, 'params.tsv'), sep='\t')
77
- act = U / U_var ** 0.5
78
- U_total = act.sum(axis=1, keepdims=True) / (1 / U_var ** 0.5).sum(axis=1, keepdims=True)
79
- act = np.hstack((U_total, act))
79
+ U_total = U.mean(axis=1, keepdims=True) # / (1 / U_var ** 0.5).sum(axis=1, keepdims=True)
80
+ act = np.hstack((U_total, U))
80
81
  DF(act, index=motif_names,
81
82
  columns=['overall'] + list(sample_names)).to_csv(os.path.join(output_folder, 'activities.tsv'),
82
83
  sep='\t')
83
84
 
84
- z = U / U_var ** 0.5
85
- z = z ** 2
85
+ z = U ** 2
86
86
  U_total = z.mean(axis=1, keepdims=True) #/ (1 / U_var ** 0.5).sum(axis=1, keepdims=True)
87
87
  z = np.hstack((U_total, z))
88
88
  z = z ** 0.5
@@ -90,5 +90,4 @@ def export_results(project_name: str, output_folder: str):
90
90
  columns=['overall'] + list(sample_names)).to_csv(os.path.join(output_folder, 'z_scores.tsv'),
91
91
  sep='\t')
92
92
 
93
-
94
93
 
@@ -44,6 +44,7 @@ class MotifVarianceEstimates:
44
44
  class FitResult:
45
45
  error_variance: ErrorVarianceEstimates
46
46
  motif_variance: MotifVarianceEstimates
47
+ B_decomposition: LowrankDecomposition
47
48
  group_names: list
48
49
  clustering: np.ndarray = None
49
50
  clustered_B: np.ndarray = None
@@ -70,7 +71,8 @@ def transform_data(data, std_y=False, std_b=False, helmert=True) -> TransformedD
70
71
 
71
72
  def estimate_error_variance(data: TransformedData,
72
73
  B_decomposition: LowrankDecomposition) -> ErrorVarianceEstimates:
73
- Y = B_decomposition.null_Q.T @ data.Y
74
+ # Y = B_decomposition.null_Q.T @ data.Y
75
+ Y = B_decomposition.null_space_transform(data.Y)
74
76
  variance = (Y ** 2).mean(axis=0)
75
77
  return ErrorVarianceEstimates(variance)
76
78
 
@@ -79,7 +81,7 @@ def calc_tau(tau: float, error_variance: np.ndarray, mode: TauMode):
79
81
  if mode == mode.mara:
80
82
  taus = tau * np.ones_like(error_variance)
81
83
  else:
82
- taus = tau / (error_variance + tau)
84
+ taus = tau / error_variance
83
85
  return taus
84
86
 
85
87
  def loglik_tau(tau: float, Sigma: np.ndarray, Y_hat: np.ndarray,
@@ -88,10 +90,10 @@ def loglik_tau(tau: float, Sigma: np.ndarray, Y_hat: np.ndarray,
88
90
  logdet = 0
89
91
  taus = calc_tau(tau, error_variance, mode)
90
92
  for sigma, tau, y in zip(error_variance, taus, Y_hat.T):
91
- S = tau * Sigma + sigma
92
- vec += (y ** 2 * S).sum()
93
+ S = tau / sigma * Sigma + 1
94
+ vec += (y ** 2 / S).sum() * (tau / sigma ** 2)
93
95
  logdet += S.sum()
94
- return vec + logdet
96
+ return -vec + logdet
95
97
 
96
98
  def estimate_motif_variance(data: TransformedData, B_decomposition: LowrankDecomposition,
97
99
  error_variance: ErrorVarianceEstimates,
@@ -106,7 +108,7 @@ def estimate_motif_variance(data: TransformedData, B_decomposition: LowrankDecom
106
108
  Y_hat = Q.T @ data.B.T @ data.Y
107
109
  fun = partial(loglik_tau, Sigma=Sigma, Y_hat=Y_hat, error_variance=error_variance.variance,
108
110
  mode=mode)
109
- tau = calc_tau(minimize_scalar(fun, bounds=(0.0, 5.0)).x, error_variance.variance, mode)
111
+ tau = calc_tau(minimize_scalar(fun, bounds=(0.0, error_variance.variance.max() * 10)).x, error_variance.variance, mode)
110
112
  return MotifVarianceEstimates(tau)
111
113
 
112
114
 
@@ -118,18 +120,11 @@ class ActivitiesPrediction:
118
120
 
119
121
 
120
122
  def predict_activities(data: TransformedData, fit: FitResult,
121
- gpu_decomposition=False, gpu=False, verbose=True) -> ActivitiesPrediction:
123
+ gpu=False, verbose=True) -> ActivitiesPrediction:
122
124
  U = list()
123
125
  variance = list()
124
- if gpu_decomposition:
125
- device = jax.devices()
126
- else:
127
- device = jax.devices('cpu')
128
- device = next(iter(device))
129
126
 
130
- logger_print('Computing low-rank decompositions of the loading matrix...', verbose)
131
- with jax.default_device(device):
132
- B_decomposition = lowrank_decomposition(data.B)
127
+ B_decomposition = fit.B_decomposition
133
128
  if gpu:
134
129
  device = jax.devices()
135
130
  else:
@@ -200,7 +195,7 @@ def fit(project: str, tau_mode: TauMode, tau_estimation: TauEstimation,
200
195
 
201
196
 
202
197
  res = FitResult(error_variance=error_variance, motif_variance=motif_variance,
203
- clustering=clustering,
198
+ clustering=clustering, B_decomposition=B_decomposition,
204
199
  group_names=group_names, promoter_inds_to_drop=promoter_inds_to_drop)
205
200
  if dump:
206
201
  with openers[fmt](f'{project}.old.fit.{fmt}', 'wb') as f:
@@ -257,10 +252,10 @@ def _cor(a, b, axis=1):
257
252
  return numerator / denominator
258
253
 
259
254
  def calculate_fov(project: str, gpu: bool,
260
- stat_type: GOFStat, x64=True,
255
+ stat_type: GOFStat, keep_motifs: str, x64=True,
261
256
  verbose=True, dump=True):
262
257
  def calc_fov(data: TransformedData, fit: FitResult,
263
- activities: ActivitiesPrediction) -> tuple[FOVResult]:
258
+ activities: ActivitiesPrediction, keep_motifs=None) -> tuple[FOVResult]:
264
259
  def sub(Y, effects) -> FOVResult:
265
260
  if stat_type == stat_type.fov:
266
261
  Y1 = Y - effects
@@ -277,17 +272,33 @@ def calculate_fov(project: str, gpu: bool,
277
272
  sample = _cor(Y, effects, axis=0)
278
273
  return FOVResult(total, prom, sample)
279
274
  data = transform_data(data)
280
- B = data.B
275
+ B = data.B if activities.clustering is None else activities.clustering[0]
281
276
  Y = data.Y
282
277
  U = activities.U
283
- if activities.clustering is not None:
284
- d = activities.clustering[0] @ U
285
- else:
286
- d = B @ U
278
+ if keep_motifs is not None:
279
+ B = B[:, keep_motifs]
280
+ U = U[keep_motifs]
281
+ d = B @ U
287
282
  stat_0 = sub(Y, d)
288
283
  return stat_0,
289
284
  data = read_init(project)
290
285
  fmt = data.fmt
286
+ motif_names = data.motif_names
287
+ if keep_motifs:
288
+ import datatable as dt
289
+ df = dt.fread(keep_motifs).to_pandas().groupby('status')
290
+ keep_motifs = list()
291
+ for name, motifs in df:
292
+ inds = list()
293
+ for mot in motifs.iloc[:, 0]:
294
+ try:
295
+ i = motif_names.index(mot)
296
+ inds.append(i)
297
+ except ValueError:
298
+ print(f'Motif {mot} not found in the project.')
299
+ keep_motifs.append((name, np.array(inds, dtype=int)))
300
+ else:
301
+ keep_motifs = [(None, None)]
291
302
  with openers[fmt](f'{project}.old.fit.{fmt}', 'rb') as f:
292
303
  fit = dill.load(f)
293
304
  with openers[fmt](f'{project}.old.predict.{fmt}', 'rb') as f:
@@ -303,17 +314,23 @@ def calculate_fov(project: str, gpu: bool,
303
314
  else:
304
315
  device = jax.devices('cpu')
305
316
  device = next(iter(device))
306
- with jax.default_device(device):
307
-
308
- if data_test is not None:
309
- test_FOV = calc_fov(data=data_test, fit=fit, activities=activities)
310
- train_FOV = calc_fov(data=data, fit=fit, activities=activities)
311
- if data_test is None:
312
- test_FOV = None
313
- res = TestResult(train_FOV, test_FOV, grouped=False)
317
+ results = list()
318
+ for status_name, motifs in keep_motifs:
319
+ if status_name:
320
+ status_name = f'{status_name} ({len(motifs)})'
321
+ print(status_name)
322
+ with jax.default_device(device):
323
+
324
+ if data_test is not None:
325
+ test_FOV = calc_fov(data=data_test, fit=fit, activities=activities, keep_motifs=motifs)
326
+ train_FOV = calc_fov(data=data, fit=fit, activities=activities, keep_motifs=motifs)
327
+ if data_test is None:
328
+ test_FOV = None
329
+ res = TestResult(train_FOV, test_FOV, grouped=False)
330
+ results.append((status_name, res))
314
331
  with openers[fmt](f'{project}.old.fov.{fmt}', 'wb') as f:
315
- dill.dump(res, f)
316
- return res
332
+ dill.dump(results, f)
333
+ return results
317
334
 
318
335
 
319
336
 
@@ -51,6 +51,7 @@ def _fit(name: str = Argument(..., help='Project name.'),
51
51
  @app_old.command('gof', help='Estimate GOFs given test/train data split. Provides test info only if [orange]test-chromosomes[/orange] is not None in [cyan]fit[/cyan].')
52
52
  def _gof(name: str = Argument(..., help='Project name.'),
53
53
  # use_groups: bool = Option(False, help='Compute statistic for sammples aggragated across groups.'),
54
+ keep_motifs: Path = Option(None, help='Table with 2 columns: motif and status'),
54
55
  stat_type: GOFStat = Option(GOFStat.fov, help='Statistic type to compute'),
55
56
  gpu: bool = Option(False, help='Use GPU if available for most of computations.'),
56
57
  x64: bool = Option(True, help='Use high precision algebra.')):
@@ -62,21 +63,25 @@ def _gof(name: str = Argument(..., help='Project name.'),
62
63
  p = Progress(SpinnerColumn(speed=0.5), TextColumn("[progress.description]{task.description}"), transient=True)
63
64
  p.add_task(description="Calculating FOVs...", total=None)
64
65
  p.start()
65
- res = calculate_fov(name, stat_type=stat_type, gpu=gpu, x64=x64)
66
- if stat_type == GOFStat.corr:
67
- title = 'Pearson correlation'
68
- else:
69
- title = 'Fraction of variance explained'
70
- t = Table('Set', 'stat',
71
- title=title)
72
- row = [f'{t.total:.6f}' for t in res.train]
73
- t.add_row('train', *row)
74
- if res.test is not None:
75
- row = [f'{t.total:.6f}' for t in res.test]
76
- t.add_row('test', *row)
66
+ res = calculate_fov(name, stat_type=stat_type, keep_motifs=keep_motifs, gpu=gpu, x64=x64)
67
+ for name, res in res:
68
+ print(name)
69
+ if stat_type == GOFStat.corr:
70
+ title = 'Pearson correlation'
71
+ else:
72
+ title = 'Fraction of variance explained'
73
+ if name:
74
+ title = f'({name}) {title}'
75
+ t = Table('Set', 'stat',
76
+ title=title)
77
+ row = [f'{t.total:.6f}' for t in res.train]
78
+ t.add_row('train', *row)
79
+ if res.test is not None:
80
+ row = [f'{t.total:.6f}' for t in res.test]
81
+ t.add_row('test', *row)
82
+ rprint(t)
77
83
  p.stop()
78
84
  dt = time() - t0
79
- rprint(t)
80
85
  rprint(f'[green][bold]✔️[/bold] Done![/green]\t time: {dt:.2f} s.')
81
86
 
82
87
  @app_old.command('predict', help='Estimate deviations of motif activities from their means.')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: maradoner
3
- Version: 0.10
3
+ Version: 0.11
4
4
  Summary: Variance-adjusted estimation of motif activities.
5
5
  Home-page: https://github.com/autosome-ru/nemara
6
6
  Author: Georgy Meshcheryakov
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes