maradoner 0.11__tar.gz → 0.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maradoner might be problematic. Click here for more details.

Files changed (26) hide show
  1. {maradoner-0.11 → maradoner-0.13}/PKG-INFO +4 -11
  2. {maradoner-0.11 → maradoner-0.13}/maradoner/__init__.py +3 -1
  3. {maradoner-0.11 → maradoner-0.13}/maradoner/create.py +3 -3
  4. {maradoner-0.11 → maradoner-0.13}/maradoner/dataset_filter.py +38 -2
  5. {maradoner-0.11 → maradoner-0.13}/maradoner/export.py +65 -14
  6. {maradoner-0.11 → maradoner-0.13}/maradoner/fit.py +15 -16
  7. maradoner-0.13/maradoner/grn.py +177 -0
  8. {maradoner-0.11 → maradoner-0.13}/maradoner/main.py +61 -6
  9. {maradoner-0.11 → maradoner-0.13}/maradoner/mara/fit.py +29 -16
  10. {maradoner-0.11 → maradoner-0.13}/maradoner/mara/main.py +2 -1
  11. {maradoner-0.11 → maradoner-0.13}/maradoner/utils.py +1 -1
  12. {maradoner-0.11 → maradoner-0.13}/maradoner.egg-info/PKG-INFO +4 -11
  13. {maradoner-0.11 → maradoner-0.13}/maradoner.egg-info/SOURCES.txt +1 -0
  14. {maradoner-0.11 → maradoner-0.13}/maradoner.egg-info/requires.txt +2 -0
  15. {maradoner-0.11 → maradoner-0.13}/README.md +0 -0
  16. {maradoner-0.11 → maradoner-0.13}/maradoner/mara/__init__.py +0 -0
  17. {maradoner-0.11 → maradoner-0.13}/maradoner/mara/export.py +0 -0
  18. {maradoner-0.11 → maradoner-0.13}/maradoner/mara.py +0 -0
  19. {maradoner-0.11 → maradoner-0.13}/maradoner/meta_optimizer.py +0 -0
  20. {maradoner-0.11 → maradoner-0.13}/maradoner/select.py +0 -0
  21. {maradoner-0.11 → maradoner-0.13}/maradoner/synthetic_data.py +0 -0
  22. {maradoner-0.11 → maradoner-0.13}/maradoner.egg-info/dependency_links.txt +0 -0
  23. {maradoner-0.11 → maradoner-0.13}/maradoner.egg-info/entry_points.txt +0 -0
  24. {maradoner-0.11 → maradoner-0.13}/maradoner.egg-info/top_level.txt +0 -0
  25. {maradoner-0.11 → maradoner-0.13}/setup.cfg +0 -0
  26. {maradoner-0.11 → maradoner-0.13}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.1
2
2
  Name: maradoner
3
- Version: 0.11
3
+ Version: 0.13
4
4
  Summary: Variance-adjusted estimation of motif activities.
5
5
  Home-page: https://github.com/autosome-ru/nemara
6
6
  Author: Georgy Meshcheryakov
@@ -25,15 +25,8 @@ Requires-Dist: statsmodels>=0.14
25
25
  Requires-Dist: datatable>=1.0.0
26
26
  Requires-Dist: dill>=0.3.9
27
27
  Requires-Dist: rich>=12.6.0
28
- Dynamic: author
29
- Dynamic: author-email
30
- Dynamic: classifier
31
- Dynamic: description
32
- Dynamic: description-content-type
33
- Dynamic: home-page
34
- Dynamic: requires-dist
35
- Dynamic: requires-python
36
- Dynamic: summary
28
+ Requires-Dist: tqdm>=4.0
29
+ Requires-Dist: scikit-learn>=1.6
37
30
 
38
31
 
39
32
  **MARADONER**
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- __version__ = '0.11'
2
+ __version__ = '0.13'
3
3
  import importlib
4
4
 
5
5
 
@@ -16,6 +16,8 @@ __min_reqs__ = [
16
16
  'datatable>=1.0.0' ,
17
17
  'dill>=0.3.9',
18
18
  'rich>=12.6.0',
19
+ 'tqdm>=4.0',
20
+ 'scikit-learn>=1.6'
19
21
  ]
20
22
 
21
23
  def versiontuple(v):
@@ -37,7 +37,7 @@ def transform_loadings(df, mode: str, zero_cutoff=1e-9, prom_inds=None):
37
37
 
38
38
  def create_project(project_name: str, promoter_expression_filename: str, loading_matrix_filenames: list[str],
39
39
  motif_expression_filenames=None, loading_matrix_transformations=None, sample_groups=None, motif_postfixes=None,
40
- promoter_filter_lowexp_cutoff=0.95, promoter_filter_plot_filename=None,
40
+ promoter_filter_lowexp_cutoff=0.95, promoter_filter_plot_filename=None, promoter_filter_max=True,
41
41
  motif_names_filename=None, compression='raw', dump=True, verbose=True):
42
42
  if not os.path.isfile(promoter_expression_filename):
43
43
  raise FileNotFoundError(f'Promoter expression file {promoter_expression_filename} not found.')
@@ -88,8 +88,8 @@ def create_project(project_name: str, promoter_expression_filename: str, loading
88
88
  f'{len(loading_matrix_transformations)}.')
89
89
 
90
90
  logger_print('Filtering promoters of low expression...', verbose)
91
- print('aaaaa', len(promoter_expression))
92
- inds, weights = filter_lowexp(promoter_expression, cutoff=promoter_filter_lowexp_cutoff, fit_plot_filename=promoter_filter_plot_filename)
91
+ inds, weights = filter_lowexp(promoter_expression, cutoff=promoter_filter_lowexp_cutoff, fit_plot_filename=promoter_filter_plot_filename,
92
+ max_mode=promoter_filter_max)
93
93
  promoter_expression = promoter_expression.loc[inds]
94
94
  proms = promoter_expression.index
95
95
  loading_matrices = [transform_loadings(df, mode, prom_inds=inds) for df, mode in zip(loading_matrices, loading_matrix_transformations)]
@@ -6,7 +6,19 @@ import pandas as pd
6
6
  import numpy as np
7
7
  from scipy.optimize import minimize
8
8
  from functools import partial
9
+ from sklearn.mixture import GaussianMixture
9
10
 
11
+ def compute_leftmost_probability(Y):
12
+ Y = Y.reshape(-1, 1)
13
+ gmm = GaussianMixture(n_components=2, random_state=0)
14
+ gmm.fit(Y)
15
+
16
+ means = gmm.means_.flatten()
17
+ leftmost_component_index = np.argmin(means)
18
+ probas = gmm.predict_proba(Y)
19
+ leftmost_probs = probas[:, leftmost_component_index]
20
+
21
+ return leftmost_probs, gmm
10
22
 
11
23
  def normax_logpdf(x: jnp.ndarray, mu: float, sigma: float, n: int):
12
24
  x = (x - mu) / sigma
@@ -39,9 +51,33 @@ def loglik(params: jnp.ndarray, x: jnp.ndarray, n: int):
39
51
  w = params[-1]
40
52
  return -logmixture(x, mu, sigma, w, n).sum()
41
53
 
42
- def filter_lowexp(expression: pd.DataFrame, cutoff=0.95, fit_plot_filename=None, plot_dpi=200):
54
+ def filter_lowexp(expression: pd.DataFrame, cutoff=0.95, max_mode=True,
55
+ fit_plot_filename=None, plot_dpi=200):
43
56
  expression = (expression - expression.mean()) / expression.std()
44
-
57
+ if not max_mode:
58
+ expression = expression.mean(axis=1).values
59
+ probs, gmm = compute_leftmost_probability(expression)
60
+ inds = probs < (1-cutoff)
61
+ if fit_plot_filename:
62
+ import matplotlib.pyplot as plt
63
+ from matplotlib.collections import LineCollection
64
+ import seaborn as sns
65
+ x = np.array(sorted(expression))
66
+ pdf = np.exp(gmm.score_samples(expression[:, None]))
67
+ points = np.array([x, pdf]).T.reshape(-1, 1, 2)
68
+ segments = np.concatenate([points[:-1], points[1:]], axis=1)
69
+ plt.figure(dpi=plot_dpi, )
70
+ sns.histplot(expression, stat='density', color='grey')
71
+ lc = LineCollection(segments, cmap='winter')
72
+ lc.set_array(probs)
73
+ lc.set_linewidth(3)
74
+ line = plt.gca().add_collection(lc)
75
+ plt.colorbar(line)
76
+ plt.xlabel('Standardized expression')
77
+ plt.tight_layout()
78
+ plt.savefig(fit_plot_filename)
79
+ return inds, probs
80
+
45
81
  expression_max = expression.max(axis=1).values
46
82
 
47
83
  mu = [-1.0, 0.0]
@@ -2,8 +2,9 @@
2
2
  # -*- coding: utf-8 -*-
3
3
  from pandas import DataFrame as DF
4
4
  # add dot
5
- from .utils import read_init, openers
5
+ from .utils import read_init, openers, ProjectData
6
6
  from .fit import FOVResult, ActivitiesPrediction, FitResult
7
+ from .grn import grn
7
8
  from scipy.stats import norm, chi2, multivariate_normal, Covariance
8
9
  from scipy.linalg import eigh, lapack, cholesky, solve
9
10
  from statsmodels.stats import multitest
@@ -80,7 +81,9 @@ class Information():
80
81
  try:
81
82
  x = chol_inv(x)
82
83
  except:
83
- print('alarm')
84
+ print('Failed to compute inverse using Cholesky decomposition. ')
85
+ print('This can be a sign of a numerical errors during parameters estimation.')
86
+ print('Will use pseudo-inverse now. The minimal and maximal eigenvalues are:')
84
87
  # print(x.diagonal().min())
85
88
  assert np.allclose(x, x.T), x - x.T
86
89
  x = np.linalg.eigh(x)
@@ -155,7 +158,8 @@ def export_fov(fovs: tuple[FOVResult], folder: str,
155
158
  samples = [fov_null.sample[:, None], fov_means.sample[:, None], fov_motif_means.sample[:, None]]
156
159
  samples = np.concatenate(samples, axis=-1)
157
160
  DF(samples, index=sample_names, columns=cols).to_csv(os.path.join(folder, 'samples.tsv'), sep='\t')
158
-
161
+
162
+
159
163
 
160
164
 
161
165
  def posterior_anova(activities: ActivitiesPrediction, fit: FitResult,
@@ -172,12 +176,17 @@ def posterior_anova(activities: ActivitiesPrediction, fit: FitResult,
172
176
  # bad_inds[ind] = True
173
177
  # mot = fit.motif_variance.motif
174
178
  # mot = np.delete(mot, activities.filtered_motifs)[~bad_inds]
179
+ motif_variance = fit.motif_variance.motif
180
+ if activities.filtered_motifs is not None:
181
+ motif_variance = np.delete(motif_variance, activities.filtered_motifs)
182
+ B = np.delete(B, activities.filtered_motifs, axis=1)
183
+ U = activities.U
175
184
  if map_cov:
176
185
  # fit.motif_variance.m
177
186
  BTB = B.T @ B
178
- BTB_s = BTB * fit.motif_variance.motif ** 0.5
187
+ BTB_s = BTB * motif_variance ** 0.5
179
188
  BTB_s = BTB_s @ BTB_s.T
180
- for cov, U, sigma, n, nu in zip(activities.cov(), activities.U.T,
189
+ for cov, U, sigma, n, nu in zip(activities.cov(), U.T,
181
190
  activities._cov[-2],
182
191
  fit.error_variance.variance, fit.motif_variance.group):
183
192
  # cov = cov[~bad_inds, ~bad_inds]
@@ -189,11 +198,11 @@ def posterior_anova(activities: ActivitiesPrediction, fit: FitResult,
189
198
  covs.append(cov)
190
199
  # U = U[~bad_inds]
191
200
  # prec = np.linalg.inv(np.diag(mot * nu) - cov)
192
- prec = np.linalg.inv(cov)
201
+ prec = np.linalg.pinv(cov, hermitian=True)
193
202
  mean += prec @ U
194
203
  precs.append(prec)
195
204
  total_prec = sum(precs)
196
- total_cov = np.linalg.inv(total_prec)
205
+ total_cov = np.linalg.pinv(total_prec, hermitian=True)
197
206
  mean = total_cov @ mean
198
207
  stats = activities.U[~bad_inds] - mean.reshape(-1, 1)
199
208
  # if corr_stat:
@@ -211,9 +220,6 @@ def posterior_anova(activities: ActivitiesPrediction, fit: FitResult,
211
220
  fdr = multitest.multipletests(pvalues, alpha=0.05, method='fdr_by')[1]
212
221
  return stats, pvalues, fdr, bad_inds
213
222
 
214
-
215
-
216
-
217
223
 
218
224
  def export_results(project_name: str, output_folder: str,
219
225
  std_mode: Standardization,
@@ -249,7 +255,7 @@ def export_results(project_name: str, output_folder: str,
249
255
  motif_names_filtered = motif_names
250
256
 
251
257
  os.makedirs(output_folder, exist_ok=True)
252
-
258
+ # grn(data, act, fit, os.path.join(output_folder, 'grn'))
253
259
  error_variance = fit.error_variance.variance
254
260
  error_variance_fim = Information(fit.error_variance.fim)
255
261
  error_variance_stat, error_variance_std = error_variance_fim.standardize(error_variance,
@@ -278,9 +284,12 @@ def export_results(project_name: str, output_folder: str,
278
284
 
279
285
  folder = os.path.join(output_folder, 'params')
280
286
  os.makedirs(folder, exist_ok=True)
287
+ if os.path.isfile(f'{project_name}.promvar.{fmt}'):
288
+ with openers[fmt](f'{project_name}.promvar.{fmt}', 'rb') as f:
289
+ promvar: np.ndarray = dill.load(f)
290
+ DF(promvar, index=prom_names, columns=group_names).to_csv(os.path.join(folder, 'promoter_variances.tsv'), sep='\t')
281
291
  if excluded_motif_group is not None:
282
292
  motif_group_variance_std = np.insert(motif_group_variance_std, excluded_motif_group, np.nan)
283
- print(error_variance.shape, error_variance_std.shape, motif_group_variance.shape, motif_group_variance_std.shape)
284
293
  DF(np.array([error_variance, error_variance_std, motif_group_variance, motif_group_variance_std]).T,
285
294
  index=group_names,
286
295
  columns=['sigma', 'sigma_std', 'nu', 'nu_std']).to_csv(os.path.join(folder, 'group_variances.tsv'),
@@ -400,6 +409,48 @@ def export_results(project_name: str, output_folder: str,
400
409
  sample_names=sample_names)
401
410
 
402
411
 
412
+ def export_loadings_product(project_name: str, output_folder: str,
413
+ use_hdf: bool = True, intercepts: bool = True,
414
+ tsv_truncation=4):
403
415
 
404
- # return {'z-test': z_test, 'anova': anova, 'off_test': off_test,
405
- # 'anova_ass': anova_ass, 'sign_ass': sign_ass}
416
+
417
+ data = read_init(project_name)
418
+ fmt = data.fmt
419
+ motif_names = data.motif_names
420
+ prom_names = data.promoter_names
421
+ # del data
422
+ with openers[fmt](f'{project_name}.fit.{fmt}', 'rb') as f:
423
+ fit: FitResult = dill.load(f)
424
+ if fit.promoter_inds_to_drop:
425
+ prom_names = np.delete(prom_names, fit.promoter_inds_to_drop)
426
+ group_names = fit.group_names
427
+ with openers[fmt](f'{project_name}.predict.{fmt}', 'rb') as f:
428
+ act: ActivitiesPrediction = dill.load(f)
429
+
430
+ output_folder = os.path.join(output_folder, 'loadings-product')
431
+ os.makedirs(output_folder, exist_ok=True)
432
+
433
+ U = act.U
434
+ B = data.B
435
+ mu = fit.motif_mean.mean
436
+
437
+ if act.filtered_motifs is not None:
438
+ motif_names = np.delete(motif_names, act.filtered_motifs)
439
+ B = np.delete(B, act.filtered_motifs, axis=1)
440
+ mu = np.delete(mu, act.filtered_motifs)
441
+ BM = B * mu
442
+ for name, U in zip(group_names, U.T):
443
+ effect = B * U
444
+ if intercepts:
445
+ effect += BM
446
+ if use_hdf:
447
+ effect = effect.astype(np.half)
448
+ filename = os.path.join(output_folder, f'{name}.hdf')
449
+ DF(data=effect, index=prom_names, columns=motif_names).to_hdf(filename, key='lrt', mode='w', complevel=4)
450
+ else:
451
+ filename = os.path.join(output_folder, f'{name}.tsv')
452
+ DF(data=effect, index=prom_names, columns=motif_names).to_csv(filename, sep='\t',
453
+ float_format=f'%.{tsv_truncation}f')
454
+
455
+
456
+
@@ -198,7 +198,7 @@ def ones_nullspace_transform_transpose(X: np.ndarray) -> np.ndarray:
198
198
 
199
199
  return Y
200
200
 
201
- def lowrank_decomposition(X: np.ndarray, rel_eps=1e-12) -> LowrankDecomposition:
201
+ def lowrank_decomposition(X: np.ndarray, rel_eps=1e-15) -> LowrankDecomposition:
202
202
  svd = jnp.linalg.svd
203
203
  q, s, v = [np.array(t) for t in svd(X, full_matrices=False)]
204
204
  max_sv = max(s)
@@ -449,12 +449,6 @@ def loglik_motifs_fim(x: jnp.ndarray, BTB: jnp.ndarray,
449
449
  FIM_tau_nu = jnp.delete(FIM_tau_nu, G_fix_ind, axis=1)
450
450
  FIM = jnp.block([[FIM_tau, FIM_tau_nu],
451
451
  [FIM_tau_nu.T, FIM_nu]])
452
- t = FIM[:len(Sigma), :len(Sigma)]
453
- t = jnp.linalg.eigh(t)[0]
454
- print('FIM_tau', np.min(t), np.max(t), np.min(np.abs(t)))
455
- t = FIM[len(Sigma):, len(Sigma):]
456
- t = jnp.linalg.eigh(t)[0]
457
- print('FIM_nu', np.min(t), np.max(t), np.min(np.abs(t)))
458
452
  return FIM
459
453
 
460
454
 
@@ -483,7 +477,7 @@ def estimate_error_variance(data: TransformedData, B_decomposition: LowrankDecom
483
477
  group_inds=data.group_inds)
484
478
  fun = jax.jit(fun)
485
479
  grad = jax.jit(grad)
486
- opt = MetaOptimizer(fun, grad, num_steps_momentum=10)
480
+ opt = MetaOptimizer(fun, grad, num_steps_momentum=15)
487
481
  res = opt.optimize(d0)
488
482
  if verbose:
489
483
  print('-' * 15)
@@ -539,9 +533,7 @@ def estimate_motif_variance(data: TransformedData, B_decomposition: LowrankDecom
539
533
  G_fix_ind=j, G_fix_val=fix)
540
534
  fun = jax.jit(fun)
541
535
  grad = jax.jit(grad)
542
- opt = MetaOptimizer(fun, grad, num_steps_momentum=80,
543
- # scaling_set=(slice(len(BTB)), slice(len(BTB), None))
544
- )
536
+ opt = MetaOptimizer(fun, grad, num_steps_momentum=50)
545
537
  try:
546
538
  res = opt.optimize(x0)
547
539
  except ValueError as E:
@@ -566,14 +558,17 @@ def estimate_motif_variance(data: TransformedData, B_decomposition: LowrankDecom
566
558
  G_fix_ind=j, G_fix_val=fix)
567
559
  f = fim(res.x)
568
560
  eig = jnp.linalg.eigh(f)[0].min()
561
+ print('FIM min eig', eig)
569
562
  if eig < 0:
570
563
  eig = list()
571
- epsilons = [1e-23, 1e-15, 1e-12, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
564
+ epsilons = [1e-23, 1e-18, 1e-15, 1e-12, 1e-9, 1e-8,
565
+ 1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
572
566
  for eps in epsilons:
573
567
  x = res.x.copy()
574
568
  x = x.at[:len(BTB)].set(jnp.clip(x.at[:len(BTB)].get(), eps, float('inf')))
575
569
  f = fim(x)
576
570
  eig.append(jnp.linalg.eigh(f)[0].min())
571
+ print(eps, eig[-1])
577
572
  if eig[-1] > 0:
578
573
  break
579
574
  i = np.argmax(eig)
@@ -870,8 +865,12 @@ def fit(project: str, clustering: ClusteringMode,
870
865
  data.B, clustering = cluster_data(data.B, mode=clustering,
871
866
  num_clusters=num_clusters)
872
867
  if test_chromosomes:
873
- test_chromosomes = tuple([c + '_' for c in test_chromosomes])
874
- promoter_inds_to_drop = [i for i, p in enumerate(data.promoter_names) if p.startswith(test_chromosomes)]
868
+ import re
869
+ pattern = re.compile(r'chr([0-9XYM]+|\d+)')
870
+
871
+ test_chromosomes = set(test_chromosomes)
872
+ promoter_inds_to_drop = [i for i, p in enumerate(data.promoter_names)
873
+ if pattern.search(p).group() in test_chromosomes]
875
874
  data.Y = np.delete(data.Y, promoter_inds_to_drop, axis=0)
876
875
  data.B = np.delete(data.B, promoter_inds_to_drop, axis=0)
877
876
  else:
@@ -942,12 +941,12 @@ def split_data(data: ProjectData, inds: list) -> tuple[ProjectData, ProjectData]
942
941
  data_d = ProjectData(Y=Y_d, B=B_d, K=data.K, weights=data.weights,
943
942
  group_inds=data.group_inds, group_names=data.group_names,
944
943
  motif_names=data.motif_names, promoter_names=promoter_names_d,
945
- motif_postfixes=data.motif_postfixes,
944
+ motif_postfixes=data.motif_postfixes, sample_names=data.sample_names,
946
945
  fmt=data.fmt)
947
946
  data = ProjectData(Y=Y, B=B, K=data.K, weights=data.weights,
948
947
  group_inds=data.group_inds, group_names=data.group_names,
949
948
  motif_names=data.motif_names, promoter_names=promoter_names,
950
- motif_postfixes=data.motif_postfixes,
949
+ motif_postfixes=data.motif_postfixes, sample_names=data.sample_names,
951
950
  fmt=data.fmt)
952
951
  return data_d, data
953
952
 
@@ -0,0 +1,177 @@
1
+ # -*- coding: utf-8 -*-
2
+ import numpy as np
3
+ import jax.numpy as jnp
4
+ import jax
5
+ from .utils import read_init, openers, ProjectData
6
+ from .fit import FOVResult, ActivitiesPrediction, FitResult
7
+ from scipy.optimize import minimize_scalar, minimize
8
+ import os
9
+ import dill
10
+ from pandas import DataFrame as DF
11
+ from scipy.stats import norm
12
+ from functools import partial
13
+ from tqdm import tqdm
14
+
15
+
16
+ def estimate_promoter_prior_variance(data: ProjectData, activities: ActivitiesPrediction,
17
+ fit: FitResult, top=0.90, eps=1e-6):
18
+ B = data.B
19
+ Y = data.Y
20
+ group_inds = data.group_inds
21
+ Y = Y - fit.promoter_mean.mean.reshape(-1, 1) - fit.sample_mean.mean.reshape(1, -1)
22
+ Y = Y - B @ fit.motif_mean.mean.reshape(-1, 1)
23
+ Y = np.concatenate([Y[:, inds].mean(axis=1, keepdims=True) - B @ U.reshape(-1, 1)
24
+ for inds, U in zip(group_inds, activities.U.T)],
25
+ axis=1)
26
+
27
+ var = (Y**2).mean(axis=1)
28
+ var = var[var > eps]
29
+ inds = np.argsort(var)
30
+ inds = inds[:int(len(inds) * top)]
31
+ return np.var(var[inds])
32
+
33
+ def estimate_promoter_variance(project_name: str, prior_top=0.90):
34
+
35
+ def fun(sigma, y: jnp.ndarray, b: jnp.ndarray, s: int,
36
+ prior_mean: float, prior_var: float):
37
+ if jnp.iterable(sigma):
38
+ sigma = sigma[0]
39
+ theta = prior_var / prior_mean
40
+ alpha = prior_var / theta ** 2
41
+ penalty = sigma / theta - (alpha - 1) * jnp.log(sigma)
42
+ return y / (b + sigma) + s * jnp.log(b + sigma) + penalty
43
+ data = read_init(project_name)
44
+ fmt = data.fmt
45
+ with openers[fmt](f'{project_name}.fit.{fmt}', 'rb') as f:
46
+ fit: FitResult = dill.load(f)
47
+ with openers[fmt](f'{project_name}.predict.{fmt}', 'rb') as f:
48
+ activities: ActivitiesPrediction = dill.load(f)
49
+ B = data.B
50
+ Y = data.Y
51
+ group_inds = data.group_inds
52
+ prior_var = estimate_promoter_prior_variance(data, activities, fit,
53
+ top=prior_top)
54
+ print('Piror standard deviation:', prior_var ** 0.5)
55
+ prior_means = fit.error_variance.variance
56
+
57
+ Y = Y - fit.promoter_mean.mean.reshape(-1, 1) - fit.sample_mean.mean.reshape(1, -1)
58
+ Y = Y - B @ fit.motif_mean.mean.reshape(-1, 1)
59
+ Y = Y ** 2
60
+ B_hat = B ** 2 * fit.motif_variance.motif
61
+ B_hat = B_hat.sum(axis=1)
62
+ var = list()
63
+ for inds, prior_mean, nu in tqdm(list(zip(group_inds, prior_means, fit.motif_variance.group))):
64
+ Yt = Y[:, inds].sum(axis=1)
65
+ s = len(inds)
66
+ f_ = jax.jit(partial(fun, prior_mean=prior_mean, prior_var=prior_var, s=s))
67
+ g_ = jax.jit(jax.grad(f_))
68
+ var_g = list()
69
+ for y, b in zip(Yt, B_hat * nu):
70
+ res = minimize(partial(f_, b=b, y=y), x0=jnp.array([prior_mean]),
71
+ method='SLSQP', bounds=[(0, None)],
72
+ jac=partial(g_, b=b, y=y))
73
+ var_g.append(res.x[0] ** 2)
74
+ var.append(var_g)
75
+ var = np.array(var, dtype=float).T
76
+ with openers[fmt](f'{project_name}.promvar.{fmt}', 'wb') as f:
77
+ dill.dump(var, f)
78
+ return var
79
+
80
+
81
+ def grn(project_name: str, output: str, use_hdf=False, save_stat=True,
82
+ prior_h1=1/100):
83
+ data = read_init(project_name)
84
+ fmt = data.fmt
85
+ with openers[fmt](f'{project_name}.fit.{fmt}', 'rb') as f:
86
+ fit: FitResult = dill.load(f)
87
+ with openers[fmt](f'{project_name}.predict.{fmt}', 'rb') as f:
88
+ activities: ActivitiesPrediction = dill.load(f)
89
+
90
+ dtype = np.float32
91
+ B = data.B.astype(dtype)
92
+ Y = data.Y.astype(dtype)
93
+ group_inds = data.group_inds
94
+ group_names = data.group_names
95
+ nus = fit.motif_variance.group.astype(dtype)
96
+ motif_names = data.motif_names
97
+ prom_names = data.promoter_names
98
+ U = activities.U_raw.astype(dtype)
99
+ motif_mean = fit.motif_mean.mean.flatten().astype(dtype)
100
+ motif_variance = fit.motif_variance.motif.astype(dtype)
101
+ promoter_mean = fit.promoter_mean.mean.astype(dtype)
102
+ sample_mean = fit.sample_mean.mean.astype(dtype)
103
+
104
+ try:
105
+ with openers[fmt](f'{project_name}.promvar.{fmt}', 'rb') as f:
106
+ promvar: np.ndarray = dill.load(f)
107
+ except FileNotFoundError:
108
+ print('WARNING')
109
+ print('It seems that promoter variances were not estimated prior to running GRN.')
110
+ print('All promoter-wise variances will be assumed to be equal to the average error variance.')
111
+ print('Consider estimating promoter-wise variances before running GRN in the future.')
112
+ promvar = np.zeros((len(B), len(group_names)))
113
+ for i, sigma in enumerate(fit.error_variance.variance):
114
+ promvar[:, i] = sigma
115
+
116
+ Y = Y - promoter_mean.reshape(-1, 1) - sample_mean.reshape(1, -1)
117
+ Y = Y - B @ motif_mean.reshape(-1, 1)
118
+
119
+ if activities.filtered_motifs is not None:
120
+ motif_names = np.delete(motif_names, activities.filtered_motifs)
121
+ B = np.delete(B, activities.filtered_motifs, axis=1)
122
+ motif_mean = np.delete(motif_mean, activities.filtered_motifs)
123
+ motif_variance = np.delete(motif_variance, activities.filtered_motifs)
124
+
125
+ BM = B * motif_mean
126
+ BM = BM[..., None]
127
+ # BU = BU[..., None]
128
+ B_hat = B ** 2 * motif_variance
129
+ B_hat = B_hat.sum(axis=1, keepdims=True) - B_hat
130
+ B_pow = B ** 2
131
+
132
+ folder_stat = os.path.join(output, 'lr')
133
+ folder_belief = os.path.join(output, 'belief')
134
+ if save_stat:
135
+ os.makedirs(folder_stat, exist_ok=True)
136
+ os.makedirs(folder_belief, exist_ok=True)
137
+ for sigma, nu, name, inds in zip(promvar.T[..., None], nus, group_names, group_inds):
138
+ # if name != 'anconeus':
139
+ # continue
140
+ print(name)
141
+ var = (B_hat * nu + sigma)
142
+ Y_ = Y[:, inds][..., None, :] + BM
143
+ # theta = U[:, inds][..., None, :] + BM
144
+ theta = B[..., None] * U[:, inds] + BM
145
+ loglr = 2 * B * (Y_ * theta).sum(axis=-1) - B_pow * (theta ** 2).sum(axis=-1)
146
+ del Y_
147
+ del theta
148
+ loglr = loglr / (2 * var)
149
+ del var
150
+ lr = np.exp(loglr)
151
+ belief = lr * prior_h1 / ((1 - prior_h1) + lr * prior_h1)
152
+ inds = sigma.flatten() > 1e-3
153
+ lr = lr[inds]
154
+ belief = belief[inds]
155
+ belief = belief.astype(np.half)
156
+
157
+ proms = list(np.array(prom_names)[inds])
158
+
159
+ # pvalue = n.sf(lr) * (theta > 0) + n.cdf(lr) * (theta <= 0)
160
+ if use_hdf:
161
+ if save_stat:
162
+ lr = lr.astype(np.half)
163
+ filename = os.path.join(folder_stat, f'{name}.hdf')
164
+ DF(data=lr, index=proms, columns=motif_names).to_hdf(filename, key='zscore', mode='w', complevel=4)
165
+ filename = os.path.join(folder_belief, f'{name}.hdf')
166
+ DF(data=belief, index=proms, columns=motif_names).to_hdf(filename, key='lrt', mode='w', complevel=4)
167
+ else:
168
+ if save_stat:
169
+ lr = lr.astype(np.half)
170
+ filename = os.path.join(folder_stat, f'{name}.tsv')
171
+ DF(data=lr, index=proms, columns=motif_names).to_csv(filename, sep='\t',
172
+ float_format='%.3f')
173
+ filename = os.path.join(folder_belief, f'{name}.tsv')
174
+ DF(data=belief, index=proms, columns=motif_names).to_csv(filename, sep='\t',
175
+ float_format='%.3f')
176
+
177
+
@@ -13,10 +13,11 @@ from rich.table import Table
13
13
  from .create import create_project
14
14
  from pathlib import Path
15
15
  from .fit import fit, ClusteringMode, calculate_fov, predict, GOFStat, GOFStatMode
16
+ from .grn import estimate_promoter_variance, grn
16
17
  from .synthetic_data import generate_dataset
17
18
  from time import time
18
19
  from dill import __version__ as dill_version
19
- from .export import export_results, Standardization, ANOVAType
20
+ from .export import export_results, export_loadings_product, Standardization, ANOVAType
20
21
  from . import __version__ as project_version
21
22
  from .select import select_motifs_single
22
23
  import json
@@ -105,7 +106,7 @@ def _create(name: str = Argument(..., help='Project name. [bold]MARADONER[/bold]
105
106
  'name[/cyan].'),
106
107
  expression: Path = Argument(..., help='A path to the promoter expression table. Expression values are assumed to be in a log-scale.'),
107
108
  loading: List[Path] = Argument(..., help='A list (if applicable, separated by space) of filenames containing loading matrices. '),
108
- loading_transform: List[LoadingTransform] = Option([LoadingTransform.none], '--loading-transform', '-t',
109
+ loading_transform: List[LoadingTransform] = Option([LoadingTransform.esf], '--loading-transform', '-t',
109
110
  help='A type of transformation to apply to loading '
110
111
  'matrices. [orange]ecdf[/orange] substitutes values in the table with empricical CDF,'
111
112
  ' [orange]esf[/orange] with negative logarithm of the empirical survival function.'),
@@ -115,6 +116,8 @@ def _create(name: str = Argument(..., help='Project name. [bold]MARADONER[/bold]
115
116
  ' contain. If a text file, each line must start with a group name followed by space-separated sample names.'),
116
117
  filter_lowexp_w: float = Option(0.9, help='Truncation boundary for filtering out low-expressed promoters. The closer [orange]w[/orange]'
117
118
  ' to 1, the more promoters will be left in the dataset.'),
119
+ filter_max_mode: bool = Option(True, help='Use max-mode of filtering. Max-mode keeps promoters that are active at least for some samples.'
120
+ ' If disabled, filtration using GMM on the averages will be ran instead.'),
118
121
  filter_plot: Path = Option(None, help='Expression plot with a fitted mixture that is used for filtering.'),
119
122
  loading_postfix: List[str] = Option(None, '--loading-postfix', '-p',
120
123
  help='String postfixes will be appeneded to the motifs from each of the supplied loading matrices'),
@@ -133,7 +136,8 @@ def _create(name: str = Argument(..., help='Project name. [bold]MARADONER[/bold]
133
136
  r = create_project(name, expression, loading_matrix_filenames=loading, motif_expression_filenames=motif_expression,
134
137
  loading_matrix_transformations=loading_transform, sample_groups=sample_groups,
135
138
  promoter_filter_lowexp_cutoff=filter_lowexp_w,
136
- promoter_filter_plot_filename=filter_plot,
139
+ promoter_filter_plot_filename=filter_plot,
140
+ promoter_filter_max=filter_max_mode,
137
141
  compression=compression,
138
142
  motif_postfixes=loading_postfix,
139
143
  motif_names_filename=motif_filename,
@@ -208,7 +212,7 @@ def _gof(name: str = Argument(..., help='Project name.'),
208
212
 
209
213
  @app.command('predict', help='Estimate deviations of motif activities from their means.')
210
214
  def _predict(name: str = Argument(..., help='Project name.'),
211
- filter_motifs: bool = Option(False, help='Do not predict deviations from motifs whose variance is low.'),
215
+ filter_motifs: bool = Option(True, help='Do not predict deviations from motifs whose variance is low.'),
212
216
  filter_order: int = Option(7, help='Motif variance is considered low if it is [orange]filter-order[/orange] orders of magnitude smaller that a median motif variance.'),
213
217
  tau_search: bool = Option(False, help='Search for tau multiplier using CV'),
214
218
  cv_repeats: int = Option(3, help='CV repeats in [orange]RepeatedKFold[/orange]'),
@@ -265,7 +269,11 @@ def _export(name: str = Argument(..., help='Project name.'),
265
269
  std_mode: Standardization = Option(Standardization.full, help='Whether to standardize activities with plain variances or also decorrelate them.'),
266
270
  anova_mode: ANOVAType = Option(ANOVAType.positive, help='If negative, look for non-variative motifs'),
267
271
  weighted_zscore: bool = Option(False, help='Reciprocal variance weighted Z-scores'),
268
- alpha: float = Option(0.05, help='FDR alpha.')):
272
+ alpha: float = Option(0.05, help='FDR alpha.'),
273
+ loadings_product: bool = Option(False, help='Export loading matrix-acitvity 3D tensor. This will produce num_of_groups tabular files.'),
274
+ lp_hdf: bool = Option(True, help='Each loadings-product table will be stored in hdf format (occupies much less space than plain tsv) using float16 precision.'),
275
+ lp_intercepts: bool = Option(True, help='Include motif means in the 3D tensor.'),
276
+ lp_tsv_truncation: int = Option(4, help='Number of digits after a floating point to truncate. Decreases the output size of a tabular if [orange]lp-hdf[/orange] is disabled.')):
269
277
  t0 = time()
270
278
  p = Progress(SpinnerColumn(speed=0.5), TextColumn("[progress.description]{task.description}"), transient=True)
271
279
  p.add_task(description="Exporting results...", total=None)
@@ -273,8 +281,17 @@ def _export(name: str = Argument(..., help='Project name.'),
273
281
  export_results(name, output_folder, std_mode=std_mode, anova_mode=anova_mode, alpha=alpha,
274
282
  weighted_zscore=weighted_zscore)
275
283
  p.stop()
284
+
285
+ if loadings_product:
286
+ p = Progress(SpinnerColumn(speed=0.5), TextColumn("[progress.description]{task.description}"), transient=True)
287
+ p.add_task(description="Exporting results...", total=None)
288
+ p.start()
289
+ export_loadings_product(name, output_folder, use_hdf=lp_hdf, intercepts=lp_intercepts)
290
+ p.stop()
291
+
276
292
  dt = time() - t0
277
293
  rprint(f'[green][bold]✔️[/bold] Done![/green]\t time: {dt:.2f} s.')
294
+
278
295
 
279
296
 
280
297
  __select_motif_doc = 'Selects best motif variants when the project was created from multiple loading matrices, each with an unique postfix.'\
@@ -287,13 +304,51 @@ def _select_motifs(name: str = Argument(..., help='Project name'),
287
304
  filename: Path = Argument(..., help='Filename where a list of best motif variants will be stored')):
288
305
  t0 = time()
289
306
  p = Progress(SpinnerColumn(speed=0.5), TextColumn("[progress.description]{task.description}"), transient=True)
290
- p.add_task(description="Exporting results...", total=None)
307
+ p.add_task(description="Selecting motifs...", total=None)
291
308
  p.start()
292
309
  select_motifs_single(name, filename)
293
310
  p.stop()
294
311
  dt = time() - t0
295
312
  rprint(f'[green][bold]✔️[/bold] Done![/green]\t time: {dt:.2f} s.')
296
313
 
314
+
315
+ __grn_doc = 'Tests each promoter against each motif per each group. Some people call it GRN.'
316
+ @app.command('grn',
317
+ help=__select_motif_doc)
318
+ def _grn(name: str = Argument(..., help='Project name'),
319
+ folder: Path = Argument(..., help='Output folder where results will be stored. In total, expect number_of_groups tables of size'
320
+ ' comparable to the expression file size.'),
321
+ hdf: bool = Option(True, help='Use HDF format instead of tar.gz files. Typically eats much less space'),
322
+ stat: bool = Option(True, help='Save statistics alongside probabilities.'),
323
+ prior_h1: float = Option(1/10, help='Prior belief on the expected fraction of motifs active per promoter.')):
324
+ t0 = time()
325
+ p = Progress(SpinnerColumn(speed=0.5), TextColumn("[progress.description]{task.description}"), transient=True)
326
+ p.add_task(description="Building GRN...", total=None)
327
+ p.start()
328
+ grn(name, output=folder, use_hdf=hdf, save_stat=stat, prior_h1=prior_h1)
329
+ p.stop()
330
+ dt = time() - t0
331
+ rprint(f'[green][bold]✔️[/bold] Done![/green]\t time: {dt:.2f} s.')
332
+
333
+ __estimate_promvar_doc = 'Estimates each promoter variance for each group using empirical Bayesian shrinkage.'\
334
+ ' A necessary step before computing GRN.'
335
+ @app.command('estimate-promoter-variance',
336
+ help=__estimate_promvar_doc)
337
+ def _estimate_promoter_variance(name: str = Argument(..., help='Project name'),
338
+ prior_top: float = Option(0.90,
339
+ help='The fraction from the bottom as ranked by sample'
340
+ ' variance of promoters to be used for estimating global group-wise variance.'
341
+ ' Higher values result in higher prior variance and weaken the prior.'
342
+ )):
343
+ t0 = time()
344
+ p = Progress(SpinnerColumn(speed=0.5), TextColumn("[progress.description]{task.description}"), transient=True)
345
+ p.add_task(description="Estimating each promoter's variance...", total=None)
346
+ p.start()
347
+ estimate_promoter_variance(name, prior_top=prior_top)
348
+ p.stop()
349
+ dt = time() - t0
350
+ rprint(f'[green][bold]✔️[/bold] Done![/green]\t time: {dt:.2f} s.')
351
+
297
352
  def main():
298
353
  check_packages()
299
354
  app()
@@ -51,7 +51,7 @@ class FitResult:
51
51
  promoter_inds_to_drop: list = None
52
52
 
53
53
 
54
- def transform_data(data, std_y=False, std_b=False, helmert=True) -> TransformedData:
54
+ def transform_data(data, std_y=False, std_b=False) -> TransformedData:
55
55
  Y = data.Y - (data.Y.mean(axis=0, keepdims=True) + data.Y.mean(axis=1, keepdims=True) - data.Y.mean())
56
56
  B = data.B - data.B.mean(axis=0, keepdims=True)
57
57
  group_inds_inv = list()
@@ -159,8 +159,12 @@ def fit(project: str, tau_mode: TauMode, tau_estimation: TauEstimation,
159
159
  data.B, clustering = cluster_data(data.B, mode=clustering,
160
160
  num_clusters=num_clusters)
161
161
  if test_chromosomes:
162
- test_chromosomes = tuple([c + '_' for c in test_chromosomes])
163
- promoter_inds_to_drop = [i for i, p in enumerate(data.promoter_names) if p.startswith(test_chromosomes)]
162
+ import re
163
+ pattern = re.compile(r'chr([0-9XYM]+|\d+)')
164
+
165
+ test_chromosomes = set(test_chromosomes)
166
+ promoter_inds_to_drop = [i for i, p in enumerate(data.promoter_names)
167
+ if pattern.search(p).group() in test_chromosomes]
164
168
  data.Y = np.delete(data.Y, promoter_inds_to_drop, axis=0)
165
169
  data.B = np.delete(data.B, promoter_inds_to_drop, axis=0)
166
170
  else:
@@ -214,12 +218,12 @@ def split_data(data: ProjectData, inds: list) -> tuple[ProjectData, ProjectData]
214
218
  data_d = ProjectData(Y=Y_d, B=B_d, K=data.K, weights=data.weights,
215
219
  group_inds=data.group_inds, group_names=data.group_names,
216
220
  motif_names=data.motif_names, promoter_names=promoter_names_d,
217
- motif_postfixes=data.motif_postfixes,
221
+ motif_postfixes=data.motif_postfixes, sample_names=data.sample_names,
218
222
  fmt=data.fmt)
219
223
  data = ProjectData(Y=Y, B=B, K=data.K, weights=data.weights,
220
224
  group_inds=data.group_inds, group_names=data.group_names,
221
225
  motif_names=data.motif_names, promoter_names=promoter_names,
222
- motif_postfixes=data.motif_postfixes,
226
+ motif_postfixes=data.motif_postfixes, sample_names=data.sample_names,
223
227
  fmt=data.fmt)
224
228
  return data_d, data
225
229
 
@@ -255,7 +259,7 @@ def calculate_fov(project: str, gpu: bool,
255
259
  stat_type: GOFStat, keep_motifs: str, x64=True,
256
260
  verbose=True, dump=True):
257
261
  def calc_fov(data: TransformedData, fit: FitResult,
258
- activities: ActivitiesPrediction, keep_motifs=None) -> tuple[FOVResult]:
262
+ activities: ActivitiesPrediction, keep_motifs=None, Bs=None) -> tuple[FOVResult]:
259
263
  def sub(Y, effects) -> FOVResult:
260
264
  if stat_type == stat_type.fov:
261
265
  Y1 = Y - effects
@@ -271,10 +275,16 @@ def calculate_fov(project: str, gpu: bool,
271
275
  prom = _cor(Y, effects, axis=1)
272
276
  sample = _cor(Y, effects, axis=0)
273
277
  return FOVResult(total, prom, sample)
274
- data = transform_data(data)
275
- B = data.B if activities.clustering is None else activities.clustering[0]
276
- Y = data.Y
277
- U = activities.U
278
+ if Bs is None:
279
+ data = transform_data(data)
280
+ B = data.B if activities.clustering is None else activities.clustering[0]
281
+ Y = data.Y
282
+ U = activities.U
283
+ else:
284
+ B = data.B
285
+ Y = data.Y
286
+ B = np.hstack((B, np.ones((len(B), 1))))
287
+ U = np.linalg.pinv(np.hstack((Bs[0], np.ones((len(Bs[0]), 1))))) @ Bs[1]
278
288
  if keep_motifs is not None:
279
289
  B = B[:, keep_motifs]
280
290
  U = U[keep_motifs]
@@ -306,9 +316,9 @@ def calculate_fov(project: str, gpu: bool,
306
316
  data, data_test = split_data(data, fit.promoter_inds_to_drop)
307
317
  if x64:
308
318
  jax.config.update("jax_enable_x64", True)
309
- data = transform_data(data, helmert=False)
310
- if data_test is not None:
311
- data_test = transform_data(data_test, helmert=False)
319
+ # data = transform_data(data, helmert=False)
320
+ # if data_test is not None:
321
+ # data_test = transform_data(data_test, helmert=False)
312
322
  if gpu:
313
323
  device = jax.devices()
314
324
  else:
@@ -318,12 +328,15 @@ def calculate_fov(project: str, gpu: bool,
318
328
  for status_name, motifs in keep_motifs:
319
329
  if status_name:
320
330
  status_name = f'{status_name} ({len(motifs)})'
321
- print(status_name)
322
331
  with jax.default_device(device):
323
332
 
324
333
  if data_test is not None:
325
- test_FOV = calc_fov(data=data_test, fit=fit, activities=activities, keep_motifs=motifs)
326
- train_FOV = calc_fov(data=data, fit=fit, activities=activities, keep_motifs=motifs)
334
+ test_FOV = calc_fov(data=data_test, fit=fit, activities=activities, keep_motifs=motifs,
335
+ Bs=(data.B, data.Y)
336
+ )
337
+ train_FOV = calc_fov(data=data, fit=fit, activities=activities, keep_motifs=motifs,
338
+ Bs=(data.B, data.Y)
339
+ )
327
340
  if data_test is None:
328
341
  test_FOV = None
329
342
  res = TestResult(train_FOV, test_FOV, grouped=False)
@@ -65,7 +65,8 @@ def _gof(name: str = Argument(..., help='Project name.'),
65
65
  p.start()
66
66
  res = calculate_fov(name, stat_type=stat_type, keep_motifs=keep_motifs, gpu=gpu, x64=x64)
67
67
  for name, res in res:
68
- print(name)
68
+ if name:
69
+ print(name)
69
70
  if stat_type == GOFStat.corr:
70
71
  title = 'Pearson correlation'
71
72
  else:
@@ -75,6 +75,7 @@ class ProjectData:
75
75
  motif_postfixes: list
76
76
  fmt: str
77
77
 
78
+
78
79
  def read_init(project_name: str) -> ProjectData:
79
80
  if type(project_name) is str:
80
81
  filename, fmt = get_init_file(project_name)
@@ -86,7 +87,6 @@ def read_init(project_name: str) -> ProjectData:
86
87
  group_inds = list()
87
88
  for name in group_names:
88
89
  group_inds.append(np.array(init['groups'][name]))
89
-
90
90
  r = ProjectData(
91
91
  Y=init['expression'],
92
92
  B=init['loadings'],
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.1
2
2
  Name: maradoner
3
- Version: 0.11
3
+ Version: 0.13
4
4
  Summary: Variance-adjusted estimation of motif activities.
5
5
  Home-page: https://github.com/autosome-ru/nemara
6
6
  Author: Georgy Meshcheryakov
@@ -25,15 +25,8 @@ Requires-Dist: statsmodels>=0.14
25
25
  Requires-Dist: datatable>=1.0.0
26
26
  Requires-Dist: dill>=0.3.9
27
27
  Requires-Dist: rich>=12.6.0
28
- Dynamic: author
29
- Dynamic: author-email
30
- Dynamic: classifier
31
- Dynamic: description
32
- Dynamic: description-content-type
33
- Dynamic: home-page
34
- Dynamic: requires-dist
35
- Dynamic: requires-python
36
- Dynamic: summary
28
+ Requires-Dist: tqdm>=4.0
29
+ Requires-Dist: scikit-learn>=1.6
37
30
 
38
31
 
39
32
  **MARADONER**
@@ -5,6 +5,7 @@ maradoner/create.py
5
5
  maradoner/dataset_filter.py
6
6
  maradoner/export.py
7
7
  maradoner/fit.py
8
+ maradoner/grn.py
8
9
  maradoner/main.py
9
10
  maradoner/mara.py
10
11
  maradoner/meta_optimizer.py
@@ -10,3 +10,5 @@ statsmodels>=0.14
10
10
  datatable>=1.0.0
11
11
  dill>=0.3.9
12
12
  rich>=12.6.0
13
+ tqdm>=4.0
14
+ scikit-learn>=1.6
File without changes
File without changes
File without changes
File without changes
File without changes