flexsweep 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. flexsweep-0.1.0/PKG-INFO +25 -0
  2. flexsweep-0.1.0/README.md +0 -0
  3. flexsweep-0.1.0/data/constant.yaml +5 -0
  4. flexsweep-0.1.0/data/discoal +0 -0
  5. flexsweep-0.1.0/data/twoepoch.yaml +7 -0
  6. flexsweep-0.1.0/flexsweep/__init__.py +12 -0
  7. flexsweep-0.1.0/flexsweep/abc.py +291 -0
  8. flexsweep-0.1.0/flexsweep/cnn.py +433 -0
  9. flexsweep-0.1.0/flexsweep/data.py +348 -0
  10. flexsweep-0.1.0/flexsweep/flipped.py +475 -0
  11. flexsweep-0.1.0/flexsweep/fv.py +3217 -0
  12. flexsweep-0.1.0/flexsweep/main.py +185 -0
  13. flexsweep-0.1.0/flexsweep/relate.py +752 -0
  14. flexsweep-0.1.0/flexsweep/rf.py +544 -0
  15. flexsweep-0.1.0/flexsweep/simulate.py +1341 -0
  16. flexsweep-0.1.0/flexsweep/simulate_discoal.py +436 -0
  17. flexsweep-0.1.0/flexsweep/src/FDAclass.R +88 -0
  18. flexsweep-0.1.0/flexsweep/src/FDAparam.R +91 -0
  19. flexsweep-0.1.0/flexsweep/src/calcstats_surf.py +175 -0
  20. flexsweep-0.1.0/flexsweep/src/calcstats_surf_v2.py +185 -0
  21. flexsweep-0.1.0/flexsweep/src/lassi.py +334 -0
  22. flexsweep-0.1.0/flexsweep/src/predclass.R +52 -0
  23. flexsweep-0.1.0/flexsweep/src/predparam.R +61 -0
  24. flexsweep-0.1.0/flexsweep/src/tmp_simulator.py +919 -0
  25. flexsweep-0.1.0/flexsweep/src/tmp_stats.py +517 -0
  26. flexsweep-0.1.0/flexsweep/v0.1/simulations.py +893 -0
  27. flexsweep-0.1.0/flexsweep/v0.1/simulations_old.py +842 -0
  28. flexsweep-0.1.0/flexsweep/v0.1/summaries.py +1127 -0
  29. flexsweep-0.1.0/flexsweep/v0.2/fv_021024.py +4171 -0
  30. flexsweep-0.1.0/flexsweep/v0.2/gan.py +1769 -0
  31. flexsweep-0.1.0/flexsweep/v0.2/safe_custom.py +303 -0
  32. flexsweep-0.1.0/flexsweep/v0.2/simulations.py +893 -0
  33. flexsweep-0.1.0/flexsweep/v0.2/stats_171024.py +609 -0
  34. flexsweep-0.1.0/flexsweep/v0.2/summaries_nb.py +2024 -0
  35. flexsweep-0.1.0/flexsweep/v0.2/utils.py +759 -0
  36. flexsweep-0.1.0/pyproject.toml +34 -0
@@ -0,0 +1,25 @@
1
+ Metadata-Version: 2.1
2
+ Name: flexsweep
3
+ Version: 0.1.0
4
+ Summary:
5
+ Author: Jesus Murga-Moreno
6
+ Author-email: murgamoreno@gmail.com
7
+ Requires-Python: >=3.10,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Requires-Dist: click (>=8.1.7,<9.0.0)
14
+ Requires-Dist: joblib (>=1.4.2,<2.0.0)
15
+ Requires-Dist: numba (>=0.60.0,<0.61.0)
16
+ Requires-Dist: pandas (>=2.2.2,<3.0.0)
17
+ Requires-Dist: scikit-allel (>=1.3.8,<2.0.0)
18
+ Requires-Dist: scikit-learn (>=1.5.1,<2.0.0)
19
+ Requires-Dist: scipy (>=1.14.0,<2.0.0)
20
+ Requires-Dist: tensorflow (>=2.17.0,<3.0.0)
21
+ Requires-Dist: threadpoolctl (>=3.5.0,<4.0.0)
22
+ Requires-Dist: tqdm (>=4.66.5,<5.0.0)
23
+ Description-Content-Type: text/markdown
24
+
25
+
File without changes
@@ -0,0 +1,5 @@
1
+ time_units: generations
2
+ demes:
3
+ - name: YRI
4
+ epochs:
5
+ - start_size: 10000
Binary file
@@ -0,0 +1,7 @@
1
+ time_units: generations
2
+ demes:
3
+ - name: YRI
4
+ epochs:
5
+ - {end_time: 5920.0, start_size: 7310}
6
+ - {end_time: 0, start_size: 10000}
7
+
@@ -0,0 +1,12 @@
1
+ __version__ = "0.2"
2
+ try:
3
+ from . import _version
4
+
5
+ __version__ = _version.version
6
+ except ImportError:
7
+ pass
8
+
9
+ from .simulate_discoal import Simulator, DISCOAL
10
+ from .fv import summary_statistics
11
+ from .data import Data
12
+ from .cnn import CNN
@@ -0,0 +1,291 @@
1
+ import numpy as np
2
+ from scipy.stats import median_abs_deviation
3
+ from sklearn.neighbors import DistanceMetric
4
+ import numpy as np
5
+ from numba import njit, float64
6
+ import pandas as pd
7
+
8
+
9
+ @njit
10
+ def median_absolute_deviation(arr, scale=1.4826):
11
+ median = np.median(arr)
12
+ deviations = np.abs(arr - median)
13
+ mad = np.median(deviations)
14
+ mad *= scale
15
+ return mad
16
+
17
+
18
+ @njit
19
+ def normalise(x, y):
20
+ if median_absolute_deviation(y) != 0:
21
+ return x / median_absolute_deviation(y)
22
+ else:
23
+ return x
24
+
25
+
26
+ # @njit
27
+ def distances(target, sumstat, param):
28
+ # Scale everything and compute Euclidean distance
29
+ sumstat_scaled = np.zeros(sumstat.shape)
30
+ target_scaled = np.zeros(target.shape)
31
+ dist = np.zeros(sumstat.shape[0])
32
+
33
+ for j in range(target.shape[0]):
34
+ sumstat_scaled[:, j] = normalise(sumstat[:, j], sumstat[:, j])
35
+ target_scaled[j] = normalise(target[j], sumstat[:, j])
36
+ dist += np.square(sumstat_scaled[:, j] - target_scaled[j])
37
+
38
+ dist = np.sqrt(dist)
39
+ return target_scaled, sumstat_scaled, dist
40
+
41
+
42
+ def rejection(target, sumstat, param, tol, kernel="epanechnikov"):
43
+ target_scaled, sumstat_scaled, dist = distances(target, sumstat, param)
44
+
45
+ # Sort and get minimum distance to return values inside tolerance range
46
+ n_accept = int(np.ceil(len(dist) * tol))
47
+ n_limit = np.sort(dist)[n_accept]
48
+ # Ensure getting only n_limit, if more than one
49
+ n_idx = np.where(dist <= n_limit)[0][:n_accept]
50
+
51
+ # Weighted distances
52
+ if kernel == "epanechnikov":
53
+ wts = 1 - np.square(dist[n_idx] / n_limit)
54
+ elif kernel == "rectangular":
55
+ wts = dist[n_idx] / n_limit
56
+ elif kernel == "gaussian":
57
+ d = DistanceMetric.get_metric("euclidean")
58
+ ds = np.median(d.pairwise(sumstat))
59
+ wts = 1 / np.sqrt(2 * np.pi) * np.exp(-0.5 * np.square(dist / (ds / 2)))
60
+ elif kernel == "triangular":
61
+ wts = 1 - np.abs(dist[n_idx] / n_limit)
62
+ elif kernel == "biweight":
63
+ wts = np.square(1 - np.square(dist[n_idx] / n_limit))
64
+ elif kernel == "cosine":
65
+ wts = np.cos(np.pi / 2 * dist[n_idx] / n_limit)
66
+
67
+ return (
68
+ target_scaled,
69
+ sumstat[n_idx],
70
+ sumstat_scaled[n_idx],
71
+ param[n_idx],
72
+ dist[n_idx],
73
+ wts,
74
+ )
75
+
76
+
77
+ def lsfit(x, y, wt=None, intercept=True, tolerance=1e-07):
78
+ # Check if the intercept term should be included
79
+ if intercept:
80
+ x = np.column_stack((np.ones(len(x)), x))
81
+
82
+ # Apply weights if provided
83
+ if wt is not None:
84
+ x = x * np.sqrt(wt[:, None])
85
+ y = y * np.sqrt(wt[:, None])
86
+
87
+ # Perform least squares estimation
88
+ q, r = np.linalg.qr(x)
89
+ coef = np.linalg.lstsq(r, q.T @ y, rcond=None)[0]
90
+ # new_coef = np.vstack((coef[-1:], coef[:-1]))
91
+ residuals = y - x @ coef
92
+
93
+ # Create the result dictionary
94
+ # result = {
95
+ # "coef": coef,
96
+ # "residuals": residuals,
97
+ # "intercept": intercept,
98
+ # "qr": (q, r),
99
+ # }
100
+
101
+ return coef, residuals
102
+
103
+
104
+ def abc_loclinear(
105
+ target_file,
106
+ param_summaries_file,
107
+ P,
108
+ tol,
109
+ transformation="none",
110
+ kernel="epanechnikov",
111
+ ):
112
+ # Reading into matrix using np.loadtxt
113
+ target = pd.read_csv(target_file).values
114
+ summaries = pd.read_csv(param_summaries_file, sep="\t").values
115
+
116
+ target = target[0, 48:]
117
+ param = summaries[:, :P]
118
+ param[:, 0] = -np.log10(param[:, 0])
119
+ sumstat = summaries[:, 48:]
120
+
121
+ # transformation="none"; kernel="epanechnikov"; tol=0.025
122
+
123
+ assert kernel.lower() in [
124
+ "gaussian",
125
+ "epanechnikov",
126
+ "rectangular",
127
+ "triangular",
128
+ "biweight",
129
+ "cosine",
130
+ ], "Kernel is incorrectly defined. Use gaussian, epanechnikov, rectangular, triangular, biweight, or cosine"
131
+ assert transformation.lower() in [
132
+ "none",
133
+ "log",
134
+ "tan",
135
+ ], "Apply one of the following transformations: none, log, tan"
136
+
137
+ assert (
138
+ len(target) == sumstat.shape[1]
139
+ ), "Number of summary statistics in target has to be the same as in sumstat."
140
+
141
+ num_params = param.shape[1]
142
+ num_stats = sumstat.shape[1]
143
+
144
+ mins = np.min(param, axis=0)
145
+ maxs = np.max(param, axis=0)
146
+
147
+ # Scale and compute Euclidean distance
148
+ (
149
+ target_scaled,
150
+ sumstat_accepted,
151
+ sumstat_scaled,
152
+ param_accepted,
153
+ dist_accepted,
154
+ wts,
155
+ ) = rejection(target, sumstat, param, tol, kernel)
156
+
157
+ num_accepted = sumstat_accepted.shape[0]
158
+
159
+ # Transform parameters
160
+ if transformation != "none":
161
+ for i, v in enumerate(param_accepted.T):
162
+ if transformation.lower() == "log" & np.any(param <= 0):
163
+ v[v <= 0] = np.min(v[np.nonzero(v)])
164
+ v = np.log(v)
165
+ elif transformation.lower() == "tan":
166
+ v = tangent_transformation(v, mins[i], maxs[i])
167
+
168
+ sumstat_intercept = np.hstack((np.ones((num_accepted, 1)), sumstat_scaled))
169
+
170
+ # Linear regression
171
+ lm_coefficients, lm_residuals = lsfit(sumstat_intercept, param_accepted, wts)
172
+
173
+ pred = np.dot(
174
+ lm_coefficients, np.vstack((np.ones_like(target_scaled), target_scaled))
175
+ )
176
+ pred = np.repeat(pred.T, num_accepted, axis=0)
177
+
178
+ rsdl_mean = np.mean(lm_residuals, axis=0)
179
+ rsdl_corrected = lm_residuals - rsdl_mean
180
+
181
+ pred_corrected = pred + rsdl_mean
182
+
183
+ def f(x, wts):
184
+ return np.sum(np.square(x) * wts) / np.sum(wts)
185
+
186
+ σ = np.apply_along_axis(f, axis=1, arr=rsdl_corrected, wts=wts)
187
+ aic = num_accepted * np.sum(np.log(σ)) + 2 * (num_stats + 1) * num_params
188
+ bic = (
189
+ num_accepted * np.sum(np.log(σ))
190
+ + np.log(np.sum(num_accepted)) * (num_stats + 1) * num_params
191
+ )
192
+
193
+ # Heteroscedasticity correction
194
+ rsdl_log = np.log(np.square(lm_residuals))
195
+ lm_coefficients, lm_residuals = regression(sumstat_intercept, rsdl_log, wts)
196
+
197
+ pred_sd = np.dot(
198
+ lm_coefficients, np.vstack((np.ones_like(target_scaled), target_scaled))
199
+ )
200
+ pred_sd = np.sqrt(np.exp(pred_sd))
201
+ pred_sd = np.repeat(pred_sd.T, num_accepted, axis=0)
202
+ pred_si = np.dot(lm_coefficients, sumstat_intercept.T)
203
+ pred_si = np.sqrt(np.exp(pred_si))
204
+
205
+ param_adjusted = pred + (pred_sd * rsdl_corrected) / pred_si
206
+ rsdl_adjusted = (pred_sd * rsdl_corrected) / pred_si
207
+
208
+ # Back transform parameter values
209
+ for i in range(num_params):
210
+ if transformation.lower() == "log":
211
+ param_accepted[:, i] = np.exp(param_accepted[:, i])
212
+ param_adjusted[:, i] = np.exp(param_adjusted[:, i])
213
+ elif transformation.lower() == "tan":
214
+ param_accepted[:, i] = undo_tangent_transformation(
215
+ param_accepted[:, i], mins[i], maxs[i]
216
+ )
217
+ param_adjusted[:, i] = undo_tangent_transformation(
218
+ param_adjusted[:, i], mins[i], maxs[i]
219
+ )
220
+
221
+ return param_accepted, param_adjusted
222
+
223
+
224
+ import numpy as np
225
+ from sklearn.neural_network import MLPRegressor
226
+ from sklearn.preprocessing import StandardScaler
227
+ from sklearn.pipeline import make_pipeline
228
+ from sklearn.metrics import mean_squared_error
229
+
230
+ # Create an MLP-based ridge regressor
231
+ mlp = MLPRegressor(hidden_layer_sizes=(64, 32), activation="relu", max_iter=2000)
232
+ mlp.fit(sumstat_accepted, param_accepted)
233
+
234
+ # Predict using the trained MLP
235
+ target_pred = mlp.predict(target_scaled.reshape(-1, 1))
236
+
237
+ # Predict using the trained regressor
238
+ y_pred = mlp_regressor.predict(X_test)
239
+
240
+ # Calculate the Mean Squared Error (MSE)
241
+ mse = mean_squared_error(y_test, y_pred)
242
+ print("Mean Squared Error:", mse)
243
+
244
+
245
+ ####################################################
246
+
247
+ ## normalise parameters
248
+
249
+ import numpy as np
250
+ from sklearn.linear_model import Ridge
251
+
252
+ # Assuming you have the data loaded as NumPy arrays: param, lambda, wt1, gwt, scaled_sumstat, target
253
+
254
+ # Get the number of parameters
255
+
256
+ # Initialize an array to store the Median Absolute Deviation (MAD) for each parameter
257
+ param_mad = np.zeros(num_params)
258
+ param_scaled = np.empty_like(param_accepted)
259
+ # Compute MAD and normalize the parameters
260
+ for i in range(num_params):
261
+ param_mad[i] = median_absolute_deviation(param_accepted)
262
+ param_scaled[:, i] = normalise(param_accepted[:, i], param_accepted[:, i])
263
+
264
+ # Convert the lambda values to a numpy array
265
+ lambdas = np.array([0.0001, 0.001, 0.01])
266
+
267
+ numnet = lambdas.size
268
+ fv = np.zeros((wts.size, num_params, numnet))
269
+ pred = np.zeros((num_params, numnet))
270
+ mataux = np.sqrt(np.diag(wts))
271
+ paramaux = np.dot(mataux, param_scaled)
272
+ scaledaux = np.dot(mataux, sumstat_scaled)
273
+
274
+ # Perform ridge regression for each parameter
275
+ for i in range(num_params):
276
+ for j in range(numnet):
277
+ alpha = lambdas[j]
278
+ ridge_model = Ridge(alpha=alpha)
279
+ ridge_model.fit(scaledaux, paramaux[:, i])
280
+ coef_i = ridge_model.coef_
281
+
282
+ fv[:, i, j] = np.dot(
283
+ np.hstack((np.ones((np.sum(wt1), 1)), scaled_sumstat[wt1, :])), coef_i
284
+ )
285
+ pred[i, j] = np.dot(np.hstack((1, target)), coef_i)
286
+
287
+ pred_med = np.median(pred, axis=1)
288
+ pred_med = np.tile(pred_med, (np.sum(wt1), 1))
289
+
290
+ fitted_values = np.median(fv, axis=2)
291
+ residuals = param[wt1, :] - fitted_values