flexsweep 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flexsweep-0.1.0/PKG-INFO +25 -0
- flexsweep-0.1.0/README.md +0 -0
- flexsweep-0.1.0/data/constant.yaml +5 -0
- flexsweep-0.1.0/data/discoal +0 -0
- flexsweep-0.1.0/data/twoepoch.yaml +7 -0
- flexsweep-0.1.0/flexsweep/__init__.py +12 -0
- flexsweep-0.1.0/flexsweep/abc.py +291 -0
- flexsweep-0.1.0/flexsweep/cnn.py +433 -0
- flexsweep-0.1.0/flexsweep/data.py +348 -0
- flexsweep-0.1.0/flexsweep/flipped.py +475 -0
- flexsweep-0.1.0/flexsweep/fv.py +3217 -0
- flexsweep-0.1.0/flexsweep/main.py +185 -0
- flexsweep-0.1.0/flexsweep/relate.py +752 -0
- flexsweep-0.1.0/flexsweep/rf.py +544 -0
- flexsweep-0.1.0/flexsweep/simulate.py +1341 -0
- flexsweep-0.1.0/flexsweep/simulate_discoal.py +436 -0
- flexsweep-0.1.0/flexsweep/src/FDAclass.R +88 -0
- flexsweep-0.1.0/flexsweep/src/FDAparam.R +91 -0
- flexsweep-0.1.0/flexsweep/src/calcstats_surf.py +175 -0
- flexsweep-0.1.0/flexsweep/src/calcstats_surf_v2.py +185 -0
- flexsweep-0.1.0/flexsweep/src/lassi.py +334 -0
- flexsweep-0.1.0/flexsweep/src/predclass.R +52 -0
- flexsweep-0.1.0/flexsweep/src/predparam.R +61 -0
- flexsweep-0.1.0/flexsweep/src/tmp_simulator.py +919 -0
- flexsweep-0.1.0/flexsweep/src/tmp_stats.py +517 -0
- flexsweep-0.1.0/flexsweep/v0.1/simulations.py +893 -0
- flexsweep-0.1.0/flexsweep/v0.1/simulations_old.py +842 -0
- flexsweep-0.1.0/flexsweep/v0.1/summaries.py +1127 -0
- flexsweep-0.1.0/flexsweep/v0.2/fv_021024.py +4171 -0
- flexsweep-0.1.0/flexsweep/v0.2/gan.py +1769 -0
- flexsweep-0.1.0/flexsweep/v0.2/safe_custom.py +303 -0
- flexsweep-0.1.0/flexsweep/v0.2/simulations.py +893 -0
- flexsweep-0.1.0/flexsweep/v0.2/stats_171024.py +609 -0
- flexsweep-0.1.0/flexsweep/v0.2/summaries_nb.py +2024 -0
- flexsweep-0.1.0/flexsweep/v0.2/utils.py +759 -0
- flexsweep-0.1.0/pyproject.toml +34 -0
flexsweep-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: flexsweep
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary:
|
|
5
|
+
Author: Jesus Murga-Moreno
|
|
6
|
+
Author-email: murgamoreno@gmail.com
|
|
7
|
+
Requires-Python: >=3.10,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Requires-Dist: click (>=8.1.7,<9.0.0)
|
|
14
|
+
Requires-Dist: joblib (>=1.4.2,<2.0.0)
|
|
15
|
+
Requires-Dist: numba (>=0.60.0,<0.61.0)
|
|
16
|
+
Requires-Dist: pandas (>=2.2.2,<3.0.0)
|
|
17
|
+
Requires-Dist: scikit-allel (>=1.3.8,<2.0.0)
|
|
18
|
+
Requires-Dist: scikit-learn (>=1.5.1,<2.0.0)
|
|
19
|
+
Requires-Dist: scipy (>=1.14.0,<2.0.0)
|
|
20
|
+
Requires-Dist: tensorflow (>=2.17.0,<3.0.0)
|
|
21
|
+
Requires-Dist: threadpoolctl (>=3.5.0,<4.0.0)
|
|
22
|
+
Requires-Dist: tqdm (>=4.66.5,<5.0.0)
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
|
|
File without changes
|
|
Binary file
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from scipy.stats import median_abs_deviation
|
|
3
|
+
from sklearn.neighbors import DistanceMetric
|
|
4
|
+
import numpy as np
|
|
5
|
+
from numba import njit, float64
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@njit
|
|
10
|
+
def median_absolute_deviation(arr, scale=1.4826):
|
|
11
|
+
median = np.median(arr)
|
|
12
|
+
deviations = np.abs(arr - median)
|
|
13
|
+
mad = np.median(deviations)
|
|
14
|
+
mad *= scale
|
|
15
|
+
return mad
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@njit
|
|
19
|
+
def normalise(x, y):
|
|
20
|
+
if median_absolute_deviation(y) != 0:
|
|
21
|
+
return x / median_absolute_deviation(y)
|
|
22
|
+
else:
|
|
23
|
+
return x
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# @njit
|
|
27
|
+
def distances(target, sumstat, param):
|
|
28
|
+
# Scale everything and compute Euclidean distance
|
|
29
|
+
sumstat_scaled = np.zeros(sumstat.shape)
|
|
30
|
+
target_scaled = np.zeros(target.shape)
|
|
31
|
+
dist = np.zeros(sumstat.shape[0])
|
|
32
|
+
|
|
33
|
+
for j in range(target.shape[0]):
|
|
34
|
+
sumstat_scaled[:, j] = normalise(sumstat[:, j], sumstat[:, j])
|
|
35
|
+
target_scaled[j] = normalise(target[j], sumstat[:, j])
|
|
36
|
+
dist += np.square(sumstat_scaled[:, j] - target_scaled[j])
|
|
37
|
+
|
|
38
|
+
dist = np.sqrt(dist)
|
|
39
|
+
return target_scaled, sumstat_scaled, dist
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def rejection(target, sumstat, param, tol, kernel="epanechnikov"):
|
|
43
|
+
target_scaled, sumstat_scaled, dist = distances(target, sumstat, param)
|
|
44
|
+
|
|
45
|
+
# Sort and get minimum distance to return values inside tolerance range
|
|
46
|
+
n_accept = int(np.ceil(len(dist) * tol))
|
|
47
|
+
n_limit = np.sort(dist)[n_accept]
|
|
48
|
+
# Ensure getting only n_limit, if more than one
|
|
49
|
+
n_idx = np.where(dist <= n_limit)[0][:n_accept]
|
|
50
|
+
|
|
51
|
+
# Weighted distances
|
|
52
|
+
if kernel == "epanechnikov":
|
|
53
|
+
wts = 1 - np.square(dist[n_idx] / n_limit)
|
|
54
|
+
elif kernel == "rectangular":
|
|
55
|
+
wts = dist[n_idx] / n_limit
|
|
56
|
+
elif kernel == "gaussian":
|
|
57
|
+
d = DistanceMetric.get_metric("euclidean")
|
|
58
|
+
ds = np.median(d.pairwise(sumstat))
|
|
59
|
+
wts = 1 / np.sqrt(2 * np.pi) * np.exp(-0.5 * np.square(dist / (ds / 2)))
|
|
60
|
+
elif kernel == "triangular":
|
|
61
|
+
wts = 1 - np.abs(dist[n_idx] / n_limit)
|
|
62
|
+
elif kernel == "biweight":
|
|
63
|
+
wts = np.square(1 - np.square(dist[n_idx] / n_limit))
|
|
64
|
+
elif kernel == "cosine":
|
|
65
|
+
wts = np.cos(np.pi / 2 * dist[n_idx] / n_limit)
|
|
66
|
+
|
|
67
|
+
return (
|
|
68
|
+
target_scaled,
|
|
69
|
+
sumstat[n_idx],
|
|
70
|
+
sumstat_scaled[n_idx],
|
|
71
|
+
param[n_idx],
|
|
72
|
+
dist[n_idx],
|
|
73
|
+
wts,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def lsfit(x, y, wt=None, intercept=True, tolerance=1e-07):
|
|
78
|
+
# Check if the intercept term should be included
|
|
79
|
+
if intercept:
|
|
80
|
+
x = np.column_stack((np.ones(len(x)), x))
|
|
81
|
+
|
|
82
|
+
# Apply weights if provided
|
|
83
|
+
if wt is not None:
|
|
84
|
+
x = x * np.sqrt(wt[:, None])
|
|
85
|
+
y = y * np.sqrt(wt[:, None])
|
|
86
|
+
|
|
87
|
+
# Perform least squares estimation
|
|
88
|
+
q, r = np.linalg.qr(x)
|
|
89
|
+
coef = np.linalg.lstsq(r, q.T @ y, rcond=None)[0]
|
|
90
|
+
# new_coef = np.vstack((coef[-1:], coef[:-1]))
|
|
91
|
+
residuals = y - x @ coef
|
|
92
|
+
|
|
93
|
+
# Create the result dictionary
|
|
94
|
+
# result = {
|
|
95
|
+
# "coef": coef,
|
|
96
|
+
# "residuals": residuals,
|
|
97
|
+
# "intercept": intercept,
|
|
98
|
+
# "qr": (q, r),
|
|
99
|
+
# }
|
|
100
|
+
|
|
101
|
+
return coef, residuals
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def abc_loclinear(
|
|
105
|
+
target_file,
|
|
106
|
+
param_summaries_file,
|
|
107
|
+
P,
|
|
108
|
+
tol,
|
|
109
|
+
transformation="none",
|
|
110
|
+
kernel="epanechnikov",
|
|
111
|
+
):
|
|
112
|
+
# Reading into matrix using np.loadtxt
|
|
113
|
+
target = pd.read_csv(target_file).values
|
|
114
|
+
summaries = pd.read_csv(param_summaries_file, sep="\t").values
|
|
115
|
+
|
|
116
|
+
target = target[0, 48:]
|
|
117
|
+
param = summaries[:, :P]
|
|
118
|
+
param[:, 0] = -np.log10(param[:, 0])
|
|
119
|
+
sumstat = summaries[:, 48:]
|
|
120
|
+
|
|
121
|
+
# transformation="none"; kernel="epanechnikov"; tol=0.025
|
|
122
|
+
|
|
123
|
+
assert kernel.lower() in [
|
|
124
|
+
"gaussian",
|
|
125
|
+
"epanechnikov",
|
|
126
|
+
"rectangular",
|
|
127
|
+
"triangular",
|
|
128
|
+
"biweight",
|
|
129
|
+
"cosine",
|
|
130
|
+
], "Kernel is incorrectly defined. Use gaussian, epanechnikov, rectangular, triangular, biweight, or cosine"
|
|
131
|
+
assert transformation.lower() in [
|
|
132
|
+
"none",
|
|
133
|
+
"log",
|
|
134
|
+
"tan",
|
|
135
|
+
], "Apply one of the following transformations: none, log, tan"
|
|
136
|
+
|
|
137
|
+
assert (
|
|
138
|
+
len(target) == sumstat.shape[1]
|
|
139
|
+
), "Number of summary statistics in target has to be the same as in sumstat."
|
|
140
|
+
|
|
141
|
+
num_params = param.shape[1]
|
|
142
|
+
num_stats = sumstat.shape[1]
|
|
143
|
+
|
|
144
|
+
mins = np.min(param, axis=0)
|
|
145
|
+
maxs = np.max(param, axis=0)
|
|
146
|
+
|
|
147
|
+
# Scale and compute Euclidean distance
|
|
148
|
+
(
|
|
149
|
+
target_scaled,
|
|
150
|
+
sumstat_accepted,
|
|
151
|
+
sumstat_scaled,
|
|
152
|
+
param_accepted,
|
|
153
|
+
dist_accepted,
|
|
154
|
+
wts,
|
|
155
|
+
) = rejection(target, sumstat, param, tol, kernel)
|
|
156
|
+
|
|
157
|
+
num_accepted = sumstat_accepted.shape[0]
|
|
158
|
+
|
|
159
|
+
# Transform parameters
|
|
160
|
+
if transformation != "none":
|
|
161
|
+
for i, v in enumerate(param_accepted.T):
|
|
162
|
+
if transformation.lower() == "log" & np.any(param <= 0):
|
|
163
|
+
v[v <= 0] = np.min(v[np.nonzero(v)])
|
|
164
|
+
v = np.log(v)
|
|
165
|
+
elif transformation.lower() == "tan":
|
|
166
|
+
v = tangent_transformation(v, mins[i], maxs[i])
|
|
167
|
+
|
|
168
|
+
sumstat_intercept = np.hstack((np.ones((num_accepted, 1)), sumstat_scaled))
|
|
169
|
+
|
|
170
|
+
# Linear regression
|
|
171
|
+
lm_coefficients, lm_residuals = lsfit(sumstat_intercept, param_accepted, wts)
|
|
172
|
+
|
|
173
|
+
pred = np.dot(
|
|
174
|
+
lm_coefficients, np.vstack((np.ones_like(target_scaled), target_scaled))
|
|
175
|
+
)
|
|
176
|
+
pred = np.repeat(pred.T, num_accepted, axis=0)
|
|
177
|
+
|
|
178
|
+
rsdl_mean = np.mean(lm_residuals, axis=0)
|
|
179
|
+
rsdl_corrected = lm_residuals - rsdl_mean
|
|
180
|
+
|
|
181
|
+
pred_corrected = pred + rsdl_mean
|
|
182
|
+
|
|
183
|
+
def f(x, wts):
|
|
184
|
+
return np.sum(np.square(x) * wts) / np.sum(wts)
|
|
185
|
+
|
|
186
|
+
σ = np.apply_along_axis(f, axis=1, arr=rsdl_corrected, wts=wts)
|
|
187
|
+
aic = num_accepted * np.sum(np.log(σ)) + 2 * (num_stats + 1) * num_params
|
|
188
|
+
bic = (
|
|
189
|
+
num_accepted * np.sum(np.log(σ))
|
|
190
|
+
+ np.log(np.sum(num_accepted)) * (num_stats + 1) * num_params
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Heteroscedasticity correction
|
|
194
|
+
rsdl_log = np.log(np.square(lm_residuals))
|
|
195
|
+
lm_coefficients, lm_residuals = regression(sumstat_intercept, rsdl_log, wts)
|
|
196
|
+
|
|
197
|
+
pred_sd = np.dot(
|
|
198
|
+
lm_coefficients, np.vstack((np.ones_like(target_scaled), target_scaled))
|
|
199
|
+
)
|
|
200
|
+
pred_sd = np.sqrt(np.exp(pred_sd))
|
|
201
|
+
pred_sd = np.repeat(pred_sd.T, num_accepted, axis=0)
|
|
202
|
+
pred_si = np.dot(lm_coefficients, sumstat_intercept.T)
|
|
203
|
+
pred_si = np.sqrt(np.exp(pred_si))
|
|
204
|
+
|
|
205
|
+
param_adjusted = pred + (pred_sd * rsdl_corrected) / pred_si
|
|
206
|
+
rsdl_adjusted = (pred_sd * rsdl_corrected) / pred_si
|
|
207
|
+
|
|
208
|
+
# Back transform parameter values
|
|
209
|
+
for i in range(num_params):
|
|
210
|
+
if transformation.lower() == "log":
|
|
211
|
+
param_accepted[:, i] = np.exp(param_accepted[:, i])
|
|
212
|
+
param_adjusted[:, i] = np.exp(param_adjusted[:, i])
|
|
213
|
+
elif transformation.lower() == "tan":
|
|
214
|
+
param_accepted[:, i] = undo_tangent_transformation(
|
|
215
|
+
param_accepted[:, i], mins[i], maxs[i]
|
|
216
|
+
)
|
|
217
|
+
param_adjusted[:, i] = undo_tangent_transformation(
|
|
218
|
+
param_adjusted[:, i], mins[i], maxs[i]
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
return param_accepted, param_adjusted
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
import numpy as np
|
|
225
|
+
from sklearn.neural_network import MLPRegressor
|
|
226
|
+
from sklearn.preprocessing import StandardScaler
|
|
227
|
+
from sklearn.pipeline import make_pipeline
|
|
228
|
+
from sklearn.metrics import mean_squared_error
|
|
229
|
+
|
|
230
|
+
# Create an MLP-based ridge regressor
|
|
231
|
+
mlp = MLPRegressor(hidden_layer_sizes=(64, 32), activation="relu", max_iter=2000)
|
|
232
|
+
mlp.fit(sumstat_accepted, param_accepted)
|
|
233
|
+
|
|
234
|
+
# Predict using the trained MLP
|
|
235
|
+
target_pred = mlp.predict(target_scaled.reshape(-1, 1))
|
|
236
|
+
|
|
237
|
+
# Predict using the trained regressor
|
|
238
|
+
y_pred = mlp_regressor.predict(X_test)
|
|
239
|
+
|
|
240
|
+
# Calculate the Mean Squared Error (MSE)
|
|
241
|
+
mse = mean_squared_error(y_test, y_pred)
|
|
242
|
+
print("Mean Squared Error:", mse)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
####################################################
|
|
246
|
+
|
|
247
|
+
## normalise parameters
|
|
248
|
+
|
|
249
|
+
import numpy as np
|
|
250
|
+
from sklearn.linear_model import Ridge
|
|
251
|
+
|
|
252
|
+
# Assuming you have the data loaded as NumPy arrays: param, lambda, wt1, gwt, scaled_sumstat, target
|
|
253
|
+
|
|
254
|
+
# Get the number of parameters
|
|
255
|
+
|
|
256
|
+
# Initialize an array to store the Median Absolute Deviation (MAD) for each parameter
|
|
257
|
+
param_mad = np.zeros(num_params)
|
|
258
|
+
param_scaled = np.empty_like(param_accepted)
|
|
259
|
+
# Compute MAD and normalize the parameters
|
|
260
|
+
for i in range(num_params):
|
|
261
|
+
param_mad[i] = median_absolute_deviation(param_accepted)
|
|
262
|
+
param_scaled[:, i] = normalise(param_accepted[:, i], param_accepted[:, i])
|
|
263
|
+
|
|
264
|
+
# Convert the lambda values to a numpy array
|
|
265
|
+
lambdas = np.array([0.0001, 0.001, 0.01])
|
|
266
|
+
|
|
267
|
+
numnet = lambdas.size
|
|
268
|
+
fv = np.zeros((wts.size, num_params, numnet))
|
|
269
|
+
pred = np.zeros((num_params, numnet))
|
|
270
|
+
mataux = np.sqrt(np.diag(wts))
|
|
271
|
+
paramaux = np.dot(mataux, param_scaled)
|
|
272
|
+
scaledaux = np.dot(mataux, sumstat_scaled)
|
|
273
|
+
|
|
274
|
+
# Perform ridge regression for each parameter
|
|
275
|
+
for i in range(num_params):
|
|
276
|
+
for j in range(numnet):
|
|
277
|
+
alpha = lambdas[j]
|
|
278
|
+
ridge_model = Ridge(alpha=alpha)
|
|
279
|
+
ridge_model.fit(scaledaux, paramaux[:, i])
|
|
280
|
+
coef_i = ridge_model.coef_
|
|
281
|
+
|
|
282
|
+
fv[:, i, j] = np.dot(
|
|
283
|
+
np.hstack((np.ones((np.sum(wt1), 1)), scaled_sumstat[wt1, :])), coef_i
|
|
284
|
+
)
|
|
285
|
+
pred[i, j] = np.dot(np.hstack((1, target)), coef_i)
|
|
286
|
+
|
|
287
|
+
pred_med = np.median(pred, axis=1)
|
|
288
|
+
pred_med = np.tile(pred_med, (np.sum(wt1), 1))
|
|
289
|
+
|
|
290
|
+
fitted_values = np.median(fv, axis=2)
|
|
291
|
+
residuals = param[wt1, :] - fitted_values
|