lisaanalysistools 1.0.0__cp312-cp312-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lisaanalysistools might be problematic. Click here for more details.

Files changed (37) hide show
  1. lisaanalysistools-1.0.0.dist-info/LICENSE +201 -0
  2. lisaanalysistools-1.0.0.dist-info/METADATA +80 -0
  3. lisaanalysistools-1.0.0.dist-info/RECORD +37 -0
  4. lisaanalysistools-1.0.0.dist-info/WHEEL +5 -0
  5. lisaanalysistools-1.0.0.dist-info/top_level.txt +2 -0
  6. lisatools/__init__.py +0 -0
  7. lisatools/_version.py +4 -0
  8. lisatools/analysiscontainer.py +438 -0
  9. lisatools/cutils/detector.cpython-312-darwin.so +0 -0
  10. lisatools/datacontainer.py +292 -0
  11. lisatools/detector.py +410 -0
  12. lisatools/diagnostic.py +976 -0
  13. lisatools/glitch.py +193 -0
  14. lisatools/sampling/__init__.py +0 -0
  15. lisatools/sampling/likelihood.py +882 -0
  16. lisatools/sampling/moves/__init__.py +0 -0
  17. lisatools/sampling/moves/gbgroupstretch.py +53 -0
  18. lisatools/sampling/moves/gbmultipletryrj.py +1287 -0
  19. lisatools/sampling/moves/gbspecialgroupstretch.py +671 -0
  20. lisatools/sampling/moves/gbspecialstretch.py +1836 -0
  21. lisatools/sampling/moves/mbhspecialmove.py +286 -0
  22. lisatools/sampling/moves/placeholder.py +16 -0
  23. lisatools/sampling/moves/skymodehop.py +110 -0
  24. lisatools/sampling/moves/specialforegroundmove.py +564 -0
  25. lisatools/sampling/prior.py +508 -0
  26. lisatools/sampling/stopping.py +320 -0
  27. lisatools/sampling/utility.py +324 -0
  28. lisatools/sensitivity.py +888 -0
  29. lisatools/sources/__init__.py +0 -0
  30. lisatools/sources/emri/__init__.py +1 -0
  31. lisatools/sources/emri/tdiwaveform.py +72 -0
  32. lisatools/stochastic.py +291 -0
  33. lisatools/utils/__init__.py +0 -0
  34. lisatools/utils/constants.py +40 -0
  35. lisatools/utils/multigpudataholder.py +730 -0
  36. lisatools/utils/pointeradjust.py +106 -0
  37. lisatools/utils/utility.py +240 -0
@@ -0,0 +1,730 @@
1
+ import cupy as xp
2
+ import numpy as np
3
+ from lisatools.sensitivity import get_sensitivity
4
+ import time
5
+
6
+
7
+ class MultiGPUDataHolder:
8
+ def __init__(self, gpus, channel1_data, channel2_data, channel1_base_data, channel2_base_data, channel1_psd, channel2_psd, channel1_lisasens, channel2_lisasens, df, base_injections=None, base_psd=None):
9
+
10
+ if isinstance(gpus, int):
11
+ gpus = [gpus]
12
+
13
+ self.df = df
14
+
15
+ if not isinstance(gpus, list) or not isinstance(gpus[0], int):
16
+ raise ValueError("gpus must be an integer or a list of integers.")
17
+
18
+ self.gpus = gpus
19
+ self.num_gpus = len(gpus)
20
+ # need to be numpy coming in to now make memory large
21
+ self.ntemps, self.nwalkers, self.data_length = channel1_data.shape
22
+ self.total_number = self.nwalkers
23
+ self.walker_indices = np.arange(self.nwalkers)
24
+ self.overall_indices_flat = np.arange(2 * self.nwalkers) # evens and odds
25
+
26
+ self.fd = np.arange(self.data_length) * df
27
+
28
+ self.base_injections = base_injections
29
+ self.base_psd = base_psd
30
+
31
+ self.map = self.overall_indices_flat.copy()
32
+
33
+ num_per_split = self.total_number // self.num_gpus + 1 * (self.total_number % self.num_gpus != 0)
34
+ # gpu arangement
35
+ self.gpu_split_inds = np.arange(num_per_split, self.total_number, num_per_split)
36
+
37
+ self.gpu_splits = [
38
+ np.split(self.overall_indices_flat[:self.nwalkers], self.gpu_split_inds),
39
+ np.split(self.overall_indices_flat[self.nwalkers:], self.gpu_split_inds)
40
+ ]
41
+ self.gpu_splits = [np.concatenate([self.gpu_splits[0][i], self.gpu_splits[1][i]]) for i in range(len(self.gpu_splits[0]))]
42
+
43
+ self.gpus_for_each_data = [np.full_like(gpu_split, gpu) for gpu_split, gpu in zip( self.gpu_splits, self.gpus)]
44
+ self.mempool = xp.get_default_memory_pool()
45
+
46
+ self.channel1_data = [None for _ in range(self.num_gpus)]
47
+ self.channel2_data = [None for _ in range(self.num_gpus)]
48
+ self.channel1_base_data = [None for _ in range(self.num_gpus)]
49
+ self.channel2_base_data = [None for _ in range(self.num_gpus)]
50
+ self.channel1_psd = [None for _ in range(self.num_gpus)]
51
+ self.channel2_psd = [None for _ in range(self.num_gpus)]
52
+ self.channel1_lisasens = [None for _ in range(self.num_gpus)]
53
+ self.channel2_lisasens = [None for _ in range(self.num_gpus)]
54
+ return_to_main = xp.cuda.runtime.getDevice()
55
+ for gpu_i, (gpu, gpu_split_tmp) in enumerate(zip(self.gpus, self.gpu_splits)):
56
+ gpu_split = gpu_split_tmp[gpu_split_tmp < self.nwalkers]
57
+ walker_inds_gpu_here = self.walker_indices[gpu_split]
58
+
59
+ with xp.cuda.device.Device(gpu):
60
+
61
+ self.channel1_data[gpu_i] = xp.zeros(2 * walker_inds_gpu_here.shape[0] * channel1_data.shape[-1], dtype=channel1_data.dtype)
62
+ self.channel2_data[gpu_i] = xp.zeros(2 * walker_inds_gpu_here.shape[0] * channel1_data.shape[-1], dtype=channel1_data.dtype)
63
+ self.channel1_base_data[gpu_i] = xp.zeros(walker_inds_gpu_here.shape[0] * channel1_data.shape[-1], dtype=channel1_data.dtype)
64
+ self.channel2_base_data[gpu_i] = xp.zeros(walker_inds_gpu_here.shape[0] * channel1_data.shape[-1], dtype=channel1_data.dtype)
65
+ self.channel1_psd[gpu_i] = xp.zeros(walker_inds_gpu_here.shape[0] * channel1_data.shape[-1], dtype=channel1_psd.dtype)
66
+ self.channel2_psd[gpu_i] = xp.zeros(walker_inds_gpu_here.shape[0] * channel1_data.shape[-1], dtype=channel2_psd.dtype)
67
+ self.channel1_lisasens[gpu_i] = xp.zeros(walker_inds_gpu_here.shape[0] * channel1_data.shape[-1], dtype=channel1_lisasens.dtype)
68
+ self.channel2_lisasens[gpu_i] = xp.zeros(walker_inds_gpu_here.shape[0] * channel1_data.shape[-1], dtype=channel2_lisasens.dtype)
69
+
70
+ for data_i, walker_ind in enumerate(walker_inds_gpu_here):
71
+ inds_slice = slice(data_i * channel1_data.shape[-1], (data_i + 1) * channel1_data.shape[-1])
72
+ inds_slice_even = slice(data_i * channel1_data.shape[-1], (data_i + 1) * channel1_data.shape[-1])
73
+ inds_slice_odd = slice((self.nwalkers + data_i) * channel1_data.shape[-1], (self.nwalkers + data_i + 1) * channel1_data.shape[-1])
74
+
75
+ tmp_data1 = xp.asarray(channel1_data[0, walker_ind])
76
+ self.channel1_data[gpu_i][inds_slice_even] = tmp_data1
77
+ del tmp_data1
78
+ self.mempool.free_all_blocks()
79
+
80
+ tmp_data1 = xp.asarray(channel1_data[1, walker_ind])
81
+ self.channel1_data[gpu_i][inds_slice_odd] = tmp_data1
82
+ del tmp_data1
83
+ self.mempool.free_all_blocks()
84
+
85
+ tmp_data2 = xp.asarray(channel2_data[0, walker_ind])
86
+ self.channel2_data[gpu_i][inds_slice_even] = tmp_data2
87
+ del tmp_data2
88
+ self.mempool.free_all_blocks()
89
+
90
+ tmp_data2 = xp.asarray(channel2_data[1, walker_ind])
91
+ self.channel2_data[gpu_i][inds_slice_odd] = tmp_data2
92
+ del tmp_data2
93
+ self.mempool.free_all_blocks()
94
+
95
+ # TODO: reconsider use of this data since it is just for checking LL
96
+ tmp_base_data1 = xp.asarray(channel1_base_data[0, walker_ind])
97
+ self.channel1_base_data[gpu_i][inds_slice_even] = tmp_base_data1
98
+ del tmp_base_data1
99
+ self.mempool.free_all_blocks()
100
+
101
+ tmp_base_data2 = xp.asarray(channel2_base_data[0, walker_ind])
102
+ self.channel2_base_data[gpu_i][inds_slice_even] = tmp_base_data2
103
+ del tmp_base_data2
104
+ self.mempool.free_all_blocks()
105
+
106
+ tmp_psd1 = xp.asarray(channel1_psd[0, walker_ind])
107
+ self.channel1_psd[gpu_i][inds_slice] = tmp_psd1
108
+ del tmp_psd1
109
+ self.mempool.free_all_blocks()
110
+
111
+ tmp_psd2 = xp.asarray(channel2_psd[0, walker_ind])
112
+ self.channel2_psd[gpu_i][inds_slice] = tmp_psd2
113
+ del tmp_psd2
114
+ self.mempool.free_all_blocks()
115
+
116
+ tmp_lisasens1 = xp.asarray(channel1_lisasens[0, walker_ind])
117
+ self.channel1_lisasens[gpu_i][inds_slice] = tmp_lisasens1
118
+ del tmp_lisasens1
119
+ self.mempool.free_all_blocks()
120
+
121
+ tmp_lisasens2 = xp.asarray(channel2_lisasens[0, walker_ind])
122
+ self.channel2_lisasens[gpu_i][inds_slice] = tmp_lisasens2
123
+ del tmp_lisasens2
124
+ self.mempool.free_all_blocks()
125
+
126
+ xp.cuda.runtime.setDevice(return_to_main)
127
+ xp.cuda.runtime.deviceSynchronize()
128
+
129
+ def reshape_list(self, input_value):
130
+ return [
131
+ self.reshape(tmp) for tmp in input_value
132
+ ]
133
+
134
+ def reshape(self, input_value):
135
+ return input_value.reshape(-1, self.data_length)
136
+
137
+ @property
138
+ def data_list(self):
139
+ return [self.channel1_data, self.channel2_data]
140
+
141
+ @property
142
+ def base_data_list(self):
143
+ return [self.channel1_base_data, self.channel2_base_data]
144
+
145
+ @property
146
+ def psd_list(self):
147
+ return [self.channel1_psd, self.channel2_psd]
148
+
149
+ @property
150
+ def lisasens_list(self):
151
+ return [self.channel1_lisasens, self.channel2_lisasens]
152
+
153
+ @property
154
+ def data_shaped(self):
155
+ tmp1 = [self.channel1_data[i][:self.nwalkers * self.data_length] + self.channel1_data[i][self.nwalkers * self.data_length:] - self.channel1_base_data[i][:] for i in range(len(self.channel1_data))]
156
+ tmp2 = [self.channel2_data[i][:self.nwalkers * self.data_length] + self.channel2_data[i][self.nwalkers * self.data_length:] - self.channel2_base_data[i][:] for i in range(len(self.channel2_data))]
157
+
158
+ return [
159
+ self.reshape_list(tmp1),
160
+ self.reshape_list(tmp2),
161
+ ]
162
+
163
+ @property
164
+ def data_shaped_2_parts(self):
165
+ return [
166
+ self.reshape_list(self.channel1_data),
167
+ self.reshape_list(self.channel2_data),
168
+ ]
169
+
170
+ @property
171
+ def data_shaped_base(self):
172
+ return [
173
+ self.reshape_list(self.channel1_base_data),
174
+ self.reshape_list(self.channel2_base_data),
175
+ ]
176
+
177
+ @property
178
+ def psd_shaped(self):
179
+ return [
180
+ self.reshape_list(self.channel1_psd),
181
+ self.reshape_list(self.channel2_psd),
182
+ ]
183
+
184
+ @property
185
+ def lisasens_shaped(self):
186
+ return [
187
+ self.reshape_list(self.channel1_lisasens),
188
+ self.reshape_list(self.channel2_lisasens),
189
+ ]
190
+
191
+ @property
192
+ def map(self):
193
+ return self._map
194
+
195
+ @map.setter
196
+ def map(self, map):
197
+ if not isinstance(map, np.ndarray) or len(map) != 2 * self.total_number or map.dtype != np.int64:
198
+ raise ValueError("map input must be a numpy array of np.int64 that is the same length as the number of gpu holder slots.")
199
+ self._map = map
200
+
201
+ @property
202
+ def full_length(self):
203
+ return self.ntemps * self.nwalkers * self.data_length
204
+
205
+ def get_mapped_indices(self, inds_in):
206
+ if (not isinstance(inds_in, np.ndarray) and not isinstance(inds_in, xp.ndarray)) or ((inds_in.dtype != np.int64 and inds_in.dtype != xp.int32)):
207
+ raise ValueError("inds_in input must be a numpy array of np.int64.")
208
+
209
+ if isinstance(inds_in, np.ndarray):
210
+ xp_here = np
211
+ else:
212
+ xp_here = xp
213
+ return xp_here.asarray(self.map)[inds_in]
214
+
215
+ def set_psd_from_arrays(self, A_vals_in, E_vals_in, overall_inds=None):
216
+
217
+ if overall_inds is None:
218
+ overall_inds = np.arange(self.ntemps * self.nwalkers)
219
+
220
+ assert len(A_vals_in) == len(E_vals_in) == len(overall_inds)
221
+ return_to_main = xp.cuda.runtime.getDevice()
222
+
223
+ fd_gpu = [None for _ in self.gpus]
224
+ A_tmp = [None for _ in self.gpus]
225
+ E_tmp = [None for _ in self.gpus]
226
+ # st = time.perf_counter()
227
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
228
+ with xp.cuda.device.Device(gpu):
229
+ xp.cuda.runtime.deviceSynchronize()
230
+
231
+ fd_gpu[gpu_i] = xp.asarray(self.fd)
232
+ for i, (overall_index) in enumerate(overall_inds):
233
+
234
+ if overall_index not in gpu_split:
235
+ continue
236
+
237
+ overall_index_here = overall_index - gpu_split.min().item()
238
+
239
+ A_tmp[gpu_i] = xp.asarray(A_vals_in[i])
240
+ A_tmp[gpu_i][0] = A_tmp[gpu_i][1]
241
+ inds_slice = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
242
+ self.channel1_psd[gpu_i][inds_slice] = A_tmp[gpu_i]
243
+ if xp.any(A_tmp[gpu_i] < 0.0):
244
+ breakpoint()
245
+
246
+ E_tmp[gpu_i] = xp.asarray(E_vals_in[i])
247
+ E_tmp[gpu_i][0] = E_tmp[gpu_i][1]
248
+ inds_slice = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
249
+ self.channel2_psd[gpu_i][inds_slice] = E_tmp[gpu_i]
250
+ if xp.any(E_tmp[gpu_i] < 0.0):
251
+ breakpoint()
252
+
253
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
254
+ with xp.cuda.device.Device(gpu):
255
+ xp.cuda.runtime.deviceSynchronize()
256
+
257
+ del fd_gpu[gpu_i], A_tmp[gpu_i], E_tmp[gpu_i]
258
+ xp.get_default_memory_pool().free_all_blocks()
259
+
260
+ xp.cuda.runtime.setDevice(return_to_main)
261
+ xp.cuda.runtime.deviceSynchronize()
262
+
263
+ def set_lisasens_from_arrays(self, A_vals_in, E_vals_in, overall_inds=None):
264
+
265
+ if overall_inds is None:
266
+ overall_inds = np.arange(self.ntemps * self.nwalkers)
267
+
268
+ assert len(A_vals_in) == len(E_vals_in) == len(overall_inds)
269
+ return_to_main = xp.cuda.runtime.getDevice()
270
+
271
+ fd_gpu = [None for _ in self.gpus]
272
+ A_tmp = [None for _ in self.gpus]
273
+ E_tmp = [None for _ in self.gpus]
274
+ # st = time.perf_counter()
275
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
276
+ with xp.cuda.device.Device(gpu):
277
+ xp.cuda.runtime.deviceSynchronize()
278
+
279
+ fd_gpu[gpu_i] = xp.asarray(self.fd)
280
+ for i, (overall_index) in enumerate(overall_inds):
281
+
282
+ if overall_index not in gpu_split:
283
+ continue
284
+
285
+ overall_index_here = overall_index - gpu_split.min().item()
286
+
287
+ A_tmp[gpu_i] = xp.asarray(A_vals_in[i])
288
+ A_tmp[gpu_i][0] = A_tmp[gpu_i][1]
289
+ inds_slice = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
290
+ self.channel1_lisasens[gpu_i][inds_slice] = A_tmp[gpu_i]
291
+ if xp.any(A_tmp[gpu_i] < 0.0):
292
+ breakpoint()
293
+
294
+ E_tmp[gpu_i] = xp.asarray(E_vals_in[i])
295
+ E_tmp[gpu_i][0] = E_tmp[gpu_i][1]
296
+ inds_slice = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
297
+ self.channel2_lisasens[gpu_i][inds_slice] = E_tmp[gpu_i]
298
+ if xp.any(E_tmp[gpu_i] < 0.0):
299
+ breakpoint()
300
+
301
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
302
+ with xp.cuda.device.Device(gpu):
303
+ xp.cuda.runtime.deviceSynchronize()
304
+
305
+ del fd_gpu[gpu_i], A_tmp[gpu_i], E_tmp[gpu_i]
306
+ xp.get_default_memory_pool().free_all_blocks()
307
+
308
+ xp.cuda.runtime.setDevice(return_to_main)
309
+ xp.cuda.runtime.deviceSynchronize()
310
+
311
+ def add_templates_from_arrays_to_residuals(self, A_vals_in, E_vals_in, overall_inds=None):
312
+
313
+ if overall_inds is None:
314
+ overall_inds = np.arange(self.ntemps * self.nwalkers)
315
+
316
+ assert len(A_vals_in) == len(E_vals_in) == len(overall_inds)
317
+ return_to_main = xp.cuda.runtime.getDevice()
318
+
319
+ fd_gpu = [None for _ in self.gpus]
320
+ A_tmp = [None for _ in self.gpus]
321
+ E_tmp = [None for _ in self.gpus]
322
+ # st = time.perf_counter()
323
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
324
+ with xp.cuda.device.Device(gpu):
325
+ xp.cuda.runtime.deviceSynchronize()
326
+
327
+ for i, (overall_index) in enumerate(overall_inds):
328
+
329
+ if overall_index not in gpu_split:
330
+ continue
331
+
332
+ overall_index_here = overall_index - gpu_split.min().item()
333
+
334
+ A_tmp[gpu_i] = xp.asarray(A_vals_in[i])
335
+ A_tmp[gpu_i][0] = A_tmp[gpu_i][1]
336
+ inds_slice = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
337
+ self.channel1_data[gpu_i][inds_slice] -= A_tmp[gpu_i]
338
+ if xp.any(xp.isnan(A_tmp[gpu_i] < 0.0)):
339
+ breakpoint()
340
+
341
+ E_tmp[gpu_i] = xp.asarray(E_vals_in[i])
342
+ E_tmp[gpu_i][0] = E_tmp[gpu_i][1]
343
+ inds_slice = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
344
+ self.channel2_data[gpu_i][inds_slice] -= E_tmp[gpu_i]
345
+ if xp.any(xp.isnan(E_tmp[gpu_i] < 0.0)):
346
+ breakpoint()
347
+
348
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
349
+ with xp.cuda.device.Device(gpu):
350
+ xp.cuda.runtime.deviceSynchronize()
351
+
352
+ del A_tmp[gpu_i], E_tmp[gpu_i]
353
+ xp.get_default_memory_pool().free_all_blocks()
354
+
355
+ xp.cuda.runtime.setDevice(return_to_main)
356
+ xp.cuda.runtime.deviceSynchronize()
357
+
358
+
359
+ def set_psd_vals(self, psd_params, overall_inds=None, foreground_params=None):
360
+
361
+ if overall_inds is None:
362
+ overall_inds = np.arange(self.ntemps * self.nwalkers)
363
+ return_to_main = xp.cuda.runtime.getDevice()
364
+
365
+ fd_gpu = [None for _ in self.gpus]
366
+ A_tmp = [None for _ in self.gpus]
367
+ E_tmp = [None for _ in self.gpus]
368
+ # st = time.perf_counter()
369
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
370
+ with xp.cuda.device.Device(gpu):
371
+ xp.cuda.runtime.deviceSynchronize()
372
+
373
+ fd_gpu[gpu_i] = xp.asarray(self.fd)
374
+ for i, (overall_index) in enumerate(overall_inds):
375
+
376
+ if overall_index not in gpu_split:
377
+ continue
378
+
379
+ overall_index_here = overall_index - gpu_split.min().item()
380
+
381
+ if foreground_params is not None:
382
+ foreground_pars_in = foreground_params[i]
383
+ else:
384
+ foreground_pars_in = None
385
+
386
+ psd_params_A_in = psd_params[i][:2]
387
+
388
+ A_tmp[gpu_i] = get_sensitivity(fd_gpu[gpu_i], sens_fn="noisepsd_AE", model=psd_params_A_in, foreground_params=foreground_pars_in, xp=xp)
389
+ A_tmp[gpu_i][0] = A_tmp[gpu_i][1]
390
+ inds_slice = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
391
+ self.channel1_psd[gpu_i][inds_slice] = A_tmp[gpu_i]
392
+ if xp.any(A_tmp[gpu_i] < 0.0):
393
+ breakpoint()
394
+
395
+ psd_params_E_in = psd_params[i][2:]
396
+
397
+ E_tmp[gpu_i] = get_sensitivity(fd_gpu[gpu_i], sens_fn="noisepsd_AE", model=psd_params_E_in, foreground_params=foreground_pars_in, xp=xp)
398
+ E_tmp[gpu_i][0] = E_tmp[gpu_i][1]
399
+ inds_slice = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
400
+ self.channel2_psd[gpu_i][inds_slice] = E_tmp[gpu_i]
401
+ if xp.any(E_tmp[gpu_i] < 0.0):
402
+ breakpoint()
403
+
404
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
405
+ with xp.cuda.device.Device(gpu):
406
+ xp.cuda.runtime.deviceSynchronize()
407
+
408
+ del fd_gpu[gpu_i], A_tmp[gpu_i], E_tmp[gpu_i]
409
+ xp.get_default_memory_pool().free_all_blocks()
410
+
411
+ xp.cuda.runtime.setDevice(return_to_main)
412
+ xp.cuda.runtime.deviceSynchronize()
413
+
414
+ # et = time.perf_counter()
415
+ # print("fill", et - st)
416
+
417
+ def set_lisasens_vals(self, lisasens_params, overall_inds=None, foreground_params=None):
418
+
419
+ if overall_inds is None:
420
+ overall_inds = np.arange(self.ntemps * self.nwalkers)
421
+ return_to_main = xp.cuda.runtime.getDevice()
422
+
423
+ fd_gpu = [None for _ in self.gpus]
424
+ A_tmp = [None for _ in self.gpus]
425
+ E_tmp = [None for _ in self.gpus]
426
+ # st = time.perf_counter()
427
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
428
+ with xp.cuda.device.Device(gpu):
429
+ xp.cuda.runtime.deviceSynchronize()
430
+
431
+ fd_gpu[gpu_i] = xp.asarray(self.fd)
432
+ for i, (overall_index) in enumerate(overall_inds):
433
+
434
+ if overall_index not in gpu_split:
435
+ continue
436
+
437
+ overall_index_here = overall_index - gpu_split.min().item()
438
+
439
+ if foreground_params is not None:
440
+ foreground_pars_in = foreground_params[i]
441
+ else:
442
+ foreground_pars_in = None
443
+
444
+ lisasens_params_A_in = lisasens_params[i][:2]
445
+
446
+ A_tmp[gpu_i] = get_sensitivity(fd_gpu[gpu_i], sens_fn="lisasens", model=lisasens_params_A_in, foreground_params=foreground_pars_in, xp=xp)
447
+ A_tmp[gpu_i][0] = A_tmp[gpu_i][1]
448
+ inds_slice = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
449
+ self.channel1_lisasens[gpu_i][inds_slice] = A_tmp[gpu_i]
450
+ if xp.any(A_tmp[gpu_i] < 0.0):
451
+ breakpoint()
452
+
453
+ lisasens_params_E_in = lisasens_params[i][2:]
454
+
455
+ E_tmp[gpu_i] = get_sensitivity(fd_gpu[gpu_i], sens_fn="lisasens", model=lisasens_params_E_in, foreground_params=foreground_pars_in, xp=xp)
456
+ E_tmp[gpu_i][0] = E_tmp[gpu_i][1]
457
+ inds_slice = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
458
+ self.channel2_lisasens[gpu_i][inds_slice] = E_tmp[gpu_i]
459
+ if xp.any(E_tmp[gpu_i] < 0.0):
460
+ breakpoint()
461
+
462
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
463
+ with xp.cuda.device.Device(gpu):
464
+ xp.cuda.runtime.deviceSynchronize()
465
+
466
+ del fd_gpu[gpu_i], A_tmp[gpu_i], E_tmp[gpu_i]
467
+ xp.get_default_memory_pool().free_all_blocks()
468
+
469
+ xp.cuda.runtime.setDevice(return_to_main)
470
+ xp.cuda.runtime.deviceSynchronize()
471
+
472
+ # et = time.perf_counter()
473
+ # print("fill", et - st)
474
+
475
+ def get_psd_term(self, overall_inds=None):
476
+
477
+ reshape = False
478
+ if overall_inds is None:
479
+ reshape = True
480
+ overall_inds = np.arange(self.nwalkers)
481
+
482
+ return_to_main = xp.cuda.runtime.getDevice()
483
+
484
+ psd_term = np.zeros_like(overall_inds, dtype=float)
485
+
486
+ # st = time.perf_counter()
487
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
488
+ with xp.cuda.device.Device(gpu):
489
+ xp.cuda.runtime.deviceSynchronize()
490
+ for i, (overall_index) in enumerate(overall_inds):
491
+ if overall_index not in gpu_split:
492
+ continue
493
+
494
+ overall_index_here = overall_index - gpu_split.min().item()
495
+ inds_slice = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
496
+
497
+ psd_term_here = xp.sum((xp.log(self.channel1_psd[gpu_i][inds_slice]) + xp.log(self.channel2_psd[gpu_i][inds_slice]))).get().item()
498
+ xp.cuda.runtime.deviceSynchronize()
499
+ if np.isnan(psd_term_here):
500
+ breakpoint()
501
+ psd_term[i] = psd_term_here
502
+
503
+
504
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
505
+ with xp.cuda.device.Device(gpu):
506
+ xp.cuda.runtime.deviceSynchronize()
507
+
508
+ xp.get_default_memory_pool().free_all_blocks()
509
+
510
+ xp.cuda.runtime.setDevice(return_to_main)
511
+ xp.cuda.runtime.deviceSynchronize()
512
+
513
+ # if reshape:
514
+ # psd_term = psd_term.reshape(self.ntemps, self.nwalkers)
515
+
516
+ # et = time.perf_counter()
517
+ # print("get psd term", et - st)
518
+ return psd_term
519
+
520
+ def sub_in_data_and_psd(self, data, psd, lisasens):
521
+ """Must be the same size at current data
522
+
523
+
524
+ """
525
+ assert len(self.gpus) == 1
526
+ gpu_i = 0
527
+
528
+ # adjust psd
529
+ self.channel1_psd[gpu_i][:] = xp.asarray(psd[0].flatten())
530
+ self.channel2_psd[gpu_i][:] = xp.asarray(psd[1].flatten())
531
+
532
+ # adjust lisasens
533
+ self.channel1_lisasens[gpu_i][:] = xp.asarray(lisasens[0].flatten())
534
+ self.channel2_lisasens[gpu_i][:] = xp.asarray(lisasens[1].flatten())
535
+
536
+ # remove injected data + previous templates
537
+ self.channel1_data[gpu_i][:self.nwalkers * self.data_length] -= self.channel1_base_data[gpu_i][:]
538
+ self.channel1_data[gpu_i][self.nwalkers * self.data_length:] -= self.channel1_base_data[gpu_i][:]
539
+
540
+ self.channel2_data[gpu_i][:self.nwalkers * self.data_length] -= self.channel2_base_data[gpu_i][:]
541
+ self.channel2_data[gpu_i][self.nwalkers * self.data_length:] -= self.channel2_base_data[gpu_i][:]
542
+
543
+ # change injected data + other templates in base
544
+ self.channel1_base_data[gpu_i][:] = xp.asarray(data[0].flatten())
545
+ self.channel2_base_data[gpu_i][:] = xp.asarray(data[1].flatten())
546
+
547
+ # re-add to channel data
548
+ self.channel1_data[gpu_i][:self.nwalkers * self.data_length] += self.channel1_base_data[gpu_i][:]
549
+ self.channel1_data[gpu_i][self.nwalkers * self.data_length:] += self.channel1_base_data[gpu_i][:]
550
+
551
+ self.channel2_data[gpu_i][:self.nwalkers * self.data_length] += self.channel2_base_data[gpu_i][:]
552
+ self.channel2_data[gpu_i][self.nwalkers * self.data_length:] += self.channel2_base_data[gpu_i][:]
553
+
554
+ return
555
+
556
+
557
+ def get_inner_product(self, *args, overall_inds=None, band_edge_inds=None, **kwargs):
558
+ reshape = False
559
+ if overall_inds is None:
560
+ reshape = True
561
+ overall_inds = np.arange(self.nwalkers)
562
+
563
+ return_to_main = xp.cuda.runtime.getDevice()
564
+
565
+ if band_edge_inds is None:
566
+ inner_term = np.zeros_like(overall_inds, dtype=float)
567
+ else:
568
+ inner_term = np.zeros((overall_inds.shape[0], band_edge_inds.shape[0] - 1), dtype=float)
569
+
570
+ data_tmp1 = [None for _ in self.gpus]
571
+ data_tmp2 = [None for _ in self.gpus]
572
+ psd_tmp1 = [None for _ in self.gpus]
573
+ psd_tmp2 = [None for _ in self.gpus]
574
+
575
+ # st = time.perf_counter()
576
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
577
+ with xp.cuda.device.Device(gpu):
578
+ xp.cuda.runtime.deviceSynchronize()
579
+ for i, (overall_index) in enumerate(overall_inds):
580
+ if overall_index not in gpu_split:
581
+ continue
582
+
583
+ overall_index_here = overall_index - gpu_split.min().item()
584
+ inds_slice = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
585
+ inds_slice_even = slice(overall_index_here * self.data_length, (overall_index_here + 1) * self.data_length)
586
+ inds_slice_odd = slice((self.nwalkers + overall_index_here) * self.data_length, (self.nwalkers + overall_index_here + 1) * self.data_length)
587
+
588
+ data_tmp1[gpu_i] = self.channel1_data[gpu_i][inds_slice_even] + self.channel1_data[gpu_i][inds_slice_odd] - self.channel1_base_data[gpu_i][inds_slice]
589
+ psd_tmp1[gpu_i] = self.channel1_psd[gpu_i][inds_slice]
590
+ data_tmp2[gpu_i] = self.channel2_data[gpu_i][inds_slice_even] + self.channel2_data[gpu_i][inds_slice_odd] - self.channel2_base_data[gpu_i][inds_slice]
591
+ psd_tmp2[gpu_i] = self.channel2_psd[gpu_i][inds_slice]
592
+
593
+ if band_edge_inds is None:
594
+ inner_here = self.df * 4 * xp.sum(
595
+ data_tmp1[gpu_i].conj() * data_tmp1[gpu_i] / psd_tmp1[gpu_i]
596
+ + data_tmp2[gpu_i].conj() * data_tmp2[gpu_i] / psd_tmp2[gpu_i],
597
+ ).real.item()
598
+
599
+ else:
600
+ inner_here_tmp = self.df * 4 * xp.cumsum(
601
+ data_tmp1[gpu_i].conj() * data_tmp1[gpu_i] / psd_tmp1[gpu_i]
602
+ + data_tmp2[gpu_i].conj() * data_tmp2[gpu_i] / psd_tmp2[gpu_i],
603
+ ).real[band_edge_inds]
604
+ inner_here_tmp[1:] -= inner_here_tmp[:-1]
605
+ inner_here = inner_here_tmp[1:]
606
+
607
+ # if overall_index_here == 11:
608
+ # # for w in range(3951, 3951 + 420, 25):
609
+ # # print(f"INCHECKIT : {w} {data_tmp1[gpu_i][w].real} {data_tmp1[gpu_i][w].imag}, {self.channel1_data[gpu_i][inds_slice_even][w].real} {self.channel1_data[gpu_i][inds_slice_even][w].imag}, {self.channel1_data[gpu_i][inds_slice_odd][w].real} {self.channel1_data[gpu_i][inds_slice_odd][w].imag}, {self.channel1_base_data[gpu_i][inds_slice][w].real} {self.channel1_base_data[gpu_i][inds_slice][w].imag}")
610
+
611
+ # inner_here_check = self.df * 4 * xp.cumsum(
612
+ # data_tmp1[gpu_i][3951:3951 + 420].conj() * data_tmp1[gpu_i][3951:3951 + 420] / psd_tmp1[gpu_i][3951:3951 + 420]
613
+ # + data_tmp2[gpu_i][3951:3951 + 420].conj() * data_tmp2[gpu_i][3951:3951 + 420] / psd_tmp2[gpu_i][3951:3951 + 420],
614
+ # ).real
615
+ # # print("INSIDE INNER: ", -1/2 * inner_here_check) # , data_tmp1[gpu_i][3811], self.channel1_data[gpu_i][inds_slice_even][3811], self.channel1_data[gpu_i][inds_slice_odd][3811], self.channel1_base_data[gpu_i][inds_slice][3811])
616
+ # if "stop" in kwargs and kwargs["stop"]:
617
+ # breakpoint()
618
+ xp.cuda.runtime.deviceSynchronize()
619
+ if np.all(np.isnan(inner_here)):
620
+ breakpoint()
621
+
622
+ try:
623
+ inner_term[i] = inner_here.get()
624
+ except AttributeError:
625
+ inner_term[i] = inner_here
626
+
627
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
628
+ with xp.cuda.device.Device(gpu):
629
+ xp.cuda.runtime.deviceSynchronize()
630
+
631
+ del data_tmp1[gpu_i], data_tmp2[gpu_i], psd_tmp1[gpu_i], psd_tmp2[gpu_i]
632
+
633
+ xp.get_default_memory_pool().free_all_blocks()
634
+
635
+ xp.cuda.runtime.setDevice(return_to_main)
636
+ xp.cuda.runtime.deviceSynchronize()
637
+
638
+ # if reshape:
639
+ # inner_term = inner_term.reshape(self.ntemps, self.nwalkers)
640
+
641
+ # et = time.perf_counter()
642
+ # print("inner prod", et - st)
643
+ return inner_term
644
+
645
+ def get_ll(self, *args, include_psd_info=False, overall_inds=None, **kwargs):
646
+ inner_product = self.get_inner_product(*args, overall_inds=overall_inds, **kwargs)
647
+ ll_out = -1/2 * inner_product
648
+
649
+ if include_psd_info:
650
+ ll_out += -self.get_psd_term(overall_inds=overall_inds)
651
+ return ll_out
652
+
653
+ def multiply_data(self, val):
654
+ return_to_main = xp.cuda.runtime.getDevice()
655
+ if not isinstance(val, int) and not isinstance(val, float):
656
+ raise NotImplementedError("val must be an int or float.")
657
+
658
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
659
+ with xp.cuda.device.Device(gpu):
660
+ for chan in range(len(self.data_list)):
661
+ self.data_list[chan][gpu_i] *= val
662
+
663
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
664
+ with xp.cuda.device.Device(gpu):
665
+ xp.cuda.runtime.deviceSynchronize()
666
+
667
+ xp.cuda.runtime.setDevice(return_to_main)
668
+ xp.cuda.runtime.deviceSynchronize()
669
+
670
+ def restore_base_injections(self):
671
+ return_to_main = xp.cuda.runtime.getDevice()
672
+ if self.base_injections is None or self.base_psd is None:
673
+ raise ValueError("Must give base_injections and base_psd kwarg to __init__ to restore.")
674
+
675
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
676
+ with xp.cuda.device.Device(gpu):
677
+ for chan in range(len(self.data_list)):
678
+ tmp = self.data_list[chan][gpu_i].reshape(-1, self.data_length)
679
+ tmp[:] = xp.asarray(self.base_injections[chan])[None, :]
680
+ self.data_list[chan][gpu_i] = tmp.flatten()
681
+
682
+ tmp = self.psd_list[chan][gpu_i].reshape(-1, self.data_length)
683
+ tmp[:] = xp.asarray(self.base_psd[chan])[None, :]
684
+ self.psd_list[chan][gpu_i] = tmp.flatten()
685
+
686
+ for gpu_i, (gpu, gpu_split) in enumerate(zip(self.gpus, self.gpu_splits)):
687
+ with xp.cuda.device.Device(gpu):
688
+ xp.cuda.runtime.deviceSynchronize()
689
+
690
+ xp.cuda.runtime.setDevice(return_to_main)
691
+ xp.cuda.runtime.deviceSynchronize()
692
+
693
+ def get_injection_inner_product(self, *args, **kwargs):
694
+
695
+ inner_out = self.df * 4 * np.sum(
696
+ self.base_injections[0].conj() * self.base_injections[0] / self.base_psd[0]
697
+ + self.base_injections[1].conj() * self.base_injections[1] / self.base_psd[1],
698
+ )
699
+ return inner_out
700
+
701
+
702
+
703
+
704
+ if __name__ == "__main__":
705
+ ntemps = 2
706
+ nwalkers = 100
707
+ data_length = int(1.6e5)
708
+ nchannels = 2
709
+ df = 3e-8
710
+
711
+ data_A = np.ones((ntemps, nwalkers, data_length), dtype=complex)
712
+ data_E = np.ones((ntemps, nwalkers, data_length), dtype=complex)
713
+
714
+ psd_A = np.ones((ntemps, nwalkers, data_length), dtype=complex)
715
+ psd_E = np.ones((ntemps, nwalkers, data_length), dtype=complex)
716
+
717
+ gpus = [5, 6]
718
+ mg = MultiGPUDataHolder(gpus, data_A, data_E, psd_A, psd_E, df)
719
+
720
+ check1 = mg.get_mapped_indices(np.arange(len(mg.overall_indices_flat)))
721
+
722
+ mg.map = np.random.choice(mg.overall_indices_flat, len(mg.overall_indices_flat), replace=False)
723
+
724
+ check2 = mg.get_mapped_indices(np.arange(len(mg.overall_indices_flat)))
725
+
726
+ check3 = mg.get_ll()
727
+ breakpoint()
728
+
729
+
730
+