lisaanalysistools 1.0.10__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lisaanalysistools might be problematic. Click here for more details.

Files changed (43) hide show
  1. lisaanalysistools-1.0.10.dist-info/LICENSE +201 -0
  2. lisaanalysistools-1.0.10.dist-info/METADATA +101 -0
  3. lisaanalysistools-1.0.10.dist-info/RECORD +43 -0
  4. lisaanalysistools-1.0.10.dist-info/WHEEL +5 -0
  5. lisaanalysistools-1.0.10.dist-info/top_level.txt +1 -0
  6. lisatools/__init__.py +0 -0
  7. lisatools/_version.py +4 -0
  8. lisatools/analysiscontainer.py +451 -0
  9. lisatools/cutils/__init__.py +0 -0
  10. lisatools/cutils/detector_cpu.cpython-312-darwin.so +0 -0
  11. lisatools/cutils/include/Detector.hpp +84 -0
  12. lisatools/cutils/include/__init__.py +0 -0
  13. lisatools/cutils/include/global.hpp +28 -0
  14. lisatools/cutils/src/Detector.cpp +307 -0
  15. lisatools/cutils/src/Detector.cu +307 -0
  16. lisatools/cutils/src/__init__.py +0 -0
  17. lisatools/cutils/src/pycppdetector.pyx +255 -0
  18. lisatools/datacontainer.py +309 -0
  19. lisatools/detector.py +704 -0
  20. lisatools/diagnostic.py +977 -0
  21. lisatools/sampling/__init__.py +0 -0
  22. lisatools/sampling/likelihood.py +882 -0
  23. lisatools/sampling/moves/__init__.py +0 -0
  24. lisatools/sampling/moves/skymodehop.py +110 -0
  25. lisatools/sampling/prior.py +646 -0
  26. lisatools/sampling/stopping.py +320 -0
  27. lisatools/sampling/utility.py +411 -0
  28. lisatools/sensitivity.py +899 -0
  29. lisatools/sources/__init__.py +6 -0
  30. lisatools/sources/bbh/__init__.py +1 -0
  31. lisatools/sources/bbh/waveform.py +91 -0
  32. lisatools/sources/defaultresponse.py +36 -0
  33. lisatools/sources/emri/__init__.py +1 -0
  34. lisatools/sources/emri/waveform.py +79 -0
  35. lisatools/sources/gb/__init__.py +1 -0
  36. lisatools/sources/gb/waveform.py +67 -0
  37. lisatools/sources/utils.py +456 -0
  38. lisatools/sources/waveformbase.py +41 -0
  39. lisatools/stochastic.py +291 -0
  40. lisatools/utils/__init__.py +0 -0
  41. lisatools/utils/constants.py +40 -0
  42. lisatools/utils/pointeradjust.py +106 -0
  43. lisatools/utils/utility.py +245 -0
@@ -0,0 +1,307 @@
1
+ #include "stdio.h"
2
+ #include "global.hpp"
3
+ #include "Detector.hpp"
4
+ #include <iostream>
5
+ #include <stdexcept>
6
+ #include <string>
7
+ #include <sstream>
8
+
9
+ CUDA_DEVICE
10
+ int Orbits::get_window(double t)
11
+ {
12
+ int out = int(t / dt);
13
+ if ((out < 0) || (out >= N))
14
+ return -1;
15
+ else
16
+ return out;
17
+ }
18
+
19
+ CUDA_DEVICE
20
+ int Orbits::get_link_ind(int link)
21
+ {
22
+ if (link == 12)
23
+ return 0;
24
+ else if (link == 23)
25
+ return 1;
26
+ else if (link == 31)
27
+ return 2;
28
+ else if (link == 13)
29
+ return 3;
30
+ else if (link == 32)
31
+ return 4;
32
+ else if (link == 21)
33
+ return 5;
34
+ else
35
+ #ifdef __CUDACC__
36
+ printf("BAD link ind. Must be 12, 23, 31, 13, 32, 21.");
37
+ #else
38
+ throw std::invalid_argument("Bad link ind. Must be 12, 23, 31, 13, 32, 21.");
39
+ #endif // __CUDACC__
40
+ return -1;
41
+ }
42
+
43
+ CUDA_DEVICE
44
+ int Orbits::get_sc_ind(int sc)
45
+ {
46
+ if (sc == 1)
47
+ return 0;
48
+ else if (sc == 2)
49
+ return 1;
50
+ else if (sc == 3)
51
+ return 2;
52
+ else
53
+ {
54
+ #ifdef __CUDACC__
55
+ printf("BAD sc ind. Must be 1,2,3. %d\n", sc);
56
+ #else
57
+ std::ostringstream oss;
58
+ oss << "Bad sc ind. Must be 1,2,3. Input sc is " << sc << " " << std::endl;
59
+ std::string var = oss.str();
60
+ throw std::invalid_argument(var);
61
+ #endif // __CUDACC__
62
+ }
63
+ return 0;
64
+ }
65
+
66
+ CUDA_DEVICE
67
+ double Orbits::interpolate(double t, double *in_arr, int window, int major_ndim, int major_ind, int ndim, int pos)
68
+ {
69
+ double up = in_arr[((window + 1) * major_ndim + major_ind) * ndim + pos]; // down_ind * ndim + pos];
70
+ double down = in_arr[(window * major_ndim + major_ind) * ndim + pos];
71
+
72
+ // m *(x - x0) + y0
73
+ double fin = ((up - down) / dt) * (t - (dt * window)) + down;
74
+ // if ((ndim == 1))
75
+ // printf("%d %e %e %e %e \n", window, fin, down, up, (t - (dt * window)));
76
+
77
+ return fin;
78
+ }
79
+
80
+ CUDA_DEVICE
81
+ void Orbits::get_normal_unit_vec_ptr(Vec *vec, double t, int link)
82
+ {
83
+ Vec _tmp = get_normal_unit_vec(t, link);
84
+ vec->x = _tmp.x;
85
+ vec->y = _tmp.y;
86
+ vec->z = _tmp.z;
87
+ }
88
+
89
+ CUDA_DEVICE
90
+ Vec Orbits::get_normal_unit_vec(double t, int link)
91
+ {
92
+ int window = get_window(t);
93
+ if (window == -1)
94
+ {
95
+ // out of bounds
96
+ return Vec(0.0, 0.0, 0.0);
97
+ }
98
+
99
+ int link_ind = get_link_ind(link);
100
+
101
+ int up_ind = (window + 1) * nlinks + link_ind;
102
+ int down_ind = window * nlinks + link_ind;
103
+
104
+ // x (pos = 0) ndim = 3
105
+ double x_out = interpolate(t, n_arr, window, nlinks, link_ind, 3, 0);
106
+ // y (pos = 1)
107
+ double y_out = interpolate(t, n_arr, window, nlinks, link_ind, 3, 1);
108
+ // z (pos = 2)
109
+ double z_out = interpolate(t, n_arr, window, nlinks, link_ind, 3, 2);
110
+
111
+ return Vec(x_out, y_out, z_out);
112
+ }
113
+
114
+ CUDA_DEVICE
115
+ double Orbits::get_light_travel_time(double t, int link)
116
+ {
117
+ int window = get_window(t);
118
+ if (window == -1)
119
+ {
120
+ // out of bounds
121
+ return 0.0;
122
+ }
123
+
124
+ int link_ind = get_link_ind(link);
125
+ if ((link_ind < 0) || (link_ind >= 6))
126
+ printf("BAD %d\n", link_ind);
127
+ int up_ind = (window + 1) * (nlinks + link_ind);
128
+ int down_ind = window * (nlinks + link_ind);
129
+
130
+ // x (pos = 0), ndim = 1
131
+ double ltt_out = interpolate(t, ltt_arr, window, nlinks, link_ind, 1, 0);
132
+
133
+ return ltt_out;
134
+ }
135
+
136
+ CUDA_DEVICE
137
+ Vec Orbits::get_pos(double t, int sc)
138
+ {
139
+ int window = get_window(t);
140
+ if (window == -1)
141
+ {
142
+ // out of bounds
143
+ return Vec(0.0, 0.0, 0.0);
144
+ }
145
+
146
+ int sc_ind = get_sc_ind(sc);
147
+
148
+ // x (pos = 0), ndim = 3
149
+ double x_out = interpolate(t, x_arr, window, nspacecraft, sc_ind, 3, 0);
150
+ // y (pos = 1), ndim = 3
151
+ double y_out = interpolate(t, x_arr, window, nspacecraft, sc_ind, 3, 1);
152
+ // z (pos = 2), ndim = 3
153
+ double z_out = interpolate(t, x_arr, window, nspacecraft, sc_ind, 3, 2);
154
+ return Vec(x_out, y_out, z_out);
155
+ }
156
+
157
+ CUDA_DEVICE
158
+ void Orbits::get_pos_ptr(Vec *vec, double t, int sc)
159
+ {
160
+ Vec _tmp = get_pos(t, sc);
161
+ vec->x = _tmp.x;
162
+ vec->y = _tmp.y;
163
+ vec->z = _tmp.z;
164
+ }
165
+
166
+ #define NUM_THREADS 64
167
+
168
+
169
+ CUDA_KERNEL
170
+ void get_light_travel_time_kernel(double *ltt, double *t, int *link, int num, Orbits &orbits)
171
+ {
172
+ int start, end, increment;
173
+ #ifdef __CUDACC__
174
+ start = blockIdx.x * blockDim.x + threadIdx.x;
175
+ end = num;
176
+ increment = gridDim.x * blockDim.x;
177
+ #else // __CUDACC__
178
+ start = 0;
179
+ end = num;
180
+ increment = 1;
181
+ #endif // __CUDACC__
182
+
183
+ for (int i = start; i < end; i += increment)
184
+ {
185
+ ltt[i] = orbits.get_light_travel_time(t[i], link[i]);
186
+ }
187
+ }
188
+
189
+ void Orbits::get_light_travel_time_arr(double *ltt, double *t, int *link, int num)
190
+ {
191
+ #ifdef __CUDACC__
192
+ int num_blocks = std::ceil((num + NUM_THREADS - 1) / NUM_THREADS);
193
+
194
+ // copy self to GPU
195
+ Orbits *orbits_gpu;
196
+ gpuErrchk(cudaMalloc(&orbits_gpu, sizeof(Orbits)));
197
+ gpuErrchk(cudaMemcpy(orbits_gpu, this, sizeof(Orbits), cudaMemcpyHostToDevice));
198
+
199
+ get_light_travel_time_kernel<<<num_blocks, NUM_THREADS>>>(ltt, t, link, num, *orbits_gpu);
200
+ cudaDeviceSynchronize();
201
+ gpuErrchk(cudaGetLastError());
202
+
203
+ gpuErrchk(cudaFree(orbits_gpu));
204
+
205
+ #else // __CUDACC__
206
+
207
+ get_light_travel_time_kernel(ltt, t, link, num, *this);
208
+
209
+ #endif // __CUDACC__
210
+ }
211
+
212
+
213
+ CUDA_KERNEL
214
+ void get_pos_kernel(double *pos_x, double *pos_y, double *pos_z, double *t, int *sc, int num, Orbits &orbits)
215
+ {
216
+ int start, end, increment;
217
+ #ifdef __CUDACC__
218
+ start = blockIdx.x * blockDim.x + threadIdx.x;
219
+ end = num;
220
+ increment = gridDim.x * blockDim.x;
221
+ #else // __CUDACC__
222
+ start = 0;
223
+ end = num;
224
+ increment = 1;
225
+ #endif // __CUDACC__
226
+ Vec _tmp(0.0, 0.0, 0.0);
227
+
228
+ for (int i = start; i < end; i += increment)
229
+ {
230
+ _tmp = orbits.get_pos(t[i], sc[i]);
231
+ pos_x[i] = _tmp.x;
232
+ pos_y[i] = _tmp.y;
233
+ pos_z[i] = _tmp.z;
234
+ }
235
+ }
236
+
237
+ void Orbits::get_pos_arr(double *pos_x, double *pos_y, double *pos_z, double *t, int *sc, int num)
238
+ {
239
+ #ifdef __CUDACC__
240
+ int num_blocks = std::ceil((num + NUM_THREADS - 1) / NUM_THREADS);
241
+
242
+ // copy self to GPU
243
+ Orbits *orbits_gpu;
244
+ gpuErrchk(cudaMalloc(&orbits_gpu, sizeof(Orbits)));
245
+ gpuErrchk(cudaMemcpy(orbits_gpu, this, sizeof(Orbits), cudaMemcpyHostToDevice));
246
+
247
+ get_pos_kernel<<<num_blocks, NUM_THREADS>>>(pos_x, pos_y, pos_z, t, sc, num, *orbits_gpu);
248
+ cudaDeviceSynchronize();
249
+ gpuErrchk(cudaGetLastError());
250
+
251
+ gpuErrchk(cudaFree(orbits_gpu));
252
+
253
+ #else // __CUDACC__
254
+
255
+ get_pos_kernel(pos_x, pos_y, pos_z, t, sc, num, *this);
256
+
257
+ #endif // __CUDACC__
258
+ }
259
+
260
+
261
+ CUDA_KERNEL
262
+ void get_normal_unit_vec_kernel(double *normal_unit_vec_x, double *normal_unit_vec_y, double *normal_unit_vec_z, double *t, int *link, int num, Orbits &orbits)
263
+ {
264
+ int start, end, increment;
265
+ #ifdef __CUDACC__
266
+ start = blockIdx.x * blockDim.x + threadIdx.x;
267
+ end = num;
268
+ increment = gridDim.x * blockDim.x;
269
+ #else // __CUDACC__
270
+ start = 0;
271
+ end = num;
272
+ increment = 1;
273
+ #endif // __CUDACC__
274
+ Vec _tmp(0.0, 0.0, 0.0);
275
+
276
+ for (int i = start; i < end; i += increment)
277
+ {
278
+ _tmp = orbits.get_normal_unit_vec(t[i], link[i]);
279
+ normal_unit_vec_x[i] = _tmp.x;
280
+ normal_unit_vec_y[i] = _tmp.y;
281
+ normal_unit_vec_z[i] = _tmp.z;
282
+ }
283
+ }
284
+
285
+ void Orbits::get_normal_unit_vec_arr(double *normal_unit_vec_x, double *normal_unit_vec_y, double *normal_unit_vec_z, double *t, int *link, int num)
286
+ {
287
+ #ifdef __CUDACC__
288
+ int num_blocks = std::ceil((num + NUM_THREADS - 1) / NUM_THREADS);
289
+
290
+ // copy self to GPU
291
+ Orbits *orbits_gpu;
292
+ gpuErrchk(cudaMalloc(&orbits_gpu, sizeof(Orbits)));
293
+ gpuErrchk(cudaMemcpy(orbits_gpu, this, sizeof(Orbits), cudaMemcpyHostToDevice));
294
+
295
+ get_normal_unit_vec_kernel<<<num_blocks, NUM_THREADS>>>(normal_unit_vec_x, normal_unit_vec_y, normal_unit_vec_z, t, link, num, *orbits_gpu);
296
+ cudaDeviceSynchronize();
297
+ gpuErrchk(cudaGetLastError());
298
+
299
+ gpuErrchk(cudaFree(orbits_gpu));
300
+
301
+ #else // __CUDACC__
302
+
303
+ get_normal_unit_vec_kernel(normal_unit_vec_x, normal_unit_vec_y, normal_unit_vec_z, t, link, num, *this);
304
+
305
+ #endif // __CUDACC__
306
+ }
307
+
@@ -0,0 +1,307 @@
1
+ #include "stdio.h"
2
+ #include "global.hpp"
3
+ #include "Detector.hpp"
4
+ #include <iostream>
5
+ #include <stdexcept>
6
+ #include <string>
7
+ #include <sstream>
8
+
9
+ CUDA_DEVICE
10
+ int Orbits::get_window(double t)
11
+ {
12
+ int out = int(t / dt);
13
+ if ((out < 0) || (out >= N))
14
+ return -1;
15
+ else
16
+ return out;
17
+ }
18
+
19
+ CUDA_DEVICE
20
+ int Orbits::get_link_ind(int link)
21
+ {
22
+ if (link == 12)
23
+ return 0;
24
+ else if (link == 23)
25
+ return 1;
26
+ else if (link == 31)
27
+ return 2;
28
+ else if (link == 13)
29
+ return 3;
30
+ else if (link == 32)
31
+ return 4;
32
+ else if (link == 21)
33
+ return 5;
34
+ else
35
+ #ifdef __CUDACC__
36
+ printf("BAD link ind. Must be 12, 23, 31, 13, 32, 21.");
37
+ #else
38
+ throw std::invalid_argument("Bad link ind. Must be 12, 23, 31, 13, 32, 21.");
39
+ #endif // __CUDACC__
40
+ return -1;
41
+ }
42
+
43
+ CUDA_DEVICE
44
+ int Orbits::get_sc_ind(int sc)
45
+ {
46
+ if (sc == 1)
47
+ return 0;
48
+ else if (sc == 2)
49
+ return 1;
50
+ else if (sc == 3)
51
+ return 2;
52
+ else
53
+ {
54
+ #ifdef __CUDACC__
55
+ printf("BAD sc ind. Must be 1,2,3. %d\n", sc);
56
+ #else
57
+ std::ostringstream oss;
58
+ oss << "Bad sc ind. Must be 1,2,3. Input sc is " << sc << " " << std::endl;
59
+ std::string var = oss.str();
60
+ throw std::invalid_argument(var);
61
+ #endif // __CUDACC__
62
+ }
63
+ return 0;
64
+ }
65
+
66
+ CUDA_DEVICE
67
+ double Orbits::interpolate(double t, double *in_arr, int window, int major_ndim, int major_ind, int ndim, int pos)
68
+ {
69
+ double up = in_arr[((window + 1) * major_ndim + major_ind) * ndim + pos]; // down_ind * ndim + pos];
70
+ double down = in_arr[(window * major_ndim + major_ind) * ndim + pos];
71
+
72
+ // m *(x - x0) + y0
73
+ double fin = ((up - down) / dt) * (t - (dt * window)) + down;
74
+ // if ((ndim == 1))
75
+ // printf("%d %e %e %e %e \n", window, fin, down, up, (t - (dt * window)));
76
+
77
+ return fin;
78
+ }
79
+
80
+ CUDA_DEVICE
81
+ void Orbits::get_normal_unit_vec_ptr(Vec *vec, double t, int link)
82
+ {
83
+ Vec _tmp = get_normal_unit_vec(t, link);
84
+ vec->x = _tmp.x;
85
+ vec->y = _tmp.y;
86
+ vec->z = _tmp.z;
87
+ }
88
+
89
+ CUDA_DEVICE
90
+ Vec Orbits::get_normal_unit_vec(double t, int link)
91
+ {
92
+ int window = get_window(t);
93
+ if (window == -1)
94
+ {
95
+ // out of bounds
96
+ return Vec(0.0, 0.0, 0.0);
97
+ }
98
+
99
+ int link_ind = get_link_ind(link);
100
+
101
+ int up_ind = (window + 1) * nlinks + link_ind;
102
+ int down_ind = window * nlinks + link_ind;
103
+
104
+ // x (pos = 0) ndim = 3
105
+ double x_out = interpolate(t, n_arr, window, nlinks, link_ind, 3, 0);
106
+ // y (pos = 1)
107
+ double y_out = interpolate(t, n_arr, window, nlinks, link_ind, 3, 1);
108
+ // z (pos = 2)
109
+ double z_out = interpolate(t, n_arr, window, nlinks, link_ind, 3, 2);
110
+
111
+ return Vec(x_out, y_out, z_out);
112
+ }
113
+
114
+ CUDA_DEVICE
115
+ double Orbits::get_light_travel_time(double t, int link)
116
+ {
117
+ int window = get_window(t);
118
+ if (window == -1)
119
+ {
120
+ // out of bounds
121
+ return 0.0;
122
+ }
123
+
124
+ int link_ind = get_link_ind(link);
125
+ if ((link_ind < 0) || (link_ind >= 6))
126
+ printf("BAD %d\n", link_ind);
127
+ int up_ind = (window + 1) * (nlinks + link_ind);
128
+ int down_ind = window * (nlinks + link_ind);
129
+
130
+ // x (pos = 0), ndim = 1
131
+ double ltt_out = interpolate(t, ltt_arr, window, nlinks, link_ind, 1, 0);
132
+
133
+ return ltt_out;
134
+ }
135
+
136
+ CUDA_DEVICE
137
+ Vec Orbits::get_pos(double t, int sc)
138
+ {
139
+ int window = get_window(t);
140
+ if (window == -1)
141
+ {
142
+ // out of bounds
143
+ return Vec(0.0, 0.0, 0.0);
144
+ }
145
+
146
+ int sc_ind = get_sc_ind(sc);
147
+
148
+ // x (pos = 0), ndim = 3
149
+ double x_out = interpolate(t, x_arr, window, nspacecraft, sc_ind, 3, 0);
150
+ // y (pos = 1), ndim = 3
151
+ double y_out = interpolate(t, x_arr, window, nspacecraft, sc_ind, 3, 1);
152
+ // z (pos = 2), ndim = 3
153
+ double z_out = interpolate(t, x_arr, window, nspacecraft, sc_ind, 3, 2);
154
+ return Vec(x_out, y_out, z_out);
155
+ }
156
+
157
+ CUDA_DEVICE
158
+ void Orbits::get_pos_ptr(Vec *vec, double t, int sc)
159
+ {
160
+ Vec _tmp = get_pos(t, sc);
161
+ vec->x = _tmp.x;
162
+ vec->y = _tmp.y;
163
+ vec->z = _tmp.z;
164
+ }
165
+
166
+ #define NUM_THREADS 64
167
+
168
+
169
+ CUDA_KERNEL
170
+ void get_light_travel_time_kernel(double *ltt, double *t, int *link, int num, Orbits &orbits)
171
+ {
172
+ int start, end, increment;
173
+ #ifdef __CUDACC__
174
+ start = blockIdx.x * blockDim.x + threadIdx.x;
175
+ end = num;
176
+ increment = gridDim.x * blockDim.x;
177
+ #else // __CUDACC__
178
+ start = 0;
179
+ end = num;
180
+ increment = 1;
181
+ #endif // __CUDACC__
182
+
183
+ for (int i = start; i < end; i += increment)
184
+ {
185
+ ltt[i] = orbits.get_light_travel_time(t[i], link[i]);
186
+ }
187
+ }
188
+
189
+ void Orbits::get_light_travel_time_arr(double *ltt, double *t, int *link, int num)
190
+ {
191
+ #ifdef __CUDACC__
192
+ int num_blocks = std::ceil((num + NUM_THREADS - 1) / NUM_THREADS);
193
+
194
+ // copy self to GPU
195
+ Orbits *orbits_gpu;
196
+ gpuErrchk(cudaMalloc(&orbits_gpu, sizeof(Orbits)));
197
+ gpuErrchk(cudaMemcpy(orbits_gpu, this, sizeof(Orbits), cudaMemcpyHostToDevice));
198
+
199
+ get_light_travel_time_kernel<<<num_blocks, NUM_THREADS>>>(ltt, t, link, num, *orbits_gpu);
200
+ cudaDeviceSynchronize();
201
+ gpuErrchk(cudaGetLastError());
202
+
203
+ gpuErrchk(cudaFree(orbits_gpu));
204
+
205
+ #else // __CUDACC__
206
+
207
+ get_light_travel_time_kernel(ltt, t, link, num, *this);
208
+
209
+ #endif // __CUDACC__
210
+ }
211
+
212
+
213
+ CUDA_KERNEL
214
+ void get_pos_kernel(double *pos_x, double *pos_y, double *pos_z, double *t, int *sc, int num, Orbits &orbits)
215
+ {
216
+ int start, end, increment;
217
+ #ifdef __CUDACC__
218
+ start = blockIdx.x * blockDim.x + threadIdx.x;
219
+ end = num;
220
+ increment = gridDim.x * blockDim.x;
221
+ #else // __CUDACC__
222
+ start = 0;
223
+ end = num;
224
+ increment = 1;
225
+ #endif // __CUDACC__
226
+ Vec _tmp(0.0, 0.0, 0.0);
227
+
228
+ for (int i = start; i < end; i += increment)
229
+ {
230
+ _tmp = orbits.get_pos(t[i], sc[i]);
231
+ pos_x[i] = _tmp.x;
232
+ pos_y[i] = _tmp.y;
233
+ pos_z[i] = _tmp.z;
234
+ }
235
+ }
236
+
237
+ void Orbits::get_pos_arr(double *pos_x, double *pos_y, double *pos_z, double *t, int *sc, int num)
238
+ {
239
+ #ifdef __CUDACC__
240
+ int num_blocks = std::ceil((num + NUM_THREADS - 1) / NUM_THREADS);
241
+
242
+ // copy self to GPU
243
+ Orbits *orbits_gpu;
244
+ gpuErrchk(cudaMalloc(&orbits_gpu, sizeof(Orbits)));
245
+ gpuErrchk(cudaMemcpy(orbits_gpu, this, sizeof(Orbits), cudaMemcpyHostToDevice));
246
+
247
+ get_pos_kernel<<<num_blocks, NUM_THREADS>>>(pos_x, pos_y, pos_z, t, sc, num, *orbits_gpu);
248
+ cudaDeviceSynchronize();
249
+ gpuErrchk(cudaGetLastError());
250
+
251
+ gpuErrchk(cudaFree(orbits_gpu));
252
+
253
+ #else // __CUDACC__
254
+
255
+ get_pos_kernel(pos_x, pos_y, pos_z, t, sc, num, *this);
256
+
257
+ #endif // __CUDACC__
258
+ }
259
+
260
+
261
+ CUDA_KERNEL
262
+ void get_normal_unit_vec_kernel(double *normal_unit_vec_x, double *normal_unit_vec_y, double *normal_unit_vec_z, double *t, int *link, int num, Orbits &orbits)
263
+ {
264
+ int start, end, increment;
265
+ #ifdef __CUDACC__
266
+ start = blockIdx.x * blockDim.x + threadIdx.x;
267
+ end = num;
268
+ increment = gridDim.x * blockDim.x;
269
+ #else // __CUDACC__
270
+ start = 0;
271
+ end = num;
272
+ increment = 1;
273
+ #endif // __CUDACC__
274
+ Vec _tmp(0.0, 0.0, 0.0);
275
+
276
+ for (int i = start; i < end; i += increment)
277
+ {
278
+ _tmp = orbits.get_normal_unit_vec(t[i], link[i]);
279
+ normal_unit_vec_x[i] = _tmp.x;
280
+ normal_unit_vec_y[i] = _tmp.y;
281
+ normal_unit_vec_z[i] = _tmp.z;
282
+ }
283
+ }
284
+
285
+ void Orbits::get_normal_unit_vec_arr(double *normal_unit_vec_x, double *normal_unit_vec_y, double *normal_unit_vec_z, double *t, int *link, int num)
286
+ {
287
+ #ifdef __CUDACC__
288
+ int num_blocks = std::ceil((num + NUM_THREADS - 1) / NUM_THREADS);
289
+
290
+ // copy self to GPU
291
+ Orbits *orbits_gpu;
292
+ gpuErrchk(cudaMalloc(&orbits_gpu, sizeof(Orbits)));
293
+ gpuErrchk(cudaMemcpy(orbits_gpu, this, sizeof(Orbits), cudaMemcpyHostToDevice));
294
+
295
+ get_normal_unit_vec_kernel<<<num_blocks, NUM_THREADS>>>(normal_unit_vec_x, normal_unit_vec_y, normal_unit_vec_z, t, link, num, *orbits_gpu);
296
+ cudaDeviceSynchronize();
297
+ gpuErrchk(cudaGetLastError());
298
+
299
+ gpuErrchk(cudaFree(orbits_gpu));
300
+
301
+ #else // __CUDACC__
302
+
303
+ get_normal_unit_vec_kernel(normal_unit_vec_x, normal_unit_vec_y, normal_unit_vec_z, t, link, num, *this);
304
+
305
+ #endif // __CUDACC__
306
+ }
307
+
File without changes