pyopencl 2024.2.2__cp311-cp311-macosx_11_0_arm64.whl → 2024.2.4__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (102) hide show
  1. pyopencl/__init__.py +16 -4
  2. pyopencl/_cl.cpython-311-darwin.so +0 -0
  3. pyopencl/algorithm.py +3 -1
  4. pyopencl/bitonic_sort.py +2 -0
  5. pyopencl/characterize/__init__.py +23 -0
  6. pyopencl/compyte/.git +1 -0
  7. pyopencl/compyte/.github/workflows/autopush.yml +21 -0
  8. pyopencl/compyte/.github/workflows/ci.yml +30 -0
  9. pyopencl/compyte/.gitignore +21 -0
  10. pyopencl/compyte/ndarray/Makefile +31 -0
  11. pyopencl/compyte/ndarray/gpu_ndarray.h +35 -0
  12. pyopencl/compyte/ndarray/pygpu_language.h +207 -0
  13. pyopencl/compyte/ndarray/pygpu_language_cuda.cu +622 -0
  14. pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +317 -0
  15. pyopencl/compyte/ndarray/pygpu_ndarray.cpp +1546 -0
  16. pyopencl/compyte/ndarray/pygpu_ndarray.h +71 -0
  17. pyopencl/compyte/ndarray/pygpu_ndarray_object.h +232 -0
  18. pyopencl/compyte/setup.cfg +9 -0
  19. pyopencl/tools.py +60 -56
  20. pyopencl/version.py +7 -3
  21. {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.4.dist-info}/METADATA +14 -14
  22. pyopencl-2024.2.4.dist-info/RECORD +59 -0
  23. {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.4.dist-info}/WHEEL +1 -1
  24. pyopencl-2024.2.2.data/data/CITATION.cff +0 -74
  25. pyopencl-2024.2.2.data/data/CMakeLists.txt +0 -83
  26. pyopencl-2024.2.2.data/data/Makefile.in +0 -21
  27. pyopencl-2024.2.2.data/data/README.rst +0 -70
  28. pyopencl-2024.2.2.data/data/README_SETUP.txt +0 -34
  29. pyopencl-2024.2.2.data/data/aksetup_helper.py +0 -1013
  30. pyopencl-2024.2.2.data/data/configure.py +0 -6
  31. pyopencl-2024.2.2.data/data/contrib/cldis.py +0 -91
  32. pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/README +0 -29
  33. pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/translate.py +0 -1441
  34. pyopencl-2024.2.2.data/data/contrib/pyopencl.vim +0 -84
  35. pyopencl-2024.2.2.data/data/doc/Makefile +0 -23
  36. pyopencl-2024.2.2.data/data/doc/algorithm.rst +0 -214
  37. pyopencl-2024.2.2.data/data/doc/array.rst +0 -305
  38. pyopencl-2024.2.2.data/data/doc/conf.py +0 -26
  39. pyopencl-2024.2.2.data/data/doc/howto.rst +0 -105
  40. pyopencl-2024.2.2.data/data/doc/index.rst +0 -137
  41. pyopencl-2024.2.2.data/data/doc/make_constants.py +0 -561
  42. pyopencl-2024.2.2.data/data/doc/misc.rst +0 -885
  43. pyopencl-2024.2.2.data/data/doc/runtime.rst +0 -51
  44. pyopencl-2024.2.2.data/data/doc/runtime_const.rst +0 -30
  45. pyopencl-2024.2.2.data/data/doc/runtime_gl.rst +0 -78
  46. pyopencl-2024.2.2.data/data/doc/runtime_memory.rst +0 -527
  47. pyopencl-2024.2.2.data/data/doc/runtime_platform.rst +0 -184
  48. pyopencl-2024.2.2.data/data/doc/runtime_program.rst +0 -364
  49. pyopencl-2024.2.2.data/data/doc/runtime_queue.rst +0 -182
  50. pyopencl-2024.2.2.data/data/doc/subst.rst +0 -36
  51. pyopencl-2024.2.2.data/data/doc/tools.rst +0 -4
  52. pyopencl-2024.2.2.data/data/doc/types.rst +0 -42
  53. pyopencl-2024.2.2.data/data/examples/black-hole-accretion.py +0 -2227
  54. pyopencl-2024.2.2.data/data/examples/demo-struct-reduce.py +0 -75
  55. pyopencl-2024.2.2.data/data/examples/demo.py +0 -39
  56. pyopencl-2024.2.2.data/data/examples/demo_array.py +0 -32
  57. pyopencl-2024.2.2.data/data/examples/demo_array_svm.py +0 -37
  58. pyopencl-2024.2.2.data/data/examples/demo_elementwise.py +0 -34
  59. pyopencl-2024.2.2.data/data/examples/demo_elementwise_complex.py +0 -53
  60. pyopencl-2024.2.2.data/data/examples/demo_mandelbrot.py +0 -183
  61. pyopencl-2024.2.2.data/data/examples/demo_meta_codepy.py +0 -56
  62. pyopencl-2024.2.2.data/data/examples/demo_meta_template.py +0 -55
  63. pyopencl-2024.2.2.data/data/examples/dump-performance.py +0 -38
  64. pyopencl-2024.2.2.data/data/examples/dump-properties.py +0 -86
  65. pyopencl-2024.2.2.data/data/examples/gl_interop_demo.py +0 -84
  66. pyopencl-2024.2.2.data/data/examples/gl_particle_animation.py +0 -218
  67. pyopencl-2024.2.2.data/data/examples/ipython-demo.ipynb +0 -203
  68. pyopencl-2024.2.2.data/data/examples/median-filter.py +0 -99
  69. pyopencl-2024.2.2.data/data/examples/n-body.py +0 -1070
  70. pyopencl-2024.2.2.data/data/examples/narray.py +0 -37
  71. pyopencl-2024.2.2.data/data/examples/noisyImage.jpg +0 -0
  72. pyopencl-2024.2.2.data/data/examples/pi-monte-carlo.py +0 -1166
  73. pyopencl-2024.2.2.data/data/examples/svm.py +0 -82
  74. pyopencl-2024.2.2.data/data/examples/transpose.py +0 -229
  75. pyopencl-2024.2.2.data/data/pytest.ini +0 -3
  76. pyopencl-2024.2.2.data/data/src/bitlog.cpp +0 -51
  77. pyopencl-2024.2.2.data/data/src/bitlog.hpp +0 -83
  78. pyopencl-2024.2.2.data/data/src/clinfo_ext.h +0 -134
  79. pyopencl-2024.2.2.data/data/src/mempool.hpp +0 -444
  80. pyopencl-2024.2.2.data/data/src/pyopencl_ext.h +0 -77
  81. pyopencl-2024.2.2.data/data/src/tools.hpp +0 -90
  82. pyopencl-2024.2.2.data/data/src/wrap_cl.cpp +0 -61
  83. pyopencl-2024.2.2.data/data/src/wrap_cl.hpp +0 -5853
  84. pyopencl-2024.2.2.data/data/src/wrap_cl_part_1.cpp +0 -369
  85. pyopencl-2024.2.2.data/data/src/wrap_cl_part_2.cpp +0 -702
  86. pyopencl-2024.2.2.data/data/src/wrap_constants.cpp +0 -1274
  87. pyopencl-2024.2.2.data/data/src/wrap_helpers.hpp +0 -213
  88. pyopencl-2024.2.2.data/data/src/wrap_mempool.cpp +0 -738
  89. pyopencl-2024.2.2.data/data/test/add-vectors-32.spv +0 -0
  90. pyopencl-2024.2.2.data/data/test/add-vectors-64.spv +0 -0
  91. pyopencl-2024.2.2.data/data/test/empty-header.h +0 -1
  92. pyopencl-2024.2.2.data/data/test/test_algorithm.py +0 -1180
  93. pyopencl-2024.2.2.data/data/test/test_array.py +0 -2392
  94. pyopencl-2024.2.2.data/data/test/test_arrays_in_structs.py +0 -100
  95. pyopencl-2024.2.2.data/data/test/test_clmath.py +0 -529
  96. pyopencl-2024.2.2.data/data/test/test_clrandom.py +0 -75
  97. pyopencl-2024.2.2.data/data/test/test_enqueue_copy.py +0 -271
  98. pyopencl-2024.2.2.data/data/test/test_wrapper.py +0 -1565
  99. pyopencl-2024.2.2.dist-info/LICENSE +0 -282
  100. pyopencl-2024.2.2.dist-info/RECORD +0 -123
  101. pyopencl-2024.2.2.dist-info/top_level.txt +0 -1
  102. {pyopencl-2024.2.2.data/data → pyopencl-2024.2.4.dist-info/licenses}/LICENSE +0 -0
@@ -1,2227 +0,0 @@
1
- #!/usr/bin/env python3
2
- #
3
- # TrouNoir model using PyOpenCL or PyCUDA
4
- #
5
- # CC BY-NC-SA 2019 : <emmanuel.quemener@ens-lyon.fr>
6
- #
7
- # Part of matrix programs from: https://forge.cbp.ens-lyon.fr/svn/bench4gpu/
8
- #
9
- # Thanks to Andreas Klockner for PyOpenCL and PyCUDA:
10
- # http://mathema.tician.de/software/pyopencl
11
- #
12
- # Original code programmed in Fortran 77 in mars 1994
13
- # for Practical Work of Numerical Simulation
14
- # DEA (old Master2) in astrophysics and spatial techniques in Meudon
15
- # by Herve Aussel & Emmanuel Quemener
16
- #
17
- # Conversion in C done by Emmanuel Quemener in august 1997
18
- # GPUfication in OpenCL under Python in july 2019
19
- # GPUfication in CUDA under Python in august 2019
20
- #
21
- # Thanks to :
22
- #
23
- # - Herve Aussel for his part of code of black body spectrum
24
- # - Didier Pelat for his help to perform this work
25
- # - Jean-Pierre Luminet for his article published in 1979
26
- # - Numerical Recipes for Runge Kutta recipes
27
- # - Luc Blanchet for his disponibility about my questions in General Relativity
28
- # - Pierre Lena for his passion about science and vulgarisation
29
-
30
- # If crash on OpenCL Intel implementation, add following options and force
31
- # export PYOPENCL_COMPILER_OUTPUT=1
32
- # export CL_CONFIG_USE_VECTORIZER=True
33
- # export CL_CONFIG_CPU_VECTORIZER_MODE=16
34
-
35
- import getopt
36
- import sys
37
- import time
38
- from socket import gethostname
39
-
40
- import numpy
41
-
42
- import pyopencl as cl
43
-
44
-
45
- def DictionariesAPI():
46
- PhysicsList = {"Einstein": 0, "Newton": 1}
47
- return PhysicsList
48
-
49
-
50
- #
51
- # Blank space below to simplify debugging on OpenCL code
52
- #
53
-
54
-
55
- BlobOpenCL = """
56
-
57
- #define PI (float)3.14159265359e0f
58
- #define nbr 256
59
-
60
- #define EINSTEIN 0
61
- #define NEWTON 1
62
-
63
- #ifdef SETTRACKPOINTS
64
- #define TRACKPOINTS SETTRACKPOINTS
65
- #else
66
- #define TRACKPOINTS 2048
67
- #endif
68
-
69
- float atanp(float x,float y)
70
- {
71
- float angle;
72
-
73
- angle=atan2(y,x);
74
-
75
- if (angle<0.e0f)
76
- {
77
- angle+=(float)2.e0f*PI;
78
- }
79
-
80
- return angle;
81
- }
82
-
83
- float f(float v)
84
- {
85
- return v;
86
- }
87
-
88
- #if PHYSICS == NEWTON
89
- float g(float u,float m,float b)
90
- {
91
- return (-u);
92
- }
93
- #else
94
- float g(float u,float m,float b)
95
- {
96
- return (3.e0f*m/b*pow(u,2)-u);
97
- }
98
- #endif
99
-
100
- void calcul(float *us,float *vs,float up,float vp,
101
- float h,float m,float b)
102
- {
103
- float c0,c1,c2,c3,d0,d1,d2,d3;
104
-
105
- c0=h*f(vp);
106
- c1=h*f(vp+c0/2.e0f);
107
- c2=h*f(vp+c1/2.e0f);
108
- c3=h*f(vp+c2);
109
- d0=h*g(up,m,b);
110
- d1=h*g(up+d0/2.e0f,m,b);
111
- d2=h*g(up+d1/2.e0f,m,b);
112
- d3=h*g(up+d2,m,b);
113
-
114
- *us=up+(c0+2.e0f*c1+2.e0f*c2+c3)/6.e0f;
115
- *vs=vp+(d0+2.e0f*d1+2.e0f*d2+d3)/6.e0f;
116
- }
117
-
118
- void rungekutta(float *ps,float *us,float *vs,
119
- float pp,float up,float vp,
120
- float h,float m,float b)
121
- {
122
- calcul(us,vs,up,vp,h,m,b);
123
- *ps=pp+h;
124
- }
125
-
126
- float decalage_spectral(float r,float b,float phi,
127
- float tho,float m)
128
- {
129
- return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi)));
130
- }
131
-
132
- float spectre(float rf,int q,float b,float db,
133
- float h,float r,float m,float bss)
134
- {
135
- float flx;
136
-
137
- // flx=exp(q*log(r/m))*pow(rf,4)*b*db*h;
138
- flx=exp(q*log(r/m)+4.e0f*log(rf))*b*db*h;
139
- return(flx);
140
- }
141
-
142
- float spectre_cn(float rf32,float b32,float db32,
143
- float h32,float r32,float m32,float bss32)
144
- {
145
-
146
- #define MYFLOAT float
147
-
148
- MYFLOAT rf=(MYFLOAT)(rf32);
149
- MYFLOAT b=(MYFLOAT)(b32);
150
- MYFLOAT db=(MYFLOAT)(db32);
151
- MYFLOAT h=(MYFLOAT)(h32);
152
- MYFLOAT r=(MYFLOAT)(r32);
153
- MYFLOAT m=(MYFLOAT)(m32);
154
- MYFLOAT bss=(MYFLOAT)(bss32);
155
-
156
- MYFLOAT flx;
157
- MYFLOAT nu_rec,nu_em,qu,temp_em,flux_int;
158
- int fi,posfreq;
159
-
160
- #define planck 6.62e-34f
161
- #define k 1.38e-23f
162
- #define c2 9.e16f
163
- #define temp 3.e7f
164
- #define m_point 1.e0f
165
-
166
- #define lplanck (log(6.62e0f)-34.e0f*log(10.e0f))
167
- #define lk (log(1.38e0f)-23.e0f*log(10.e0f))
168
- #define lc2 (log(9.e0f)+16.e0f*log(10.e0f))
169
-
170
- MYFLOAT v=1.e0f-3.e0f/r;
171
-
172
- qu=1.e0f/sqrt((1.e0f-3.e0f/r)*r)*(sqrt(r)-sqrt(6.e0f)+sqrt(3.e0f)/2.e0f*log((sqrt(r)+sqrt(3.e0f))/(sqrt(r)-sqrt(3.e0f))* 0.17157287525380988e0f )); // # noqa: E501
173
-
174
- temp_em=temp*sqrt(m)*exp(0.25e0f*log(m_point)-0.75e0f*log(r)-0.125e0f*log(v)+0.25e0f*log(fabs(qu)));
175
-
176
- flux_int=0.e0f;
177
- flx=0.e0f;
178
-
179
- for (fi=0;fi<nbr;fi++)
180
- {
181
- nu_em=bss*(MYFLOAT)fi/(MYFLOAT)nbr;
182
- nu_rec=nu_em*rf;
183
- posfreq=(int)(nu_rec*(MYFLOAT)nbr/bss);
184
- if ((posfreq>0)&&(posfreq<nbr))
185
- {
186
- // Initial version
187
- // flux_int=2.*planck/c2*pow(nu_em,3)/(exp(planck*nu_em/(k*temp_em))-1.);
188
- // Version with log used
189
- //flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.);
190
- // flux_int*=pow(rf,3)*b*db*h;
191
- //flux_int*=exp(3.e0f*log(rf))*b*db*h;
192
- flux_int=2.e0f*exp(lplanck-lc2+3.e0f*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.e0f)*exp(3.e0f*log(rf))*b*db*h;
193
-
194
- flx+=flux_int;
195
- }
196
- }
197
-
198
- return((float)(flx));
199
- }
200
-
201
- void impact(float phi,float r,float b,float tho,float m,
202
- float *zp,float *fp,
203
- int q,float db,
204
- float h,int raie)
205
- {
206
- float flx,rf,bss;
207
-
208
- rf=decalage_spectral(r,b,phi,tho,m);
209
-
210
- if (raie==0)
211
- {
212
- bss=1.e19f;
213
- flx=spectre_cn(rf,b,db,h,r,m,bss);
214
- }
215
- else
216
- {
217
- bss=2.e0f;
218
- flx=spectre(rf,q,b,db,h,r,m,bss);
219
- }
220
-
221
- *zp=1.e0f/rf;
222
- *fp=flx;
223
-
224
- }
225
-
226
- __kernel void EachPixel(__global float *zImage,__global float *fImage,
227
- float Mass,float InternalRadius,
228
- float ExternalRadius,float Angle,
229
- int Line)
230
- {
231
- uint xi=(uint)get_global_id(0);
232
- uint yi=(uint)get_global_id(1);
233
- uint sizex=(uint)get_global_size(0);
234
- uint sizey=(uint)get_global_size(1);
235
-
236
- // Perform trajectory for each pixel, exit on hit
237
-
238
- float m,rs,ri,re,tho;
239
- int q,raie;
240
-
241
- m=Mass;
242
- rs=2.e0f*m;
243
- ri=InternalRadius;
244
- re=ExternalRadius;
245
- tho=Angle;
246
- q=-2;
247
- raie=Line;
248
-
249
- float bmx,db,b,h;
250
- float rp0,rps;
251
- float phi,phd;
252
- uint nh=0;
253
- float zp=0.e0f,fp=0.e0f;
254
-
255
- // Autosize for image
256
- bmx=1.25e0f*re;
257
-
258
- h=4.e0f*PI/(float)TRACKPOINTS;
259
-
260
- // set origin as center of image
261
- float x=(float)xi-(float)(sizex/2)+(float)5.e-1f;
262
- float y=(float)yi-(float)(sizey/2)+(float)5.e-1f;
263
- // angle extracted from cylindric symmetry
264
- phi=atanp(x,y);
265
- phd=atanp(cos(phi)*sin(tho),cos(tho));
266
-
267
-
268
- float up,vp,pp,us,vs,ps;
269
-
270
- // impact parameter
271
- b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx;
272
- // step of impact parameter;
273
- db=bmx/(float)(sizex);
274
-
275
- up=0.e0f;
276
- vp=1.e0f;
277
- pp=0.e0f;
278
-
279
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
280
-
281
- rps=fabs(b/us);
282
- rp0=rps;
283
-
284
- int ExitOnImpact=0;
285
-
286
- do
287
- {
288
- nh++;
289
- pp=ps;
290
- up=us;
291
- vp=vs;
292
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
293
- rps=fabs(b/us);
294
- ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>=ri)&&(rps<=re)?1:0;
295
-
296
- } while ((rps>=rs)&&(rps<=rp0)&&(ExitOnImpact==0)&&(nh<TRACKPOINTS));
297
-
298
-
299
- if (ExitOnImpact==1) {
300
- impact(phi,rps,b,tho,m,&zp,&fp,q,db,h,raie);
301
- }
302
- else
303
- {
304
- zp=0.e0f;
305
- fp=0.e0f;
306
- }
307
-
308
- barrier(CLK_GLOBAL_MEM_FENCE);
309
-
310
- zImage[yi+sizex*xi]=(float)zp;
311
- fImage[yi+sizex*xi]=(float)fp;
312
- }
313
-
314
- __kernel void Pixel(__global float *zImage,__global float *fImage,
315
- __global float *Trajectories,__global int *IdLast,
316
- uint ImpactParameter,
317
- float Mass,float InternalRadius,
318
- float ExternalRadius,float Angle,
319
- int Line)
320
- {
321
- uint xi=(uint)get_global_id(0);
322
- uint yi=(uint)get_global_id(1);
323
- uint sizex=(uint)get_global_size(0);
324
- uint sizey=(uint)get_global_size(1);
325
-
326
- // Perform trajectory for each pixel
327
-
328
- float m,ri,re,tho;
329
- int q,raie;
330
-
331
- m=Mass;
332
- ri=InternalRadius;
333
- re=ExternalRadius;
334
- tho=Angle;
335
- q=-2;
336
- raie=Line;
337
-
338
- float bmx,db,b,h;
339
- float phi,phd,php,nr,r;
340
- float zp=0.e0f,fp=0.e0f;
341
-
342
- // Autosize for image, 25% greater than external radius
343
- bmx=1.25e0f*re;
344
-
345
- // Angular step of integration
346
- h=4.e0f*PI/(float)TRACKPOINTS;
347
-
348
- // Step of Impact Parameter
349
- db=bmx/(2.e0f*(float)ImpactParameter);
350
-
351
- // set origin as center of image
352
- float x=(float)xi-(float)(sizex/2)+(float)5.e-1f;
353
- float y=(float)yi-(float)(sizey/2)+(float)5.e-1f;
354
-
355
- // angle extracted from cylindric symmetry
356
- phi=atanp(x,y);
357
- phd=atanp(cos(phi)*sin(tho),cos(tho));
358
-
359
- // Real Impact Parameter
360
- b=sqrt(x*x+y*y)*bmx/(float)ImpactParameter;
361
-
362
- // Integer Impact Parameter
363
- uint bi=(uint)sqrt(x*x+y*y);
364
-
365
- int HalfLap=0,ExitOnImpact=0,ni;
366
-
367
- if (bi<ImpactParameter)
368
- {
369
- do
370
- {
371
- php=phd+(float)HalfLap*PI;
372
- nr=php/h;
373
- ni=(int)nr;
374
-
375
- if (ni<IdLast[bi])
376
- {
377
- r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
378
- }
379
- else
380
- {
381
- r=Trajectories[bi*TRACKPOINTS+ni];
382
- }
383
-
384
- if ((r<=re)&&(r>=ri))
385
- {
386
- ExitOnImpact=1;
387
- impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
388
- }
389
-
390
- HalfLap++;
391
- } while ((HalfLap<=2)&&(ExitOnImpact==0));
392
-
393
- }
394
-
395
- barrier(CLK_GLOBAL_MEM_FENCE);
396
-
397
- zImage[yi+sizex*xi]=zp;
398
- fImage[yi+sizex*xi]=fp;
399
- }
400
-
401
- __kernel void Circle(__global float *Trajectories,__global int *IdLast,
402
- __global float *zImage,__global float *fImage,
403
- float Mass,float InternalRadius,
404
- float ExternalRadius,float Angle,
405
- int Line)
406
- {
407
- // Integer Impact Parameter ID
408
- int bi=get_global_id(0);
409
- // Integer points on circle
410
- int i=get_global_id(1);
411
- // Integer Impact Parameter Size (half of image)
412
- int bmaxi=get_global_size(0);
413
- // Integer Points on circle
414
- int imx=get_global_size(1);
415
-
416
- // Perform trajectory for each pixel
417
-
418
- float m,ri,re,tho;
419
- int q,raie;
420
-
421
- m=Mass;
422
- ri=InternalRadius;
423
- re=ExternalRadius;
424
- tho=Angle;
425
- raie=Line;
426
-
427
- float bmx,db,b,h;
428
- float phi,phd;
429
- float zp=0.e0f,fp=0.e0f;
430
-
431
- // Autosize for image
432
- bmx=1.25e0f*re;
433
-
434
- // Angular step of integration
435
- h=4.e0f*PI/(float)TRACKPOINTS;
436
-
437
- // impact parameter
438
- b=(float)bi/(float)bmaxi*bmx;
439
- db=bmx/(2.e0f*(float)bmaxi);
440
-
441
- phi=2.e0f*PI/(float)imx*(float)i;
442
- phd=atanp(cos(phi)*sin(tho),cos(tho));
443
- int yi=(int)((float)bi*sin(phi))+bmaxi;
444
- int xi=(int)((float)bi*cos(phi))+bmaxi;
445
-
446
- int HalfLap=0,ExitOnImpact=0,ni;
447
- float php,nr,r;
448
-
449
- do
450
- {
451
- php=phd+(float)HalfLap*PI;
452
- nr=php/h;
453
- ni=(int)nr;
454
-
455
- if (ni<IdLast[bi])
456
- {
457
- r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
458
- }
459
- else
460
- {
461
- r=Trajectories[bi*TRACKPOINTS+ni];
462
- }
463
-
464
- if ((r<=re)&&(r>=ri))
465
- {
466
- ExitOnImpact=1;
467
- impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
468
- }
469
-
470
- HalfLap++;
471
- } while ((HalfLap<=2)&&(ExitOnImpact==0));
472
-
473
- zImage[yi+2*bmaxi*xi]=zp;
474
- fImage[yi+2*bmaxi*xi]=fp;
475
-
476
- barrier(CLK_GLOBAL_MEM_FENCE);
477
-
478
- }
479
-
480
- __kernel void Trajectory(__global float *Trajectories,__global int *IdLast,
481
- float Mass,float InternalRadius,
482
- float ExternalRadius,float Angle,
483
- int Line)
484
- {
485
- // Integer Impact Parameter ID
486
- int bi=get_global_id(0);
487
- // Integer Impact Parameter Size (half of image)
488
- int bmaxi=get_global_size(0);
489
-
490
- // Perform trajectory for each pixel
491
-
492
- float m,rs,re;
493
-
494
- m=Mass;
495
- rs=2.e0f*m;
496
- re=ExternalRadius;
497
-
498
- float bmx,b,h;
499
- int nh;
500
-
501
- // Autosize for image
502
- bmx=1.25e0f*re;
503
-
504
- // Angular step of integration
505
- h=4.e0f*PI/(float)TRACKPOINTS;
506
-
507
- // impact parameter
508
- b=(float)bi/(float)bmaxi*bmx;
509
-
510
- float up,vp,pp,us,vs,ps;
511
-
512
- up=0.e0f;
513
- vp=1.e0f;
514
-
515
- pp=0.e0f;
516
- nh=0;
517
-
518
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
519
-
520
- // b versus us
521
- float bvus=fabs(b/us);
522
- float bvus0=bvus;
523
- Trajectories[bi*TRACKPOINTS+nh]=bvus;
524
-
525
- do
526
- {
527
- nh++;
528
- pp=ps;
529
- up=us;
530
- vp=vs;
531
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
532
- bvus=fabs(b/us);
533
- Trajectories[bi*TRACKPOINTS+nh]=bvus;
534
-
535
- } while ((bvus>=rs)&&(bvus<=bvus0));
536
-
537
- IdLast[bi]=nh;
538
-
539
- barrier(CLK_GLOBAL_MEM_FENCE);
540
-
541
- }
542
-
543
- __kernel void EachCircle(__global float *zImage,__global float *fImage,
544
- float Mass,float InternalRadius,
545
- float ExternalRadius,float Angle,
546
- int Line)
547
- {
548
- // Integer Impact Parameter ID
549
- uint bi=(uint)get_global_id(0);
550
- // Integer Impact Parameter Size (half of image)
551
- uint bmaxi=(uint)get_global_size(0);
552
-
553
- private float Trajectory[TRACKPOINTS];
554
-
555
- float m,rs,ri,re,tho;
556
- int raie,q;
557
-
558
- m=Mass;
559
- rs=2.e0f*m;
560
- ri=InternalRadius;
561
- re=ExternalRadius;
562
- tho=Angle;
563
- q=-2;
564
- raie=Line;
565
-
566
- float bmx,db,b,h;
567
- uint nh;
568
-
569
-
570
- // Autosize for image
571
- bmx=1.25e0f*re;
572
-
573
- // Angular step of integration
574
- h=4.e0f*PI/(float)TRACKPOINTS;
575
-
576
- // impact parameter
577
- b=(float)bi/(float)bmaxi*bmx;
578
- db=bmx/(2.e0f*(float)bmaxi);
579
-
580
- float up,vp,pp,us,vs,ps;
581
-
582
- up=0.e0f;
583
- vp=1.e0f;
584
-
585
- pp=0.e0f;
586
- nh=0;
587
-
588
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
589
-
590
- // b versus us
591
- float bvus=fabs(b/us);
592
- float bvus0=bvus;
593
- Trajectory[nh]=bvus;
594
-
595
- do
596
- {
597
- nh++;
598
- pp=ps;
599
- up=us;
600
- vp=vs;
601
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
602
- bvus=(float)fabs(b/us);
603
- Trajectory[nh]=bvus;
604
-
605
- } while ((bvus>=rs)&&(bvus<=bvus0));
606
-
607
-
608
- for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) {
609
- Trajectory[i]=0.e0f;
610
- }
611
-
612
-
613
- uint imx=(uint)(16*bi);
614
-
615
- for (uint i=0;i<imx;i++)
616
- {
617
- float zp=0.e0f,fp=0.e0f;
618
- float phi=2.e0f*PI/(float)imx*(float)i;
619
- float phd=atanp(cos(phi)*sin(tho),cos(tho));
620
- uint yi=(uint)((float)bi*sin(phi)+bmaxi);
621
- uint xi=(uint)((float)bi*cos(phi)+bmaxi);
622
-
623
- uint HalfLap=0,ExitOnImpact=0,ni;
624
- float php,nr,r;
625
-
626
- do
627
- {
628
- php=phd+(float)HalfLap*PI;
629
- nr=php/h;
630
- ni=(int)nr;
631
-
632
- if (ni<nh)
633
- {
634
- r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.e0f)+Trajectory[ni];
635
- }
636
- else
637
- {
638
- r=Trajectory[ni];
639
- }
640
-
641
- if ((r<=re)&&(r>=ri))
642
- {
643
- ExitOnImpact=1;
644
- impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
645
- }
646
-
647
- HalfLap++;
648
-
649
- } while ((HalfLap<=2)&&(ExitOnImpact==0));
650
-
651
- zImage[yi+2*bmaxi*xi]=zp;
652
- fImage[yi+2*bmaxi*xi]=fp;
653
-
654
- }
655
-
656
- barrier(CLK_GLOBAL_MEM_FENCE);
657
-
658
- }
659
-
660
- __kernel void Original(__global float *zImage,__global float *fImage,
661
- uint Size,float Mass,float InternalRadius,
662
- float ExternalRadius,float Angle,
663
- int Line)
664
- {
665
- // Integer Impact Parameter Size (half of image)
666
- uint bmaxi=(uint)Size;
667
-
668
- float Trajectory[TRACKPOINTS];
669
-
670
- // Perform trajectory for each pixel
671
-
672
- float m,rs,ri,re,tho;
673
- int raie,q;
674
-
675
- m=Mass;
676
- rs=2.e0f*m;
677
- ri=InternalRadius;
678
- re=ExternalRadius;
679
- tho=Angle;
680
- q=-2;
681
- raie=Line;
682
-
683
- float bmx,db,b,h;
684
- uint nh;
685
-
686
- // Autosize for image
687
- bmx=1.25e0f*re;
688
-
689
- // Angular step of integration
690
- h=4.e0f*PI/(float)TRACKPOINTS;
691
-
692
- // Integer Impact Parameter ID
693
- for (int bi=0;bi<bmaxi;bi++)
694
- {
695
- // impact parameter
696
- b=(float)bi/(float)bmaxi*bmx;
697
- db=bmx/(2.e0f*(float)bmaxi);
698
-
699
- float up,vp,pp,us,vs,ps;
700
-
701
- up=0.e0f;
702
- vp=1.e0f;
703
-
704
- pp=0.e0f;
705
- nh=0;
706
-
707
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
708
-
709
- // b versus us
710
- float bvus=fabs(b/us);
711
- float bvus0=bvus;
712
- Trajectory[nh]=bvus;
713
-
714
- do
715
- {
716
- nh++;
717
- pp=ps;
718
- up=us;
719
- vp=vs;
720
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
721
- bvus=fabs(b/us);
722
- Trajectory[nh]=bvus;
723
-
724
- } while ((bvus>=rs)&&(bvus<=bvus0));
725
-
726
- for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) {
727
- Trajectory[i]=0.e0f;
728
- }
729
-
730
- int imx=(int)(16*bi);
731
-
732
- for (int i=0;i<imx;i++)
733
- {
734
- float zp=0.e0f,fp=0.e0f;
735
- float phi=2.e0f*PI/(float)imx*(float)i;
736
- float phd=atanp(cos(phi)*sin(tho),cos(tho));
737
- uint yi=(uint)((float)bi*sin(phi)+bmaxi);
738
- uint xi=(uint)((float)bi*cos(phi)+bmaxi);
739
-
740
- uint HalfLap=0,ExitOnImpact=0,ni;
741
- float php,nr,r;
742
-
743
- do
744
- {
745
- php=phd+(float)HalfLap*PI;
746
- nr=php/h;
747
- ni=(int)nr;
748
-
749
- if (ni<nh)
750
- {
751
- r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.e0f)+Trajectory[ni];
752
- }
753
- else
754
- {
755
- r=Trajectory[ni];
756
- }
757
-
758
- if ((r<=re)&&(r>=ri))
759
- {
760
- ExitOnImpact=1;
761
- impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
762
- }
763
-
764
- HalfLap++;
765
-
766
- } while ((HalfLap<=2)&&(ExitOnImpact==0));
767
-
768
- zImage[yi+2*bmaxi*xi]=zp;
769
- fImage[yi+2*bmaxi*xi]=fp;
770
-
771
- }
772
-
773
- }
774
-
775
- barrier(CLK_GLOBAL_MEM_FENCE);
776
-
777
- }
778
- """
779
-
780
-
781
- def KernelCodeCuda():
782
- BlobCUDA = """
783
-
784
- #define PI (float)3.14159265359
785
- #define nbr 256
786
-
787
- #define EINSTEIN 0
788
- #define NEWTON 1
789
-
790
- #ifdef SETTRACKPOINTS
791
- #define TRACKPOINTS SETTRACKPOINTS
792
- #else
793
- #define TRACKPOINTS
794
- #endif
795
- __device__ float nothing(float x)
796
- {
797
- return(x);
798
- }
799
-
800
- __device__ float atanp(float x,float y)
801
- {
802
- float angle;
803
-
804
- angle=atan2(y,x);
805
-
806
- if (angle<0.e0f)
807
- {
808
- angle+=(float)2.e0f*PI;
809
- }
810
-
811
- return(angle);
812
- }
813
-
814
- __device__ float f(float v)
815
- {
816
- return(v);
817
- }
818
-
819
- #if PHYSICS == NEWTON
820
- __device__ float g(float u,float m,float b)
821
- {
822
- return (-u);
823
- }
824
- #else
825
- __device__ float g(float u,float m,float b)
826
- {
827
- return (3.e0f*m/b*pow(u,2)-u);
828
- }
829
- #endif
830
-
831
- __device__ void calcul(float *us,float *vs,float up,float vp,
832
- float h,float m,float b)
833
- {
834
- float c0,c1,c2,c3,d0,d1,d2,d3;
835
-
836
- c0=h*f(vp);
837
- c1=h*f(vp+c0/2.);
838
- c2=h*f(vp+c1/2.);
839
- c3=h*f(vp+c2);
840
- d0=h*g(up,m,b);
841
- d1=h*g(up+d0/2.,m,b);
842
- d2=h*g(up+d1/2.,m,b);
843
- d3=h*g(up+d2,m,b);
844
-
845
- *us=up+(c0+2.*c1+2.*c2+c3)/6.;
846
- *vs=vp+(d0+2.*d1+2.*d2+d3)/6.;
847
- }
848
-
849
- __device__ void rungekutta(float *ps,float *us,float *vs,
850
- float pp,float up,float vp,
851
- float h,float m,float b)
852
- {
853
- calcul(us,vs,up,vp,h,m,b);
854
- *ps=pp+h;
855
- }
856
-
857
- __device__ float decalage_spectral(float r,float b,float phi,
858
- float tho,float m)
859
- {
860
- return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi)));
861
- }
862
-
863
- __device__ float spectre(float rf,int q,float b,float db,
864
- float h,float r,float m,float bss)
865
- {
866
- float flx;
867
-
868
- // flx=exp(q*log(r/m))*pow(rf,4)*b*db*h;
869
- flx=exp(q*log(r/m)+4.*log(rf))*b*db*h;
870
- return(flx);
871
- }
872
-
873
- __device__ float spectre_cn(float rf32,float b32,float db32,
874
- float h32,float r32,float m32,float bss32)
875
- {
876
-
877
- #define MYFLOAT float
878
-
879
- MYFLOAT rf=(MYFLOAT)(rf32);
880
- MYFLOAT b=(MYFLOAT)(b32);
881
- MYFLOAT db=(MYFLOAT)(db32);
882
- MYFLOAT h=(MYFLOAT)(h32);
883
- MYFLOAT r=(MYFLOAT)(r32);
884
- MYFLOAT m=(MYFLOAT)(m32);
885
- MYFLOAT bss=(MYFLOAT)(bss32);
886
-
887
- MYFLOAT flx;
888
- MYFLOAT nu_rec,nu_em,qu,temp_em,flux_int;
889
- int fi,posfreq;
890
-
891
- #define planck 6.62e-34
892
- #define k 1.38e-23
893
- #define c2 9.e16
894
- #define temp 3.e7
895
- #define m_point 1.
896
-
897
- #define lplanck (log(6.62)-34.*log(10.))
898
- #define lk (log(1.38)-23.*log(10.))
899
- #define lc2 (log(9.)+16.*log(10.))
900
-
901
- MYFLOAT v=1.-3./r;
902
-
903
- qu=1./sqrt((1.-3./r)*r)*(sqrt(r)-sqrt(6.)+sqrt(3.)/2.*log((sqrt(r)+sqrt(3.))/(sqrt(r)-sqrt(3.))* 0.17157287525380988 )); // # noqa: #051
904
-
905
- temp_em=temp*sqrt(m)*exp(0.25*log(m_point)-0.75*log(r)-0.125*log(v)+0.25*log(fabs(qu)));
906
-
907
- flux_int=0.;
908
- flx=0.;
909
-
910
- for (fi=0;fi<nbr;fi++)
911
- {
912
- nu_em=bss*(MYFLOAT)fi/(MYFLOAT)nbr;
913
- nu_rec=nu_em*rf;
914
- posfreq=(int)(nu_rec*(MYFLOAT)nbr/bss);
915
- if ((posfreq>0)&&(posfreq<nbr))
916
- {
917
- // Initial version
918
- // flux_int=2.*planck/c2*pow(nu_em,3)/(exp(planck*nu_em/(k*temp_em))-1.);
919
- // Version with log used
920
- //flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.);
921
- // flux_int*=pow(rf,3)*b*db*h;
922
- //flux_int*=exp(3.*log(rf))*b*db*h;
923
- flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.)*exp(3.*log(rf))*b*db*h;
924
-
925
- flx+=flux_int;
926
- }
927
- }
928
-
929
- return((float)(flx));
930
- }
931
-
932
- __device__ void impact(float phi,float r,float b,float tho,float m,
933
- float *zp,float *fp,
934
- int q,float db,
935
- float h,int raie)
936
- {
937
- float flx,rf,bss;
938
-
939
- rf=decalage_spectral(r,b,phi,tho,m);
940
-
941
- if (raie==0)
942
- {
943
- bss=1.e19;
944
- flx=spectre_cn(rf,b,db,h,r,m,bss);
945
- }
946
- else
947
- {
948
- bss=2.;
949
- flx=spectre(rf,q,b,db,h,r,m,bss);
950
- }
951
-
952
- *zp=1./rf;
953
- *fp=flx;
954
-
955
- }
956
-
957
- __global__ void EachPixel(float *zImage,float *fImage,
958
- float Mass,float InternalRadius,
959
- float ExternalRadius,float Angle,
960
- int Line)
961
- {
962
- uint xi=(uint)(blockIdx.x*blockDim.x+threadIdx.x);
963
- uint yi=(uint)(blockIdx.y*blockDim.y+threadIdx.y);
964
- uint sizex=(uint)gridDim.x*blockDim.x;
965
- uint sizey=(uint)gridDim.y*blockDim.y;
966
-
967
-
968
- // Perform trajectory for each pixel, exit on hit
969
-
970
- float m,rs,ri,re,tho;
971
- int q,raie;
972
-
973
- m=Mass;
974
- rs=2.*m;
975
- ri=InternalRadius;
976
- re=ExternalRadius;
977
- tho=Angle;
978
- q=-2;
979
- raie=Line;
980
-
981
- float bmx,db,b,h;
982
- float rp0,rpp,rps;
983
- float phi,phd;
984
- int nh;
985
- float zp,fp;
986
-
987
- // Autosize for image
988
- bmx=1.25*re;
989
- b=0.;
990
-
991
- h=4.e0f*PI/(float)TRACKPOINTS;
992
-
993
- // set origin as center of image
994
- float x=(float)xi-(float)(sizex/2)+(float)5e-1f;
995
- float y=(float)yi-(float)(sizey/2)+(float)5e-1f;
996
- // angle extracted from cylindric symmetry
997
- phi=atanp(x,y);
998
- phd=atanp(cos(phi)*sin(tho),cos(tho));
999
-
1000
- float up,vp,pp,us,vs,ps;
1001
-
1002
- // impact parameter
1003
- b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx;
1004
- // step of impact parameter;
1005
- // db=bmx/(float)(sizex/2);
1006
- db=bmx/(float)(sizex);
1007
-
1008
- up=0.;
1009
- vp=1.;
1010
- pp=0.;
1011
- nh=0;
1012
-
1013
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
1014
-
1015
- rps=fabs(b/us);
1016
- rp0=rps;
1017
-
1018
- int ExitOnImpact=0;
1019
-
1020
- do
1021
- {
1022
- nh++;
1023
- pp=ps;
1024
- up=us;
1025
- vp=vs;
1026
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
1027
- rpp=rps;
1028
- rps=fabs(b/us);
1029
- ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>ri)&&(rps<re)?1:0;
1030
-
1031
- } while ((rps>=rs)&&(rps<=rp0)&&(ExitOnImpact==0));
1032
-
1033
- if (ExitOnImpact==1) {
1034
- impact(phi,rpp,b,tho,m,&zp,&fp,q,db,h,raie);
1035
- }
1036
- else
1037
- {
1038
- zp=0.e0f;
1039
- fp=0.e0f;
1040
- }
1041
-
1042
- __syncthreads();
1043
-
1044
- zImage[yi+sizex*xi]=(float)zp;
1045
- fImage[yi+sizex*xi]=(float)fp;
1046
- }
1047
-
1048
- __global__ void Pixel(float *zImage,float *fImage,
1049
- float *Trajectories,int *IdLast,
1050
- uint ImpactParameter,
1051
- float Mass,float InternalRadius,
1052
- float ExternalRadius,float Angle,
1053
- int Line)
1054
- {
1055
- uint xi=(uint)(blockIdx.x*blockDim.x+threadIdx.x);
1056
- uint yi=(uint)(blockIdx.y*blockDim.y+threadIdx.y);
1057
- uint sizex=(uint)gridDim.x*blockDim.x;
1058
- uint sizey=(uint)gridDim.y*blockDim.y;
1059
-
1060
- // Perform trajectory for each pixel
1061
-
1062
- float m,ri,re,tho;
1063
- int q,raie;
1064
-
1065
- m=Mass;
1066
- ri=InternalRadius;
1067
- re=ExternalRadius;
1068
- tho=Angle;
1069
- q=-2;
1070
- raie=Line;
1071
-
1072
- float bmx,db,b,h;
1073
- float phi,phd,php,nr,r;
1074
- float zp=0,fp=0;
1075
- // Autosize for image, 25% greater than external radius
1076
- bmx=1.25e0f*re;
1077
-
1078
- // Angular step of integration
1079
- h=4.e0f*PI/(float)TRACKPOINTS;
1080
-
1081
- // Step of Impact Parameter
1082
- db=bmx/(2.e0f*(float)ImpactParameter);
1083
-
1084
- // set origin as center of image
1085
- float x=(float)xi-(float)(sizex/2)+(float)5e-1f;
1086
- float y=(float)yi-(float)(sizey/2)+(float)5e-1f;
1087
- // angle extracted from cylindric symmetry
1088
- phi=atanp(x,y);
1089
- phd=atanp(cos(phi)*sin(tho),cos(tho));
1090
-
1091
- // Real Impact Parameter
1092
- b=sqrt(x*x+y*y)*bmx/(float)ImpactParameter;
1093
-
1094
- // Integer Impact Parameter
1095
- uint bi=(uint)sqrt(x*x+y*y);
1096
-
1097
- int HalfLap=0,ExitOnImpact=0,ni;
1098
-
1099
- if (bi<ImpactParameter)
1100
- {
1101
- do
1102
- {
1103
- php=phd+(float)HalfLap*PI;
1104
- nr=php/h;
1105
- ni=(int)nr;
1106
-
1107
- if (ni<IdLast[bi])
1108
- {
1109
- r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
1110
- }
1111
- else
1112
- {
1113
- r=Trajectories[bi*TRACKPOINTS+ni];
1114
- }
1115
-
1116
- if ((r<=re)&&(r>=ri))
1117
- {
1118
- ExitOnImpact=1;
1119
- impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
1120
- }
1121
-
1122
- HalfLap++;
1123
- } while ((HalfLap<=2)&&(ExitOnImpact==0));
1124
-
1125
- }
1126
-
1127
- zImage[yi+sizex*xi]=zp;
1128
- fImage[yi+sizex*xi]=fp;
1129
- }
1130
-
1131
- __global__ void Circle(float *Trajectories,int *IdLast,
1132
- float *zImage,float *fImage,
1133
- float Mass,float InternalRadius,
1134
- float ExternalRadius,float Angle,
1135
- int Line)
1136
- {
1137
- // Integer Impact Parameter ID
1138
- int bi=blockIdx.x*blockDim.x+threadIdx.x;
1139
- // Integer points on circle
1140
- int i=blockIdx.y*blockDim.y+threadIdx.y;
1141
- // Integer Impact Parameter Size (half of image)
1142
- int bmaxi=gridDim.x*blockDim.x;
1143
- // Integer Points on circle
1144
- int imx=gridDim.y*blockDim.y;
1145
-
1146
- // Perform trajectory for each pixel
1147
-
1148
- float m,ri,re,tho;
1149
- int q,raie;
1150
-
1151
- m=Mass;
1152
- ri=InternalRadius;
1153
- re=ExternalRadius;
1154
- tho=Angle;
1155
- raie=Line;
1156
-
1157
- float bmx,db,b,h;
1158
- float phi,phd;
1159
- float zp=0,fp=0;
1160
-
1161
- // Autosize for image
1162
- bmx=1.25e0f*re;
1163
-
1164
- // Angular step of integration
1165
- h=4.e0f*PI/(float)TRACKPOINTS;
1166
-
1167
- // impact parameter
1168
- b=(float)bi/(float)bmaxi*bmx;
1169
- db=bmx/(2.e0f*(float)bmaxi);
1170
-
1171
- phi=2.e0f*PI/(float)imx*(float)i;
1172
- phd=atanp(cos(phi)*sin(tho),cos(tho));
1173
- int yi=(int)((float)bi*sin(phi))+bmaxi;
1174
- int xi=(int)((float)bi*cos(phi))+bmaxi;
1175
-
1176
- int HalfLap=0,ExitOnImpact=0,ni;
1177
- float php,nr,r;
1178
-
1179
- do
1180
- {
1181
- php=phd+(float)HalfLap*PI;
1182
- nr=php/h;
1183
- ni=(int)nr;
1184
-
1185
- if (ni<IdLast[bi])
1186
- {
1187
- r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
1188
- }
1189
- else
1190
- {
1191
- r=Trajectories[bi*TRACKPOINTS+ni];
1192
- }
1193
-
1194
- if ((r<=re)&&(r>=ri))
1195
- {
1196
- ExitOnImpact=1;
1197
- impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
1198
- }
1199
-
1200
- HalfLap++;
1201
- } while ((HalfLap<=2)&&(ExitOnImpact==0));
1202
-
1203
- zImage[yi+2*bmaxi*xi]=zp;
1204
- fImage[yi+2*bmaxi*xi]=fp;
1205
-
1206
- }
1207
-
1208
- __global__ void Trajectory(float *Trajectories,int *IdLast,
1209
- float Mass,float InternalRadius,
1210
- float ExternalRadius,float Angle,
1211
- int Line)
1212
- {
1213
- // Integer Impact Parameter ID
1214
- int bi=blockIdx.x*blockDim.x+threadIdx.x;
1215
- // Integer Impact Parameter Size (half of image)
1216
- int bmaxi=gridDim.x*blockDim.x;
1217
-
1218
- // Perform trajectory for each pixel
1219
-
1220
- float m,rs,re;
1221
-
1222
- m=Mass;
1223
- rs=2.e0f*m;
1224
- re=ExternalRadius;
1225
-
1226
- float bmx,b,h;
1227
- int nh;
1228
-
1229
- // Autosize for image
1230
- bmx=1.25e0f*re;
1231
-
1232
- // Angular step of integration
1233
- h=4.e0f*PI/(float)TRACKPOINTS;
1234
-
1235
- // impact parameter
1236
- b=(float)bi/(float)bmaxi*bmx;
1237
-
1238
- float up,vp,pp,us,vs,ps;
1239
-
1240
- up=0.e0f;
1241
- vp=1.e0f;
1242
- pp=0.e0f;
1243
- nh=0;
1244
-
1245
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
1246
-
1247
- // b versus us
1248
- float bvus=fabs(b/us);
1249
- float bvus0=bvus;
1250
- Trajectories[bi*TRACKPOINTS+nh]=bvus;
1251
-
1252
- do
1253
- {
1254
- nh++;
1255
- pp=ps;
1256
- up=us;
1257
- vp=vs;
1258
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
1259
- bvus=fabs(b/us);
1260
- Trajectories[bi*TRACKPOINTS+nh]=bvus;
1261
-
1262
- } while ((bvus>=rs)&&(bvus<=bvus0));
1263
-
1264
- IdLast[bi]=nh;
1265
-
1266
- }
1267
-
1268
- __global__ void EachCircle(float *zImage,float *fImage,
1269
- float Mass,float InternalRadius,
1270
- float ExternalRadius,float Angle,
1271
- int Line)
1272
- {
1273
- // Integer Impact Parameter ID
1274
- int bi=blockIdx.x*blockDim.x+threadIdx.x;
1275
-
1276
- // Integer Impact Parameter Size (half of image)
1277
- int bmaxi=gridDim.x*blockDim.x;
1278
-
1279
- float Trajectory[2048];
1280
-
1281
- // Perform trajectory for each pixel
1282
-
1283
- float m,rs,ri,re,tho;
1284
- int raie,q;
1285
-
1286
- m=Mass;
1287
- rs=2.*m;
1288
- ri=InternalRadius;
1289
- re=ExternalRadius;
1290
- tho=Angle;
1291
- q=-2;
1292
- raie=Line;
1293
-
1294
- float bmx,db,b,h;
1295
- int nh;
1296
-
1297
- // Autosize for image
1298
- bmx=1.25e0f*re;
1299
-
1300
- // Angular step of integration
1301
- h=4.e0f*PI/(float)TRACKPOINTS;
1302
-
1303
- // impact parameter
1304
- b=(float)bi/(float)bmaxi*bmx;
1305
- db=bmx/(2.e0f*(float)bmaxi);
1306
-
1307
- float up,vp,pp,us,vs,ps;
1308
-
1309
- up=0.;
1310
- vp=1.;
1311
- pp=0.;
1312
- nh=0;
1313
-
1314
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
1315
-
1316
- // b versus us
1317
- float bvus=fabs(b/us);
1318
- float bvus0=bvus;
1319
- Trajectory[nh]=bvus;
1320
-
1321
- do
1322
- {
1323
- nh++;
1324
- pp=ps;
1325
- up=us;
1326
- vp=vs;
1327
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
1328
- bvus=fabs(b/us);
1329
- Trajectory[nh]=bvus;
1330
-
1331
- } while ((bvus>=rs)&&(bvus<=bvus0));
1332
-
1333
- int imx=(int)(16*bi);
1334
-
1335
- for (int i=0;i<imx;i++)
1336
- {
1337
- float zp=0,fp=0;
1338
- float phi=2.*PI/(float)imx*(float)i;
1339
- float phd=atanp(cos(phi)*sin(tho),cos(tho));
1340
- uint yi=(uint)((float)bi*sin(phi)+bmaxi);
1341
- uint xi=(uint)((float)bi*cos(phi)+bmaxi);
1342
-
1343
- int HalfLap=0,ExitOnImpact=0,ni;
1344
- float php,nr,r;
1345
-
1346
- do
1347
- {
1348
- php=phd+(float)HalfLap*PI;
1349
- nr=php/h;
1350
- ni=(int)nr;
1351
-
1352
- if (ni<nh)
1353
- {
1354
- r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.)+Trajectory[ni];
1355
- }
1356
- else
1357
- {
1358
- r=Trajectory[ni];
1359
- }
1360
-
1361
- if ((r<=re)&&(r>=ri))
1362
- {
1363
- ExitOnImpact=1;
1364
- impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
1365
- }
1366
-
1367
- HalfLap++;
1368
-
1369
- } while ((HalfLap<=2)&&(ExitOnImpact==0));
1370
-
1371
- __syncthreads();
1372
-
1373
- zImage[yi+2*bmaxi*xi]=zp;
1374
- fImage[yi+2*bmaxi*xi]=fp;
1375
-
1376
- }
1377
-
1378
- }
1379
-
1380
- __global__ void Original(float *zImage,float *fImage,
1381
- uint Size,float Mass,float InternalRadius,
1382
- float ExternalRadius,float Angle,
1383
- int Line)
1384
- {
1385
- // Integer Impact Parameter Size (half of image)
1386
- uint bmaxi=(uint)Size;
1387
-
1388
- float Trajectory[TRACKPOINTS];
1389
-
1390
- // Perform trajectory for each pixel
1391
-
1392
- float m,rs,ri,re,tho;
1393
- int raie,q;
1394
-
1395
- m=Mass;
1396
- rs=2.e0f*m;
1397
- ri=InternalRadius;
1398
- re=ExternalRadius;
1399
- tho=Angle;
1400
- q=-2;
1401
- raie=Line;
1402
-
1403
- float bmx,db,b,h;
1404
- int nh;
1405
-
1406
- // Autosize for image
1407
- bmx=1.25e0f*re;
1408
-
1409
- // Angular step of integration
1410
- h=4.e0f*PI/(float)TRACKPOINTS;
1411
-
1412
- // Integer Impact Parameter ID
1413
- for (int bi=0;bi<bmaxi;bi++)
1414
- {
1415
- // impact parameter
1416
- b=(float)bi/(float)bmaxi*bmx;
1417
- db=bmx/(2.e0f*(float)bmaxi);
1418
-
1419
- float up,vp,pp,us,vs,ps;
1420
-
1421
- up=0.;
1422
- vp=1.;
1423
- pp=0.;
1424
- nh=0;
1425
-
1426
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
1427
-
1428
- // b versus us
1429
- float bvus=fabs(b/us);
1430
- float bvus0=bvus;
1431
- Trajectory[nh]=bvus;
1432
-
1433
- do
1434
- {
1435
- nh++;
1436
- pp=ps;
1437
- up=us;
1438
- vp=vs;
1439
- rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
1440
- bvus=fabs(b/us);
1441
- Trajectory[nh]=bvus;
1442
-
1443
- } while ((bvus>=rs)&&(bvus<=bvus0));
1444
-
1445
- for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) {
1446
- Trajectory[i]=0.e0f;
1447
- }
1448
-
1449
- int imx=(int)(16*bi);
1450
-
1451
- for (int i=0;i<imx;i++)
1452
- {
1453
- float zp=0,fp=0;
1454
- float phi=2.e0f*PI/(float)imx*(float)i;
1455
- float phd=atanp(cos(phi)*sin(tho),cos(tho));
1456
- uint yi=(uint)((float)bi*sin(phi)+bmaxi);
1457
- uint xi=(uint)((float)bi*cos(phi)+bmaxi);
1458
-
1459
- int HalfLap=0,ExitOnImpact=0,ni;
1460
- float php,nr,r;
1461
-
1462
- do
1463
- {
1464
- php=phd+(float)HalfLap*PI;
1465
- nr=php/h;
1466
- ni=(int)nr;
1467
-
1468
- if (ni<nh)
1469
- {
1470
- r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.)+Trajectory[ni];
1471
- }
1472
- else
1473
- {
1474
- r=Trajectory[ni];
1475
- }
1476
-
1477
- if ((r<=re)&&(r>=ri))
1478
- {
1479
- ExitOnImpact=1;
1480
- impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
1481
- }
1482
-
1483
- HalfLap++;
1484
-
1485
- } while ((HalfLap<=2)&&(ExitOnImpact==0));
1486
-
1487
- zImage[yi+2*bmaxi*xi]=zp;
1488
- fImage[yi+2*bmaxi*xi]=fp;
1489
-
1490
- }
1491
-
1492
- }
1493
-
1494
- }
1495
- """
1496
- return BlobCUDA
1497
-
1498
-
1499
- # def ImageOutput(sigma,prefix,Colors):
1500
- # import matplotlib.pyplot as plt
1501
- # start_time=time.time()
1502
- # if Colors == 'Red2Yellow':
1503
- # plt.imsave("%s.png" % prefix, sigma, cmap='afmhot')
1504
- # else:
1505
- # plt.imsave("%s.png" % prefix, sigma, cmap='Greys_r')
1506
- # save_time = time.time()-start_time
1507
- # print("Save image as %s.png file" % prefix)
1508
- # print("Save Time : %f" % save_time)
1509
-
1510
-
1511
- def ImageOutput(sigma, prefix, Colors):
1512
- from PIL import Image
1513
-
1514
- Max = sigma.max()
1515
- Min = sigma.min()
1516
- # Normalize value as 8bits Integer
1517
- SigmaInt = (255 * (sigma - Min) / (Max - Min)).astype("uint8")
1518
- image = Image.fromarray(SigmaInt)
1519
- image.save("%s.jpg" % prefix)
1520
-
1521
-
1522
- def BlackHoleCL(zImage, fImage, InputCL):
1523
- Device = InputCL["Device"]
1524
- Mass = InputCL["Mass"]
1525
- InternalRadius = InputCL["InternalRadius"]
1526
- ExternalRadius = InputCL["ExternalRadius"]
1527
- Angle = InputCL["Angle"]
1528
- Method = InputCL["Method"]
1529
- TrackPoints = InputCL["TrackPoints"]
1530
- Physics = InputCL["Physics"]
1531
- NoImage = InputCL["NoImage"]
1532
- TrackSave = InputCL["TrackSave"]
1533
-
1534
- PhysicsList = DictionariesAPI()
1535
-
1536
- if InputCL["BlackBody"]:
1537
- # Spectrum is Black Body one
1538
- Line = 0
1539
- else:
1540
- # Spectrum is Monochromatic Line one
1541
- Line = 1
1542
-
1543
- Trajectories = numpy.zeros(
1544
- (int(InputCL["Size"] / 2), InputCL["TrackPoints"]), dtype=numpy.float32
1545
- )
1546
- IdLast = numpy.zeros(int(InputCL["Size"] / 2), dtype=numpy.int32)
1547
-
1548
- # Je detecte un peripherique GPU dans la liste des peripheriques
1549
- Id = 0
1550
- HasXPU = False
1551
- for platform in cl.get_platforms():
1552
- for device in platform.get_devices():
1553
- if Id == Device:
1554
- PF4XPU = platform.name
1555
- XPU = device
1556
- print("CPU/GPU selected: ", device.name.lstrip())
1557
- HasXPU = True
1558
- Id += 1
1559
-
1560
- if not HasXPU:
1561
- print("No XPU #%i found in all of %i devices, sorry..." % (Device, Id - 1))
1562
- sys.exit()
1563
-
1564
- ctx = cl.Context([XPU])
1565
- queue = cl.CommandQueue(
1566
- ctx, properties=cl.command_queue_properties.PROFILING_ENABLE
1567
- )
1568
-
1569
- BuildOptions = "-DPHYSICS=%i -DSETTRACKPOINTS=%i " % (
1570
- PhysicsList[Physics],
1571
- InputCL["TrackPoints"],
1572
- )
1573
-
1574
- print("My Platform is ", PF4XPU)
1575
-
1576
- if (
1577
- "Intel" in PF4XPU
1578
- or "Experimental" in PF4XPU
1579
- or "Clover" in PF4XPU
1580
- or "Portable" in PF4XPU
1581
- ):
1582
- print("No extra options for Intel and Clover!")
1583
- else:
1584
- BuildOptions = BuildOptions + " -cl-mad-enable"
1585
-
1586
- BlackHoleCL = cl.Program(ctx, BlobOpenCL).build(options=BuildOptions)
1587
-
1588
- # Je recupere les flag possibles pour les buffers
1589
- mf = cl.mem_flags
1590
-
1591
- if Method == "TrajectoPixel" or Method == "TrajectoCircle":
1592
- TrajectoriesCL = cl.Buffer(
1593
- ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=Trajectories
1594
- )
1595
- IdLastCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=IdLast)
1596
-
1597
- zImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=zImage)
1598
- fImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=fImage)
1599
-
1600
- start_time = time.time()
1601
-
1602
- if Method == "EachPixel":
1603
- CLLaunch = BlackHoleCL.EachPixel(
1604
- queue,
1605
- (zImage.shape[0], zImage.shape[1]),
1606
- None,
1607
- zImageCL,
1608
- fImageCL,
1609
- numpy.float32(Mass),
1610
- numpy.float32(InternalRadius),
1611
- numpy.float32(ExternalRadius),
1612
- numpy.float32(Angle),
1613
- numpy.int32(Line),
1614
- )
1615
- CLLaunch.wait()
1616
- elif Method == "Original":
1617
- CLLaunch = BlackHoleCL.Original(
1618
- queue,
1619
- (1,),
1620
- None,
1621
- zImageCL,
1622
- fImageCL,
1623
- numpy.uint32(zImage.shape[0] / 2),
1624
- numpy.float32(Mass),
1625
- numpy.float32(InternalRadius),
1626
- numpy.float32(ExternalRadius),
1627
- numpy.float32(Angle),
1628
- numpy.int32(Line),
1629
- )
1630
- CLLaunch.wait()
1631
- elif Method == "EachCircle":
1632
- CLLaunch = BlackHoleCL.EachCircle(
1633
- queue,
1634
- (int(zImage.shape[0] / 2),),
1635
- None,
1636
- zImageCL,
1637
- fImageCL,
1638
- numpy.float32(Mass),
1639
- numpy.float32(InternalRadius),
1640
- numpy.float32(ExternalRadius),
1641
- numpy.float32(Angle),
1642
- numpy.int32(Line),
1643
- )
1644
- CLLaunch.wait()
1645
- elif Method == "TrajectoCircle":
1646
- CLLaunch = BlackHoleCL.Trajectory(
1647
- queue,
1648
- (Trajectories.shape[0],),
1649
- None,
1650
- TrajectoriesCL,
1651
- IdLastCL,
1652
- numpy.float32(Mass),
1653
- numpy.float32(InternalRadius),
1654
- numpy.float32(ExternalRadius),
1655
- numpy.float32(Angle),
1656
- numpy.int32(Line),
1657
- )
1658
-
1659
- CLLaunch = BlackHoleCL.Circle(
1660
- queue,
1661
- (Trajectories.shape[0], int(zImage.shape[0] * 4)),
1662
- None,
1663
- TrajectoriesCL,
1664
- IdLastCL,
1665
- zImageCL,
1666
- fImageCL,
1667
- numpy.float32(Mass),
1668
- numpy.float32(InternalRadius),
1669
- numpy.float32(ExternalRadius),
1670
- numpy.float32(Angle),
1671
- numpy.int32(Line),
1672
- )
1673
- CLLaunch.wait()
1674
- else:
1675
- CLLaunch = BlackHoleCL.Trajectory(
1676
- queue,
1677
- (Trajectories.shape[0],),
1678
- None,
1679
- TrajectoriesCL,
1680
- IdLastCL,
1681
- numpy.float32(Mass),
1682
- numpy.float32(InternalRadius),
1683
- numpy.float32(ExternalRadius),
1684
- numpy.float32(Angle),
1685
- numpy.int32(Line),
1686
- )
1687
-
1688
- CLLaunch = BlackHoleCL.Pixel(
1689
- queue,
1690
- (zImage.shape[0], zImage.shape[1]),
1691
- None,
1692
- zImageCL,
1693
- fImageCL,
1694
- TrajectoriesCL,
1695
- IdLastCL,
1696
- numpy.uint32(Trajectories.shape[0]),
1697
- numpy.float32(Mass),
1698
- numpy.float32(InternalRadius),
1699
- numpy.float32(ExternalRadius),
1700
- numpy.float32(Angle),
1701
- numpy.int32(Line),
1702
- )
1703
- CLLaunch.wait()
1704
-
1705
- compute = time.time() - start_time
1706
-
1707
- cl.enqueue_copy(queue, zImage, zImageCL).wait()
1708
- cl.enqueue_copy(queue, fImage, fImageCL).wait()
1709
- if Method == "TrajectoPixel" or Method == "TrajectoCircle":
1710
- cl.enqueue_copy(queue, Trajectories, TrajectoriesCL).wait()
1711
- cl.enqueue_copy(queue, IdLast, IdLastCL).wait()
1712
- elapsed = time.time() - start_time
1713
- print("\nCompute Time : %f" % compute)
1714
- print("Elapsed Time : %f\n" % elapsed)
1715
-
1716
- zMaxPosition = numpy.where(zImage[:, :] == zImage.max())
1717
- fMaxPosition = numpy.where(fImage[:, :] == fImage.max())
1718
- print(
1719
- "Z max @(%f,%f) : %f"
1720
- % (
1721
- (
1722
- 1.0 * zMaxPosition[1][0] / zImage.shape[1] - 0.5,
1723
- 1.0 * zMaxPosition[0][0] / zImage.shape[0] - 0.5,
1724
- zImage.max(),
1725
- )
1726
- )
1727
- )
1728
- print(
1729
- "Flux max @(%f,%f) : %f"
1730
- % (
1731
- (
1732
- 1.0 * fMaxPosition[1][0] / fImage.shape[1] - 0.5,
1733
- 1.0 * fMaxPosition[0][0] / fImage.shape[0] - 0.5,
1734
- fImage.max(),
1735
- )
1736
- )
1737
- )
1738
- zImageCL.release()
1739
- fImageCL.release()
1740
-
1741
- if Method == "TrajectoPixel" or Method == "TrajectoCircle":
1742
- if not NoImage:
1743
- AngleStep = 4 * numpy.pi / TrackPoints
1744
- Angles = numpy.arange(0.0, 4 * numpy.pi, AngleStep)
1745
- Angles.shape = (1, TrackPoints)
1746
-
1747
- if TrackSave:
1748
- # numpy.savetxt("TrouNoirTrajectories_%s.csv" % ImageInfo,
1749
- # numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),
1750
- # delimiter=' ', fmt='%.2e')
1751
- numpy.savetxt(
1752
- "TrouNoirTrajectories.csv",
1753
- numpy.transpose(numpy.concatenate((Angles, Trajectories),
1754
- axis=0)),
1755
- delimiter=" ",
1756
- fmt="%.2e",
1757
- )
1758
-
1759
- TrajectoriesCL.release()
1760
- IdLastCL.release()
1761
-
1762
- return elapsed
1763
-
1764
-
1765
- def BlackHoleCUDA(zImage, fImage, InputCL):
1766
- Device = InputCL["Device"]
1767
- Mass = InputCL["Mass"]
1768
- InternalRadius = InputCL["InternalRadius"]
1769
- ExternalRadius = InputCL["ExternalRadius"]
1770
- Angle = InputCL["Angle"]
1771
- Method = InputCL["Method"]
1772
- TrackPoints = InputCL["TrackPoints"]
1773
- Physics = InputCL["Physics"]
1774
- Threads = InputCL["Threads"]
1775
-
1776
- PhysicsList = DictionariesAPI()
1777
-
1778
- if InputCL["BlackBody"]:
1779
- # Spectrum is Black Body one
1780
- Line = 0
1781
- else:
1782
- # Spectrum is Monochromatic Line one
1783
- Line = 1
1784
-
1785
- Trajectories = numpy.zeros(
1786
- (int(InputCL["Size"] / 2), InputCL["TrackPoints"]), dtype=numpy.float32
1787
- )
1788
- IdLast = numpy.zeros(int(InputCL["Size"] / 2), dtype=numpy.int32)
1789
-
1790
- try:
1791
- # For PyCUDA import
1792
- import pycuda.driver as cuda
1793
- from pycuda.compiler import SourceModule
1794
-
1795
- cuda.init()
1796
- for Id in range(cuda.Device.count()):
1797
- if Id == Device:
1798
- XPU = cuda.Device(Id)
1799
- print("GPU selected %s" % XPU.name())
1800
- print
1801
-
1802
- except ImportError:
1803
- print("Platform does not seem to support CUDA")
1804
-
1805
- Context = XPU.make_context()
1806
-
1807
- try:
1808
- mod = SourceModule(
1809
- KernelCodeCuda(),
1810
- options=[
1811
- "--compiler-options",
1812
- "-DPHYSICS=%i -DSETTRACKPOINTS=%i"
1813
- % (PhysicsList[Physics], TrackPoints),
1814
- ],
1815
- )
1816
- print("Compilation seems to be OK")
1817
- except Exception:
1818
- print("Compilation seems to break")
1819
-
1820
- EachPixelCU = mod.get_function("EachPixel")
1821
- OriginalCU = mod.get_function("Original")
1822
- EachCircleCU = mod.get_function("EachCircle")
1823
- TrajectoryCU = mod.get_function("Trajectory")
1824
- PixelCU = mod.get_function("Pixel")
1825
- CircleCU = mod.get_function("Circle")
1826
-
1827
- TrajectoriesCU = cuda.mem_alloc(Trajectories.size * Trajectories.dtype.itemsize)
1828
- cuda.memcpy_htod(TrajectoriesCU, Trajectories)
1829
- zImageCU = cuda.mem_alloc(zImage.size * zImage.dtype.itemsize)
1830
- cuda.memcpy_htod(zImageCU, zImage)
1831
- fImageCU = cuda.mem_alloc(fImage.size * fImage.dtype.itemsize)
1832
- cuda.memcpy_htod(zImageCU, fImage)
1833
- IdLastCU = cuda.mem_alloc(IdLast.size * IdLast.dtype.itemsize)
1834
- cuda.memcpy_htod(IdLastCU, IdLast)
1835
-
1836
- start_time = time.time()
1837
-
1838
- if Method == "EachPixel":
1839
- EachPixelCU(
1840
- zImageCU,
1841
- fImageCU,
1842
- numpy.float32(Mass),
1843
- numpy.float32(InternalRadius),
1844
- numpy.float32(ExternalRadius),
1845
- numpy.float32(Angle),
1846
- numpy.int32(Line),
1847
- grid=(int(zImage.shape[0] / Threads), int(zImage.shape[1] / Threads)),
1848
- block=(Threads, Threads, 1),
1849
- )
1850
- elif Method == "EachCircle":
1851
- EachCircleCU(
1852
- zImageCU,
1853
- fImageCU,
1854
- numpy.float32(Mass),
1855
- numpy.float32(InternalRadius),
1856
- numpy.float32(ExternalRadius),
1857
- numpy.float32(Angle),
1858
- numpy.int32(Line),
1859
- grid=(int(zImage.shape[0] / Threads / 2), 1),
1860
- block=(Threads, 1, 1),
1861
- )
1862
- elif Method == "Original":
1863
- OriginalCU(
1864
- zImageCU,
1865
- fImageCU,
1866
- numpy.uint32(zImage.shape[0] / 2),
1867
- numpy.float32(Mass),
1868
- numpy.float32(InternalRadius),
1869
- numpy.float32(ExternalRadius),
1870
- numpy.float32(Angle),
1871
- numpy.int32(Line),
1872
- grid=(1, 1),
1873
- block=(1, 1, 1),
1874
- )
1875
- elif Method == "TrajectoCircle":
1876
- TrajectoryCU(
1877
- TrajectoriesCU,
1878
- IdLastCU,
1879
- numpy.float32(Mass),
1880
- numpy.float32(InternalRadius),
1881
- numpy.float32(ExternalRadius),
1882
- numpy.float32(Angle),
1883
- numpy.int32(Line),
1884
- grid=(int(Trajectories.shape[0] / Threads), 1),
1885
- block=(Threads, 1, 1),
1886
- )
1887
-
1888
- CircleCU(
1889
- TrajectoriesCU,
1890
- IdLastCU,
1891
- zImageCU,
1892
- fImageCU,
1893
- numpy.float32(Mass),
1894
- numpy.float32(InternalRadius),
1895
- numpy.float32(ExternalRadius),
1896
- numpy.float32(Angle),
1897
- numpy.int32(Line),
1898
- grid=(
1899
- int(Trajectories.shape[0] / Threads),
1900
- int(zImage.shape[0] * 4 / Threads),
1901
- ),
1902
- block=(Threads, Threads, 1),
1903
- )
1904
- else:
1905
- # Default method: TrajectoPixel
1906
- TrajectoryCU(
1907
- TrajectoriesCU,
1908
- IdLastCU,
1909
- numpy.float32(Mass),
1910
- numpy.float32(InternalRadius),
1911
- numpy.float32(ExternalRadius),
1912
- numpy.float32(Angle),
1913
- numpy.int32(Line),
1914
- grid=(int(Trajectories.shape[0] / Threads), 1),
1915
- block=(Threads, 1, 1),
1916
- )
1917
-
1918
- PixelCU(
1919
- zImageCU,
1920
- fImageCU,
1921
- TrajectoriesCU,
1922
- IdLastCU,
1923
- numpy.uint32(Trajectories.shape[0]),
1924
- numpy.float32(Mass),
1925
- numpy.float32(InternalRadius),
1926
- numpy.float32(ExternalRadius),
1927
- numpy.float32(Angle),
1928
- numpy.int32(Line),
1929
- grid=(int(zImage.shape[0] / Threads), int(zImage.shape[1] / Threads), 1),
1930
- block=(Threads, Threads, 1),
1931
- )
1932
-
1933
- Context.synchronize()
1934
-
1935
- compute = time.time() - start_time
1936
-
1937
- cuda.memcpy_dtoh(zImage, zImageCU)
1938
- cuda.memcpy_dtoh(fImage, fImageCU)
1939
- if Method == "TrajectoPixel" or Method == "TrajectoCircle":
1940
- cuda.memcpy_dtoh(Trajectories, TrajectoriesCU)
1941
- elapsed = time.time() - start_time
1942
- print("\nCompute Time : %f" % compute)
1943
- print("Elapsed Time : %f\n" % elapsed)
1944
-
1945
- zMaxPosition = numpy.where(zImage[:, :] == zImage.max())
1946
- fMaxPosition = numpy.where(fImage[:, :] == fImage.max())
1947
- print(
1948
- "Z max @(%f,%f) : %f"
1949
- % (
1950
- (
1951
- 1.0 * zMaxPosition[1][0] / zImage.shape[1] - 0.5,
1952
- 1.0 * zMaxPosition[0][0] / zImage.shape[0] - 0.5,
1953
- zImage.max(),
1954
- )
1955
- )
1956
- )
1957
- print(
1958
- "Flux max @(%f,%f) : %f"
1959
- % (
1960
- (
1961
- 1.0 * fMaxPosition[1][0] / fImage.shape[1] - 0.5,
1962
- 1.0 * fMaxPosition[0][0] / fImage.shape[0] - 0.5,
1963
- fImage.max(),
1964
- )
1965
- )
1966
- )
1967
-
1968
- Context.pop()
1969
-
1970
- Context.detach()
1971
-
1972
- if Method == "TrajectoPixel" or Method == "TrajectoCircle":
1973
- if not NoImage:
1974
- AngleStep = 4 * numpy.pi / TrackPoints
1975
- Angles = numpy.arange(0.0, 4 * numpy.pi, AngleStep)
1976
- Angles.shape = (1, TrackPoints)
1977
-
1978
- # numpy.savetxt("TrouNoirTrajectories_%s.csv" % ImageInfo,
1979
- # numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),
1980
- # delimiter=' ', fmt='%.2e')
1981
- numpy.savetxt(
1982
- "TrouNoirTrajectories.csv",
1983
- numpy.transpose(numpy.concatenate((Angles, Trajectories), axis=0)),
1984
- delimiter=" ",
1985
- fmt="%.2e",
1986
- )
1987
-
1988
- return elapsed
1989
-
1990
-
1991
- if __name__ == "__main__":
1992
-
1993
- # Default device: first one!
1994
- Device = 0
1995
- # Default implementation: OpenCL, most versatile!
1996
- GpuStyle = "OpenCL"
1997
- Mass = 1.0
1998
- # Internal Radius 3 times de Schwarzschild Radius
1999
- InternalRadius = 6.0 * Mass
2000
- #
2001
- ExternalRadius = 12.0
2002
- #
2003
- # Angle with normal to disc 10 degrees
2004
- Angle = numpy.pi / 180.0 * (90.0 - 10.0)
2005
- # Radiation of disc : BlackBody or Monochromatic
2006
- BlackBody = False
2007
- # Size of image
2008
- Size = 1024
2009
- # Variable Type
2010
- VariableType = "FP32"
2011
- # ?
2012
- q = -2
2013
- # Method of resolution
2014
- Method = "TrajectoPixel"
2015
- # Colors for output image
2016
- Colors = "Greyscale"
2017
- # Physics
2018
- Physics = "Einstein"
2019
- # No output as image
2020
- NoImage = False
2021
- # Threads in CUDA
2022
- Threads = 32
2023
- # Trackpoints of trajectories
2024
- TrackPoints = 2048
2025
- # Tracksave of trajectories
2026
- TrackSave = False
2027
-
2028
- HowToUse = "%s -h [Help] -b [BlackBodyEmission] -j [TrackSave] -n [NoImage] -p <Einstein/Newton> -s <SizeInPixels> -m <Mass> -i <DiscInternalRadius> -x <DiscExternalRadius> -a <AngleAboveDisc> -d <DeviceId> -c <Greyscale/Red2Yellow> -g <CUDA/OpenCL> -o <EachPixel/TrajectoCircle/TrajectoPixel/EachCircle/Original> -t <ThreadsInCuda> -v <FP32/FP64> -k <TrackPoints>" # noqa: E501
2029
-
2030
- try:
2031
- opts, args = getopt.getopt(
2032
- sys.argv[1:],
2033
- "hbnjs:m:i:x:a:d:g:v:o:t:c:p:k:",
2034
- [
2035
- "tracksave",
2036
- "blackbody",
2037
- "noimage",
2038
- "camera",
2039
- "size=",
2040
- "mass=",
2041
- "internal=",
2042
- "external=",
2043
- "angle=",
2044
- "device=",
2045
- "gpustyle=",
2046
- "variabletype=",
2047
- "method=",
2048
- "threads=",
2049
- "colors=",
2050
- "physics=",
2051
- "trackpoints=",
2052
- ],
2053
- )
2054
- except getopt.GetoptError:
2055
- print(HowToUse % sys.argv[0])
2056
- sys.exit(2)
2057
-
2058
- # List of Devices
2059
- Devices = []
2060
- Alu = {}
2061
-
2062
- for opt, arg in opts:
2063
- if opt == "-h":
2064
- print(HowToUse % sys.argv[0])
2065
-
2066
- print("\nInformations about devices detected under OpenCL API:")
2067
- # For PyOpenCL import
2068
- try:
2069
- Id = 0
2070
- for platform in cl.get_platforms():
2071
- for device in platform.get_devices():
2072
- # deviceType=cl.device_type.to_string(device.type)
2073
- deviceType = "xPU"
2074
- print(
2075
- "Device #%i from %s of type %s : %s"
2076
- % (
2077
- Id,
2078
- platform.vendor.lstrip(),
2079
- deviceType,
2080
- device.name.lstrip(),
2081
- )
2082
- )
2083
- Id = Id + 1
2084
-
2085
- except Exception:
2086
- print("Your platform does not seem to support OpenCL")
2087
-
2088
- print("\nInformations about devices detected under CUDA API:")
2089
- # For PyCUDA import
2090
- try:
2091
- import pycuda.driver as cuda
2092
-
2093
- cuda.init()
2094
- for Id in range(cuda.Device.count()):
2095
- device = cuda.Device(Id)
2096
- print("Device #%i of type GPU : %s" % (Id, device.name()))
2097
- print
2098
- except Exception:
2099
- print("Your platform does not seem to support CUDA")
2100
-
2101
- sys.exit()
2102
-
2103
- elif opt in ("-d", "--device"):
2104
- # Devices.append(int(arg))
2105
- Device = int(arg)
2106
- elif opt in ("-g", "--gpustyle"):
2107
- GpuStyle = arg
2108
- elif opt in ("-v", "--variabletype"):
2109
- VariableType = arg
2110
- elif opt in ("-s", "--size"):
2111
- Size = int(arg)
2112
- elif opt in ("-k", "--trackpoints"):
2113
- TrackPoints = int(arg)
2114
- elif opt in ("-m", "--mass"):
2115
- Mass = float(arg)
2116
- elif opt in ("-i", "--internal"):
2117
- InternalRadius = float(arg)
2118
- elif opt in ("-e", "--external"):
2119
- ExternalRadius = float(arg)
2120
- elif opt in ("-a", "--angle"):
2121
- Angle = numpy.pi / 180.0 * (90.0 - float(arg))
2122
- elif opt in ("-b", "--blackbody"):
2123
- BlackBody = True
2124
- elif opt in ("-j", "--tracksave"):
2125
- TrackSave = True
2126
- elif opt in ("-n", "--noimage"):
2127
- NoImage = True
2128
- elif opt in ("-o", "--method"):
2129
- Method = arg
2130
- elif opt in ("-t", "--threads"):
2131
- Threads = int(arg)
2132
- elif opt in ("-c", "--colors"):
2133
- Colors = arg
2134
- elif opt in ("-p", "--physics"):
2135
- Physics = arg
2136
-
2137
- print("Device Identification selected : %s" % Device)
2138
- print("GpuStyle used : %s" % GpuStyle)
2139
- print("VariableType : %s" % VariableType)
2140
- print("Size : %i" % Size)
2141
- print("Mass : %f" % Mass)
2142
- print("Internal Radius : %f" % InternalRadius)
2143
- print("External Radius : %f" % ExternalRadius)
2144
- print("Angle with normal of (in radians) : %f" % Angle)
2145
- print("Black Body Disc Emission (monochromatic instead) : %s" % BlackBody)
2146
- print("Method of resolution : %s" % Method)
2147
- print("Colors for output images : %s" % Colors)
2148
- print("Physics used for Trajectories : %s" % Physics)
2149
- print("Trackpoints of Trajectories : %i" % TrackPoints)
2150
- print("Tracksave of Trajectories : %i" % TrackSave)
2151
-
2152
- if GpuStyle == "CUDA":
2153
- print("\nSelection of CUDA device")
2154
- try:
2155
- # For PyCUDA import
2156
- import pycuda.driver as cuda
2157
-
2158
- cuda.init()
2159
- for Id in range(cuda.Device.count()):
2160
- device = cuda.Device(Id)
2161
- print("Device #%i of type GPU : %s" % (Id, device.name()))
2162
- if Id in Devices:
2163
- Alu[Id] = "GPU"
2164
-
2165
- except ImportError:
2166
- print("Platform does not seem to support CUDA")
2167
-
2168
- if GpuStyle == "OpenCL":
2169
- print("\nSelection of OpenCL device")
2170
- try:
2171
- # For PyOpenCL import
2172
- import pyopencl as cl
2173
-
2174
- Id = 0
2175
- for platform in cl.get_platforms():
2176
- for device in platform.get_devices():
2177
- # deviceType=cl.device_type.to_string(device.type)
2178
- deviceType = "xPU"
2179
- print(
2180
- "Device #%i from %s of type %s : %s"
2181
- % (
2182
- Id,
2183
- platform.vendor.lstrip().rstrip(),
2184
- deviceType,
2185
- device.name.lstrip().rstrip(),
2186
- )
2187
- )
2188
-
2189
- if Id in Devices:
2190
- # Set the Alu as detected Device Type
2191
- Alu[Id] = deviceType
2192
- Id = Id + 1
2193
- except ImportError:
2194
- print("Platform does not seem to support OpenCL")
2195
-
2196
- zImage = numpy.zeros((Size, Size), dtype=numpy.float32)
2197
- fImage = numpy.zeros((Size, Size), dtype=numpy.float32)
2198
-
2199
- InputCL = {}
2200
- InputCL["Device"] = Device
2201
- InputCL["GpuStyle"] = GpuStyle
2202
- InputCL["VariableType"] = VariableType
2203
- InputCL["Size"] = Size
2204
- InputCL["Mass"] = Mass
2205
- InputCL["InternalRadius"] = InternalRadius
2206
- InputCL["ExternalRadius"] = ExternalRadius
2207
- InputCL["Angle"] = Angle
2208
- InputCL["BlackBody"] = BlackBody
2209
- InputCL["Method"] = Method
2210
- InputCL["TrackPoints"] = TrackPoints
2211
- InputCL["Physics"] = Physics
2212
- InputCL["Threads"] = Threads
2213
- InputCL["NoImage"] = NoImage
2214
- InputCL["TrackSave"] = TrackSave
2215
-
2216
- if GpuStyle == "OpenCL":
2217
- duration = BlackHoleCL(zImage, fImage, InputCL)
2218
- else:
2219
- duration = BlackHoleCUDA(zImage, fImage, InputCL)
2220
-
2221
- Hostname = gethostname()
2222
- Date = time.strftime("%Y%m%d_%H%M%S")
2223
- ImageInfo = "%s_Device%i_%s_%s" % (Method, Device, Hostname, Date)
2224
-
2225
- if not NoImage:
2226
- ImageOutput(zImage, "TrouNoirZ_%s" % ImageInfo, Colors)
2227
- ImageOutput(fImage, "TrouNoirF_%s" % ImageInfo, Colors)