pyopencl 2024.2.2__cp39-cp39-win_amd64.whl → 2024.2.5__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (99) hide show
  1. pyopencl/__init__.py +16 -4
  2. pyopencl/_cl.cp39-win_amd64.pyd +0 -0
  3. pyopencl/algorithm.py +3 -1
  4. pyopencl/bitonic_sort.py +2 -0
  5. pyopencl/characterize/__init__.py +23 -0
  6. pyopencl/compyte/.git +1 -0
  7. pyopencl/compyte/.gitignore +21 -0
  8. pyopencl/compyte/ndarray/Makefile +31 -0
  9. pyopencl/compyte/ndarray/gpu_ndarray.h +35 -0
  10. pyopencl/compyte/ndarray/pygpu_language.h +207 -0
  11. pyopencl/compyte/ndarray/pygpu_language_cuda.cu +622 -0
  12. pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +317 -0
  13. pyopencl/compyte/ndarray/pygpu_ndarray.cpp +1546 -0
  14. pyopencl/compyte/ndarray/pygpu_ndarray.h +71 -0
  15. pyopencl/compyte/ndarray/pygpu_ndarray_object.h +232 -0
  16. pyopencl/tools.py +60 -56
  17. pyopencl/version.py +9 -3
  18. {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/METADATA +105 -105
  19. pyopencl-2024.2.5.dist-info/RECORD +56 -0
  20. {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/WHEEL +1 -1
  21. pyopencl-2024.2.2.data/data/CITATION.cff +0 -74
  22. pyopencl-2024.2.2.data/data/CMakeLists.txt +0 -83
  23. pyopencl-2024.2.2.data/data/Makefile.in +0 -21
  24. pyopencl-2024.2.2.data/data/README.rst +0 -70
  25. pyopencl-2024.2.2.data/data/README_SETUP.txt +0 -34
  26. pyopencl-2024.2.2.data/data/aksetup_helper.py +0 -1013
  27. pyopencl-2024.2.2.data/data/configure.py +0 -6
  28. pyopencl-2024.2.2.data/data/contrib/cldis.py +0 -91
  29. pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/README +0 -29
  30. pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/translate.py +0 -1441
  31. pyopencl-2024.2.2.data/data/contrib/pyopencl.vim +0 -84
  32. pyopencl-2024.2.2.data/data/doc/Makefile +0 -23
  33. pyopencl-2024.2.2.data/data/doc/algorithm.rst +0 -214
  34. pyopencl-2024.2.2.data/data/doc/array.rst +0 -305
  35. pyopencl-2024.2.2.data/data/doc/conf.py +0 -26
  36. pyopencl-2024.2.2.data/data/doc/howto.rst +0 -105
  37. pyopencl-2024.2.2.data/data/doc/index.rst +0 -137
  38. pyopencl-2024.2.2.data/data/doc/make_constants.py +0 -561
  39. pyopencl-2024.2.2.data/data/doc/misc.rst +0 -885
  40. pyopencl-2024.2.2.data/data/doc/runtime.rst +0 -51
  41. pyopencl-2024.2.2.data/data/doc/runtime_const.rst +0 -30
  42. pyopencl-2024.2.2.data/data/doc/runtime_gl.rst +0 -78
  43. pyopencl-2024.2.2.data/data/doc/runtime_memory.rst +0 -527
  44. pyopencl-2024.2.2.data/data/doc/runtime_platform.rst +0 -184
  45. pyopencl-2024.2.2.data/data/doc/runtime_program.rst +0 -364
  46. pyopencl-2024.2.2.data/data/doc/runtime_queue.rst +0 -182
  47. pyopencl-2024.2.2.data/data/doc/subst.rst +0 -36
  48. pyopencl-2024.2.2.data/data/doc/tools.rst +0 -4
  49. pyopencl-2024.2.2.data/data/doc/types.rst +0 -42
  50. pyopencl-2024.2.2.data/data/examples/black-hole-accretion.py +0 -2227
  51. pyopencl-2024.2.2.data/data/examples/demo-struct-reduce.py +0 -75
  52. pyopencl-2024.2.2.data/data/examples/demo.py +0 -39
  53. pyopencl-2024.2.2.data/data/examples/demo_array.py +0 -32
  54. pyopencl-2024.2.2.data/data/examples/demo_array_svm.py +0 -37
  55. pyopencl-2024.2.2.data/data/examples/demo_elementwise.py +0 -34
  56. pyopencl-2024.2.2.data/data/examples/demo_elementwise_complex.py +0 -53
  57. pyopencl-2024.2.2.data/data/examples/demo_mandelbrot.py +0 -183
  58. pyopencl-2024.2.2.data/data/examples/demo_meta_codepy.py +0 -56
  59. pyopencl-2024.2.2.data/data/examples/demo_meta_template.py +0 -55
  60. pyopencl-2024.2.2.data/data/examples/dump-performance.py +0 -38
  61. pyopencl-2024.2.2.data/data/examples/dump-properties.py +0 -86
  62. pyopencl-2024.2.2.data/data/examples/gl_interop_demo.py +0 -84
  63. pyopencl-2024.2.2.data/data/examples/gl_particle_animation.py +0 -218
  64. pyopencl-2024.2.2.data/data/examples/ipython-demo.ipynb +0 -203
  65. pyopencl-2024.2.2.data/data/examples/median-filter.py +0 -99
  66. pyopencl-2024.2.2.data/data/examples/n-body.py +0 -1070
  67. pyopencl-2024.2.2.data/data/examples/narray.py +0 -37
  68. pyopencl-2024.2.2.data/data/examples/noisyImage.jpg +0 -0
  69. pyopencl-2024.2.2.data/data/examples/pi-monte-carlo.py +0 -1166
  70. pyopencl-2024.2.2.data/data/examples/svm.py +0 -82
  71. pyopencl-2024.2.2.data/data/examples/transpose.py +0 -229
  72. pyopencl-2024.2.2.data/data/pytest.ini +0 -3
  73. pyopencl-2024.2.2.data/data/src/bitlog.cpp +0 -51
  74. pyopencl-2024.2.2.data/data/src/bitlog.hpp +0 -83
  75. pyopencl-2024.2.2.data/data/src/clinfo_ext.h +0 -134
  76. pyopencl-2024.2.2.data/data/src/mempool.hpp +0 -444
  77. pyopencl-2024.2.2.data/data/src/pyopencl_ext.h +0 -77
  78. pyopencl-2024.2.2.data/data/src/tools.hpp +0 -90
  79. pyopencl-2024.2.2.data/data/src/wrap_cl.cpp +0 -61
  80. pyopencl-2024.2.2.data/data/src/wrap_cl.hpp +0 -5853
  81. pyopencl-2024.2.2.data/data/src/wrap_cl_part_1.cpp +0 -369
  82. pyopencl-2024.2.2.data/data/src/wrap_cl_part_2.cpp +0 -702
  83. pyopencl-2024.2.2.data/data/src/wrap_constants.cpp +0 -1274
  84. pyopencl-2024.2.2.data/data/src/wrap_helpers.hpp +0 -213
  85. pyopencl-2024.2.2.data/data/src/wrap_mempool.cpp +0 -738
  86. pyopencl-2024.2.2.data/data/test/add-vectors-32.spv +0 -0
  87. pyopencl-2024.2.2.data/data/test/add-vectors-64.spv +0 -0
  88. pyopencl-2024.2.2.data/data/test/empty-header.h +0 -1
  89. pyopencl-2024.2.2.data/data/test/test_algorithm.py +0 -1180
  90. pyopencl-2024.2.2.data/data/test/test_array.py +0 -2392
  91. pyopencl-2024.2.2.data/data/test/test_arrays_in_structs.py +0 -100
  92. pyopencl-2024.2.2.data/data/test/test_clmath.py +0 -529
  93. pyopencl-2024.2.2.data/data/test/test_clrandom.py +0 -75
  94. pyopencl-2024.2.2.data/data/test/test_enqueue_copy.py +0 -271
  95. pyopencl-2024.2.2.data/data/test/test_wrapper.py +0 -1565
  96. pyopencl-2024.2.2.dist-info/LICENSE +0 -282
  97. pyopencl-2024.2.2.dist-info/RECORD +0 -123
  98. pyopencl-2024.2.2.dist-info/top_level.txt +0 -1
  99. {pyopencl-2024.2.2.data/data → pyopencl-2024.2.5.dist-info/licenses}/LICENSE +0 -0
@@ -1,1070 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- NBody Demonstrator implemented in OpenCL, rendering OpenGL
5
-
6
- By default, rendering in OpenGL is disabled. Add -g option to activate.
7
-
8
- Part of matrix programs from: https://forge.cbp.ens-lyon.fr/svn/bench4gpu/
9
-
10
- CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
11
- Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
12
-
13
- Thanks to Andreas Klockner for PyOpenCL:
14
- http://mathema.tician.de/software/pyopencl
15
-
16
- """
17
- import getopt
18
- import sys
19
- import time
20
-
21
- import numpy as np
22
-
23
- import pyopencl as cl
24
- import pyopencl.array
25
-
26
-
27
- def DictionariesAPI():
28
- Marsaglia = {"CONG": 0, "SHR3": 1, "MWC": 2, "KISS": 3}
29
- Computing = {"FP32": 0, "FP64": 1}
30
- Interaction = {"Force": 0, "Potential": 1}
31
- Artevasion = {"None": 0, "NegExp": 1, "CorRad": 2}
32
- return (Marsaglia, Computing, Interaction, Artevasion)
33
-
34
-
35
- BlobOpenCL = """
36
- #define TFP32 0
37
- #define TFP64 1
38
-
39
- #define TFORCE 0
40
- #define TPOTENTIAL 1
41
-
42
- #define NONE 0
43
- #define NEGEXP 1
44
- #define CORRAD 2
45
-
46
- #if TYPE == TFP32
47
- #define MYFLOAT4 float4
48
- #define MYFLOAT8 float8
49
- #define MYFLOAT float
50
- #define DISTANCE fast_distance
51
- #else
52
- #define MYFLOAT4 double4
53
- #define MYFLOAT8 double8
54
- #define MYFLOAT double
55
- #define DISTANCE distance
56
- #if defined(cl_khr_fp64) // Khronos extension available?
57
- #pragma OPENCL EXTENSION cl_khr_fp64 : enable
58
- #endif
59
- #endif
60
-
61
- #define znew ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
62
- #define wnew ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
63
- #define MWC (znew+wnew)
64
- #define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
65
- #define CONG (jcong=69069*jcong+1234567)
66
- #define KISS ((MWC^CONG)+SHR3)
67
-
68
- #define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
69
- #define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
70
- #define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
71
- #define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
72
-
73
- #define PI (MYFLOAT)3.141592653589793238e0f
74
-
75
- #define SMALL_NUM (MYFLOAT)1.e-9f
76
-
77
- #define CoreRadius (MYFLOAT)(1.e0f)
78
-
79
- // Create my own Distance implementation: distance buggy on Oland AMD chipset
80
-
81
- MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
82
- {
83
- private MYFLOAT x2,y2,z2;
84
- x2=n.s0-m.s0;
85
- x2*=x2;
86
- y2=n.s1-m.s1;
87
- y2*=y2;
88
- z2=n.s2-m.s2;
89
- z2*=z2;
90
- return(sqrt(x2+y2+z2));
91
- }
92
-
93
- // Potential between 2 m,n bodies
94
- MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
95
- #if ARTEVASION == NEGEXP
96
- // Add exp(-r) to numerator to avoid divergence for low distances
97
- {
98
- MYFLOAT r=DISTANCE(n,m);
99
- return((-1.e0f+exp(-r))/r);
100
- }
101
- #elif ARTEVASION == CORRAD
102
- // Add Core Radius to avoid divergence for low distances
103
- {
104
- MYFLOAT r=DISTANCE(n,m);
105
- return(-1.e0f/sqrt(r*r+CoreRadius*CoreRadius));
106
- }
107
- #else
108
- // Classical potential in 1/r
109
- {
110
- // return((MYFLOAT)(-1.e0f)/(MyDistance(m,n)));
111
- return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
112
- }
113
- #endif
114
-
115
- // Interaction based of Force as gradient of Potential
116
- MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
117
- #if INTERACTION == TFORCE
118
- #if ARTEVASION == NEGEXP
119
- // Force gradient of potential, set as (1-exp(-r))/r
120
- {
121
- private MYFLOAT r=MyDistance(n,m);
122
- private MYFLOAT num=1.e0f+exp(-r)*(r-1.e0f);
123
- return((n-m)*num/(MYFLOAT)(r*r*r));
124
- }
125
- #elif ARTEVASION == CORRAD
126
- // Force gradient of potential, (Core Radius) set as 1/sqrt(r**2+CoreRadius**2)
127
- {
128
- private MYFLOAT r=MyDistance(n,m);
129
- private MYFLOAT den=sqrt(r*r+CoreRadius*CoreRadius);
130
- return((n-m)/(MYFLOAT)(den*den*den));
131
- }
132
- #else
133
- // Simplest implementation of force (equals to acceleration)
134
- // seems to bo bad (numerous artevasions)
135
- // MYFLOAT4 InteractionForce(MYFLOAT4 m,MYFLOAT4 n)
136
- {
137
- private MYFLOAT r=MyDistance(n,m);
138
- return((n-m)/(MYFLOAT)(r*r*r));
139
- }
140
- #endif
141
- #else
142
- // Force definited as gradient of potential
143
- // Estimate potential and proximate potential to estimate force
144
- {
145
- // 1/1024 seems to be a good factor: larger one provides bad results
146
- private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024);
147
- private MYFLOAT4 er=normalize(n-m);
148
- private MYFLOAT4 dr=er*(MYFLOAT)epsilon;
149
-
150
- return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n)));
151
- }
152
- #endif
153
-
154
- MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
155
- {
156
- private MYFLOAT potential=(MYFLOAT)0.e0f;
157
- private MYFLOAT4 x=clDataX[gid];
158
-
159
- for (int i=0;i<get_global_size(0);i++)
160
- {
161
- if (gid != i)
162
- potential+=PairPotential(x,clDataX[i]);
163
- }
164
-
165
- barrier(CLK_GLOBAL_MEM_FENCE);
166
- return(potential);
167
- }
168
-
169
- MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid) // # noqa: E501
170
- {
171
- return(PairPotential(clDataX[gid],clCoM[0]));
172
- }
173
-
174
- // Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
175
-
176
- MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
177
- {
178
- private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
179
- MYFLOAT4 DT=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
180
-
181
- a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
182
- v0=(MYFLOAT4)clDataInV[gid];
183
- x0=(MYFLOAT4)clDataInX[gid];
184
- int N = get_global_size(0);
185
-
186
- for (private int i=0;i<N;i++)
187
- {
188
- if (gid != i)
189
- a0+=Interaction(x0,clDataInX[i]);
190
- }
191
-
192
- a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
193
- v1=a0*dt+v0;
194
- x1=v0*dt+x0;
195
- for (private int j=0;j<N;j++)
196
- {
197
- if (gid != j)
198
- a1+=Interaction(x1,clDataInX[j]);
199
- }
200
-
201
- a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
202
- v2=a1*(MYFLOAT)(dt/2.e0f)+v0;
203
- x2=v1*(MYFLOAT)(dt/2.e0f)+x0;
204
- for (private int k=0;k<N;k++)
205
- {
206
- if (gid != k)
207
- a2+=Interaction(x2,clDataInX[k]);
208
- }
209
-
210
- a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
211
- v3=a2*(MYFLOAT)(dt/2.e0f)+v0;
212
- x3=v2*(MYFLOAT)(dt/2.e0f)+x0;
213
- for (private int l=0;l<N;l++)
214
- {
215
- if (gid != l)
216
- a3+=Interaction(x3,clDataInX[l]);
217
- }
218
-
219
- a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
220
- v4=a3*dt+v0;
221
- x4=v3*dt+x0;
222
- for (private int m=0;m<N;m++)
223
- {
224
- if (gid != m)
225
- a4+=Interaction(x4,clDataInX[m]);
226
- }
227
-
228
- xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
229
- vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
230
-
231
- return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
232
- }
233
-
234
- MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
235
- {
236
- private MYFLOAT4 x0,v0,a0,x1,v1,a1,xf,vf;
237
- MYFLOAT4 Dt=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
238
-
239
- x0=(MYFLOAT4)clDataInX[gid];
240
- v0=(MYFLOAT4)clDataInV[gid];
241
- a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
242
-
243
- for (private int i=0;i<get_global_size(0);i++)
244
- {
245
- if (gid != i)
246
- a0+=Interaction(x0,clDataInX[i]);
247
- }
248
-
249
- a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
250
- //v1=v0+dt*a0;
251
- //x1=x0+dt*v0;
252
- v1=dt*a0+v0;
253
- x1=dt*v0+x0;
254
-
255
- for (private int j=0;j<get_global_size(0);j++)
256
- {
257
- if (gid != j)
258
- a1+=Interaction(x1,clDataInX[j]);
259
- }
260
-
261
- vf=v0+dt*(a0+a1)/(MYFLOAT)2.e0f;
262
- xf=x0+dt*(v0+v1)/(MYFLOAT)2.e0f;
263
-
264
- return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
265
- }
266
-
267
- MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
268
- {
269
- MYFLOAT4 x0,v0,a,xf,vf;
270
-
271
- x0=(MYFLOAT4)clDataInX[gid];
272
- v0=(MYFLOAT4)clDataInV[gid];
273
- a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
274
-
275
- for (private int i=0;i<get_global_size(0);i++)
276
- {
277
- if (gid != i)
278
- a+=Interaction(x0,clDataInX[i]);
279
- }
280
-
281
- vf=v0+dt*a;
282
- xf=x0+dt*vf;
283
-
284
- return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
285
- }
286
-
287
- MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
288
- {
289
- MYFLOAT4 x0,v0,a,xf,vf;
290
-
291
- x0=(MYFLOAT4)clDataInX[gid];
292
- v0=(MYFLOAT4)clDataInV[gid];
293
- a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
294
-
295
- for (private int i=0;i<get_global_size(0);i++)
296
- {
297
- if (gid != i)
298
- a+=Interaction(x0,clDataInX[i]);
299
- }
300
-
301
- vf=v0+dt*a;
302
- xf=x0+dt*v0;
303
-
304
- return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
305
- }
306
-
307
- __kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX,
308
- MYFLOAT diameter,uint seed_z,uint seed_w)
309
- {
310
- private int gid=get_global_id(0);
311
- private uint zmwc=seed_z+gid;
312
- private uint wmwc=seed_w+(gid+1)%2;
313
- private MYFLOAT Heat;
314
-
315
- for (int i=0;i<gid;i++)
316
- {
317
- Heat=MWCfp;
318
- }
319
-
320
- // More accurate distribution based on spherical coordonates
321
- // Disactivated because of AMD Oland GPU crash on launch
322
- // private MYFLOAT Radius,Theta,Phi,PosX,PosY,PosZ,SinTheta;
323
- // Radius=MWCfp*diameter/2.e0f;
324
- // Theta=(MYFLOAT)acos((float)(-2.e0f*MWCfp+1.0e0f));
325
- // Phi=(MYFLOAT)(2.e0f*PI*MWCfp);
326
- // SinTheta=sin((float)Theta);
327
- // PosX=cos((float)Phi)*Radius*SinTheta;
328
- // PosY=sin((float)Phi)*Radius*SinTheta;
329
- // PosZ=cos((float)Theta)*Radius;
330
- // clDataX[gid]=(MYFLOAT4)(PosX,PosY,PosZ,0.e0f);
331
-
332
- private MYFLOAT Radius=diameter/2.e0f;
333
- private MYFLOAT Length=diameter;
334
- private MYFLOAT4 Position;
335
- while (Length>Radius) {
336
- Position=(MYFLOAT4)((MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,0.e0f);
337
- Length=(MYFLOAT)length((MYFLOAT4)Position);
338
- }
339
-
340
- clDataX[gid]=Position;
341
-
342
- barrier(CLK_GLOBAL_MEM_FENCE);
343
- }
344
-
345
- __kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box,
346
- uint seed_z,uint seed_w)
347
- {
348
- int gid=get_global_id(0);
349
- uint zmwc=seed_z+gid;
350
- uint wmwc=seed_w-gid;
351
- private MYFLOAT Heat;
352
-
353
- for (int i=0;i<gid;i++)
354
- {
355
- Heat=MWCfp;
356
- }
357
-
358
- clDataX[gid]=(MYFLOAT4)((MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,0.e0f);
359
-
360
- barrier(CLK_GLOBAL_MEM_FENCE);
361
- }
362
-
363
- __kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
364
- {
365
- int gid = get_global_id(0);
366
- MYFLOAT N = (MYFLOAT)get_global_size(0);
367
- uint zmwc=seed_z+(uint)gid;
368
- uint wmwc=seed_w-(uint)gid;
369
- MYFLOAT4 CrossVector,SpeedVector,FromCoM;
370
- MYFLOAT Heat,ThetaA,PhiA,ThetaB,PhiB,Length,tA,tB,Polar;
371
-
372
- for (int i=0;i<gid;i++)
373
- {
374
- Heat=MWCfp;
375
- }
376
-
377
- // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
378
- // Implemention on AMD Oland are probably broken in float
379
-
380
- FromCoM=(MYFLOAT4)(clDataX[gid]-clCoM[0]);
381
- Length=length(FromCoM);
382
- //Theta=acos(FromCoM.z/Length);
383
- //Phi=atan(FromCoM.y/FromCoM.x);
384
- // First tangential vector to sphere of length radius
385
- ThetaA=acos(FromCoM.x/Length)+5.e-1f*PI;
386
- PhiA=atan(FromCoM.y/FromCoM.z);
387
- // Second tangential vector to sphere of length radius
388
- ThetaB=acos((float)(FromCoM.x/Length));
389
- PhiB=atan((float)(FromCoM.y/FromCoM.z))+5.e-1f*PI;
390
- // (x,y) random coordonates to plane tangential to sphere
391
- Polar=MWCfp*2.e0f*PI;
392
- tA=cos((float)Polar);
393
- tB=sin((float)Polar);
394
-
395
- // Exception for 2 particules to ovoid shifting
396
- if (get_global_size(0)==2) {
397
- CrossVector=(MYFLOAT4)(1.e0f,1.e0f,1.e0f,0.e0f);
398
- } else {
399
- CrossVector.s0=tA*cos((float)ThetaA)+tB*cos((float)ThetaB);
400
- CrossVector.s1=tA*sin((float)ThetaA)*sin((float)PhiA)+tB*sin((float)ThetaB)*sin((float)PhiB);
401
- CrossVector.s2=tA*sin((float)ThetaA)*cos((float)PhiA)+tB*sin((float)ThetaB)*cos((float)PhiB);
402
- CrossVector.s3=0.e0f;
403
- }
404
-
405
- if (velocity<SMALL_NUM) {
406
- SpeedVector=(MYFLOAT4)normalize(cross(FromCoM,CrossVector))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
407
- }
408
- else
409
- {
410
-
411
- SpeedVector=(MYFLOAT4)((MWCfp-5e-1f)*velocity,(MWCfp-5e-1f)*velocity,
412
- (MWCfp-5e-1f)*velocity,0.e0f);
413
- }
414
- clDataV[gid]=SpeedVector;
415
- barrier(CLK_GLOBAL_MEM_FENCE);
416
- }
417
-
418
- __kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
419
- {
420
- private int gid = get_global_id(0);
421
- private MYFLOAT8 clDataGid;
422
-
423
- clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
424
- barrier(CLK_GLOBAL_MEM_FENCE);
425
- clDataX[gid]=clDataGid.s0123;
426
- clDataV[gid]=clDataGid.s4567;
427
- }
428
-
429
- __kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
430
- {
431
- private int gid = get_global_id(0);
432
- private MYFLOAT8 clDataGid;
433
-
434
- clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
435
- barrier(CLK_GLOBAL_MEM_FENCE);
436
- clDataX[gid]=clDataGid.s0123;
437
- clDataV[gid]=clDataGid.s4567;
438
- }
439
-
440
- __kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
441
- {
442
- private int gid = get_global_id(0);
443
- private MYFLOAT8 clDataGid;
444
-
445
- clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
446
- barrier(CLK_GLOBAL_MEM_FENCE);
447
- clDataX[gid]=clDataGid.s0123;
448
- clDataV[gid]=clDataGid.s4567;
449
- }
450
-
451
- __kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
452
- {
453
- private int gid = get_global_id(0);
454
- private MYFLOAT8 clDataGid;
455
-
456
- clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
457
- barrier(CLK_GLOBAL_MEM_FENCE);
458
- clDataX[gid]=clDataGid.s0123;
459
- clDataV[gid]=clDataGid.s4567;
460
- }
461
-
462
- __kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
463
- {
464
- int gid = get_global_id(0);
465
-
466
- clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
467
- }
468
-
469
- __kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
470
- {
471
- int gid = get_global_id(0);
472
-
473
- MYFLOAT potential=(MYFLOAT)0.e0f;
474
- MYFLOAT4 x=clDataX[gid];
475
-
476
- for (int i=0;i<get_global_size(0);i++)
477
- {
478
- if (gid != i)
479
- potential+=PairPotential(x,clDataX[i]);
480
- }
481
-
482
- barrier(CLK_GLOBAL_MEM_FENCE);
483
- clPotential[gid]=potential*(MYFLOAT)5.e-1f;
484
- }
485
-
486
- __kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
487
- {
488
- MYFLOAT4 CoM=clDataX[0];
489
-
490
- for (int i=1;i<Size;i++)
491
- {
492
- CoM+=clDataX[i];
493
- }
494
-
495
- barrier(CLK_GLOBAL_MEM_FENCE);
496
- clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0.e0f)/(MYFLOAT)Size;
497
- }
498
-
499
- __kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
500
- {
501
- int gid = get_global_id(0);
502
-
503
- barrier(CLK_GLOBAL_MEM_FENCE);
504
- MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
505
- clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
506
- }
507
-
508
- """
509
-
510
-
511
- def MainOpenCL(clDataX, clDataV, Step, Method):
512
- time_start = time.time()
513
- if Method == "RungeKutta":
514
- CLLaunch = MyRoutines.RungeKutta(
515
- queue, (Number, 1), None, clDataX, clDataV, Step
516
- )
517
- elif Method == "ExplicitEuler":
518
- CLLaunch = MyRoutines.ExplicitEuler(
519
- queue, (Number, 1), None, clDataX, clDataV, Step
520
- )
521
- elif Method == "Heun":
522
- CLLaunch = MyRoutines.Heun(queue, (Number, 1), None, clDataX, clDataV, Step)
523
- else:
524
- CLLaunch = MyRoutines.ImplicitEuler(
525
- queue, (Number, 1), None, clDataX, clDataV, Step
526
- )
527
- CLLaunch.wait()
528
- Elapsed = time.time() - time_start
529
- return Elapsed
530
-
531
-
532
- def display(*args):
533
- global MyDataX, MyDataV, clDataX, clDataV, Step, Method, Number, Iterations, \
534
- Durations, Verbose, SpeedRendering
535
-
536
- gl.glClearColor(0.0, 0.0, 0.0, 0.0)
537
- gl.glClear(gl.GL_COLOR_BUFFER_BIT)
538
- gl.glColor3f(1.0, 1.0, 1.0)
539
-
540
- MainOpenCL(clDataX, clDataV, Step, Method)
541
- if SpeedRendering:
542
- cl.enqueue_copy(queue, MyDataV, clDataV)
543
- MyDataV.reshape(Number, 4)[:, 3] = 1
544
- gl.glVertexPointerf(MyDataV.reshape(Number, 4))
545
- else:
546
- cl.enqueue_copy(queue, MyDataX, clDataX)
547
- MyDataX.reshape(Number, 4)[:, 3] = 1
548
- gl.glVertexPointerf(MyDataX.reshape(Number, 4))
549
-
550
- if Verbose:
551
- print("Positions for #%s iteration: %s" % (Iterations, MyDataX))
552
- else:
553
- sys.stdout.write(".")
554
- sys.stdout.flush()
555
- Durations = np.append(Durations, MainOpenCL(clDataX, clDataV, Step, Method))
556
- gl.glEnableClientState(gl.GL_VERTEX_ARRAY)
557
- gl.glDrawArrays(gl.GL_POINTS, 0, Number)
558
- gl.glDisableClientState(gl.GL_VERTEX_ARRAY)
559
- gl.glFlush()
560
- Iterations += 1
561
- glut.glutSwapBuffers()
562
-
563
-
564
- def halt():
565
- pass
566
-
567
-
568
- def keyboard(k, x, y):
569
- global ViewRZ, SpeedRendering
570
- LC_Z = glut.as_8_bit("z")
571
- UC_Z = glut.as_8_bit("Z")
572
- Plus = glut.as_8_bit("+")
573
- Minus = glut.as_8_bit("-")
574
- Switch = glut.as_8_bit("s")
575
-
576
- Zoom = 1
577
- if k == LC_Z:
578
- ViewRZ += 1.0
579
- elif k == UC_Z:
580
- ViewRZ -= 1.0
581
- elif k == Plus:
582
- Zoom *= 2.0
583
- elif k == Minus:
584
- Zoom /= 2.0
585
- elif k == Switch:
586
- if SpeedRendering:
587
- SpeedRendering = False
588
- else:
589
- SpeedRendering = True
590
- elif ord(k) == 27: # Escape
591
- glut.glutLeaveMainLoop()
592
- return False
593
- else:
594
- return
595
- gl.glRotatef(ViewRZ, 0.0, 0.0, 1.0)
596
- gl.glScalef(Zoom, Zoom, Zoom)
597
- glut.glutPostRedisplay()
598
-
599
-
600
- def special(k, x, y):
601
- global ViewRX, ViewRY
602
-
603
- Step = 1.0
604
- if k == glut.GLUT_KEY_UP:
605
- ViewRX += Step
606
- elif k == glut.GLUT_KEY_DOWN:
607
- ViewRX -= Step
608
- elif k == glut.GLUT_KEY_LEFT:
609
- ViewRY += Step
610
- elif k == glut.GLUT_KEY_RIGHT:
611
- ViewRY -= Step
612
- else:
613
- return
614
- gl.glRotatef(ViewRX, 1.0, 0.0, 0.0)
615
- gl.glRotatef(ViewRY, 0.0, 1.0, 0.0)
616
- glut.glutPostRedisplay()
617
-
618
-
619
- def setup_viewport():
620
- global SizeOfBox
621
- gl.glMatrixMode(gl.GL_PROJECTION)
622
- gl.glLoadIdentity()
623
- gl.glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
624
- glut.glutPostRedisplay()
625
-
626
-
627
- def reshape(w, h):
628
- gl.glViewport(0, 0, w, h)
629
- setup_viewport()
630
-
631
-
632
- if __name__ == "__main__":
633
-
634
- global Number, Step, clDataX, clDataV, MyDataX, MyDataV, Method, SizeOfBox, \
635
- Iterations, Verbose, Durations
636
-
637
- # ValueType
638
- ValueType = "FP32"
639
-
640
- class MyFloat(np.float32):
641
- pass
642
-
643
- # clType8=cl_array.vec.float8
644
- # Set defaults values
645
- np.set_printoptions(precision=2)
646
- # Id of Device : 1 is for first find !
647
- Device = 0
648
- # Number of bodies is integer
649
- Number = 2
650
- # Number of iterations (for standalone execution)
651
- Iterations = 10
652
- # Size of shape
653
- SizeOfShape = MyFloat(1.0)
654
- # Initial velocity of particules
655
- Velocity = MyFloat(1.0)
656
- # Step
657
- Step = MyFloat(1.0 / 32)
658
- # Method of integration
659
- Method = "ImplicitEuler"
660
- # InitialRandom
661
- InitialRandom = False
662
- # RNG Marsaglia Method
663
- RNG = "MWC"
664
- # Viriel Distribution of stress
665
- VirielStress = True
666
- # Verbose
667
- Verbose = False
668
- # OpenGL real time rendering
669
- OpenGL = False
670
- # Speed rendering
671
- SpeedRendering = False
672
- # Counter ArtEvasions Measures (artefact evasion)
673
- CoArEv = "None"
674
- # Shape to distribute
675
- Shape = "Ball"
676
- # Type of Interaction
677
- InterType = "Force"
678
-
679
- HowToUse = "%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [Potential] -x <None|NegExp|CorRad> -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>" # noqa: E501
680
-
681
- try:
682
- opts, args = getopt.getopt(
683
- sys.argv[1:],
684
- "rpgehod:n:i:z:v:s:m:t:b:x:",
685
- [
686
- "random",
687
- "potential",
688
- "coarev",
689
- "opengl",
690
- "viriel",
691
- "verbose",
692
- "device=",
693
- "number=",
694
- "iterations=",
695
- "size=",
696
- "velocity=",
697
- "step=",
698
- "method=",
699
- "valuetype=",
700
- "shape=",
701
- ],
702
- )
703
- except getopt.GetoptError:
704
- print(HowToUse % sys.argv[0])
705
- sys.exit(2)
706
-
707
- for opt, arg in opts:
708
- if opt == "-h":
709
- print(HowToUse % sys.argv[0])
710
-
711
- print("\nInformations about devices detected under OpenCL:")
712
- try:
713
- Id = 0
714
- for platform in cl.get_platforms():
715
- for device in platform.get_devices():
716
- # Failed now because of POCL implementation
717
- # deviceType=cl.device_type.to_string(device.type)
718
- deviceType = "xPU"
719
- print(
720
- "Device #%i from %s of type %s : %s"
721
- % (
722
- Id,
723
- platform.vendor.lstrip(),
724
- deviceType,
725
- device.name.lstrip(),
726
- )
727
- )
728
- Id = Id + 1
729
- sys.exit()
730
- except ImportError:
731
- print("Your platform does not seem to support OpenCL")
732
- sys.exit()
733
-
734
- elif opt in ("-t", "--valuetype"):
735
- if arg == "FP64":
736
-
737
- class MyFloat(np.float64):
738
- pass
739
-
740
- else:
741
-
742
- class MyFloat(np.float32):
743
- pass
744
-
745
- ValueType = arg
746
- elif opt in ("-d", "--device"):
747
- Device = int(arg)
748
- elif opt in ("-m", "--method"):
749
- Method = arg
750
- elif opt in ("-b", "--shape"):
751
- Shape = arg
752
- if Shape != "Ball" or Shape != "Box":
753
- print("Wrong argument: set to Ball")
754
- elif opt in ("-n", "--number"):
755
- Number = int(arg)
756
- elif opt in ("-i", "--iterations"):
757
- Iterations = int(arg)
758
- elif opt in ("-z", "--size"):
759
- SizeOfShape = MyFloat(arg)
760
- elif opt in ("-v", "--velocity"):
761
- Velocity = MyFloat(arg)
762
- VirielStress = False
763
- elif opt in ("-s", "--step"):
764
- Step = MyFloat(arg)
765
- elif opt in ("-r", "--random"):
766
- InitialRandom = True
767
- elif opt in ("-c", "--check"):
768
- CheckEnergies = True
769
- elif opt in ("-e", "--viriel"):
770
- VirielStress = True
771
- elif opt in ("-g", "--opengl"):
772
- OpenGL = True
773
- elif opt in ("-p", "--potential"):
774
- InterType = "Potential"
775
- elif opt in ("-x", "--coarev"):
776
- CoArEv = arg
777
- elif opt in ("-o", "--verbose"):
778
- Verbose = True
779
-
780
- SizeOfShape = np.sqrt(MyFloat(SizeOfShape * Number))
781
- Velocity = MyFloat(Velocity)
782
- Step = MyFloat(Step)
783
-
784
- print("Device choosed : %s" % Device)
785
- print("Number of particules : %s" % Number)
786
- print("Size of Shape : %s" % SizeOfShape)
787
- print("Initial velocity : %s" % Velocity)
788
- print("Step of iteration : %s" % Step)
789
- print("Number of iterations : %s" % Iterations)
790
- print("Method of resolution : %s" % Method)
791
- print("Initial Random for RNG Seed : %s" % InitialRandom)
792
- print("ValueType is : %s" % ValueType)
793
- print("Viriel distribution of stress : %s" % VirielStress)
794
- print("OpenGL real time rendering : %s" % OpenGL)
795
- print("Speed rendering : %s" % SpeedRendering)
796
- print("Interaction type : %s" % InterType)
797
- print("Counter Artevasion type : %s" % CoArEv)
798
-
799
- # Create Numpy array of CL vector with 8 FP32
800
- MyCoM = np.zeros(4, dtype=MyFloat)
801
- MyDataX = np.zeros(Number * 4, dtype=MyFloat)
802
- MyDataV = np.zeros(Number * 4, dtype=MyFloat)
803
- MyPotential = np.zeros(Number, dtype=MyFloat)
804
- MyKinetic = np.zeros(Number, dtype=MyFloat)
805
-
806
- Marsaglia, Computing, Interaction, Artevasion = DictionariesAPI()
807
-
808
- # Scan the OpenCL arrays
809
- Id = 0
810
- HasXPU = False
811
- for platform in cl.get_platforms():
812
- for device in platform.get_devices():
813
- if Id == Device:
814
- PlatForm = platform
815
- XPU = device
816
- print("CPU/GPU selected: ", device.name.lstrip())
817
- print("Platform selected: ", platform.name)
818
- HasXPU = True
819
- Id += 1
820
-
821
- if not HasXPU:
822
- print("No XPU #%i found in all of %i devices, sorry..." % (Device, Id - 1))
823
- sys.exit()
824
-
825
- # Create Context
826
- try:
827
- ctx = cl.Context([XPU])
828
- queue = cl.CommandQueue(
829
- ctx, properties=cl.command_queue_properties.PROFILING_ENABLE
830
- )
831
- except Exception:
832
- print("Crash during context creation")
833
-
834
- # Build all routines used for the computing
835
-
836
- # BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType]) # noqa: E501
837
- BuildOptions = "-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % ( # noqa: E501
838
- Marsaglia[RNG],
839
- Computing[ValueType],
840
- Interaction[InterType],
841
- Artevasion[CoArEv],
842
- )
843
-
844
- if (
845
- "Intel" in PlatForm.name
846
- or "Experimental" in PlatForm.name
847
- or "Clover" in PlatForm.name
848
- or "Portable" in PlatForm.name
849
- ):
850
- MyRoutines = cl.Program(ctx, BlobOpenCL).build(options=BuildOptions)
851
- else:
852
- MyRoutines = cl.Program(ctx, BlobOpenCL).build(
853
- options=BuildOptions + " -cl-strict-aliasing"
854
- )
855
-
856
- mf = cl.mem_flags
857
- # Read/Write approach for buffering
858
- clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
859
- clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
860
- clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
861
- clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
862
- clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
863
-
864
- # Write/HostPointer approach for buffering
865
- # clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
866
- # clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
867
- # clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential) # noqa: E501
868
- # clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
869
- # clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
870
-
871
- print("All particles superimposed.")
872
-
873
- # Set particles to RNG points
874
- if InitialRandom:
875
- rng = np.random.default_rng()
876
- seed_w = np.uint32(rng.integers(2 ** 32))
877
- seed_z = np.uint32(rng.integers(2 ** 32))
878
- else:
879
- seed_w = np.uint32(19710211)
880
- seed_z = np.uint32(20081010)
881
-
882
- if Shape == "Ball":
883
- MyRoutines.InBallSplutterPoints(
884
- queue, (Number, 1), None, clDataX, SizeOfShape, seed_w, seed_z
885
- )
886
- else:
887
- MyRoutines.InBoxSplutterPoints(
888
- queue, (Number, 1), None, clDataX, SizeOfShape, seed_w, seed_z
889
- )
890
-
891
- print("All particules distributed")
892
-
893
- CLLaunch = MyRoutines.CenterOfMass(
894
- queue, (1, 1), None, clDataX, clCoM, np.int32(Number)
895
- )
896
- CLLaunch.wait()
897
- cl.enqueue_copy(queue, MyCoM, clCoM)
898
- print("Center Of Mass estimated: (%s,%s,%s)" % (MyCoM[0], MyCoM[1], MyCoM[2]))
899
-
900
- if VirielStress:
901
- CLLaunch = MyRoutines.SplutterStress(
902
- queue,
903
- (Number, 1),
904
- None,
905
- clDataX,
906
- clDataV,
907
- clCoM,
908
- MyFloat(0.0),
909
- np.uint32(110271),
910
- np.uint32(250173),
911
- )
912
- else:
913
- CLLaunch = MyRoutines.SplutterStress(
914
- queue,
915
- (Number, 1),
916
- None,
917
- clDataX,
918
- clDataV,
919
- clCoM,
920
- Velocity,
921
- np.uint32(110271),
922
- np.uint32(250173),
923
- )
924
- CLLaunch.wait()
925
-
926
- print("All particules stressed")
927
-
928
- CLLaunch = MyRoutines.Potential(queue, (Number, 1), None, clDataX, clPotential)
929
- CLLaunch = MyRoutines.Kinetic(queue, (Number, 1), None, clDataV, clKinetic)
930
- CLLaunch.wait()
931
- cl.enqueue_copy(queue, MyPotential, clPotential)
932
- cl.enqueue_copy(queue, MyKinetic, clKinetic)
933
- print(
934
- "Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n"
935
- % (
936
- np.sum(MyPotential) + 2 * np.sum(MyKinetic),
937
- np.sum(MyPotential),
938
- np.sum(MyKinetic),
939
- )
940
- )
941
-
942
- if SpeedRendering:
943
- SizeOfBox = max(2 * MyKinetic)
944
- else:
945
- SizeOfBox = SizeOfShape
946
-
947
- if OpenGL:
948
- print("\tTiny documentation to interact OpenGL rendering:\n")
949
- print("\t<Left|Right> Rotate around X axis")
950
- print("\t <Up|Down> Rotate around Y axis")
951
- print("\t <z|Z> Rotate around Z axis")
952
- print("\t <-|+> Unzoom/Zoom")
953
- print("\t <s> Toggle to display Positions or Velocities")
954
- print("\t <Esc> Quit\n")
955
-
956
- wall_time_start = time.time()
957
-
958
- Durations = np.array([], dtype=MyFloat)
959
- print("Starting!")
960
- if OpenGL:
961
- import OpenGL.GL as gl
962
- import OpenGL.GLUT as glut
963
-
964
- global ViewRX, ViewRY, ViewRZ
965
- Iterations = 0
966
- ViewRX, ViewRY, ViewRZ = 0.0, 0.0, 0.0
967
- # Launch OpenGL Loop
968
- glut.glutInit(sys.argv)
969
- glut.glutInitDisplayMode(glut.GLUT_DOUBLE | glut.GLUT_RGB)
970
- glut.glutSetOption(glut.GLUT_ACTION_ON_WINDOW_CLOSE,
971
- glut.GLUT_ACTION_CONTINUE_EXECUTION)
972
- glut.glutInitWindowSize(512, 512)
973
- glut.glutCreateWindow(b"NBodyGL")
974
- setup_viewport()
975
- glut.glutReshapeFunc(reshape)
976
- glut.glutDisplayFunc(display)
977
- glut.glutIdleFunc(display)
978
- # glutMouseFunc(mouse)
979
- glut.glutSpecialFunc(special)
980
- glut.glutKeyboardFunc(keyboard)
981
- glut.glutMainLoop()
982
- else:
983
- for iteration in range(Iterations):
984
- Elapsed = MainOpenCL(clDataX, clDataV, Step, Method)
985
- if Verbose:
986
- # print("Duration of #%s iteration: %s" % (iteration,Elapsed))
987
- cl.enqueue_copy(queue, MyDataX, clDataX)
988
- print("Positions for #%s iteration: %s" % (iteration, MyDataX))
989
- else:
990
- sys.stdout.write(".")
991
- sys.stdout.flush()
992
- Durations = np.append(Durations, Elapsed)
993
-
994
- print("\nEnding!")
995
-
996
- MyRoutines.CenterOfMass(queue, (1, 1), None, clDataX, clCoM, np.int32(Number))
997
- CLLaunch = MyRoutines.Potential(queue, (Number, 1), None, clDataX, clPotential)
998
- CLLaunch = MyRoutines.Kinetic(queue, (Number, 1), None, clDataV, clKinetic)
999
- CLLaunch.wait()
1000
- cl.enqueue_copy(queue, MyCoM, clCoM)
1001
- cl.enqueue_copy(queue, MyPotential, clPotential)
1002
- cl.enqueue_copy(queue, MyKinetic, clKinetic)
1003
- print("\nCenter Of Mass estimated: (%s,%s,%s)" % (MyCoM[0], MyCoM[1], MyCoM[2]))
1004
- print(
1005
- "Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n"
1006
- % (
1007
- np.sum(MyPotential) + 2.0 * np.sum(MyKinetic),
1008
- np.sum(MyPotential),
1009
- np.sum(MyKinetic),
1010
- )
1011
- )
1012
-
1013
- print(
1014
- "Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" # noqa: E501
1015
- % (
1016
- Device,
1017
- Iterations,
1018
- np.mean(Durations),
1019
- np.median(Durations),
1020
- np.std(Durations),
1021
- np.min(Durations),
1022
- np.max(Durations),
1023
- np.std(Durations) / np.median(Durations),
1024
- )
1025
- )
1026
-
1027
- # FPS: 1/Elapsed
1028
- FPS = np.ones(len(Durations))
1029
- FPS /= Durations
1030
-
1031
- print(
1032
- "FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" # noqa: E501
1033
- % (
1034
- Device,
1035
- Iterations,
1036
- np.mean(FPS),
1037
- np.median(FPS),
1038
- np.std(FPS),
1039
- np.min(FPS),
1040
- np.max(FPS),
1041
- )
1042
- )
1043
-
1044
- # Contraction of Square*Size*Hertz: Size*Size/Elapsed
1045
- Squertz = np.ones(len(Durations))
1046
- Squertz *= Number * Number
1047
- Squertz /= Durations
1048
-
1049
- print(
1050
- "Squertz in log10 & complete stats on device %s with %s iterations :\n\tMean:\t%s\t%s\n\tMedian:\t%s\t%s\n\tStddev:\t%s\t%s\n\tMin:\t%s\t%s\n\tMax:\t%s\t%s\n" # noqa: E501
1051
- % (
1052
- Device,
1053
- Iterations,
1054
- np.log10(np.mean(Squertz)),
1055
- np.mean(Squertz),
1056
- np.log10(np.median(Squertz)),
1057
- np.median(Squertz),
1058
- np.log10(np.std(Squertz)),
1059
- np.std(Squertz),
1060
- np.log10(np.min(Squertz)),
1061
- np.min(Squertz),
1062
- np.log10(np.max(Squertz)),
1063
- np.max(Squertz),
1064
- )
1065
- )
1066
-
1067
- clDataX.release()
1068
- clDataV.release()
1069
- clKinetic.release()
1070
- clPotential.release()