pyopencl 2024.2.2__cp38-cp38-macosx_11_0_arm64.whl → 2024.2.5__cp38-cp38-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +16 -4
- pyopencl/_cl.cpython-38-darwin.so +0 -0
- pyopencl/algorithm.py +3 -1
- pyopencl/bitonic_sort.py +2 -0
- pyopencl/characterize/__init__.py +23 -0
- pyopencl/compyte/.git +1 -0
- pyopencl/compyte/.gitignore +21 -0
- pyopencl/compyte/ndarray/Makefile +31 -0
- pyopencl/compyte/ndarray/gpu_ndarray.h +35 -0
- pyopencl/compyte/ndarray/pygpu_language.h +207 -0
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +622 -0
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +317 -0
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +1546 -0
- pyopencl/compyte/ndarray/pygpu_ndarray.h +71 -0
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +232 -0
- pyopencl/tools.py +60 -56
- pyopencl/version.py +9 -3
- {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/METADATA +14 -14
- pyopencl-2024.2.5.dist-info/RECORD +56 -0
- {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/WHEEL +1 -1
- pyopencl-2024.2.2.data/data/CITATION.cff +0 -74
- pyopencl-2024.2.2.data/data/CMakeLists.txt +0 -83
- pyopencl-2024.2.2.data/data/Makefile.in +0 -21
- pyopencl-2024.2.2.data/data/README.rst +0 -70
- pyopencl-2024.2.2.data/data/README_SETUP.txt +0 -34
- pyopencl-2024.2.2.data/data/aksetup_helper.py +0 -1013
- pyopencl-2024.2.2.data/data/configure.py +0 -6
- pyopencl-2024.2.2.data/data/contrib/cldis.py +0 -91
- pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/README +0 -29
- pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/translate.py +0 -1441
- pyopencl-2024.2.2.data/data/contrib/pyopencl.vim +0 -84
- pyopencl-2024.2.2.data/data/doc/Makefile +0 -23
- pyopencl-2024.2.2.data/data/doc/algorithm.rst +0 -214
- pyopencl-2024.2.2.data/data/doc/array.rst +0 -305
- pyopencl-2024.2.2.data/data/doc/conf.py +0 -26
- pyopencl-2024.2.2.data/data/doc/howto.rst +0 -105
- pyopencl-2024.2.2.data/data/doc/index.rst +0 -137
- pyopencl-2024.2.2.data/data/doc/make_constants.py +0 -561
- pyopencl-2024.2.2.data/data/doc/misc.rst +0 -885
- pyopencl-2024.2.2.data/data/doc/runtime.rst +0 -51
- pyopencl-2024.2.2.data/data/doc/runtime_const.rst +0 -30
- pyopencl-2024.2.2.data/data/doc/runtime_gl.rst +0 -78
- pyopencl-2024.2.2.data/data/doc/runtime_memory.rst +0 -527
- pyopencl-2024.2.2.data/data/doc/runtime_platform.rst +0 -184
- pyopencl-2024.2.2.data/data/doc/runtime_program.rst +0 -364
- pyopencl-2024.2.2.data/data/doc/runtime_queue.rst +0 -182
- pyopencl-2024.2.2.data/data/doc/subst.rst +0 -36
- pyopencl-2024.2.2.data/data/doc/tools.rst +0 -4
- pyopencl-2024.2.2.data/data/doc/types.rst +0 -42
- pyopencl-2024.2.2.data/data/examples/black-hole-accretion.py +0 -2227
- pyopencl-2024.2.2.data/data/examples/demo-struct-reduce.py +0 -75
- pyopencl-2024.2.2.data/data/examples/demo.py +0 -39
- pyopencl-2024.2.2.data/data/examples/demo_array.py +0 -32
- pyopencl-2024.2.2.data/data/examples/demo_array_svm.py +0 -37
- pyopencl-2024.2.2.data/data/examples/demo_elementwise.py +0 -34
- pyopencl-2024.2.2.data/data/examples/demo_elementwise_complex.py +0 -53
- pyopencl-2024.2.2.data/data/examples/demo_mandelbrot.py +0 -183
- pyopencl-2024.2.2.data/data/examples/demo_meta_codepy.py +0 -56
- pyopencl-2024.2.2.data/data/examples/demo_meta_template.py +0 -55
- pyopencl-2024.2.2.data/data/examples/dump-performance.py +0 -38
- pyopencl-2024.2.2.data/data/examples/dump-properties.py +0 -86
- pyopencl-2024.2.2.data/data/examples/gl_interop_demo.py +0 -84
- pyopencl-2024.2.2.data/data/examples/gl_particle_animation.py +0 -218
- pyopencl-2024.2.2.data/data/examples/ipython-demo.ipynb +0 -203
- pyopencl-2024.2.2.data/data/examples/median-filter.py +0 -99
- pyopencl-2024.2.2.data/data/examples/n-body.py +0 -1070
- pyopencl-2024.2.2.data/data/examples/narray.py +0 -37
- pyopencl-2024.2.2.data/data/examples/noisyImage.jpg +0 -0
- pyopencl-2024.2.2.data/data/examples/pi-monte-carlo.py +0 -1166
- pyopencl-2024.2.2.data/data/examples/svm.py +0 -82
- pyopencl-2024.2.2.data/data/examples/transpose.py +0 -229
- pyopencl-2024.2.2.data/data/pytest.ini +0 -3
- pyopencl-2024.2.2.data/data/src/bitlog.cpp +0 -51
- pyopencl-2024.2.2.data/data/src/bitlog.hpp +0 -83
- pyopencl-2024.2.2.data/data/src/clinfo_ext.h +0 -134
- pyopencl-2024.2.2.data/data/src/mempool.hpp +0 -444
- pyopencl-2024.2.2.data/data/src/pyopencl_ext.h +0 -77
- pyopencl-2024.2.2.data/data/src/tools.hpp +0 -90
- pyopencl-2024.2.2.data/data/src/wrap_cl.cpp +0 -61
- pyopencl-2024.2.2.data/data/src/wrap_cl.hpp +0 -5853
- pyopencl-2024.2.2.data/data/src/wrap_cl_part_1.cpp +0 -369
- pyopencl-2024.2.2.data/data/src/wrap_cl_part_2.cpp +0 -702
- pyopencl-2024.2.2.data/data/src/wrap_constants.cpp +0 -1274
- pyopencl-2024.2.2.data/data/src/wrap_helpers.hpp +0 -213
- pyopencl-2024.2.2.data/data/src/wrap_mempool.cpp +0 -738
- pyopencl-2024.2.2.data/data/test/add-vectors-32.spv +0 -0
- pyopencl-2024.2.2.data/data/test/add-vectors-64.spv +0 -0
- pyopencl-2024.2.2.data/data/test/empty-header.h +0 -1
- pyopencl-2024.2.2.data/data/test/test_algorithm.py +0 -1180
- pyopencl-2024.2.2.data/data/test/test_array.py +0 -2392
- pyopencl-2024.2.2.data/data/test/test_arrays_in_structs.py +0 -100
- pyopencl-2024.2.2.data/data/test/test_clmath.py +0 -529
- pyopencl-2024.2.2.data/data/test/test_clrandom.py +0 -75
- pyopencl-2024.2.2.data/data/test/test_enqueue_copy.py +0 -271
- pyopencl-2024.2.2.data/data/test/test_wrapper.py +0 -1565
- pyopencl-2024.2.2.dist-info/LICENSE +0 -282
- pyopencl-2024.2.2.dist-info/RECORD +0 -123
- pyopencl-2024.2.2.dist-info/top_level.txt +0 -1
- {pyopencl-2024.2.2.data/data → pyopencl-2024.2.5.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,1070 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
NBody Demonstrator implemented in OpenCL, rendering OpenGL
|
|
5
|
-
|
|
6
|
-
By default, rendering in OpenGL is disabled. Add -g option to activate.
|
|
7
|
-
|
|
8
|
-
Part of matrix programs from: https://forge.cbp.ens-lyon.fr/svn/bench4gpu/
|
|
9
|
-
|
|
10
|
-
CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
|
|
11
|
-
Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
|
|
12
|
-
|
|
13
|
-
Thanks to Andreas Klockner for PyOpenCL:
|
|
14
|
-
http://mathema.tician.de/software/pyopencl
|
|
15
|
-
|
|
16
|
-
"""
|
|
17
|
-
import getopt
|
|
18
|
-
import sys
|
|
19
|
-
import time
|
|
20
|
-
|
|
21
|
-
import numpy as np
|
|
22
|
-
|
|
23
|
-
import pyopencl as cl
|
|
24
|
-
import pyopencl.array
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def DictionariesAPI():
|
|
28
|
-
Marsaglia = {"CONG": 0, "SHR3": 1, "MWC": 2, "KISS": 3}
|
|
29
|
-
Computing = {"FP32": 0, "FP64": 1}
|
|
30
|
-
Interaction = {"Force": 0, "Potential": 1}
|
|
31
|
-
Artevasion = {"None": 0, "NegExp": 1, "CorRad": 2}
|
|
32
|
-
return (Marsaglia, Computing, Interaction, Artevasion)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
BlobOpenCL = """
|
|
36
|
-
#define TFP32 0
|
|
37
|
-
#define TFP64 1
|
|
38
|
-
|
|
39
|
-
#define TFORCE 0
|
|
40
|
-
#define TPOTENTIAL 1
|
|
41
|
-
|
|
42
|
-
#define NONE 0
|
|
43
|
-
#define NEGEXP 1
|
|
44
|
-
#define CORRAD 2
|
|
45
|
-
|
|
46
|
-
#if TYPE == TFP32
|
|
47
|
-
#define MYFLOAT4 float4
|
|
48
|
-
#define MYFLOAT8 float8
|
|
49
|
-
#define MYFLOAT float
|
|
50
|
-
#define DISTANCE fast_distance
|
|
51
|
-
#else
|
|
52
|
-
#define MYFLOAT4 double4
|
|
53
|
-
#define MYFLOAT8 double8
|
|
54
|
-
#define MYFLOAT double
|
|
55
|
-
#define DISTANCE distance
|
|
56
|
-
#if defined(cl_khr_fp64) // Khronos extension available?
|
|
57
|
-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
|
58
|
-
#endif
|
|
59
|
-
#endif
|
|
60
|
-
|
|
61
|
-
#define znew ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
|
|
62
|
-
#define wnew ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
|
|
63
|
-
#define MWC (znew+wnew)
|
|
64
|
-
#define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
|
|
65
|
-
#define CONG (jcong=69069*jcong+1234567)
|
|
66
|
-
#define KISS ((MWC^CONG)+SHR3)
|
|
67
|
-
|
|
68
|
-
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
|
|
69
|
-
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
|
|
70
|
-
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
|
|
71
|
-
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
|
|
72
|
-
|
|
73
|
-
#define PI (MYFLOAT)3.141592653589793238e0f
|
|
74
|
-
|
|
75
|
-
#define SMALL_NUM (MYFLOAT)1.e-9f
|
|
76
|
-
|
|
77
|
-
#define CoreRadius (MYFLOAT)(1.e0f)
|
|
78
|
-
|
|
79
|
-
// Create my own Distance implementation: distance buggy on Oland AMD chipset
|
|
80
|
-
|
|
81
|
-
MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
|
|
82
|
-
{
|
|
83
|
-
private MYFLOAT x2,y2,z2;
|
|
84
|
-
x2=n.s0-m.s0;
|
|
85
|
-
x2*=x2;
|
|
86
|
-
y2=n.s1-m.s1;
|
|
87
|
-
y2*=y2;
|
|
88
|
-
z2=n.s2-m.s2;
|
|
89
|
-
z2*=z2;
|
|
90
|
-
return(sqrt(x2+y2+z2));
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// Potential between 2 m,n bodies
|
|
94
|
-
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
|
|
95
|
-
#if ARTEVASION == NEGEXP
|
|
96
|
-
// Add exp(-r) to numerator to avoid divergence for low distances
|
|
97
|
-
{
|
|
98
|
-
MYFLOAT r=DISTANCE(n,m);
|
|
99
|
-
return((-1.e0f+exp(-r))/r);
|
|
100
|
-
}
|
|
101
|
-
#elif ARTEVASION == CORRAD
|
|
102
|
-
// Add Core Radius to avoid divergence for low distances
|
|
103
|
-
{
|
|
104
|
-
MYFLOAT r=DISTANCE(n,m);
|
|
105
|
-
return(-1.e0f/sqrt(r*r+CoreRadius*CoreRadius));
|
|
106
|
-
}
|
|
107
|
-
#else
|
|
108
|
-
// Classical potential in 1/r
|
|
109
|
-
{
|
|
110
|
-
// return((MYFLOAT)(-1.e0f)/(MyDistance(m,n)));
|
|
111
|
-
return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
|
|
112
|
-
}
|
|
113
|
-
#endif
|
|
114
|
-
|
|
115
|
-
// Interaction based of Force as gradient of Potential
|
|
116
|
-
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
|
|
117
|
-
#if INTERACTION == TFORCE
|
|
118
|
-
#if ARTEVASION == NEGEXP
|
|
119
|
-
// Force gradient of potential, set as (1-exp(-r))/r
|
|
120
|
-
{
|
|
121
|
-
private MYFLOAT r=MyDistance(n,m);
|
|
122
|
-
private MYFLOAT num=1.e0f+exp(-r)*(r-1.e0f);
|
|
123
|
-
return((n-m)*num/(MYFLOAT)(r*r*r));
|
|
124
|
-
}
|
|
125
|
-
#elif ARTEVASION == CORRAD
|
|
126
|
-
// Force gradient of potential, (Core Radius) set as 1/sqrt(r**2+CoreRadius**2)
|
|
127
|
-
{
|
|
128
|
-
private MYFLOAT r=MyDistance(n,m);
|
|
129
|
-
private MYFLOAT den=sqrt(r*r+CoreRadius*CoreRadius);
|
|
130
|
-
return((n-m)/(MYFLOAT)(den*den*den));
|
|
131
|
-
}
|
|
132
|
-
#else
|
|
133
|
-
// Simplest implementation of force (equals to acceleration)
|
|
134
|
-
// seems to bo bad (numerous artevasions)
|
|
135
|
-
// MYFLOAT4 InteractionForce(MYFLOAT4 m,MYFLOAT4 n)
|
|
136
|
-
{
|
|
137
|
-
private MYFLOAT r=MyDistance(n,m);
|
|
138
|
-
return((n-m)/(MYFLOAT)(r*r*r));
|
|
139
|
-
}
|
|
140
|
-
#endif
|
|
141
|
-
#else
|
|
142
|
-
// Force definited as gradient of potential
|
|
143
|
-
// Estimate potential and proximate potential to estimate force
|
|
144
|
-
{
|
|
145
|
-
// 1/1024 seems to be a good factor: larger one provides bad results
|
|
146
|
-
private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024);
|
|
147
|
-
private MYFLOAT4 er=normalize(n-m);
|
|
148
|
-
private MYFLOAT4 dr=er*(MYFLOAT)epsilon;
|
|
149
|
-
|
|
150
|
-
return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n)));
|
|
151
|
-
}
|
|
152
|
-
#endif
|
|
153
|
-
|
|
154
|
-
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
|
|
155
|
-
{
|
|
156
|
-
private MYFLOAT potential=(MYFLOAT)0.e0f;
|
|
157
|
-
private MYFLOAT4 x=clDataX[gid];
|
|
158
|
-
|
|
159
|
-
for (int i=0;i<get_global_size(0);i++)
|
|
160
|
-
{
|
|
161
|
-
if (gid != i)
|
|
162
|
-
potential+=PairPotential(x,clDataX[i]);
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
barrier(CLK_GLOBAL_MEM_FENCE);
|
|
166
|
-
return(potential);
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid) // # noqa: E501
|
|
170
|
-
{
|
|
171
|
-
return(PairPotential(clDataX[gid],clCoM[0]));
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
|
|
175
|
-
|
|
176
|
-
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
|
|
177
|
-
{
|
|
178
|
-
private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
|
|
179
|
-
MYFLOAT4 DT=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
|
|
180
|
-
|
|
181
|
-
a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
|
|
182
|
-
v0=(MYFLOAT4)clDataInV[gid];
|
|
183
|
-
x0=(MYFLOAT4)clDataInX[gid];
|
|
184
|
-
int N = get_global_size(0);
|
|
185
|
-
|
|
186
|
-
for (private int i=0;i<N;i++)
|
|
187
|
-
{
|
|
188
|
-
if (gid != i)
|
|
189
|
-
a0+=Interaction(x0,clDataInX[i]);
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
|
|
193
|
-
v1=a0*dt+v0;
|
|
194
|
-
x1=v0*dt+x0;
|
|
195
|
-
for (private int j=0;j<N;j++)
|
|
196
|
-
{
|
|
197
|
-
if (gid != j)
|
|
198
|
-
a1+=Interaction(x1,clDataInX[j]);
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
|
|
202
|
-
v2=a1*(MYFLOAT)(dt/2.e0f)+v0;
|
|
203
|
-
x2=v1*(MYFLOAT)(dt/2.e0f)+x0;
|
|
204
|
-
for (private int k=0;k<N;k++)
|
|
205
|
-
{
|
|
206
|
-
if (gid != k)
|
|
207
|
-
a2+=Interaction(x2,clDataInX[k]);
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
|
|
211
|
-
v3=a2*(MYFLOAT)(dt/2.e0f)+v0;
|
|
212
|
-
x3=v2*(MYFLOAT)(dt/2.e0f)+x0;
|
|
213
|
-
for (private int l=0;l<N;l++)
|
|
214
|
-
{
|
|
215
|
-
if (gid != l)
|
|
216
|
-
a3+=Interaction(x3,clDataInX[l]);
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
|
|
220
|
-
v4=a3*dt+v0;
|
|
221
|
-
x4=v3*dt+x0;
|
|
222
|
-
for (private int m=0;m<N;m++)
|
|
223
|
-
{
|
|
224
|
-
if (gid != m)
|
|
225
|
-
a4+=Interaction(x4,clDataInX[m]);
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
|
|
229
|
-
vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
|
|
230
|
-
|
|
231
|
-
return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
|
|
235
|
-
{
|
|
236
|
-
private MYFLOAT4 x0,v0,a0,x1,v1,a1,xf,vf;
|
|
237
|
-
MYFLOAT4 Dt=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
|
|
238
|
-
|
|
239
|
-
x0=(MYFLOAT4)clDataInX[gid];
|
|
240
|
-
v0=(MYFLOAT4)clDataInV[gid];
|
|
241
|
-
a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
|
|
242
|
-
|
|
243
|
-
for (private int i=0;i<get_global_size(0);i++)
|
|
244
|
-
{
|
|
245
|
-
if (gid != i)
|
|
246
|
-
a0+=Interaction(x0,clDataInX[i]);
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
|
|
250
|
-
//v1=v0+dt*a0;
|
|
251
|
-
//x1=x0+dt*v0;
|
|
252
|
-
v1=dt*a0+v0;
|
|
253
|
-
x1=dt*v0+x0;
|
|
254
|
-
|
|
255
|
-
for (private int j=0;j<get_global_size(0);j++)
|
|
256
|
-
{
|
|
257
|
-
if (gid != j)
|
|
258
|
-
a1+=Interaction(x1,clDataInX[j]);
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
vf=v0+dt*(a0+a1)/(MYFLOAT)2.e0f;
|
|
262
|
-
xf=x0+dt*(v0+v1)/(MYFLOAT)2.e0f;
|
|
263
|
-
|
|
264
|
-
return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
|
|
268
|
-
{
|
|
269
|
-
MYFLOAT4 x0,v0,a,xf,vf;
|
|
270
|
-
|
|
271
|
-
x0=(MYFLOAT4)clDataInX[gid];
|
|
272
|
-
v0=(MYFLOAT4)clDataInV[gid];
|
|
273
|
-
a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
|
|
274
|
-
|
|
275
|
-
for (private int i=0;i<get_global_size(0);i++)
|
|
276
|
-
{
|
|
277
|
-
if (gid != i)
|
|
278
|
-
a+=Interaction(x0,clDataInX[i]);
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
vf=v0+dt*a;
|
|
282
|
-
xf=x0+dt*vf;
|
|
283
|
-
|
|
284
|
-
return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
|
|
288
|
-
{
|
|
289
|
-
MYFLOAT4 x0,v0,a,xf,vf;
|
|
290
|
-
|
|
291
|
-
x0=(MYFLOAT4)clDataInX[gid];
|
|
292
|
-
v0=(MYFLOAT4)clDataInV[gid];
|
|
293
|
-
a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
|
|
294
|
-
|
|
295
|
-
for (private int i=0;i<get_global_size(0);i++)
|
|
296
|
-
{
|
|
297
|
-
if (gid != i)
|
|
298
|
-
a+=Interaction(x0,clDataInX[i]);
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
vf=v0+dt*a;
|
|
302
|
-
xf=x0+dt*v0;
|
|
303
|
-
|
|
304
|
-
return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
__kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX,
|
|
308
|
-
MYFLOAT diameter,uint seed_z,uint seed_w)
|
|
309
|
-
{
|
|
310
|
-
private int gid=get_global_id(0);
|
|
311
|
-
private uint zmwc=seed_z+gid;
|
|
312
|
-
private uint wmwc=seed_w+(gid+1)%2;
|
|
313
|
-
private MYFLOAT Heat;
|
|
314
|
-
|
|
315
|
-
for (int i=0;i<gid;i++)
|
|
316
|
-
{
|
|
317
|
-
Heat=MWCfp;
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
// More accurate distribution based on spherical coordonates
|
|
321
|
-
// Disactivated because of AMD Oland GPU crash on launch
|
|
322
|
-
// private MYFLOAT Radius,Theta,Phi,PosX,PosY,PosZ,SinTheta;
|
|
323
|
-
// Radius=MWCfp*diameter/2.e0f;
|
|
324
|
-
// Theta=(MYFLOAT)acos((float)(-2.e0f*MWCfp+1.0e0f));
|
|
325
|
-
// Phi=(MYFLOAT)(2.e0f*PI*MWCfp);
|
|
326
|
-
// SinTheta=sin((float)Theta);
|
|
327
|
-
// PosX=cos((float)Phi)*Radius*SinTheta;
|
|
328
|
-
// PosY=sin((float)Phi)*Radius*SinTheta;
|
|
329
|
-
// PosZ=cos((float)Theta)*Radius;
|
|
330
|
-
// clDataX[gid]=(MYFLOAT4)(PosX,PosY,PosZ,0.e0f);
|
|
331
|
-
|
|
332
|
-
private MYFLOAT Radius=diameter/2.e0f;
|
|
333
|
-
private MYFLOAT Length=diameter;
|
|
334
|
-
private MYFLOAT4 Position;
|
|
335
|
-
while (Length>Radius) {
|
|
336
|
-
Position=(MYFLOAT4)((MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,0.e0f);
|
|
337
|
-
Length=(MYFLOAT)length((MYFLOAT4)Position);
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
clDataX[gid]=Position;
|
|
341
|
-
|
|
342
|
-
barrier(CLK_GLOBAL_MEM_FENCE);
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
__kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box,
|
|
346
|
-
uint seed_z,uint seed_w)
|
|
347
|
-
{
|
|
348
|
-
int gid=get_global_id(0);
|
|
349
|
-
uint zmwc=seed_z+gid;
|
|
350
|
-
uint wmwc=seed_w-gid;
|
|
351
|
-
private MYFLOAT Heat;
|
|
352
|
-
|
|
353
|
-
for (int i=0;i<gid;i++)
|
|
354
|
-
{
|
|
355
|
-
Heat=MWCfp;
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
clDataX[gid]=(MYFLOAT4)((MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,0.e0f);
|
|
359
|
-
|
|
360
|
-
barrier(CLK_GLOBAL_MEM_FENCE);
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
|
|
364
|
-
{
|
|
365
|
-
int gid = get_global_id(0);
|
|
366
|
-
MYFLOAT N = (MYFLOAT)get_global_size(0);
|
|
367
|
-
uint zmwc=seed_z+(uint)gid;
|
|
368
|
-
uint wmwc=seed_w-(uint)gid;
|
|
369
|
-
MYFLOAT4 CrossVector,SpeedVector,FromCoM;
|
|
370
|
-
MYFLOAT Heat,ThetaA,PhiA,ThetaB,PhiB,Length,tA,tB,Polar;
|
|
371
|
-
|
|
372
|
-
for (int i=0;i<gid;i++)
|
|
373
|
-
{
|
|
374
|
-
Heat=MWCfp;
|
|
375
|
-
}
|
|
376
|
-
|
|
377
|
-
// cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
|
|
378
|
-
// Implemention on AMD Oland are probably broken in float
|
|
379
|
-
|
|
380
|
-
FromCoM=(MYFLOAT4)(clDataX[gid]-clCoM[0]);
|
|
381
|
-
Length=length(FromCoM);
|
|
382
|
-
//Theta=acos(FromCoM.z/Length);
|
|
383
|
-
//Phi=atan(FromCoM.y/FromCoM.x);
|
|
384
|
-
// First tangential vector to sphere of length radius
|
|
385
|
-
ThetaA=acos(FromCoM.x/Length)+5.e-1f*PI;
|
|
386
|
-
PhiA=atan(FromCoM.y/FromCoM.z);
|
|
387
|
-
// Second tangential vector to sphere of length radius
|
|
388
|
-
ThetaB=acos((float)(FromCoM.x/Length));
|
|
389
|
-
PhiB=atan((float)(FromCoM.y/FromCoM.z))+5.e-1f*PI;
|
|
390
|
-
// (x,y) random coordonates to plane tangential to sphere
|
|
391
|
-
Polar=MWCfp*2.e0f*PI;
|
|
392
|
-
tA=cos((float)Polar);
|
|
393
|
-
tB=sin((float)Polar);
|
|
394
|
-
|
|
395
|
-
// Exception for 2 particules to ovoid shifting
|
|
396
|
-
if (get_global_size(0)==2) {
|
|
397
|
-
CrossVector=(MYFLOAT4)(1.e0f,1.e0f,1.e0f,0.e0f);
|
|
398
|
-
} else {
|
|
399
|
-
CrossVector.s0=tA*cos((float)ThetaA)+tB*cos((float)ThetaB);
|
|
400
|
-
CrossVector.s1=tA*sin((float)ThetaA)*sin((float)PhiA)+tB*sin((float)ThetaB)*sin((float)PhiB);
|
|
401
|
-
CrossVector.s2=tA*sin((float)ThetaA)*cos((float)PhiA)+tB*sin((float)ThetaB)*cos((float)PhiB);
|
|
402
|
-
CrossVector.s3=0.e0f;
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
if (velocity<SMALL_NUM) {
|
|
406
|
-
SpeedVector=(MYFLOAT4)normalize(cross(FromCoM,CrossVector))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
|
|
407
|
-
}
|
|
408
|
-
else
|
|
409
|
-
{
|
|
410
|
-
|
|
411
|
-
SpeedVector=(MYFLOAT4)((MWCfp-5e-1f)*velocity,(MWCfp-5e-1f)*velocity,
|
|
412
|
-
(MWCfp-5e-1f)*velocity,0.e0f);
|
|
413
|
-
}
|
|
414
|
-
clDataV[gid]=SpeedVector;
|
|
415
|
-
barrier(CLK_GLOBAL_MEM_FENCE);
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
|
|
419
|
-
{
|
|
420
|
-
private int gid = get_global_id(0);
|
|
421
|
-
private MYFLOAT8 clDataGid;
|
|
422
|
-
|
|
423
|
-
clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
|
|
424
|
-
barrier(CLK_GLOBAL_MEM_FENCE);
|
|
425
|
-
clDataX[gid]=clDataGid.s0123;
|
|
426
|
-
clDataV[gid]=clDataGid.s4567;
|
|
427
|
-
}
|
|
428
|
-
|
|
429
|
-
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
|
|
430
|
-
{
|
|
431
|
-
private int gid = get_global_id(0);
|
|
432
|
-
private MYFLOAT8 clDataGid;
|
|
433
|
-
|
|
434
|
-
clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
|
|
435
|
-
barrier(CLK_GLOBAL_MEM_FENCE);
|
|
436
|
-
clDataX[gid]=clDataGid.s0123;
|
|
437
|
-
clDataV[gid]=clDataGid.s4567;
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
|
|
441
|
-
{
|
|
442
|
-
private int gid = get_global_id(0);
|
|
443
|
-
private MYFLOAT8 clDataGid;
|
|
444
|
-
|
|
445
|
-
clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
|
|
446
|
-
barrier(CLK_GLOBAL_MEM_FENCE);
|
|
447
|
-
clDataX[gid]=clDataGid.s0123;
|
|
448
|
-
clDataV[gid]=clDataGid.s4567;
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
|
|
452
|
-
{
|
|
453
|
-
private int gid = get_global_id(0);
|
|
454
|
-
private MYFLOAT8 clDataGid;
|
|
455
|
-
|
|
456
|
-
clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
|
|
457
|
-
barrier(CLK_GLOBAL_MEM_FENCE);
|
|
458
|
-
clDataX[gid]=clDataGid.s0123;
|
|
459
|
-
clDataV[gid]=clDataGid.s4567;
|
|
460
|
-
}
|
|
461
|
-
|
|
462
|
-
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
|
|
463
|
-
{
|
|
464
|
-
int gid = get_global_id(0);
|
|
465
|
-
|
|
466
|
-
clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
|
|
470
|
-
{
|
|
471
|
-
int gid = get_global_id(0);
|
|
472
|
-
|
|
473
|
-
MYFLOAT potential=(MYFLOAT)0.e0f;
|
|
474
|
-
MYFLOAT4 x=clDataX[gid];
|
|
475
|
-
|
|
476
|
-
for (int i=0;i<get_global_size(0);i++)
|
|
477
|
-
{
|
|
478
|
-
if (gid != i)
|
|
479
|
-
potential+=PairPotential(x,clDataX[i]);
|
|
480
|
-
}
|
|
481
|
-
|
|
482
|
-
barrier(CLK_GLOBAL_MEM_FENCE);
|
|
483
|
-
clPotential[gid]=potential*(MYFLOAT)5.e-1f;
|
|
484
|
-
}
|
|
485
|
-
|
|
486
|
-
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
|
|
487
|
-
{
|
|
488
|
-
MYFLOAT4 CoM=clDataX[0];
|
|
489
|
-
|
|
490
|
-
for (int i=1;i<Size;i++)
|
|
491
|
-
{
|
|
492
|
-
CoM+=clDataX[i];
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
barrier(CLK_GLOBAL_MEM_FENCE);
|
|
496
|
-
clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0.e0f)/(MYFLOAT)Size;
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
|
|
500
|
-
{
|
|
501
|
-
int gid = get_global_id(0);
|
|
502
|
-
|
|
503
|
-
barrier(CLK_GLOBAL_MEM_FENCE);
|
|
504
|
-
MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
|
|
505
|
-
clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
"""
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
def MainOpenCL(clDataX, clDataV, Step, Method):
|
|
512
|
-
time_start = time.time()
|
|
513
|
-
if Method == "RungeKutta":
|
|
514
|
-
CLLaunch = MyRoutines.RungeKutta(
|
|
515
|
-
queue, (Number, 1), None, clDataX, clDataV, Step
|
|
516
|
-
)
|
|
517
|
-
elif Method == "ExplicitEuler":
|
|
518
|
-
CLLaunch = MyRoutines.ExplicitEuler(
|
|
519
|
-
queue, (Number, 1), None, clDataX, clDataV, Step
|
|
520
|
-
)
|
|
521
|
-
elif Method == "Heun":
|
|
522
|
-
CLLaunch = MyRoutines.Heun(queue, (Number, 1), None, clDataX, clDataV, Step)
|
|
523
|
-
else:
|
|
524
|
-
CLLaunch = MyRoutines.ImplicitEuler(
|
|
525
|
-
queue, (Number, 1), None, clDataX, clDataV, Step
|
|
526
|
-
)
|
|
527
|
-
CLLaunch.wait()
|
|
528
|
-
Elapsed = time.time() - time_start
|
|
529
|
-
return Elapsed
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
def display(*args):
|
|
533
|
-
global MyDataX, MyDataV, clDataX, clDataV, Step, Method, Number, Iterations, \
|
|
534
|
-
Durations, Verbose, SpeedRendering
|
|
535
|
-
|
|
536
|
-
gl.glClearColor(0.0, 0.0, 0.0, 0.0)
|
|
537
|
-
gl.glClear(gl.GL_COLOR_BUFFER_BIT)
|
|
538
|
-
gl.glColor3f(1.0, 1.0, 1.0)
|
|
539
|
-
|
|
540
|
-
MainOpenCL(clDataX, clDataV, Step, Method)
|
|
541
|
-
if SpeedRendering:
|
|
542
|
-
cl.enqueue_copy(queue, MyDataV, clDataV)
|
|
543
|
-
MyDataV.reshape(Number, 4)[:, 3] = 1
|
|
544
|
-
gl.glVertexPointerf(MyDataV.reshape(Number, 4))
|
|
545
|
-
else:
|
|
546
|
-
cl.enqueue_copy(queue, MyDataX, clDataX)
|
|
547
|
-
MyDataX.reshape(Number, 4)[:, 3] = 1
|
|
548
|
-
gl.glVertexPointerf(MyDataX.reshape(Number, 4))
|
|
549
|
-
|
|
550
|
-
if Verbose:
|
|
551
|
-
print("Positions for #%s iteration: %s" % (Iterations, MyDataX))
|
|
552
|
-
else:
|
|
553
|
-
sys.stdout.write(".")
|
|
554
|
-
sys.stdout.flush()
|
|
555
|
-
Durations = np.append(Durations, MainOpenCL(clDataX, clDataV, Step, Method))
|
|
556
|
-
gl.glEnableClientState(gl.GL_VERTEX_ARRAY)
|
|
557
|
-
gl.glDrawArrays(gl.GL_POINTS, 0, Number)
|
|
558
|
-
gl.glDisableClientState(gl.GL_VERTEX_ARRAY)
|
|
559
|
-
gl.glFlush()
|
|
560
|
-
Iterations += 1
|
|
561
|
-
glut.glutSwapBuffers()
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
def halt():
|
|
565
|
-
pass
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
def keyboard(k, x, y):
|
|
569
|
-
global ViewRZ, SpeedRendering
|
|
570
|
-
LC_Z = glut.as_8_bit("z")
|
|
571
|
-
UC_Z = glut.as_8_bit("Z")
|
|
572
|
-
Plus = glut.as_8_bit("+")
|
|
573
|
-
Minus = glut.as_8_bit("-")
|
|
574
|
-
Switch = glut.as_8_bit("s")
|
|
575
|
-
|
|
576
|
-
Zoom = 1
|
|
577
|
-
if k == LC_Z:
|
|
578
|
-
ViewRZ += 1.0
|
|
579
|
-
elif k == UC_Z:
|
|
580
|
-
ViewRZ -= 1.0
|
|
581
|
-
elif k == Plus:
|
|
582
|
-
Zoom *= 2.0
|
|
583
|
-
elif k == Minus:
|
|
584
|
-
Zoom /= 2.0
|
|
585
|
-
elif k == Switch:
|
|
586
|
-
if SpeedRendering:
|
|
587
|
-
SpeedRendering = False
|
|
588
|
-
else:
|
|
589
|
-
SpeedRendering = True
|
|
590
|
-
elif ord(k) == 27: # Escape
|
|
591
|
-
glut.glutLeaveMainLoop()
|
|
592
|
-
return False
|
|
593
|
-
else:
|
|
594
|
-
return
|
|
595
|
-
gl.glRotatef(ViewRZ, 0.0, 0.0, 1.0)
|
|
596
|
-
gl.glScalef(Zoom, Zoom, Zoom)
|
|
597
|
-
glut.glutPostRedisplay()
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
def special(k, x, y):
|
|
601
|
-
global ViewRX, ViewRY
|
|
602
|
-
|
|
603
|
-
Step = 1.0
|
|
604
|
-
if k == glut.GLUT_KEY_UP:
|
|
605
|
-
ViewRX += Step
|
|
606
|
-
elif k == glut.GLUT_KEY_DOWN:
|
|
607
|
-
ViewRX -= Step
|
|
608
|
-
elif k == glut.GLUT_KEY_LEFT:
|
|
609
|
-
ViewRY += Step
|
|
610
|
-
elif k == glut.GLUT_KEY_RIGHT:
|
|
611
|
-
ViewRY -= Step
|
|
612
|
-
else:
|
|
613
|
-
return
|
|
614
|
-
gl.glRotatef(ViewRX, 1.0, 0.0, 0.0)
|
|
615
|
-
gl.glRotatef(ViewRY, 0.0, 1.0, 0.0)
|
|
616
|
-
glut.glutPostRedisplay()
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
def setup_viewport():
|
|
620
|
-
global SizeOfBox
|
|
621
|
-
gl.glMatrixMode(gl.GL_PROJECTION)
|
|
622
|
-
gl.glLoadIdentity()
|
|
623
|
-
gl.glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
|
|
624
|
-
glut.glutPostRedisplay()
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
def reshape(w, h):
|
|
628
|
-
gl.glViewport(0, 0, w, h)
|
|
629
|
-
setup_viewport()
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
if __name__ == "__main__":
|
|
633
|
-
|
|
634
|
-
global Number, Step, clDataX, clDataV, MyDataX, MyDataV, Method, SizeOfBox, \
|
|
635
|
-
Iterations, Verbose, Durations
|
|
636
|
-
|
|
637
|
-
# ValueType
|
|
638
|
-
ValueType = "FP32"
|
|
639
|
-
|
|
640
|
-
class MyFloat(np.float32):
|
|
641
|
-
pass
|
|
642
|
-
|
|
643
|
-
# clType8=cl_array.vec.float8
|
|
644
|
-
# Set defaults values
|
|
645
|
-
np.set_printoptions(precision=2)
|
|
646
|
-
# Id of Device : 1 is for first find !
|
|
647
|
-
Device = 0
|
|
648
|
-
# Number of bodies is integer
|
|
649
|
-
Number = 2
|
|
650
|
-
# Number of iterations (for standalone execution)
|
|
651
|
-
Iterations = 10
|
|
652
|
-
# Size of shape
|
|
653
|
-
SizeOfShape = MyFloat(1.0)
|
|
654
|
-
# Initial velocity of particules
|
|
655
|
-
Velocity = MyFloat(1.0)
|
|
656
|
-
# Step
|
|
657
|
-
Step = MyFloat(1.0 / 32)
|
|
658
|
-
# Method of integration
|
|
659
|
-
Method = "ImplicitEuler"
|
|
660
|
-
# InitialRandom
|
|
661
|
-
InitialRandom = False
|
|
662
|
-
# RNG Marsaglia Method
|
|
663
|
-
RNG = "MWC"
|
|
664
|
-
# Viriel Distribution of stress
|
|
665
|
-
VirielStress = True
|
|
666
|
-
# Verbose
|
|
667
|
-
Verbose = False
|
|
668
|
-
# OpenGL real time rendering
|
|
669
|
-
OpenGL = False
|
|
670
|
-
# Speed rendering
|
|
671
|
-
SpeedRendering = False
|
|
672
|
-
# Counter ArtEvasions Measures (artefact evasion)
|
|
673
|
-
CoArEv = "None"
|
|
674
|
-
# Shape to distribute
|
|
675
|
-
Shape = "Ball"
|
|
676
|
-
# Type of Interaction
|
|
677
|
-
InterType = "Force"
|
|
678
|
-
|
|
679
|
-
HowToUse = "%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [Potential] -x <None|NegExp|CorRad> -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>" # noqa: E501
|
|
680
|
-
|
|
681
|
-
try:
|
|
682
|
-
opts, args = getopt.getopt(
|
|
683
|
-
sys.argv[1:],
|
|
684
|
-
"rpgehod:n:i:z:v:s:m:t:b:x:",
|
|
685
|
-
[
|
|
686
|
-
"random",
|
|
687
|
-
"potential",
|
|
688
|
-
"coarev",
|
|
689
|
-
"opengl",
|
|
690
|
-
"viriel",
|
|
691
|
-
"verbose",
|
|
692
|
-
"device=",
|
|
693
|
-
"number=",
|
|
694
|
-
"iterations=",
|
|
695
|
-
"size=",
|
|
696
|
-
"velocity=",
|
|
697
|
-
"step=",
|
|
698
|
-
"method=",
|
|
699
|
-
"valuetype=",
|
|
700
|
-
"shape=",
|
|
701
|
-
],
|
|
702
|
-
)
|
|
703
|
-
except getopt.GetoptError:
|
|
704
|
-
print(HowToUse % sys.argv[0])
|
|
705
|
-
sys.exit(2)
|
|
706
|
-
|
|
707
|
-
for opt, arg in opts:
|
|
708
|
-
if opt == "-h":
|
|
709
|
-
print(HowToUse % sys.argv[0])
|
|
710
|
-
|
|
711
|
-
print("\nInformations about devices detected under OpenCL:")
|
|
712
|
-
try:
|
|
713
|
-
Id = 0
|
|
714
|
-
for platform in cl.get_platforms():
|
|
715
|
-
for device in platform.get_devices():
|
|
716
|
-
# Failed now because of POCL implementation
|
|
717
|
-
# deviceType=cl.device_type.to_string(device.type)
|
|
718
|
-
deviceType = "xPU"
|
|
719
|
-
print(
|
|
720
|
-
"Device #%i from %s of type %s : %s"
|
|
721
|
-
% (
|
|
722
|
-
Id,
|
|
723
|
-
platform.vendor.lstrip(),
|
|
724
|
-
deviceType,
|
|
725
|
-
device.name.lstrip(),
|
|
726
|
-
)
|
|
727
|
-
)
|
|
728
|
-
Id = Id + 1
|
|
729
|
-
sys.exit()
|
|
730
|
-
except ImportError:
|
|
731
|
-
print("Your platform does not seem to support OpenCL")
|
|
732
|
-
sys.exit()
|
|
733
|
-
|
|
734
|
-
elif opt in ("-t", "--valuetype"):
|
|
735
|
-
if arg == "FP64":
|
|
736
|
-
|
|
737
|
-
class MyFloat(np.float64):
|
|
738
|
-
pass
|
|
739
|
-
|
|
740
|
-
else:
|
|
741
|
-
|
|
742
|
-
class MyFloat(np.float32):
|
|
743
|
-
pass
|
|
744
|
-
|
|
745
|
-
ValueType = arg
|
|
746
|
-
elif opt in ("-d", "--device"):
|
|
747
|
-
Device = int(arg)
|
|
748
|
-
elif opt in ("-m", "--method"):
|
|
749
|
-
Method = arg
|
|
750
|
-
elif opt in ("-b", "--shape"):
|
|
751
|
-
Shape = arg
|
|
752
|
-
if Shape != "Ball" or Shape != "Box":
|
|
753
|
-
print("Wrong argument: set to Ball")
|
|
754
|
-
elif opt in ("-n", "--number"):
|
|
755
|
-
Number = int(arg)
|
|
756
|
-
elif opt in ("-i", "--iterations"):
|
|
757
|
-
Iterations = int(arg)
|
|
758
|
-
elif opt in ("-z", "--size"):
|
|
759
|
-
SizeOfShape = MyFloat(arg)
|
|
760
|
-
elif opt in ("-v", "--velocity"):
|
|
761
|
-
Velocity = MyFloat(arg)
|
|
762
|
-
VirielStress = False
|
|
763
|
-
elif opt in ("-s", "--step"):
|
|
764
|
-
Step = MyFloat(arg)
|
|
765
|
-
elif opt in ("-r", "--random"):
|
|
766
|
-
InitialRandom = True
|
|
767
|
-
elif opt in ("-c", "--check"):
|
|
768
|
-
CheckEnergies = True
|
|
769
|
-
elif opt in ("-e", "--viriel"):
|
|
770
|
-
VirielStress = True
|
|
771
|
-
elif opt in ("-g", "--opengl"):
|
|
772
|
-
OpenGL = True
|
|
773
|
-
elif opt in ("-p", "--potential"):
|
|
774
|
-
InterType = "Potential"
|
|
775
|
-
elif opt in ("-x", "--coarev"):
|
|
776
|
-
CoArEv = arg
|
|
777
|
-
elif opt in ("-o", "--verbose"):
|
|
778
|
-
Verbose = True
|
|
779
|
-
|
|
780
|
-
SizeOfShape = np.sqrt(MyFloat(SizeOfShape * Number))
|
|
781
|
-
Velocity = MyFloat(Velocity)
|
|
782
|
-
Step = MyFloat(Step)
|
|
783
|
-
|
|
784
|
-
print("Device choosed : %s" % Device)
|
|
785
|
-
print("Number of particules : %s" % Number)
|
|
786
|
-
print("Size of Shape : %s" % SizeOfShape)
|
|
787
|
-
print("Initial velocity : %s" % Velocity)
|
|
788
|
-
print("Step of iteration : %s" % Step)
|
|
789
|
-
print("Number of iterations : %s" % Iterations)
|
|
790
|
-
print("Method of resolution : %s" % Method)
|
|
791
|
-
print("Initial Random for RNG Seed : %s" % InitialRandom)
|
|
792
|
-
print("ValueType is : %s" % ValueType)
|
|
793
|
-
print("Viriel distribution of stress : %s" % VirielStress)
|
|
794
|
-
print("OpenGL real time rendering : %s" % OpenGL)
|
|
795
|
-
print("Speed rendering : %s" % SpeedRendering)
|
|
796
|
-
print("Interaction type : %s" % InterType)
|
|
797
|
-
print("Counter Artevasion type : %s" % CoArEv)
|
|
798
|
-
|
|
799
|
-
# Create Numpy array of CL vector with 8 FP32
|
|
800
|
-
MyCoM = np.zeros(4, dtype=MyFloat)
|
|
801
|
-
MyDataX = np.zeros(Number * 4, dtype=MyFloat)
|
|
802
|
-
MyDataV = np.zeros(Number * 4, dtype=MyFloat)
|
|
803
|
-
MyPotential = np.zeros(Number, dtype=MyFloat)
|
|
804
|
-
MyKinetic = np.zeros(Number, dtype=MyFloat)
|
|
805
|
-
|
|
806
|
-
Marsaglia, Computing, Interaction, Artevasion = DictionariesAPI()
|
|
807
|
-
|
|
808
|
-
# Scan the OpenCL arrays
|
|
809
|
-
Id = 0
|
|
810
|
-
HasXPU = False
|
|
811
|
-
for platform in cl.get_platforms():
|
|
812
|
-
for device in platform.get_devices():
|
|
813
|
-
if Id == Device:
|
|
814
|
-
PlatForm = platform
|
|
815
|
-
XPU = device
|
|
816
|
-
print("CPU/GPU selected: ", device.name.lstrip())
|
|
817
|
-
print("Platform selected: ", platform.name)
|
|
818
|
-
HasXPU = True
|
|
819
|
-
Id += 1
|
|
820
|
-
|
|
821
|
-
if not HasXPU:
|
|
822
|
-
print("No XPU #%i found in all of %i devices, sorry..." % (Device, Id - 1))
|
|
823
|
-
sys.exit()
|
|
824
|
-
|
|
825
|
-
# Create Context
|
|
826
|
-
try:
|
|
827
|
-
ctx = cl.Context([XPU])
|
|
828
|
-
queue = cl.CommandQueue(
|
|
829
|
-
ctx, properties=cl.command_queue_properties.PROFILING_ENABLE
|
|
830
|
-
)
|
|
831
|
-
except Exception:
|
|
832
|
-
print("Crash during context creation")
|
|
833
|
-
|
|
834
|
-
# Build all routines used for the computing
|
|
835
|
-
|
|
836
|
-
# BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType]) # noqa: E501
|
|
837
|
-
BuildOptions = "-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % ( # noqa: E501
|
|
838
|
-
Marsaglia[RNG],
|
|
839
|
-
Computing[ValueType],
|
|
840
|
-
Interaction[InterType],
|
|
841
|
-
Artevasion[CoArEv],
|
|
842
|
-
)
|
|
843
|
-
|
|
844
|
-
if (
|
|
845
|
-
"Intel" in PlatForm.name
|
|
846
|
-
or "Experimental" in PlatForm.name
|
|
847
|
-
or "Clover" in PlatForm.name
|
|
848
|
-
or "Portable" in PlatForm.name
|
|
849
|
-
):
|
|
850
|
-
MyRoutines = cl.Program(ctx, BlobOpenCL).build(options=BuildOptions)
|
|
851
|
-
else:
|
|
852
|
-
MyRoutines = cl.Program(ctx, BlobOpenCL).build(
|
|
853
|
-
options=BuildOptions + " -cl-strict-aliasing"
|
|
854
|
-
)
|
|
855
|
-
|
|
856
|
-
mf = cl.mem_flags
|
|
857
|
-
# Read/Write approach for buffering
|
|
858
|
-
clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
|
|
859
|
-
clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
|
|
860
|
-
clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
|
|
861
|
-
clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
|
|
862
|
-
clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
|
|
863
|
-
|
|
864
|
-
# Write/HostPointer approach for buffering
|
|
865
|
-
# clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
|
|
866
|
-
# clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
|
|
867
|
-
# clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential) # noqa: E501
|
|
868
|
-
# clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
|
|
869
|
-
# clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
|
|
870
|
-
|
|
871
|
-
print("All particles superimposed.")
|
|
872
|
-
|
|
873
|
-
# Set particles to RNG points
|
|
874
|
-
if InitialRandom:
|
|
875
|
-
rng = np.random.default_rng()
|
|
876
|
-
seed_w = np.uint32(rng.integers(2 ** 32))
|
|
877
|
-
seed_z = np.uint32(rng.integers(2 ** 32))
|
|
878
|
-
else:
|
|
879
|
-
seed_w = np.uint32(19710211)
|
|
880
|
-
seed_z = np.uint32(20081010)
|
|
881
|
-
|
|
882
|
-
if Shape == "Ball":
|
|
883
|
-
MyRoutines.InBallSplutterPoints(
|
|
884
|
-
queue, (Number, 1), None, clDataX, SizeOfShape, seed_w, seed_z
|
|
885
|
-
)
|
|
886
|
-
else:
|
|
887
|
-
MyRoutines.InBoxSplutterPoints(
|
|
888
|
-
queue, (Number, 1), None, clDataX, SizeOfShape, seed_w, seed_z
|
|
889
|
-
)
|
|
890
|
-
|
|
891
|
-
print("All particules distributed")
|
|
892
|
-
|
|
893
|
-
CLLaunch = MyRoutines.CenterOfMass(
|
|
894
|
-
queue, (1, 1), None, clDataX, clCoM, np.int32(Number)
|
|
895
|
-
)
|
|
896
|
-
CLLaunch.wait()
|
|
897
|
-
cl.enqueue_copy(queue, MyCoM, clCoM)
|
|
898
|
-
print("Center Of Mass estimated: (%s,%s,%s)" % (MyCoM[0], MyCoM[1], MyCoM[2]))
|
|
899
|
-
|
|
900
|
-
if VirielStress:
|
|
901
|
-
CLLaunch = MyRoutines.SplutterStress(
|
|
902
|
-
queue,
|
|
903
|
-
(Number, 1),
|
|
904
|
-
None,
|
|
905
|
-
clDataX,
|
|
906
|
-
clDataV,
|
|
907
|
-
clCoM,
|
|
908
|
-
MyFloat(0.0),
|
|
909
|
-
np.uint32(110271),
|
|
910
|
-
np.uint32(250173),
|
|
911
|
-
)
|
|
912
|
-
else:
|
|
913
|
-
CLLaunch = MyRoutines.SplutterStress(
|
|
914
|
-
queue,
|
|
915
|
-
(Number, 1),
|
|
916
|
-
None,
|
|
917
|
-
clDataX,
|
|
918
|
-
clDataV,
|
|
919
|
-
clCoM,
|
|
920
|
-
Velocity,
|
|
921
|
-
np.uint32(110271),
|
|
922
|
-
np.uint32(250173),
|
|
923
|
-
)
|
|
924
|
-
CLLaunch.wait()
|
|
925
|
-
|
|
926
|
-
print("All particules stressed")
|
|
927
|
-
|
|
928
|
-
CLLaunch = MyRoutines.Potential(queue, (Number, 1), None, clDataX, clPotential)
|
|
929
|
-
CLLaunch = MyRoutines.Kinetic(queue, (Number, 1), None, clDataV, clKinetic)
|
|
930
|
-
CLLaunch.wait()
|
|
931
|
-
cl.enqueue_copy(queue, MyPotential, clPotential)
|
|
932
|
-
cl.enqueue_copy(queue, MyKinetic, clKinetic)
|
|
933
|
-
print(
|
|
934
|
-
"Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n"
|
|
935
|
-
% (
|
|
936
|
-
np.sum(MyPotential) + 2 * np.sum(MyKinetic),
|
|
937
|
-
np.sum(MyPotential),
|
|
938
|
-
np.sum(MyKinetic),
|
|
939
|
-
)
|
|
940
|
-
)
|
|
941
|
-
|
|
942
|
-
if SpeedRendering:
|
|
943
|
-
SizeOfBox = max(2 * MyKinetic)
|
|
944
|
-
else:
|
|
945
|
-
SizeOfBox = SizeOfShape
|
|
946
|
-
|
|
947
|
-
if OpenGL:
|
|
948
|
-
print("\tTiny documentation to interact OpenGL rendering:\n")
|
|
949
|
-
print("\t<Left|Right> Rotate around X axis")
|
|
950
|
-
print("\t <Up|Down> Rotate around Y axis")
|
|
951
|
-
print("\t <z|Z> Rotate around Z axis")
|
|
952
|
-
print("\t <-|+> Unzoom/Zoom")
|
|
953
|
-
print("\t <s> Toggle to display Positions or Velocities")
|
|
954
|
-
print("\t <Esc> Quit\n")
|
|
955
|
-
|
|
956
|
-
wall_time_start = time.time()
|
|
957
|
-
|
|
958
|
-
Durations = np.array([], dtype=MyFloat)
|
|
959
|
-
print("Starting!")
|
|
960
|
-
if OpenGL:
|
|
961
|
-
import OpenGL.GL as gl
|
|
962
|
-
import OpenGL.GLUT as glut
|
|
963
|
-
|
|
964
|
-
global ViewRX, ViewRY, ViewRZ
|
|
965
|
-
Iterations = 0
|
|
966
|
-
ViewRX, ViewRY, ViewRZ = 0.0, 0.0, 0.0
|
|
967
|
-
# Launch OpenGL Loop
|
|
968
|
-
glut.glutInit(sys.argv)
|
|
969
|
-
glut.glutInitDisplayMode(glut.GLUT_DOUBLE | glut.GLUT_RGB)
|
|
970
|
-
glut.glutSetOption(glut.GLUT_ACTION_ON_WINDOW_CLOSE,
|
|
971
|
-
glut.GLUT_ACTION_CONTINUE_EXECUTION)
|
|
972
|
-
glut.glutInitWindowSize(512, 512)
|
|
973
|
-
glut.glutCreateWindow(b"NBodyGL")
|
|
974
|
-
setup_viewport()
|
|
975
|
-
glut.glutReshapeFunc(reshape)
|
|
976
|
-
glut.glutDisplayFunc(display)
|
|
977
|
-
glut.glutIdleFunc(display)
|
|
978
|
-
# glutMouseFunc(mouse)
|
|
979
|
-
glut.glutSpecialFunc(special)
|
|
980
|
-
glut.glutKeyboardFunc(keyboard)
|
|
981
|
-
glut.glutMainLoop()
|
|
982
|
-
else:
|
|
983
|
-
for iteration in range(Iterations):
|
|
984
|
-
Elapsed = MainOpenCL(clDataX, clDataV, Step, Method)
|
|
985
|
-
if Verbose:
|
|
986
|
-
# print("Duration of #%s iteration: %s" % (iteration,Elapsed))
|
|
987
|
-
cl.enqueue_copy(queue, MyDataX, clDataX)
|
|
988
|
-
print("Positions for #%s iteration: %s" % (iteration, MyDataX))
|
|
989
|
-
else:
|
|
990
|
-
sys.stdout.write(".")
|
|
991
|
-
sys.stdout.flush()
|
|
992
|
-
Durations = np.append(Durations, Elapsed)
|
|
993
|
-
|
|
994
|
-
print("\nEnding!")
|
|
995
|
-
|
|
996
|
-
MyRoutines.CenterOfMass(queue, (1, 1), None, clDataX, clCoM, np.int32(Number))
|
|
997
|
-
CLLaunch = MyRoutines.Potential(queue, (Number, 1), None, clDataX, clPotential)
|
|
998
|
-
CLLaunch = MyRoutines.Kinetic(queue, (Number, 1), None, clDataV, clKinetic)
|
|
999
|
-
CLLaunch.wait()
|
|
1000
|
-
cl.enqueue_copy(queue, MyCoM, clCoM)
|
|
1001
|
-
cl.enqueue_copy(queue, MyPotential, clPotential)
|
|
1002
|
-
cl.enqueue_copy(queue, MyKinetic, clKinetic)
|
|
1003
|
-
print("\nCenter Of Mass estimated: (%s,%s,%s)" % (MyCoM[0], MyCoM[1], MyCoM[2]))
|
|
1004
|
-
print(
|
|
1005
|
-
"Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n"
|
|
1006
|
-
% (
|
|
1007
|
-
np.sum(MyPotential) + 2.0 * np.sum(MyKinetic),
|
|
1008
|
-
np.sum(MyPotential),
|
|
1009
|
-
np.sum(MyKinetic),
|
|
1010
|
-
)
|
|
1011
|
-
)
|
|
1012
|
-
|
|
1013
|
-
print(
|
|
1014
|
-
"Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" # noqa: E501
|
|
1015
|
-
% (
|
|
1016
|
-
Device,
|
|
1017
|
-
Iterations,
|
|
1018
|
-
np.mean(Durations),
|
|
1019
|
-
np.median(Durations),
|
|
1020
|
-
np.std(Durations),
|
|
1021
|
-
np.min(Durations),
|
|
1022
|
-
np.max(Durations),
|
|
1023
|
-
np.std(Durations) / np.median(Durations),
|
|
1024
|
-
)
|
|
1025
|
-
)
|
|
1026
|
-
|
|
1027
|
-
# FPS: 1/Elapsed
|
|
1028
|
-
FPS = np.ones(len(Durations))
|
|
1029
|
-
FPS /= Durations
|
|
1030
|
-
|
|
1031
|
-
print(
|
|
1032
|
-
"FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" # noqa: E501
|
|
1033
|
-
% (
|
|
1034
|
-
Device,
|
|
1035
|
-
Iterations,
|
|
1036
|
-
np.mean(FPS),
|
|
1037
|
-
np.median(FPS),
|
|
1038
|
-
np.std(FPS),
|
|
1039
|
-
np.min(FPS),
|
|
1040
|
-
np.max(FPS),
|
|
1041
|
-
)
|
|
1042
|
-
)
|
|
1043
|
-
|
|
1044
|
-
# Contraction of Square*Size*Hertz: Size*Size/Elapsed
|
|
1045
|
-
Squertz = np.ones(len(Durations))
|
|
1046
|
-
Squertz *= Number * Number
|
|
1047
|
-
Squertz /= Durations
|
|
1048
|
-
|
|
1049
|
-
print(
|
|
1050
|
-
"Squertz in log10 & complete stats on device %s with %s iterations :\n\tMean:\t%s\t%s\n\tMedian:\t%s\t%s\n\tStddev:\t%s\t%s\n\tMin:\t%s\t%s\n\tMax:\t%s\t%s\n" # noqa: E501
|
|
1051
|
-
% (
|
|
1052
|
-
Device,
|
|
1053
|
-
Iterations,
|
|
1054
|
-
np.log10(np.mean(Squertz)),
|
|
1055
|
-
np.mean(Squertz),
|
|
1056
|
-
np.log10(np.median(Squertz)),
|
|
1057
|
-
np.median(Squertz),
|
|
1058
|
-
np.log10(np.std(Squertz)),
|
|
1059
|
-
np.std(Squertz),
|
|
1060
|
-
np.log10(np.min(Squertz)),
|
|
1061
|
-
np.min(Squertz),
|
|
1062
|
-
np.log10(np.max(Squertz)),
|
|
1063
|
-
np.max(Squertz),
|
|
1064
|
-
)
|
|
1065
|
-
)
|
|
1066
|
-
|
|
1067
|
-
clDataX.release()
|
|
1068
|
-
clDataV.release()
|
|
1069
|
-
clKinetic.release()
|
|
1070
|
-
clPotential.release()
|