pyopencl 2024.2.2__cp310-cp310-win_amd64.whl → 2024.2.5__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +16 -4
- pyopencl/_cl.cp310-win_amd64.pyd +0 -0
- pyopencl/algorithm.py +3 -1
- pyopencl/bitonic_sort.py +2 -0
- pyopencl/characterize/__init__.py +23 -0
- pyopencl/compyte/.git +1 -0
- pyopencl/compyte/.gitignore +21 -0
- pyopencl/compyte/ndarray/Makefile +31 -0
- pyopencl/compyte/ndarray/gpu_ndarray.h +35 -0
- pyopencl/compyte/ndarray/pygpu_language.h +207 -0
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +622 -0
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +317 -0
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +1546 -0
- pyopencl/compyte/ndarray/pygpu_ndarray.h +71 -0
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +232 -0
- pyopencl/tools.py +60 -56
- pyopencl/version.py +9 -3
- {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/METADATA +105 -105
- pyopencl-2024.2.5.dist-info/RECORD +56 -0
- {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/WHEEL +1 -1
- pyopencl-2024.2.2.data/data/CITATION.cff +0 -74
- pyopencl-2024.2.2.data/data/CMakeLists.txt +0 -83
- pyopencl-2024.2.2.data/data/Makefile.in +0 -21
- pyopencl-2024.2.2.data/data/README.rst +0 -70
- pyopencl-2024.2.2.data/data/README_SETUP.txt +0 -34
- pyopencl-2024.2.2.data/data/aksetup_helper.py +0 -1013
- pyopencl-2024.2.2.data/data/configure.py +0 -6
- pyopencl-2024.2.2.data/data/contrib/cldis.py +0 -91
- pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/README +0 -29
- pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/translate.py +0 -1441
- pyopencl-2024.2.2.data/data/contrib/pyopencl.vim +0 -84
- pyopencl-2024.2.2.data/data/doc/Makefile +0 -23
- pyopencl-2024.2.2.data/data/doc/algorithm.rst +0 -214
- pyopencl-2024.2.2.data/data/doc/array.rst +0 -305
- pyopencl-2024.2.2.data/data/doc/conf.py +0 -26
- pyopencl-2024.2.2.data/data/doc/howto.rst +0 -105
- pyopencl-2024.2.2.data/data/doc/index.rst +0 -137
- pyopencl-2024.2.2.data/data/doc/make_constants.py +0 -561
- pyopencl-2024.2.2.data/data/doc/misc.rst +0 -885
- pyopencl-2024.2.2.data/data/doc/runtime.rst +0 -51
- pyopencl-2024.2.2.data/data/doc/runtime_const.rst +0 -30
- pyopencl-2024.2.2.data/data/doc/runtime_gl.rst +0 -78
- pyopencl-2024.2.2.data/data/doc/runtime_memory.rst +0 -527
- pyopencl-2024.2.2.data/data/doc/runtime_platform.rst +0 -184
- pyopencl-2024.2.2.data/data/doc/runtime_program.rst +0 -364
- pyopencl-2024.2.2.data/data/doc/runtime_queue.rst +0 -182
- pyopencl-2024.2.2.data/data/doc/subst.rst +0 -36
- pyopencl-2024.2.2.data/data/doc/tools.rst +0 -4
- pyopencl-2024.2.2.data/data/doc/types.rst +0 -42
- pyopencl-2024.2.2.data/data/examples/black-hole-accretion.py +0 -2227
- pyopencl-2024.2.2.data/data/examples/demo-struct-reduce.py +0 -75
- pyopencl-2024.2.2.data/data/examples/demo.py +0 -39
- pyopencl-2024.2.2.data/data/examples/demo_array.py +0 -32
- pyopencl-2024.2.2.data/data/examples/demo_array_svm.py +0 -37
- pyopencl-2024.2.2.data/data/examples/demo_elementwise.py +0 -34
- pyopencl-2024.2.2.data/data/examples/demo_elementwise_complex.py +0 -53
- pyopencl-2024.2.2.data/data/examples/demo_mandelbrot.py +0 -183
- pyopencl-2024.2.2.data/data/examples/demo_meta_codepy.py +0 -56
- pyopencl-2024.2.2.data/data/examples/demo_meta_template.py +0 -55
- pyopencl-2024.2.2.data/data/examples/dump-performance.py +0 -38
- pyopencl-2024.2.2.data/data/examples/dump-properties.py +0 -86
- pyopencl-2024.2.2.data/data/examples/gl_interop_demo.py +0 -84
- pyopencl-2024.2.2.data/data/examples/gl_particle_animation.py +0 -218
- pyopencl-2024.2.2.data/data/examples/ipython-demo.ipynb +0 -203
- pyopencl-2024.2.2.data/data/examples/median-filter.py +0 -99
- pyopencl-2024.2.2.data/data/examples/n-body.py +0 -1070
- pyopencl-2024.2.2.data/data/examples/narray.py +0 -37
- pyopencl-2024.2.2.data/data/examples/noisyImage.jpg +0 -0
- pyopencl-2024.2.2.data/data/examples/pi-monte-carlo.py +0 -1166
- pyopencl-2024.2.2.data/data/examples/svm.py +0 -82
- pyopencl-2024.2.2.data/data/examples/transpose.py +0 -229
- pyopencl-2024.2.2.data/data/pytest.ini +0 -3
- pyopencl-2024.2.2.data/data/src/bitlog.cpp +0 -51
- pyopencl-2024.2.2.data/data/src/bitlog.hpp +0 -83
- pyopencl-2024.2.2.data/data/src/clinfo_ext.h +0 -134
- pyopencl-2024.2.2.data/data/src/mempool.hpp +0 -444
- pyopencl-2024.2.2.data/data/src/pyopencl_ext.h +0 -77
- pyopencl-2024.2.2.data/data/src/tools.hpp +0 -90
- pyopencl-2024.2.2.data/data/src/wrap_cl.cpp +0 -61
- pyopencl-2024.2.2.data/data/src/wrap_cl.hpp +0 -5853
- pyopencl-2024.2.2.data/data/src/wrap_cl_part_1.cpp +0 -369
- pyopencl-2024.2.2.data/data/src/wrap_cl_part_2.cpp +0 -702
- pyopencl-2024.2.2.data/data/src/wrap_constants.cpp +0 -1274
- pyopencl-2024.2.2.data/data/src/wrap_helpers.hpp +0 -213
- pyopencl-2024.2.2.data/data/src/wrap_mempool.cpp +0 -738
- pyopencl-2024.2.2.data/data/test/add-vectors-32.spv +0 -0
- pyopencl-2024.2.2.data/data/test/add-vectors-64.spv +0 -0
- pyopencl-2024.2.2.data/data/test/empty-header.h +0 -1
- pyopencl-2024.2.2.data/data/test/test_algorithm.py +0 -1180
- pyopencl-2024.2.2.data/data/test/test_array.py +0 -2392
- pyopencl-2024.2.2.data/data/test/test_arrays_in_structs.py +0 -100
- pyopencl-2024.2.2.data/data/test/test_clmath.py +0 -529
- pyopencl-2024.2.2.data/data/test/test_clrandom.py +0 -75
- pyopencl-2024.2.2.data/data/test/test_enqueue_copy.py +0 -271
- pyopencl-2024.2.2.data/data/test/test_wrapper.py +0 -1565
- pyopencl-2024.2.2.dist-info/LICENSE +0 -282
- pyopencl-2024.2.2.dist-info/RECORD +0 -123
- pyopencl-2024.2.2.dist-info/top_level.txt +0 -1
- {pyopencl-2024.2.2.data/data → pyopencl-2024.2.5.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
import pyopencl as cl
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def make_collector_dtype(device):
|
|
7
|
-
dtype = np.dtype([
|
|
8
|
-
("cur_min", np.int32),
|
|
9
|
-
("cur_max", np.int32),
|
|
10
|
-
("pad", np.int32),
|
|
11
|
-
])
|
|
12
|
-
|
|
13
|
-
name = "minmax_collector"
|
|
14
|
-
from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct
|
|
15
|
-
|
|
16
|
-
dtype, c_decl = match_dtype_to_c_struct(device, name, dtype)
|
|
17
|
-
dtype = get_or_register_dtype(name, dtype)
|
|
18
|
-
|
|
19
|
-
return dtype, c_decl
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
ctx = cl.create_some_context()
|
|
23
|
-
queue = cl.CommandQueue(ctx)
|
|
24
|
-
|
|
25
|
-
mmc_dtype, mmc_c_decl = make_collector_dtype(ctx.devices[0])
|
|
26
|
-
|
|
27
|
-
preamble = mmc_c_decl + r"""//CL//
|
|
28
|
-
|
|
29
|
-
minmax_collector mmc_neutral()
|
|
30
|
-
{
|
|
31
|
-
// FIXME: needs infinity literal in real use, ok here
|
|
32
|
-
minmax_collector result;
|
|
33
|
-
result.cur_min = 1<<30;
|
|
34
|
-
result.cur_max = -(1<<30);
|
|
35
|
-
return result;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
minmax_collector mmc_from_scalar(float x)
|
|
39
|
-
{
|
|
40
|
-
minmax_collector result;
|
|
41
|
-
result.cur_min = x;
|
|
42
|
-
result.cur_max = x;
|
|
43
|
-
return result;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
minmax_collector agg_mmc(minmax_collector a, minmax_collector b)
|
|
47
|
-
{
|
|
48
|
-
minmax_collector result = a;
|
|
49
|
-
if (b.cur_min < result.cur_min)
|
|
50
|
-
result.cur_min = b.cur_min;
|
|
51
|
-
if (b.cur_max > result.cur_max)
|
|
52
|
-
result.cur_max = b.cur_max;
|
|
53
|
-
return result;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
"""
|
|
57
|
-
|
|
58
|
-
from pyopencl.clrandom import rand as clrand
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
a_gpu = clrand(queue, (20000,), dtype=np.int32, a=0, b=10**6)
|
|
62
|
-
a = a_gpu.get()
|
|
63
|
-
|
|
64
|
-
from pyopencl.reduction import ReductionKernel
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
red = ReductionKernel(ctx, mmc_dtype,
|
|
68
|
-
neutral="mmc_neutral()",
|
|
69
|
-
reduce_expr="agg_mmc(a, b)", map_expr="mmc_from_scalar(x[i])",
|
|
70
|
-
arguments="__global int *x", preamble=preamble)
|
|
71
|
-
|
|
72
|
-
minmax = red(a_gpu).get()
|
|
73
|
-
|
|
74
|
-
assert abs(minmax["cur_min"] - np.min(a)) < 1e-5
|
|
75
|
-
assert abs(minmax["cur_max"] - np.max(a)) < 1e-5
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
|
|
5
|
-
import pyopencl as cl
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
rng = np.random.default_rng()
|
|
9
|
-
a_np = rng.random(50000, dtype=np.float32)
|
|
10
|
-
b_np = rng.random(50000, dtype=np.float32)
|
|
11
|
-
|
|
12
|
-
ctx = cl.create_some_context()
|
|
13
|
-
queue = cl.CommandQueue(ctx)
|
|
14
|
-
|
|
15
|
-
mf = cl.mem_flags
|
|
16
|
-
a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
|
|
17
|
-
b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
|
|
18
|
-
|
|
19
|
-
prg = cl.Program(ctx, """
|
|
20
|
-
__kernel void sum(
|
|
21
|
-
__global const float *a_g, __global const float *b_g, __global float *res_g)
|
|
22
|
-
{
|
|
23
|
-
int gid = get_global_id(0);
|
|
24
|
-
res_g[gid] = a_g[gid] + b_g[gid];
|
|
25
|
-
}
|
|
26
|
-
""").build()
|
|
27
|
-
|
|
28
|
-
res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
|
|
29
|
-
knl = prg.sum # Use this Kernel object for repeated calls
|
|
30
|
-
knl(queue, a_np.shape, None, a_g, b_g, res_g)
|
|
31
|
-
|
|
32
|
-
res_np = np.empty_like(a_np)
|
|
33
|
-
cl.enqueue_copy(queue, res_np, res_g)
|
|
34
|
-
|
|
35
|
-
# Check on CPU with Numpy:
|
|
36
|
-
error_np = res_np - (a_np + b_np)
|
|
37
|
-
print(f"Error:\n{error_np}")
|
|
38
|
-
print(f"Norm: {np.linalg.norm(error_np):.16e}")
|
|
39
|
-
assert np.allclose(res_np, a_np + b_np)
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import numpy.linalg as la
|
|
3
|
-
|
|
4
|
-
import pyopencl as cl
|
|
5
|
-
import pyopencl.array as cl_array
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
rng = np.random.default_rng()
|
|
9
|
-
a = rng.random(50000, dtype=np.float32)
|
|
10
|
-
b = rng.random(50000, dtype=np.float32)
|
|
11
|
-
|
|
12
|
-
ctx = cl.create_some_context()
|
|
13
|
-
queue = cl.CommandQueue(ctx)
|
|
14
|
-
|
|
15
|
-
a_dev = cl_array.to_device(queue, a)
|
|
16
|
-
b_dev = cl_array.to_device(queue, b)
|
|
17
|
-
dest_dev = cl_array.empty_like(a_dev)
|
|
18
|
-
|
|
19
|
-
prg = cl.Program(ctx, """
|
|
20
|
-
__kernel void sum(__global const float *a,
|
|
21
|
-
__global const float *b, __global float *c)
|
|
22
|
-
{
|
|
23
|
-
int gid = get_global_id(0);
|
|
24
|
-
c[gid] = a[gid] + b[gid];
|
|
25
|
-
}
|
|
26
|
-
""").build()
|
|
27
|
-
|
|
28
|
-
knl = prg.sum # Use this Kernel object for repeated calls
|
|
29
|
-
knl(queue, a.shape, None, a_dev.data, b_dev.data, dest_dev.data)
|
|
30
|
-
|
|
31
|
-
print(la.norm((dest_dev - (a_dev+b_dev)).get()))
|
|
32
|
-
assert np.allclose(dest_dev.get(), (a_dev + b_dev).get())
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
import pyopencl as cl
|
|
4
|
-
import pyopencl.array as cl_array
|
|
5
|
-
from pyopencl.tools import SVMAllocator, SVMPool
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
n = 50000
|
|
9
|
-
|
|
10
|
-
rng = np.random.default_rng()
|
|
11
|
-
a = rng.random(n, dtype=np.float32)
|
|
12
|
-
b = rng.random(n, dtype=np.float32)
|
|
13
|
-
|
|
14
|
-
ctx = cl.create_some_context()
|
|
15
|
-
queue = cl.CommandQueue(ctx)
|
|
16
|
-
|
|
17
|
-
alloc = SVMAllocator(ctx, alignment=0, queue=queue)
|
|
18
|
-
alloc = SVMPool(alloc)
|
|
19
|
-
|
|
20
|
-
a_dev = cl_array.to_device(queue, a, allocator=alloc)
|
|
21
|
-
b_dev = cl_array.to_device(queue, b, allocator=alloc)
|
|
22
|
-
dest_dev = cl_array.empty_like(a_dev)
|
|
23
|
-
|
|
24
|
-
prg = cl.Program(ctx, """
|
|
25
|
-
__kernel void sum(__global const float *a,
|
|
26
|
-
__global const float *b, __global float *c)
|
|
27
|
-
{
|
|
28
|
-
int gid = get_global_id(0);
|
|
29
|
-
c[gid] = a[gid] + b[gid];
|
|
30
|
-
}
|
|
31
|
-
""").build()
|
|
32
|
-
|
|
33
|
-
knl = prg.sum
|
|
34
|
-
knl(queue, a.shape, None, a_dev.data, b_dev.data, dest_dev.data)
|
|
35
|
-
|
|
36
|
-
print(np.linalg.norm((dest_dev - (a_dev + b_dev)).get()))
|
|
37
|
-
assert np.allclose(dest_dev.get(), (a_dev + b_dev).get())
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
import pyopencl as cl
|
|
4
|
-
import pyopencl.array
|
|
5
|
-
from pyopencl.elementwise import ElementwiseKernel
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
n = 10
|
|
9
|
-
|
|
10
|
-
rng = np.random.default_rng()
|
|
11
|
-
a_np = rng.random(n, dtype=np.float32)
|
|
12
|
-
b_np = rng.random(n, dtype=np.float32)
|
|
13
|
-
|
|
14
|
-
ctx = cl.create_some_context()
|
|
15
|
-
queue = cl.CommandQueue(ctx)
|
|
16
|
-
|
|
17
|
-
a_g = cl.array.to_device(queue, a_np)
|
|
18
|
-
b_g = cl.array.to_device(queue, b_np)
|
|
19
|
-
|
|
20
|
-
lin_comb = ElementwiseKernel(ctx,
|
|
21
|
-
"float k1, float *a_g, float k2, float *b_g, float *res_g",
|
|
22
|
-
"res_g[i] = k1 * a_g[i] + k2 * b_g[i]",
|
|
23
|
-
"lin_comb")
|
|
24
|
-
|
|
25
|
-
res_g = cl.array.empty_like(a_g)
|
|
26
|
-
lin_comb(2, a_g, 3, b_g, res_g)
|
|
27
|
-
|
|
28
|
-
# Check on GPU with PyOpenCL Array:
|
|
29
|
-
print((res_g - (2 * a_g + 3 * b_g)).get())
|
|
30
|
-
|
|
31
|
-
# Check on CPU with Numpy:
|
|
32
|
-
res_np = res_g.get()
|
|
33
|
-
print(res_np - (2 * a_np + 3 * b_np))
|
|
34
|
-
print(np.linalg.norm(res_np - (2 * a_np + 3 * b_np)))
|
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import numpy.linalg as la
|
|
3
|
-
|
|
4
|
-
import pyopencl as cl
|
|
5
|
-
import pyopencl.array as cl_array
|
|
6
|
-
from pyopencl.elementwise import ElementwiseKernel
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
ctx = cl.create_some_context()
|
|
10
|
-
queue = cl.CommandQueue(ctx)
|
|
11
|
-
|
|
12
|
-
n = 10
|
|
13
|
-
|
|
14
|
-
rng = np.random.default_rng()
|
|
15
|
-
a_gpu = cl_array.to_device(queue,
|
|
16
|
-
rng.standard_normal(n, dtype=np.float32)
|
|
17
|
-
+ 1j*rng.standard_normal(n, dtype=np.float32))
|
|
18
|
-
b_gpu = cl_array.to_device(queue,
|
|
19
|
-
rng.standard_normal(n, dtype=np.float32)
|
|
20
|
-
+ 1j*rng.standard_normal(n, dtype=np.float32))
|
|
21
|
-
|
|
22
|
-
complex_prod = ElementwiseKernel(ctx,
|
|
23
|
-
"float a, "
|
|
24
|
-
"cfloat_t *x, "
|
|
25
|
-
"cfloat_t *y, "
|
|
26
|
-
"cfloat_t *z",
|
|
27
|
-
"z[i] = cfloat_rmul(a, cfloat_mul(x[i], y[i]))",
|
|
28
|
-
"complex_prod",
|
|
29
|
-
preamble="#include <pyopencl-complex.h>")
|
|
30
|
-
|
|
31
|
-
complex_add = ElementwiseKernel(ctx,
|
|
32
|
-
"cfloat_t *x, "
|
|
33
|
-
"cfloat_t *y, "
|
|
34
|
-
"cfloat_t *z",
|
|
35
|
-
"z[i] = cfloat_add(x[i], y[i])",
|
|
36
|
-
"complex_add",
|
|
37
|
-
preamble="#include <pyopencl-complex.h>")
|
|
38
|
-
|
|
39
|
-
real_part = ElementwiseKernel(ctx,
|
|
40
|
-
"cfloat_t *x, float *z",
|
|
41
|
-
"z[i] = cfloat_real(x[i])",
|
|
42
|
-
"real_part",
|
|
43
|
-
preamble="#include <pyopencl-complex.h>")
|
|
44
|
-
|
|
45
|
-
c_gpu = cl_array.empty_like(a_gpu)
|
|
46
|
-
complex_prod(5, a_gpu, b_gpu, c_gpu)
|
|
47
|
-
|
|
48
|
-
c_gpu_real = cl_array.empty(queue, len(a_gpu), dtype=np.float32)
|
|
49
|
-
real_part(c_gpu, c_gpu_real)
|
|
50
|
-
print(c_gpu.get().real - c_gpu_real.get())
|
|
51
|
-
|
|
52
|
-
print(la.norm(c_gpu.get() - (5*a_gpu.get()*b_gpu.get())))
|
|
53
|
-
assert la.norm(c_gpu.get() - (5*a_gpu.get()*b_gpu.get())) < 1e-5
|
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
# I found this example for PyCuda here:
|
|
2
|
-
# http://wiki.tiker.net/PyCuda/Examples/Mandelbrot
|
|
3
|
-
#
|
|
4
|
-
# An improved sequential/pure Python code was contributed
|
|
5
|
-
# by CRVSADER//KY <crusaderky@gmail.com>.
|
|
6
|
-
#
|
|
7
|
-
# I adapted it for PyOpenCL. Hopefully it is useful to someone.
|
|
8
|
-
# July 2010, HolgerRapp@gmx.net
|
|
9
|
-
#
|
|
10
|
-
# Original readme below these lines.
|
|
11
|
-
|
|
12
|
-
# Mandelbrot calculate using GPU, Serial numpy and faster numpy
|
|
13
|
-
# Use to show the speed difference between CPU and GPU calculations
|
|
14
|
-
# ian@ianozsvald.com March 2010
|
|
15
|
-
|
|
16
|
-
# Based on vegaseat's TKinter/numpy example code from 2006
|
|
17
|
-
# http://www.daniweb.com/code/snippet216851.html#
|
|
18
|
-
# with minor changes to move to numpy from the obsolete Numeric
|
|
19
|
-
|
|
20
|
-
import time
|
|
21
|
-
|
|
22
|
-
import numpy as np
|
|
23
|
-
from PIL import Image
|
|
24
|
-
|
|
25
|
-
import pyopencl as cl
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
# You can choose a calculation routine below (calc_fractal), uncomment
|
|
29
|
-
# one of the three lines to test the three variations
|
|
30
|
-
# Speed notes are listed in the same place
|
|
31
|
-
|
|
32
|
-
# set width and height of window, more pixels take longer to calculate
|
|
33
|
-
w = 2048
|
|
34
|
-
h = 2048
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def calc_fractal_opencl(q, maxiter):
|
|
38
|
-
ctx = cl.create_some_context()
|
|
39
|
-
queue = cl.CommandQueue(ctx)
|
|
40
|
-
|
|
41
|
-
output = np.empty(q.shape, dtype=np.uint16)
|
|
42
|
-
|
|
43
|
-
mf = cl.mem_flags
|
|
44
|
-
q_opencl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=q)
|
|
45
|
-
output_opencl = cl.Buffer(ctx, mf.WRITE_ONLY, output.nbytes)
|
|
46
|
-
|
|
47
|
-
prg = cl.Program(
|
|
48
|
-
ctx,
|
|
49
|
-
"""
|
|
50
|
-
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
|
|
51
|
-
__kernel void mandelbrot(__global float2 *q,
|
|
52
|
-
__global ushort *output, ushort const maxiter)
|
|
53
|
-
{
|
|
54
|
-
int gid = get_global_id(0);
|
|
55
|
-
float nreal, real = 0;
|
|
56
|
-
float imag = 0;
|
|
57
|
-
|
|
58
|
-
output[gid] = 0;
|
|
59
|
-
|
|
60
|
-
for(int curiter = 0; curiter < maxiter; curiter++) {
|
|
61
|
-
nreal = real*real - imag*imag + q[gid].x;
|
|
62
|
-
imag = 2* real*imag + q[gid].y;
|
|
63
|
-
real = nreal;
|
|
64
|
-
|
|
65
|
-
if (real*real + imag*imag > 4.0f) {
|
|
66
|
-
output[gid] = curiter;
|
|
67
|
-
break;
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
""",
|
|
72
|
-
).build()
|
|
73
|
-
|
|
74
|
-
prg.mandelbrot(
|
|
75
|
-
queue, output.shape, None, q_opencl, output_opencl, np.uint16(maxiter)
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
cl.enqueue_copy(queue, output, output_opencl).wait()
|
|
79
|
-
|
|
80
|
-
return output
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def calc_fractal_serial(q, maxiter):
|
|
84
|
-
# calculate z using pure python on a numpy array
|
|
85
|
-
# note that, unlike the other two implementations,
|
|
86
|
-
# the number of iterations per point is NOT constant
|
|
87
|
-
z = np.zeros(q.shape, complex)
|
|
88
|
-
output = np.resize(
|
|
89
|
-
np.array(
|
|
90
|
-
0,
|
|
91
|
-
),
|
|
92
|
-
q.shape,
|
|
93
|
-
)
|
|
94
|
-
for i in range(len(q)):
|
|
95
|
-
for iter in range(maxiter):
|
|
96
|
-
z[i] = z[i] * z[i] + q[i]
|
|
97
|
-
if abs(z[i]) > 2.0:
|
|
98
|
-
output[i] = iter
|
|
99
|
-
break
|
|
100
|
-
return output
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def calc_fractal_numpy(q, maxiter):
|
|
104
|
-
# calculate z using numpy, this is the original
|
|
105
|
-
# routine from vegaseat's URL
|
|
106
|
-
output = np.resize(
|
|
107
|
-
np.array(
|
|
108
|
-
0,
|
|
109
|
-
),
|
|
110
|
-
q.shape,
|
|
111
|
-
)
|
|
112
|
-
z = np.zeros(q.shape, np.complex64)
|
|
113
|
-
|
|
114
|
-
for it in range(maxiter):
|
|
115
|
-
z = z * z + q
|
|
116
|
-
done = np.greater(abs(z), 2.0)
|
|
117
|
-
q = np.where(done, 0 + 0j, q)
|
|
118
|
-
z = np.where(done, 0 + 0j, z)
|
|
119
|
-
output = np.where(done, it, output)
|
|
120
|
-
return output
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
# choose your calculation routine here by uncommenting one of the options
|
|
124
|
-
calc_fractal = calc_fractal_opencl
|
|
125
|
-
# calc_fractal = calc_fractal_serial
|
|
126
|
-
# calc_fractal = calc_fractal_numpy
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
class Mandelbrot:
|
|
130
|
-
def draw(self, x1, x2, y1, y2, maxiter=30):
|
|
131
|
-
# draw the Mandelbrot set, from numpy example
|
|
132
|
-
xx = np.arange(x1, x2, (x2 - x1) / w)
|
|
133
|
-
yy = np.arange(y2, y1, (y1 - y2) / h) * 1j
|
|
134
|
-
q = np.ravel(xx + yy[:, np.newaxis]).astype(np.complex64)
|
|
135
|
-
|
|
136
|
-
start_main = time.time()
|
|
137
|
-
output = calc_fractal(q, maxiter)
|
|
138
|
-
end_main = time.time()
|
|
139
|
-
|
|
140
|
-
secs = end_main - start_main
|
|
141
|
-
print("Main took", secs)
|
|
142
|
-
|
|
143
|
-
self.mandel = (output.reshape((h, w)) / float(output.max()) * 255.0).astype(
|
|
144
|
-
np.uint8
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
def create_image(self):
|
|
148
|
-
""" "
|
|
149
|
-
create the image from the draw() string
|
|
150
|
-
"""
|
|
151
|
-
# you can experiment with these x and y ranges
|
|
152
|
-
self.draw(-2.13, 0.77, -1.3, 1.3)
|
|
153
|
-
self.im = Image.fromarray(self.mandel)
|
|
154
|
-
self.im.putpalette([i for rgb in ((j, 0, 0) for j in range(255))
|
|
155
|
-
for i in rgb])
|
|
156
|
-
|
|
157
|
-
def create_label(self):
|
|
158
|
-
# put the image on a label widget
|
|
159
|
-
self.image = ImageTk.PhotoImage(self.im)
|
|
160
|
-
self.label = tk.Label(self.root, image=self.image)
|
|
161
|
-
self.label.pack()
|
|
162
|
-
|
|
163
|
-
def run_tk(self):
|
|
164
|
-
self.root = tk.Tk()
|
|
165
|
-
self.root.title("Mandelbrot Set")
|
|
166
|
-
self.create_image()
|
|
167
|
-
self.create_label()
|
|
168
|
-
# start event loop
|
|
169
|
-
self.root.mainloop()
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
if __name__ == "__main__":
|
|
173
|
-
test = Mandelbrot()
|
|
174
|
-
try:
|
|
175
|
-
import tkinter as tk
|
|
176
|
-
except ModuleNotFoundError:
|
|
177
|
-
test.create_image()
|
|
178
|
-
else:
|
|
179
|
-
from PIL import ImageTk
|
|
180
|
-
try:
|
|
181
|
-
test.run_tk()
|
|
182
|
-
except tk.TclError:
|
|
183
|
-
test.create_image()
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import numpy.linalg as la
|
|
3
|
-
from cgen import (
|
|
4
|
-
POD, Assign, Block, Const, FunctionBody, FunctionDeclaration, Initializer,
|
|
5
|
-
Module, Pointer, Value)
|
|
6
|
-
from cgen.opencl import CLGlobal, CLKernel, CLRequiredWorkGroupSize
|
|
7
|
-
|
|
8
|
-
import pyopencl as cl
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
local_size = 256
|
|
12
|
-
thread_strides = 32
|
|
13
|
-
macroblock_count = 33
|
|
14
|
-
dtype = np.float32
|
|
15
|
-
total_size = local_size*thread_strides*macroblock_count
|
|
16
|
-
|
|
17
|
-
ctx = cl.create_some_context()
|
|
18
|
-
queue = cl.CommandQueue(ctx)
|
|
19
|
-
|
|
20
|
-
rng = np.random.default_rng()
|
|
21
|
-
a = rng.standard_normal(total_size, dtype=dtype)
|
|
22
|
-
b = rng.standard_normal(total_size, dtype=dtype)
|
|
23
|
-
|
|
24
|
-
mf = cl.mem_flags
|
|
25
|
-
a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
|
|
26
|
-
b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
|
|
27
|
-
c_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)
|
|
28
|
-
|
|
29
|
-
mod = Module([
|
|
30
|
-
FunctionBody(
|
|
31
|
-
CLKernel(CLRequiredWorkGroupSize((local_size,),
|
|
32
|
-
FunctionDeclaration(
|
|
33
|
-
Value("void", "add"),
|
|
34
|
-
arg_decls=[CLGlobal(Pointer(Const(POD(dtype, name))))
|
|
35
|
-
for name in ["tgt", "op1", "op2"]]))),
|
|
36
|
-
Block([
|
|
37
|
-
Initializer(POD(np.int32, "idx"),
|
|
38
|
-
"get_local_id(0) + %d * get_group_id(0)"
|
|
39
|
-
% (local_size*thread_strides))
|
|
40
|
-
]+[
|
|
41
|
-
Assign(
|
|
42
|
-
"tgt[idx+%d]" % (o*local_size),
|
|
43
|
-
"op1[idx+%d] + op2[idx+%d]" % (
|
|
44
|
-
o*local_size,
|
|
45
|
-
o*local_size))
|
|
46
|
-
for o in range(thread_strides)]))])
|
|
47
|
-
|
|
48
|
-
knl = cl.Program(ctx, str(mod)).build().add
|
|
49
|
-
|
|
50
|
-
knl(queue, (local_size*macroblock_count,), (local_size,),
|
|
51
|
-
c_buf, a_buf, b_buf)
|
|
52
|
-
|
|
53
|
-
c = np.empty_like(a)
|
|
54
|
-
cl.enqueue_copy(queue, c, c_buf).wait()
|
|
55
|
-
|
|
56
|
-
assert la.norm(c-(a+b)) == 0
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import numpy.linalg as la
|
|
3
|
-
from mako.template import Template
|
|
4
|
-
|
|
5
|
-
import pyopencl as cl
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
local_size = 256
|
|
9
|
-
thread_strides = 32
|
|
10
|
-
macroblock_count = 33
|
|
11
|
-
dtype = np.float32
|
|
12
|
-
total_size = local_size*thread_strides*macroblock_count
|
|
13
|
-
|
|
14
|
-
ctx = cl.create_some_context()
|
|
15
|
-
queue = cl.CommandQueue(ctx)
|
|
16
|
-
|
|
17
|
-
rng = np.random.default_rng()
|
|
18
|
-
a = rng.standard_normal(total_size, dtype=dtype)
|
|
19
|
-
b = rng.standard_normal(total_size, dtype=dtype)
|
|
20
|
-
|
|
21
|
-
mf = cl.mem_flags
|
|
22
|
-
a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
|
|
23
|
-
b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
|
|
24
|
-
c_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)
|
|
25
|
-
|
|
26
|
-
tpl = Template("""
|
|
27
|
-
__kernel void add(
|
|
28
|
-
__global ${ type_name } *tgt,
|
|
29
|
-
__global const ${ type_name } *op1,
|
|
30
|
-
__global const ${ type_name } *op2)
|
|
31
|
-
{
|
|
32
|
-
int idx = get_local_id(0)
|
|
33
|
-
+ ${ local_size } * ${ thread_strides }
|
|
34
|
-
* get_group_id(0);
|
|
35
|
-
|
|
36
|
-
% for i in range(thread_strides):
|
|
37
|
-
<% offset = i*local_size %>
|
|
38
|
-
tgt[idx + ${ offset }] =
|
|
39
|
-
op1[idx + ${ offset }]
|
|
40
|
-
+ op2[idx + ${ offset } ];
|
|
41
|
-
% endfor
|
|
42
|
-
}""")
|
|
43
|
-
|
|
44
|
-
rendered_tpl = tpl.render(type_name="float",
|
|
45
|
-
local_size=local_size, thread_strides=thread_strides)
|
|
46
|
-
|
|
47
|
-
knl = cl.Program(ctx, str(rendered_tpl)).build().add
|
|
48
|
-
|
|
49
|
-
knl(queue, (local_size*macroblock_count,), (local_size,),
|
|
50
|
-
c_buf, a_buf, b_buf)
|
|
51
|
-
|
|
52
|
-
c = np.empty_like(a)
|
|
53
|
-
cl.enqueue_copy(queue, c, c_buf).wait()
|
|
54
|
-
|
|
55
|
-
assert la.norm(c-(a+b)) == 0
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import pyopencl as cl
|
|
2
|
-
import pyopencl.characterize.performance as perf
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def main():
|
|
6
|
-
ctx = cl.create_some_context()
|
|
7
|
-
|
|
8
|
-
prof_overhead, latency = perf.get_profiling_overhead(ctx)
|
|
9
|
-
print("command latency: %g s" % latency)
|
|
10
|
-
print("profiling overhead: {:g} s -> {:.1f} %".format(
|
|
11
|
-
prof_overhead, 100*prof_overhead/latency))
|
|
12
|
-
queue = cl.CommandQueue(
|
|
13
|
-
ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
|
|
14
|
-
|
|
15
|
-
print("empty kernel: %g s" % perf.get_empty_kernel_time(queue))
|
|
16
|
-
print("float32 add: %g GOps/s" % (perf.get_add_rate(queue)/1e9))
|
|
17
|
-
|
|
18
|
-
for tx_type in [
|
|
19
|
-
perf.HostToDeviceTransfer,
|
|
20
|
-
perf.DeviceToHostTransfer,
|
|
21
|
-
perf.DeviceToDeviceTransfer]:
|
|
22
|
-
print("----------------------------------------")
|
|
23
|
-
print(tx_type.__name__)
|
|
24
|
-
print("----------------------------------------")
|
|
25
|
-
|
|
26
|
-
print("latency: %g s" % perf.transfer_latency(queue, tx_type))
|
|
27
|
-
for i in range(6, 31, 2):
|
|
28
|
-
bs = 1 << i
|
|
29
|
-
try:
|
|
30
|
-
result = "%g GB/s" % (
|
|
31
|
-
perf.transfer_bandwidth(queue, tx_type, bs)/1e9)
|
|
32
|
-
except Exception as e:
|
|
33
|
-
result = "exception: %s" % e.__class__.__name__
|
|
34
|
-
print("bandwidth @ %d bytes: %s" % (bs, result))
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
if __name__ == "__main__":
|
|
38
|
-
main()
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
from optparse import OptionParser
|
|
2
|
-
|
|
3
|
-
import pyopencl as cl
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
parser = OptionParser()
|
|
7
|
-
parser.add_option("-s", "--short", action="store_true",
|
|
8
|
-
help="don't print all device properties")
|
|
9
|
-
|
|
10
|
-
(options, args) = parser.parse_args()
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def print_info(obj, info_cls):
|
|
14
|
-
for info_name in sorted(dir(info_cls)):
|
|
15
|
-
if not info_name.startswith("_") and info_name != "to_string":
|
|
16
|
-
info = getattr(info_cls, info_name)
|
|
17
|
-
try:
|
|
18
|
-
info_value = obj.get_info(info)
|
|
19
|
-
except Exception:
|
|
20
|
-
info_value = "<error>"
|
|
21
|
-
|
|
22
|
-
if (info_cls == cl.device_info and info_name == "PARTITION_TYPES_EXT"
|
|
23
|
-
and isinstance(info_value, list)):
|
|
24
|
-
print("{}: {}".format(info_name, [
|
|
25
|
-
cl.device_partition_property_ext.to_string(v,
|
|
26
|
-
"<unknown device partition property %d>")
|
|
27
|
-
for v in info_value]))
|
|
28
|
-
else:
|
|
29
|
-
try:
|
|
30
|
-
print(f"{info_name}: {info_value}")
|
|
31
|
-
except Exception:
|
|
32
|
-
print("%s: <error>" % info_name)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
for platform in cl.get_platforms():
|
|
36
|
-
print(75*"=")
|
|
37
|
-
print(platform)
|
|
38
|
-
print(75*"=")
|
|
39
|
-
if not options.short:
|
|
40
|
-
print_info(platform, cl.platform_info)
|
|
41
|
-
|
|
42
|
-
for device in platform.get_devices():
|
|
43
|
-
if not options.short:
|
|
44
|
-
print(75*"-")
|
|
45
|
-
print(device)
|
|
46
|
-
if not options.short:
|
|
47
|
-
print(75*"-")
|
|
48
|
-
print_info(device, cl.device_info)
|
|
49
|
-
ctx = cl.Context([device])
|
|
50
|
-
for mf in [
|
|
51
|
-
cl.mem_flags.READ_ONLY,
|
|
52
|
-
#cl.mem_flags.READ_WRITE,
|
|
53
|
-
#cl.mem_flags.WRITE_ONLY
|
|
54
|
-
]:
|
|
55
|
-
for itype in [
|
|
56
|
-
cl.mem_object_type.IMAGE2D,
|
|
57
|
-
cl.mem_object_type.IMAGE3D
|
|
58
|
-
]:
|
|
59
|
-
try:
|
|
60
|
-
formats = cl.get_supported_image_formats(ctx, mf, itype)
|
|
61
|
-
except Exception:
|
|
62
|
-
formats = "<error>"
|
|
63
|
-
else:
|
|
64
|
-
def str_chd_type(chdtype):
|
|
65
|
-
result = cl.channel_type.to_string(chdtype,
|
|
66
|
-
"<unknown channel data type %d>")
|
|
67
|
-
|
|
68
|
-
result = result.replace("_INT", "")
|
|
69
|
-
result = result.replace("UNSIGNED", "U")
|
|
70
|
-
result = result.replace("SIGNED", "S")
|
|
71
|
-
result = result.replace("NORM", "N")
|
|
72
|
-
result = result.replace("FLOAT", "F")
|
|
73
|
-
return result
|
|
74
|
-
|
|
75
|
-
formats = ", ".join(
|
|
76
|
-
"{}-{}".format(
|
|
77
|
-
cl.channel_order.to_string(iform.channel_order,
|
|
78
|
-
"<unknown channel order 0x%x>"),
|
|
79
|
-
str_chd_type(iform.channel_data_type))
|
|
80
|
-
for iform in formats)
|
|
81
|
-
|
|
82
|
-
print("{} {} FORMATS: {}\n".format(
|
|
83
|
-
cl.mem_object_type.to_string(itype),
|
|
84
|
-
cl.mem_flags.to_string(mf),
|
|
85
|
-
formats))
|
|
86
|
-
del ctx
|