pyopencl 2025.2.5__cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (47) hide show
  1. pyopencl/.libs/libOpenCL-83a5a7fd.so.1.0.0 +0 -0
  2. pyopencl/__init__.py +1995 -0
  3. pyopencl/_cl.cpython-313-x86_64-linux-gnu.so +0 -0
  4. pyopencl/_cl.pyi +2006 -0
  5. pyopencl/_cluda.py +57 -0
  6. pyopencl/_monkeypatch.py +1069 -0
  7. pyopencl/_mymako.py +17 -0
  8. pyopencl/algorithm.py +1454 -0
  9. pyopencl/array.py +3441 -0
  10. pyopencl/bitonic_sort.py +245 -0
  11. pyopencl/bitonic_sort_templates.py +597 -0
  12. pyopencl/cache.py +535 -0
  13. pyopencl/capture_call.py +200 -0
  14. pyopencl/characterize/__init__.py +463 -0
  15. pyopencl/characterize/performance.py +240 -0
  16. pyopencl/cl/pyopencl-airy.cl +324 -0
  17. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  18. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  19. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  20. pyopencl/cl/pyopencl-complex.h +303 -0
  21. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  22. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  23. pyopencl/cl/pyopencl-random123/array.h +325 -0
  24. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  25. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  26. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  27. pyopencl/clmath.py +282 -0
  28. pyopencl/clrandom.py +412 -0
  29. pyopencl/cltypes.py +202 -0
  30. pyopencl/compyte/.gitignore +21 -0
  31. pyopencl/compyte/__init__.py +0 -0
  32. pyopencl/compyte/array.py +241 -0
  33. pyopencl/compyte/dtypes.py +316 -0
  34. pyopencl/compyte/pyproject.toml +52 -0
  35. pyopencl/elementwise.py +1178 -0
  36. pyopencl/invoker.py +417 -0
  37. pyopencl/ipython_ext.py +70 -0
  38. pyopencl/py.typed +0 -0
  39. pyopencl/reduction.py +815 -0
  40. pyopencl/scan.py +1916 -0
  41. pyopencl/tools.py +1565 -0
  42. pyopencl/typing.py +61 -0
  43. pyopencl/version.py +11 -0
  44. pyopencl-2025.2.5.dist-info/METADATA +109 -0
  45. pyopencl-2025.2.5.dist-info/RECORD +47 -0
  46. pyopencl-2025.2.5.dist-info/WHEEL +6 -0
  47. pyopencl-2025.2.5.dist-info/licenses/LICENSE +104 -0
@@ -0,0 +1,303 @@
1
+ /*
2
+ * Copyright (c) 1999
3
+ * Silicon Graphics Computer Systems, Inc.
4
+ *
5
+ * Copyright (c) 1999
6
+ * Boris Fomitchev
7
+ *
8
+ * Copyright (c) 2012
9
+ * Andreas Kloeckner
10
+ *
11
+ * This material is provided "as is", with absolutely no warranty expressed
12
+ * or implied. Any use is at your own risk.
13
+ *
14
+ * Permission to use or copy this software for any purpose is hereby granted
15
+ * without fee, provided the above notices are retained on all copies.
16
+ * Permission to modify the code and to distribute modified code is granted,
17
+ * provided the above notices are retained, and a notice that the code was
18
+ * modified is included with the above copyright notice.
19
+ *
20
+ */
21
+
22
+ // This file is available for inclusion in pyopencl kernels and provides
23
+ // complex types 'cfloat_t' and 'cdouble_t', along with a number of special
24
+ // functions as visible below, e.g. cdouble_log(z).
25
+ //
26
+ // Under the hood, the complex types are simply float2 and double2.
27
+ // Note that native (operator-based) addition (float + float2) and
28
+ // multiplication (float2*float1) is defined for these types,
29
+ // but do not match the rules of complex arithmetic.
30
+
31
+ #pragma once
32
+
33
+ #define PYOPENCL_DECLARE_COMPLEX_TYPE_INT(REAL_TP, REAL_3LTR, TPROOT, TP) \
34
+ \
35
+ inline REAL_TP TPROOT##_real(TP a) { return a.real; } \
36
+ inline REAL_TP TPROOT##_imag(TP a) { return a.imag; } \
37
+ inline REAL_TP TPROOT##_abs(TP a) { return hypot(a.real, a.imag); } \
38
+ inline REAL_TP TPROOT##_abs_squared(TP a) { return a.real * a.real + a.imag * a.imag; } \
39
+ \
40
+ inline TP TPROOT##_new(REAL_TP real, REAL_TP imag) \
41
+ { \
42
+ TP result; \
43
+ result.real = real; \
44
+ result.imag = imag; \
45
+ return result; \
46
+ } \
47
+ \
48
+ inline TP TPROOT##_fromreal(REAL_TP real) \
49
+ { \
50
+ TP result; \
51
+ result.real = real; \
52
+ result.imag = 0; \
53
+ return result; \
54
+ } \
55
+ \
56
+ \
57
+ inline TP TPROOT##_neg(TP a) { return TPROOT##_new(-a.real, -a.imag); } \
58
+ inline TP TPROOT##_conj(TP a) { return TPROOT##_new(a.real, -a.imag); } \
59
+ \
60
+ inline TP TPROOT##_add(TP a, TP b) \
61
+ { \
62
+ return TPROOT##_new(a.real + b.real, a.imag + b.imag); \
63
+ ; \
64
+ } \
65
+ inline TP TPROOT##_addr(TP a, REAL_TP b) \
66
+ { \
67
+ return TPROOT##_new(b+a.real, a.imag); \
68
+ } \
69
+ inline TP TPROOT##_radd(REAL_TP a, TP b) \
70
+ { \
71
+ return TPROOT##_new(a+b.real, b.imag); \
72
+ } \
73
+ \
74
+ inline TP TPROOT##_sub(TP a, TP b) \
75
+ { \
76
+ return TPROOT##_new(a.real - b.real, a.imag - b.imag); \
77
+ ; \
78
+ } \
79
+ \
80
+ inline TP TPROOT##_fma(TP a, TP b, TP c) \
81
+ { \
82
+ return TPROOT##_new( \
83
+ fma(a.real, b.real, c.real) - a.imag*b.imag, \
84
+ fma(a.imag, b.real, fma(a.real, b.imag, c.imag))); \
85
+ } \
86
+ \
87
+ inline TP TPROOT##_mul(TP a, TP b) \
88
+ { \
89
+ return TPROOT##_new( \
90
+ a.real*b.real - a.imag*b.imag, \
91
+ a.real*b.imag + a.imag*b.real); \
92
+ } \
93
+ \
94
+ inline TP TPROOT##_mulr(TP a, REAL_TP b) \
95
+ { \
96
+ return TPROOT##_new(a.real*b, a.imag*b); \
97
+ } \
98
+ \
99
+ inline TP TPROOT##_rmul(REAL_TP a, TP b) \
100
+ { \
101
+ return TPROOT##_new(a*b.real, a*b.imag); \
102
+ } \
103
+ \
104
+ inline TP TPROOT##_rdivide(REAL_TP z1, TP z2) \
105
+ { \
106
+ if (fabs(z2.real) <= fabs(z2.imag)) { \
107
+ REAL_TP ratio = z2.real / z2.imag; \
108
+ REAL_TP denom = z2.imag * (1 + ratio * ratio); \
109
+ return TPROOT##_new((z1 * ratio) / denom, - z1 / denom); \
110
+ } \
111
+ else { \
112
+ REAL_TP ratio = z2.imag / z2.real; \
113
+ REAL_TP denom = z2.real * (1 + ratio * ratio); \
114
+ return TPROOT##_new(z1 / denom, - (z1 * ratio) / denom); \
115
+ } \
116
+ } \
117
+ \
118
+ inline TP TPROOT##_divide(TP z1, TP z2) \
119
+ { \
120
+ REAL_TP ratio, denom, a, b, c, d; \
121
+ \
122
+ if (fabs(z2.real) <= fabs(z2.imag)) { \
123
+ ratio = z2.real / z2.imag; \
124
+ denom = z2.imag; \
125
+ a = z1.imag; \
126
+ b = z1.real; \
127
+ c = -z1.real; \
128
+ d = z1.imag; \
129
+ } \
130
+ else { \
131
+ ratio = z2.imag / z2.real; \
132
+ denom = z2.real; \
133
+ a = z1.real; \
134
+ b = z1.imag; \
135
+ c = z1.imag; \
136
+ d = -z1.real; \
137
+ } \
138
+ denom *= (1 + ratio * ratio); \
139
+ return TPROOT##_new( \
140
+ (a + b * ratio) / denom, \
141
+ (c + d * ratio) / denom); \
142
+ } \
143
+ \
144
+ inline TP TPROOT##_divider(TP a, REAL_TP b) \
145
+ { \
146
+ return TPROOT##_new(a.real/b, a.imag/b); \
147
+ } \
148
+ \
149
+ inline TP TPROOT##_pow(TP a, TP b) \
150
+ { \
151
+ REAL_TP logr = log(hypot(a.real, a.imag)); \
152
+ REAL_TP logi = atan2(a.imag, a.real); \
153
+ REAL_TP x = exp(logr * b.real - logi * b.imag); \
154
+ REAL_TP y = logr * b.imag + logi * b.real; \
155
+ \
156
+ REAL_TP cosy; \
157
+ REAL_TP siny = sincos(y, &cosy); \
158
+ return TPROOT##_new(x*cosy, x*siny); \
159
+ } \
160
+ \
161
+ inline TP TPROOT##_powr(TP a, REAL_TP b) \
162
+ { \
163
+ REAL_TP logr = log(hypot(a.real, a.imag)); \
164
+ REAL_TP logi = atan2(a.imag, a.real); \
165
+ REAL_TP x = exp(logr * b); \
166
+ REAL_TP y = logi * b; \
167
+ \
168
+ REAL_TP cosy; \
169
+ REAL_TP siny = sincos(y, &cosy); \
170
+ \
171
+ return TPROOT##_new(x * cosy, x*siny); \
172
+ } \
173
+ \
174
+ inline TP TPROOT##_rpow(REAL_TP a, TP b) \
175
+ { \
176
+ REAL_TP logr = log(a); \
177
+ REAL_TP x = exp(logr * b.real); \
178
+ REAL_TP y = logr * b.imag; \
179
+ \
180
+ REAL_TP cosy; \
181
+ REAL_TP siny = sincos(y, &cosy); \
182
+ return TPROOT##_new(x * cosy, x * siny); \
183
+ } \
184
+ \
185
+ inline TP TPROOT##_sqrt(TP a) \
186
+ { \
187
+ REAL_TP re = a.real; \
188
+ REAL_TP im = a.imag; \
189
+ REAL_TP mag = hypot(re, im); \
190
+ TP result; \
191
+ \
192
+ if (mag == 0.f) { \
193
+ result.real = result.imag = 0.f; \
194
+ } else if (re > 0.f) { \
195
+ result.real = sqrt(0.5f * (mag + re)); \
196
+ result.imag = im/result.real/2.f; \
197
+ } else { \
198
+ result.imag = sqrt(0.5f * (mag - re)); \
199
+ if (im < 0.f) \
200
+ result.imag = - result.imag; \
201
+ result.real = im/result.imag/2.f; \
202
+ } \
203
+ return result; \
204
+ } \
205
+ \
206
+ inline TP TPROOT##_exp(TP a) \
207
+ { \
208
+ REAL_TP expr = exp(a.real); \
209
+ REAL_TP cosi; \
210
+ REAL_TP sini = sincos(a.imag, &cosi); \
211
+ return TPROOT##_new(expr * cosi, expr * sini); \
212
+ } \
213
+ \
214
+ inline TP TPROOT##_log(TP a) \
215
+ { return TPROOT##_new(log(hypot(a.real, a.imag)), atan2(a.imag, a.real)); } \
216
+ \
217
+ inline TP TPROOT##_sin(TP a) \
218
+ { \
219
+ REAL_TP cosr; \
220
+ REAL_TP sinr = sincos(a.real, &cosr); \
221
+ return TPROOT##_new(sinr*cosh(a.imag), cosr*sinh(a.imag)); \
222
+ } \
223
+ \
224
+ inline TP TPROOT##_cos(TP a) \
225
+ { \
226
+ REAL_TP cosr; \
227
+ REAL_TP sinr = sincos(a.real, &cosr); \
228
+ return TPROOT##_new(cosr*cosh(a.imag), -sinr*sinh(a.imag)); \
229
+ } \
230
+ \
231
+ inline TP TPROOT##_tan(TP a) \
232
+ { \
233
+ REAL_TP re2 = 2.f * a.real; \
234
+ REAL_TP im2 = 2.f * a.imag; \
235
+ \
236
+ const REAL_TP limit = log(REAL_3LTR##_MAX); \
237
+ \
238
+ if (fabs(im2) > limit) \
239
+ return TPROOT##_new(0.f, (im2 > 0 ? 1.f : -1.f)); \
240
+ else \
241
+ { \
242
+ REAL_TP den = cos(re2) + cosh(im2); \
243
+ return TPROOT##_new(sin(re2) / den, sinh(im2) / den); \
244
+ } \
245
+ } \
246
+ \
247
+ inline TP TPROOT##_sinh(TP a) \
248
+ { \
249
+ REAL_TP cosi; \
250
+ REAL_TP sini = sincos(a.imag, &cosi); \
251
+ return TPROOT##_new(sinh(a.real)*cosi, cosh(a.real)*sini); \
252
+ } \
253
+ \
254
+ inline TP TPROOT##_cosh(TP a) \
255
+ { \
256
+ REAL_TP cosi; \
257
+ REAL_TP sini = sincos(a.imag, &cosi); \
258
+ return TPROOT##_new(cosh(a.real)*cosi, sinh(a.real)*sini); \
259
+ } \
260
+ \
261
+ inline TP TPROOT##_tanh(TP a) \
262
+ { \
263
+ REAL_TP re2 = 2.f * a.real; \
264
+ REAL_TP im2 = 2.f * a.imag; \
265
+ \
266
+ const REAL_TP limit = log(REAL_3LTR##_MAX); \
267
+ \
268
+ if (fabs(re2) > limit) \
269
+ return TPROOT##_new((re2 > 0 ? 1.f : -1.f), 0.f); \
270
+ else \
271
+ { \
272
+ REAL_TP den = cosh(re2) + cos(im2); \
273
+ return TPROOT##_new(sinh(re2) / den, sin(im2) / den); \
274
+ } \
275
+ } \
276
+
277
+ // This is undocumented and may disappear at any time
278
+ #if PYOPENCL_COMPLEX_ENABLE_EXTENDED_ALIGNMENT
279
+ #define PYOPENCL_COMPLEX_ALIGNMENT(TYPE) 2*sizeof(TYPE)
280
+ #else
281
+ #define PYOPENCL_COMPLEX_ALIGNMENT(TYPE) sizeof(TYPE)
282
+ #endif
283
+
284
+ #define PYOPENCL_DECLARE_COMPLEX_TYPE(BASE, BASE_3LTR) \
285
+ typedef union \
286
+ { \
287
+ struct { BASE x, y; } \
288
+ __attribute__ ((aligned (PYOPENCL_COMPLEX_ALIGNMENT(BASE)))); \
289
+ struct { BASE real, imag; } \
290
+ __attribute__ ((aligned (PYOPENCL_COMPLEX_ALIGNMENT(BASE)))); \
291
+ } c##BASE##_t; \
292
+ \
293
+ PYOPENCL_DECLARE_COMPLEX_TYPE_INT(BASE, BASE_3LTR, c##BASE, c##BASE##_t)
294
+
295
+ PYOPENCL_DECLARE_COMPLEX_TYPE(float, FLT);
296
+ #define cfloat_cast(a) cfloat_new((a).real, (a).imag)
297
+
298
+ #ifdef PYOPENCL_DEFINE_CDOUBLE
299
+ PYOPENCL_DECLARE_COMPLEX_TYPE(double, DBL);
300
+ #define cdouble_cast(a) cdouble_new((a).real, (a).imag)
301
+ #endif
302
+
303
+ #undef PYOPENCL_COMPLEX_ALIGNMENT
@@ -0,0 +1,120 @@
1
+ // Pieced together from Boost C++ and Cephes by
2
+ // Andreas Kloeckner (C) 2012
3
+ //
4
+ // Pieces from:
5
+ //
6
+ // Copyright (c) 2006 Xiaogang Zhang, John Maddock
7
+ // Use, modification and distribution are subject to the
8
+ // Boost Software License, Version 1.0. (See
9
+ // http://www.boost.org/LICENSE_1_0.txt)
10
+ //
11
+ // Cephes Math Library Release 2.8: June, 2000
12
+ // Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
13
+ // What you see here may be used freely, but it comes with no support or
14
+ // guarantee.
15
+
16
+ #pragma once
17
+
18
+ typedef double special_func_scalar_type;
19
+
20
+ // {{{ cephes_polevl
21
+
22
+ /*
23
+ * DESCRIPTION:
24
+ *
25
+ * Evaluates polynomial of degree N:
26
+ *
27
+ * 2 N
28
+ * y = C + C x + C x +...+ C x
29
+ * 0 1 2 N
30
+ *
31
+ * Coefficients are stored in reverse order:
32
+ *
33
+ * coef[0] = C , ..., coef[N] = C .
34
+ * N 0
35
+ *
36
+ * The function p1evl() assumes that coef[N] = 1.0 and is
37
+ * omitted from the array. Its calling arguments are
38
+ * otherwise the same as polevl().
39
+ *
40
+ */
41
+
42
+ special_func_scalar_type cephes_polevl(special_func_scalar_type x, __constant const special_func_scalar_type *coef, int N)
43
+ {
44
+ special_func_scalar_type ans;
45
+ int i;
46
+ __constant const special_func_scalar_type *p;
47
+
48
+ p = coef;
49
+ ans = *p++;
50
+ i = N;
51
+
52
+ do
53
+ ans = ans * x + *p++;
54
+ while( --i );
55
+
56
+ return( ans );
57
+ }
58
+
59
+ // }}}
60
+
61
+ // {{{ cephes_p1evl
62
+
63
+ special_func_scalar_type cephes_p1evl( special_func_scalar_type x, __constant const special_func_scalar_type *coef, int N )
64
+ {
65
+ special_func_scalar_type ans;
66
+ __constant const special_func_scalar_type *p;
67
+ int i;
68
+
69
+ p = coef;
70
+ ans = x + *p++;
71
+ i = N-1;
72
+
73
+ do
74
+ ans = ans * x + *p++;
75
+ while( --i );
76
+
77
+ return( ans );
78
+ }
79
+
80
+ // }}}
81
+
82
+ // {{{ boost_evaluate_rational
83
+
84
+ special_func_scalar_type boost_evaluate_rational_backend(__constant const special_func_scalar_type* num, __constant const special_func_scalar_type* denom, special_func_scalar_type z, int count)
85
+ {
86
+ special_func_scalar_type s1, s2;
87
+ if(z <= 1)
88
+ {
89
+ s1 = num[count-1];
90
+ s2 = denom[count-1];
91
+ for(int i = (int)count - 2; i >= 0; --i)
92
+ {
93
+ s1 *= z;
94
+ s2 *= z;
95
+ s1 += num[i];
96
+ s2 += denom[i];
97
+ }
98
+ }
99
+ else
100
+ {
101
+ z = 1 / z;
102
+ s1 = num[0];
103
+ s2 = denom[0];
104
+ for(unsigned i = 1; i < count; ++i)
105
+ {
106
+ s1 *= z;
107
+ s2 *= z;
108
+ s1 += num[i];
109
+ s2 += denom[i];
110
+ }
111
+ }
112
+ return s1 / s2;
113
+ }
114
+
115
+ #define boost_evaluate_rational(num, denom, z) \
116
+ boost_evaluate_rational_backend(num, denom, z, sizeof(num)/sizeof(special_func_scalar_type))
117
+
118
+ // }}}
119
+
120
+ // vim: fdm=marker