PyFiberModes 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. PyFiberModes/.DS_Store +0 -0
  2. PyFiberModes/VERSION +1 -0
  3. PyFiberModes/__init__.py +38 -0
  4. PyFiberModes/constants.py +32 -0
  5. PyFiberModes/fiber/.DS_Store +0 -0
  6. PyFiberModes/fiber/__init__.py +0 -0
  7. PyFiberModes/fiber/factory.py +448 -0
  8. PyFiberModes/fiber/fiber.py +317 -0
  9. PyFiberModes/fiber/geometry/__init__.py +10 -0
  10. PyFiberModes/fiber/geometry/geometry.py +33 -0
  11. PyFiberModes/fiber/geometry/stepindex.py +181 -0
  12. PyFiberModes/fiber/geometry/supergaussian.py +193 -0
  13. PyFiberModes/fiber/material/__init__.py +13 -0
  14. PyFiberModes/fiber/material/air.py +20 -0
  15. PyFiberModes/fiber/material/claussiusmossotti.py +40 -0
  16. PyFiberModes/fiber/material/compmaterial.py +60 -0
  17. PyFiberModes/fiber/material/fixed.py +36 -0
  18. PyFiberModes/fiber/material/germania.py +14 -0
  19. PyFiberModes/fiber/material/material.py +107 -0
  20. PyFiberModes/fiber/material/sellmeier.py +47 -0
  21. PyFiberModes/fiber/material/sellmeiercomp.py +42 -0
  22. PyFiberModes/fiber/material/silica.py +15 -0
  23. PyFiberModes/fiber/material/sio2f.py +34 -0
  24. PyFiberModes/fiber/material/sio2geo2.py +36 -0
  25. PyFiberModes/fiber/material/sio2geo2cm.py +43 -0
  26. PyFiberModes/fiber/solver/__init__.py +12 -0
  27. PyFiberModes/fiber/solver/cuda.py +124 -0
  28. PyFiberModes/fiber/solver/cudasrc/besseldiff.c +28 -0
  29. PyFiberModes/fiber/solver/cudasrc/chareq.c +314 -0
  30. PyFiberModes/fiber/solver/cudasrc/constf.c +20 -0
  31. PyFiberModes/fiber/solver/cudasrc/hypergf.c +264 -0
  32. PyFiberModes/fiber/solver/cudasrc/ivf.c +49 -0
  33. PyFiberModes/fiber/solver/cudasrc/knf.c +166 -0
  34. PyFiberModes/fiber/solver/mlsif.py +289 -0
  35. PyFiberModes/fiber/solver/solver.py +119 -0
  36. PyFiberModes/fiber/solver/ssif.py +275 -0
  37. PyFiberModes/fiber/solver/tlsif.py +266 -0
  38. PyFiberModes/field.py +405 -0
  39. PyFiberModes/functions.py +137 -0
  40. PyFiberModes/mode.py +186 -0
  41. PyFiberModes/simulator/.DS_Store +0 -0
  42. PyFiberModes/simulator/__init__.py +15 -0
  43. PyFiberModes/simulator/psimulator.py +41 -0
  44. PyFiberModes/simulator/simulator.py +288 -0
  45. PyFiberModes/slrc.py +291 -0
  46. PyFiberModes/tools/__init__.py +0 -0
  47. PyFiberModes/tools/directories.py +41 -0
  48. PyFiberModes/wavelength.py +109 -0
  49. PyFiberModes-0.2.1.dist-info/METADATA +158 -0
  50. PyFiberModes-0.2.1.dist-info/RECORD +52 -0
  51. PyFiberModes-0.2.1.dist-info/WHEEL +5 -0
  52. PyFiberModes-0.2.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,43 @@
1
+ # This file is part of FiberModes.
2
+ #
3
+ # FiberModes is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # FiberModes is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with FiberModes. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ from .claussiusmossotti import ClaussiusMossotti
17
+ import numpy
18
+
19
+
20
+ class SiO2GeO2(ClaussiusMossotti):
21
+
22
+ '''
23
+ classdocs
24
+ '''
25
+
26
+ name = "Silica doped with Germania (Claussius-Mossotti version)"
27
+ nparams = 1
28
+ WLRANGE = (0.6e-6, 1.8e-6)
29
+ XRANGE = 0.2
30
+
31
+ A = numpy.array([0.2045154578, 0.06451676258, 0.1311583151])
32
+ B = numpy.array([-0.1011783769, 0.1778934999, -0.1064179581])
33
+ Z = numpy.array([0.06130807320e-6, 0.1108859848e-6, 8.964441861e-6])
34
+
35
+ @classmethod
36
+ def info(cls):
37
+ return "Silica doped with Germania."
38
+
39
+ # Article (Sunak1989)
40
+ # Sunak, H. & Bastien, S.
41
+ # Refractive index and material dispersion interpolation of doped silica
42
+ # in the 0.6-1.8 mu m wavelength region
43
+ # Photonics Technology Letters, IEEE, 1989, 1, 142-145
@@ -0,0 +1,12 @@
1
+ """A solver gives the function used to find cutoff and/or effective index
2
+ of a given :py:class:`~fibermodes.mode.Mode`
3
+ in a given :py:class:`fibermodes.fiber.fiber.Fiber`.
4
+
5
+ """
6
+
7
+ from . import ssif
8
+ from . import tlsif
9
+ from . import mlsif
10
+
11
+
12
+ __all__ = ['ssif', 'tlsif', 'mlsif']
@@ -0,0 +1,124 @@
1
+ # This file is part of FiberModes.
2
+ #
3
+ # FiberModes is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # FiberModes is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with FiberModes. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ """Solver using CUDA (Nvidia GPU)"""
17
+
18
+ from fibermodes.fiber.solver import mlsif
19
+ from fibermodes import Mode, Wavelength, ModeFamily
20
+ import numpy
21
+ import os
22
+ from itertools import cycle
23
+
24
+ import pycuda.driver as cuda
25
+ import pycuda.autoinit
26
+ from pycuda.compiler import SourceModule
27
+ assert pycuda.autoinit
28
+
29
+
30
+ class Neff(mlsif.Neff):
31
+
32
+ def __init__(self, fiber):
33
+ super().__init__(fiber)
34
+
35
+ self.NSOLVERS = 1000 # Number of points for the solver
36
+ self._init_gpu()
37
+
38
+ def _init_gpu(self):
39
+ c_src = ""
40
+ c_path = os.path.dirname(os.path.realpath(__file__)) + "/cudasrc/"
41
+ for cfile in ("constf.c", "hypergf.c", "ivf.c",
42
+ "knf.c", "besseldiff.c", "chareq.c"):
43
+ with open(c_path+cfile) as f:
44
+ c_src += f.read()
45
+ cudamod = SourceModule(c_src)
46
+ self.chareq = cudamod.get_function("chareq")
47
+ self.chareq.prepare("PfPPIPP")
48
+
49
+ sof = numpy.dtype(numpy.float32).itemsize
50
+ self.gpu_neff = cuda.mem_alloc(self.NSOLVERS * sof)
51
+ r = numpy.array(self.fiber._r, dtype=numpy.float32)
52
+ self.gpu_r = cuda.mem_alloc(r.nbytes)
53
+ cuda.memcpy_htod(self.gpu_r, r)
54
+ self.gpu_n = cuda.mem_alloc(len(self.fiber) * sof)
55
+ soi = numpy.dtype(numpy.int32).itemsize
56
+ self.gpu_nu = cuda.mem_alloc(soi)
57
+ self.x = numpy.empty((1, self.NSOLVERS), dtype=numpy.float32)
58
+ self.gpu_x = cuda.mem_alloc(self.x.nbytes)
59
+
60
+ def __call__(self, wl, mode, delta, lowbound):
61
+
62
+ if mode.nu == 0 or mode.family is ModeFamily.LP:
63
+ return super().__call__(wl, mode, delta, lowbound)
64
+
65
+ wl = Wavelength(wl)
66
+ nmin = self.fiber.minIndex(-1, wl)
67
+ nmax = max(layer.maxIndex(wl) for layer in self.fiber.layers)
68
+ neff = numpy.linspace(nmin, nmax, self.NSOLVERS).astype(numpy.float32)
69
+ cuda.memcpy_htod(self.gpu_neff, neff)
70
+
71
+ n = numpy.fromiter((layer.minIndex(wl) for layer in self.fiber.layers),
72
+ dtype=numpy.float32,
73
+ count=len(self.fiber))
74
+ cuda.memcpy_htod(self.gpu_n, n)
75
+
76
+ nu = numpy.array([mode.nu], dtype=numpy.int32)
77
+ cuda.memcpy_htod(self.gpu_nu, nu)
78
+
79
+ self.chareq.prepared_call(
80
+ (neff.size, nu.size), (5, 4, 2),
81
+ self.gpu_neff, numpy.float32(wl.k0), self.gpu_r,
82
+ self.gpu_n, numpy.uint32(n.size), self.gpu_nu,
83
+ self.gpu_x,
84
+ shared_size=5*4*2*4)
85
+
86
+ cuda.memcpy_dtoh(self.x, self.gpu_x)
87
+
88
+ sols = []
89
+
90
+ for i in range(self.NSOLVERS-1, 0, -1):
91
+ if (abs(self.x[0, i]) > 1e5) or (abs(self.x[0, i-1]) > 1e5):
92
+ continue
93
+ if ((self.x[0, i-1] < 0 and self.x[0, i] > 0) or
94
+ (self.x[0, i-1] > 0 and self.x[0, i] < 0)):
95
+ sols.append((neff[i-1], neff[i]))
96
+ # sols.append(self._findBetween(
97
+ # self._heceq, neff[i-1], neff[i], args=(wl, mode.nu)))
98
+
99
+ famc = cycle((ModeFamily.HE, ModeFamily.EH))
100
+ m = 1
101
+ for n in sols:
102
+ fam = next(famc)
103
+ self.fiber.set_ne_cache(wl, Mode(fam, mode.nu, m), n)
104
+ if fam == ModeFamily.EH:
105
+ m += 1
106
+
107
+ try:
108
+ return self.fiber.ne_cache[wl][mode]
109
+ except KeyError:
110
+ return float("nan")
111
+
112
+ if __name__ == '__main__':
113
+ from fibermodes import FiberFactory
114
+
115
+ f = FiberFactory()
116
+ f.setSolvers(neff=Neff)
117
+ f.addLayer(radius=4e-6, index=1.4489)
118
+ f.addLayer(radius=10e-6, index=1.4474)
119
+ f.addLayer(index=1.4444)
120
+ fiber = f[0]
121
+ wl = 1550e-9
122
+ modes = fiber.findVmodes(wl)
123
+ for mode in modes:
124
+ print(mode, fiber.neff(mode, wl, delta=1e-5))
@@ -0,0 +1,28 @@
1
+
2
+ __device__ float jnp(int n, float z) {
3
+ if (0 == n)
4
+ return -jnf(1, z);
5
+ else
6
+ return (jnf(n-1, z) - jnf(n+1, z)) / 2.;
7
+ }
8
+
9
+ __device__ float ynp(int n, float z) {
10
+ if (0 == n)
11
+ return -ynf(1, z);
12
+ else
13
+ return (ynf(n-1, z) - ynf(n+1, z)) / 2.;
14
+ }
15
+
16
+ __device__ float ivp(int n, float z) {
17
+ if (0 == n)
18
+ return ivf(1, z);
19
+ else
20
+ return (ivf(n-1, z) + ivf(n+1, z)) / 2.;
21
+ }
22
+
23
+ __device__ float knp(int n, float z) {
24
+ if (0 == n)
25
+ return -knf(1, z);
26
+ else
27
+ return -(knf(n-1, z) + knf(n+1, z)) / 2.;
28
+ }
@@ -0,0 +1,314 @@
1
+
2
+ #include <math_constants.h>
3
+
4
+ #define ETA0 376.73031346177066f
5
+
6
+ #define THREADIDX ((blockDim.x * blockDim.y * threadIdx.z) + (blockDim.x * threadIdx.y) + threadIdx.x)
7
+ #define BLOCKIDX ((gridDim.x * blockIdx.y) + blockIdx.x)
8
+ #define IDX THREADIDX
9
+ // ((BLOCKIDX * blockDim.x * blockDim.y * blockDim.z) + THREADIDX)
10
+ #define IJ(i, j) ((blockDim.x * blockDim.y * threadIdx.z) + (blockDim.x * i) + j)
11
+ #define IZ(i) (blockDim.y * threadIdx.z + i)
12
+
13
+ #define F(fct, nu, u, r, rho) (fct(nu, u * r / rho) / fct(nu, u))
14
+ #define FP(fct, fctp, nu, u, r, rho) (fctp(nu, u * r / rho) / fct(nu, u))
15
+
16
+ #define J(nu, u, r, rho) F(jnf, nu, u, r, rho)
17
+ #define Y(nu, u, r, rho) F(ynf, nu, u, r, rho)
18
+ #define I(nu, u, r, rho) F(ivf, nu, u, r, rho)
19
+ #define K(nu, u, r, rho) F(knf, nu, u, r, rho)
20
+
21
+ #define JP(nu, u, r, rho) FP(jnf, jnp, nu, u, r, rho)
22
+ #define YP(nu, u, r, rho) FP(ynf, ynp, nu, u, r, rho)
23
+ #define IP(nu, u, r, rho) FP(ivf, ivp, nu, u, r, rho)
24
+ #define KP(nu, u, r, rho) FP(knf, knp, nu, u, r, rho)
25
+
26
+
27
+ __device__ void solve(float *x) {
28
+ extern __shared__ float ab[];
29
+
30
+ int i, j;
31
+ int pidx, rpidx, ridx;
32
+ float temp, temp2;
33
+
34
+ for (i=0; i<blockDim.y; ++i) {
35
+ /* find max pivot */
36
+ if (threadIdx.y == i) {
37
+ temp = 0.;
38
+ for (j=i; j<blockDim.y; ++j) {
39
+ if ( (temp2 = fabs(ab[IJ(j,i)])) > temp) {
40
+ temp = temp2;
41
+ pidx = j;
42
+ }
43
+ }
44
+ }
45
+
46
+ /* swap rows */
47
+ __syncthreads();
48
+ if (threadIdx.y == i && pidx != i) {
49
+ ridx = IJ(pidx,threadIdx.x);
50
+ temp = ab[IDX];
51
+ ab[IDX] = ab[ridx];
52
+ ab[ridx] = temp;
53
+ }
54
+
55
+ ridx = IJ(i,threadIdx.x);
56
+ pidx = IJ(i,i);
57
+ rpidx = IJ(threadIdx.y,i);
58
+
59
+ /* row operations */
60
+ __syncthreads();
61
+ if (threadIdx.y != i) {
62
+ if (threadIdx.x > i) {
63
+ ab[IDX] -= ab[ridx] * ab[rpidx] / ab[pidx];
64
+ }
65
+ else if (threadIdx.x == i) {
66
+ ab[IDX] = 0.;
67
+ }
68
+ }
69
+
70
+ /* normalize row */
71
+ __syncthreads();
72
+ if (threadIdx.y == i) {
73
+ if (threadIdx.x > i) {
74
+ ab[IDX] /= ab[pidx];
75
+ }
76
+ else if (threadIdx.x == i) {
77
+ ab[IDX] = 1.;
78
+ }
79
+ }
80
+ }
81
+
82
+ /* copy from ab to x */
83
+ __syncthreads();
84
+ for (i=0; i<8; ++i) {
85
+ x[i] = ab[(i / 4) * (blockDim.x * blockDim.y) + blockDim.y + (i % 4)];
86
+ }
87
+ }
88
+
89
+
90
+ __device__ float _chareq(float neff, float k0,
91
+ float *r, float *n, unsigned int N,
92
+ unsigned int nu) {
93
+ extern __shared__ float ab[];
94
+ unsigned int i;
95
+ float *w, *u;
96
+ float x[8];
97
+
98
+ w = (float *) malloc(N*sizeof(float));
99
+ u = (float *) malloc(N*sizeof(float));
100
+
101
+ for (i=0; i<N; ++i) {
102
+ w[i] = sqrtf(fabsf(n[i]*n[i] - neff*neff));
103
+ if (0. == w[i]) {
104
+ free(w);
105
+ free(u);
106
+ return CUDART_INF_F;
107
+ }
108
+ u[i] = k0 * r[(i+1 == N)?(i-1):i] * w[i];
109
+ if (neff > n[i]) {
110
+ w[i] = -w[i];
111
+ }
112
+ }
113
+
114
+ /* First layer */
115
+ if (4 == threadIdx.x) {
116
+ switch (threadIdx.y) {
117
+ case 0:
118
+ case 1:
119
+ ab[IDX] = (threadIdx.y == threadIdx.z)?1.:0.;
120
+ break;
121
+ case 2:
122
+ if (threadIdx.z == 0) {
123
+ ab[IDX] = neff * nu / (u[0] * w[0]);
124
+ }
125
+ else {
126
+ if (neff < n[0]) {
127
+ ab[IDX] = -JP(nu, u[0], 1, 1) * ETA0 / w[0];
128
+ }
129
+ else {
130
+ ab[IDX] = -IP(nu, u[0], 1, 1) * ETA0 / w[0];
131
+ }
132
+ }
133
+ break;
134
+ case 3:
135
+ if (threadIdx.z == 0) {
136
+ if (neff < n[0]) {
137
+ ab[IDX] = JP(nu, u[0], 1, 1) * n[0] * n[0] / (ETA0 * w[0]);
138
+ }
139
+ else {
140
+ ab[IDX] = IP(nu, u[0], 1, 1) * n[0] * n[0] / (ETA0 * w[0]);
141
+ }
142
+ }
143
+ else {
144
+ ab[IDX] = -neff * nu / (u[0] * w[0]);
145
+ }
146
+ break;
147
+ }
148
+ }
149
+
150
+ /* For each intermediate layer */
151
+ for (i=1; i<N-1; ++i) {
152
+ /* Fill matrix */
153
+ switch (threadIdx.x) {
154
+ case 0:
155
+ switch (threadIdx.y) {
156
+ case 0:
157
+ ab[IDX] = (neff < n[i]) ? J(nu, u[i], r[i-1], r[i]):
158
+ I(nu, u[i], r[i-1], r[i]);
159
+ break;
160
+ case 1:
161
+ ab[IDX] = (neff < n[i]) ? Y(nu, u[i], r[i-1], r[i]):
162
+ K(nu, u[i], r[i-1], r[i]);
163
+ break;
164
+ case 2:
165
+ case 3:
166
+ ab[IDX] = 0.;
167
+ break;
168
+ }
169
+ break;
170
+ case 1:
171
+ switch (threadIdx.y) {
172
+ case 0:
173
+ case 1:
174
+ ab[IDX] = 0.;
175
+ break;
176
+ case 2:
177
+ ab[IDX] = (neff < n[i]) ? J(nu, u[i], r[i-1], r[i]):
178
+ I(nu, u[i], r[i-1], r[i]);
179
+ break;
180
+ case 3:
181
+ ab[IDX] = (neff < n[i]) ? Y(nu, u[i], r[i-1], r[i]):
182
+ K(nu, u[i], r[i-1], r[i]);
183
+ break;
184
+ }
185
+ break;
186
+ case 2:
187
+ switch (threadIdx.y) {
188
+ case 0:
189
+ ab[IDX] = ((neff < n[i]) ? J(nu, u[i], r[i-1], r[i]):
190
+ I(nu, u[i], r[i-1], r[i])) *
191
+ neff * nu * r[i] / (u[i] * r[i-1] * w[i]);
192
+ break;
193
+ case 1:
194
+ ab[IDX] = ((neff < n[i]) ? Y(nu, u[i], r[i-1], r[i]):
195
+ K(nu, u[i], r[i-1], r[i])) *
196
+ neff * nu * r[i] / (u[i] * r[i-1] * w[i]);
197
+ break;
198
+ case 2:
199
+ ab[IDX] = -((neff < n[i]) ? JP(nu, u[i], r[i-1], r[i]):
200
+ IP(nu, u[i], r[i-1], r[i])) *
201
+ ETA0 / w[i];
202
+ break;
203
+ case 3:
204
+ ab[IDX] = -((neff < n[i]) ? YP(nu, u[i], r[i-1], r[i]):
205
+ KP(nu, u[i], r[i-1], r[i])) *
206
+ ETA0 / w[i];
207
+ break;
208
+ }
209
+ break;
210
+ case 3:
211
+ switch (threadIdx.y) {
212
+ case 0:
213
+ ab[IDX] = ((neff < n[i]) ? JP(nu, u[i], r[i-1], r[i]):
214
+ IP(nu, u[i], r[i-1], r[i])) *
215
+ n[i] * n[i] / (ETA0 * w[i]);
216
+ break;
217
+ case 1:
218
+ ab[IDX] = ((neff < n[i]) ? YP(nu, u[i], r[i-1], r[i]):
219
+ KP(nu, u[i], r[i-1], r[i])) *
220
+ n[i] * n[i] / (ETA0 * w[i]);
221
+ break;
222
+ case 2:
223
+ ab[IDX] = -((neff < n[i]) ? J(nu, u[i], r[i-1], r[i]):
224
+ I(nu, u[i], r[i-1], r[i])) *
225
+ neff * nu * r[i] / (u[i] * r[i-1] * w[i]);
226
+ break;
227
+ case 3:
228
+ ab[IDX] = -((neff < n[i]) ? Y(nu, u[i], r[i-1], r[i]):
229
+ K(nu, u[i], r[i-1], r[i])) *
230
+ neff * nu * r[i] / (u[i] * r[i-1] * w[i]);
231
+ break;
232
+ }
233
+ break;
234
+ }
235
+
236
+ /* Solve system */
237
+ __syncthreads();
238
+ solve(x);
239
+
240
+ /* Get E and H values */
241
+ if (4 == threadIdx.x) {
242
+ switch (threadIdx.y) {
243
+ case 0:
244
+ ab[IDX] = x[IZ(0)] + x[IZ(1)];
245
+ break;
246
+ case 1:
247
+ ab[IDX] = x[IZ(2)] + x[IZ(3)];
248
+ break;
249
+ case 2:
250
+ if (neff < n[i])
251
+ ab[IDX] = ((neff * nu / (u[i] * w[i])) * (x[IZ(0)] + x[IZ(1)])) -
252
+ ((ETA0 / w[i]) * (x[IZ(2)] * JP(nu, u[i], 1, 1) +
253
+ x[IZ(3)] * YP(nu, u[i], 1, 1)));
254
+ else
255
+ ab[IDX] = ((neff * nu / (u[i] * w[i])) * (x[IZ(0)] + x[IZ(1)])) -
256
+ ((ETA0 / w[i]) * (x[IZ(2)] * IP(nu, u[i], 1, 1) +
257
+ x[IZ(3)] * KP(nu, u[i], 1, 1)));
258
+ break;
259
+ case 3:
260
+ if (neff < n[i])
261
+ ab[IDX] = ((n[i] * n[i] / (ETA0 * w[i])) *
262
+ (x[IZ(0)] * JP(nu, u[i], 1, 1) +
263
+ x[IZ(1)] * YP(nu, u[i], 1, 1))) -
264
+ ((neff * nu / (u[i] * w[i])) * (x[IZ(2)] + x[IZ(3)]));
265
+ else
266
+ ab[IDX] = ((n[i] * n[i] / (ETA0 * w[i])) *
267
+ (x[IZ(0)] * IP(nu, u[i], 1, 1) +
268
+ x[IZ(1)] * KP(nu, u[i], 1, 1))) -
269
+ ((neff * nu / (u[i] * w[i])) * (x[IZ(2)] + x[IZ(3)]));
270
+ break;
271
+ }
272
+ }
273
+
274
+ }
275
+
276
+ __syncthreads();
277
+
278
+ /* Find values for last layer */
279
+ if (neff < n[i]) {
280
+ /* Leaky mode */
281
+ }
282
+ else {
283
+ /* Guided mode */
284
+ if (4 == threadIdx.x) {
285
+ if (2 == threadIdx.y) {
286
+ ab[IDX] -= (neff * nu / (u[N-1] * w[N-1])) * ab[IJ(0,4)] -
287
+ ((ETA0 / w[N-1]) * ab[IJ(1,4)] * KP(nu, u[N-1], 1, 1));
288
+ }
289
+ else if (3 == threadIdx.y) {
290
+ ab[IDX] -= (n[N-1] * n[N-1] / (ETA0 * w[N-1])) * ab[IJ(0,4)] * KP(nu, u[N-1], 1, 1) -
291
+ (neff * nu / (u[N-1] * w[N-1])) * ab[IJ(1,4)];
292
+ }
293
+ }
294
+ }
295
+
296
+ free(w);
297
+ free(u);
298
+
299
+ __syncthreads();
300
+
301
+ return ab[((blockDim.x * 2) + 4)] * ab[((blockDim.x * blockDim.y) + (blockDim.x * 3) + 4)] -
302
+ ab[((blockDim.x * blockDim.y) + (blockDim.x * 2) + 4)] * ab[((blockDim.x * 3) + 4)];
303
+ }
304
+
305
+ __global__ void chareq(float *neff, float k0,
306
+ float *r, float *n, unsigned int N, unsigned int *nu,
307
+ float *x) {
308
+ float xx;
309
+
310
+ xx = _chareq(neff[blockIdx.x], k0, r, n, N, nu[blockIdx.y]);
311
+ if (0 == threadIdx.x == threadIdx.y == threadIdx.z) {
312
+ x[BLOCKIDX] = xx;
313
+ }
314
+ }
@@ -0,0 +1,20 @@
1
+
2
+ #ifdef DEC
3
+ /* MAXNUMF = 2^127 * (1 - 2^-24) */
4
+ __device__ float MAXNUMF = 1.7014117331926442990585209174225846272e38;
5
+ __device__ float MAXLOGF = 88.02969187150841;
6
+ __device__ float MINLOGF = -88.7228391116729996; /* log(2^-128) */
7
+ #else
8
+ /* MAXNUMF = 2^128 * (1 - 2^-24) */
9
+ __device__ float MAXNUMF = 3.4028234663852885981170418348451692544e38;
10
+ __device__ float MAXLOGF = 88.72283905206835;
11
+ __device__ float MINLOGF = -103.278929903431851103; /* log(2^-149) */
12
+ #endif
13
+
14
+ __device__ float LOG2EF = 1.44269504088896341;
15
+ __device__ float LOGE2F = 0.693147180559945309;
16
+ __device__ float SQRTHF = 0.707106781186547524;
17
+ __device__ float PIF = 3.141592653589793238;
18
+ __device__ float PIO2F = 1.5707963267948966192;
19
+ __device__ float PIO4F = 0.7853981633974483096;
20
+ __device__ float MACHEPF = 5.9604644775390625E-8;