PyFiberModes 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyFiberModes/.DS_Store +0 -0
- PyFiberModes/VERSION +1 -0
- PyFiberModes/__init__.py +38 -0
- PyFiberModes/constants.py +32 -0
- PyFiberModes/fiber/.DS_Store +0 -0
- PyFiberModes/fiber/__init__.py +0 -0
- PyFiberModes/fiber/factory.py +448 -0
- PyFiberModes/fiber/fiber.py +317 -0
- PyFiberModes/fiber/geometry/__init__.py +10 -0
- PyFiberModes/fiber/geometry/geometry.py +33 -0
- PyFiberModes/fiber/geometry/stepindex.py +181 -0
- PyFiberModes/fiber/geometry/supergaussian.py +193 -0
- PyFiberModes/fiber/material/__init__.py +13 -0
- PyFiberModes/fiber/material/air.py +20 -0
- PyFiberModes/fiber/material/claussiusmossotti.py +40 -0
- PyFiberModes/fiber/material/compmaterial.py +60 -0
- PyFiberModes/fiber/material/fixed.py +36 -0
- PyFiberModes/fiber/material/germania.py +14 -0
- PyFiberModes/fiber/material/material.py +107 -0
- PyFiberModes/fiber/material/sellmeier.py +47 -0
- PyFiberModes/fiber/material/sellmeiercomp.py +42 -0
- PyFiberModes/fiber/material/silica.py +15 -0
- PyFiberModes/fiber/material/sio2f.py +34 -0
- PyFiberModes/fiber/material/sio2geo2.py +36 -0
- PyFiberModes/fiber/material/sio2geo2cm.py +43 -0
- PyFiberModes/fiber/solver/__init__.py +12 -0
- PyFiberModes/fiber/solver/cuda.py +124 -0
- PyFiberModes/fiber/solver/cudasrc/besseldiff.c +28 -0
- PyFiberModes/fiber/solver/cudasrc/chareq.c +314 -0
- PyFiberModes/fiber/solver/cudasrc/constf.c +20 -0
- PyFiberModes/fiber/solver/cudasrc/hypergf.c +264 -0
- PyFiberModes/fiber/solver/cudasrc/ivf.c +49 -0
- PyFiberModes/fiber/solver/cudasrc/knf.c +166 -0
- PyFiberModes/fiber/solver/mlsif.py +289 -0
- PyFiberModes/fiber/solver/solver.py +119 -0
- PyFiberModes/fiber/solver/ssif.py +275 -0
- PyFiberModes/fiber/solver/tlsif.py +266 -0
- PyFiberModes/field.py +405 -0
- PyFiberModes/functions.py +137 -0
- PyFiberModes/mode.py +186 -0
- PyFiberModes/simulator/.DS_Store +0 -0
- PyFiberModes/simulator/__init__.py +15 -0
- PyFiberModes/simulator/psimulator.py +41 -0
- PyFiberModes/simulator/simulator.py +288 -0
- PyFiberModes/slrc.py +291 -0
- PyFiberModes/tools/__init__.py +0 -0
- PyFiberModes/tools/directories.py +41 -0
- PyFiberModes/wavelength.py +109 -0
- PyFiberModes-0.2.1.dist-info/METADATA +158 -0
- PyFiberModes-0.2.1.dist-info/RECORD +52 -0
- PyFiberModes-0.2.1.dist-info/WHEEL +5 -0
- PyFiberModes-0.2.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# This file is part of FiberModes.
|
|
2
|
+
#
|
|
3
|
+
# FiberModes is free software: you can redistribute it and/or modify
|
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
6
|
+
# (at your option) any later version.
|
|
7
|
+
#
|
|
8
|
+
# FiberModes is distributed in the hope that it will be useful,
|
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11
|
+
# GNU General Public License for more details.
|
|
12
|
+
#
|
|
13
|
+
# You should have received a copy of the GNU General Public License
|
|
14
|
+
# along with FiberModes. If not, see <http://www.gnu.org/licenses/>.
|
|
15
|
+
|
|
16
|
+
from .claussiusmossotti import ClaussiusMossotti
|
|
17
|
+
import numpy
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SiO2GeO2(ClaussiusMossotti):
|
|
21
|
+
|
|
22
|
+
'''
|
|
23
|
+
classdocs
|
|
24
|
+
'''
|
|
25
|
+
|
|
26
|
+
name = "Silica doped with Germania (Claussius-Mossotti version)"
|
|
27
|
+
nparams = 1
|
|
28
|
+
WLRANGE = (0.6e-6, 1.8e-6)
|
|
29
|
+
XRANGE = 0.2
|
|
30
|
+
|
|
31
|
+
A = numpy.array([0.2045154578, 0.06451676258, 0.1311583151])
|
|
32
|
+
B = numpy.array([-0.1011783769, 0.1778934999, -0.1064179581])
|
|
33
|
+
Z = numpy.array([0.06130807320e-6, 0.1108859848e-6, 8.964441861e-6])
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def info(cls):
|
|
37
|
+
return "Silica doped with Germania."
|
|
38
|
+
|
|
39
|
+
# Article (Sunak1989)
|
|
40
|
+
# Sunak, H. & Bastien, S.
|
|
41
|
+
# Refractive index and material dispersion interpolation of doped silica
|
|
42
|
+
# in the 0.6-1.8 mu m wavelength region
|
|
43
|
+
# Photonics Technology Letters, IEEE, 1989, 1, 142-145
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""A solver gives the function used to find cutoff and/or effective index
|
|
2
|
+
of a given :py:class:`~fibermodes.mode.Mode`
|
|
3
|
+
in a given :py:class:`fibermodes.fiber.fiber.Fiber`.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from . import ssif
|
|
8
|
+
from . import tlsif
|
|
9
|
+
from . import mlsif
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
__all__ = ['ssif', 'tlsif', 'mlsif']
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# This file is part of FiberModes.
|
|
2
|
+
#
|
|
3
|
+
# FiberModes is free software: you can redistribute it and/or modify
|
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
6
|
+
# (at your option) any later version.
|
|
7
|
+
#
|
|
8
|
+
# FiberModes is distributed in the hope that it will be useful,
|
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11
|
+
# GNU General Public License for more details.
|
|
12
|
+
#
|
|
13
|
+
# You should have received a copy of the GNU General Public License
|
|
14
|
+
# along with FiberModes. If not, see <http://www.gnu.org/licenses/>.
|
|
15
|
+
|
|
16
|
+
"""Solver using CUDA (Nvidia GPU)"""
|
|
17
|
+
|
|
18
|
+
from fibermodes.fiber.solver import mlsif
|
|
19
|
+
from fibermodes import Mode, Wavelength, ModeFamily
|
|
20
|
+
import numpy
|
|
21
|
+
import os
|
|
22
|
+
from itertools import cycle
|
|
23
|
+
|
|
24
|
+
import pycuda.driver as cuda
|
|
25
|
+
import pycuda.autoinit
|
|
26
|
+
from pycuda.compiler import SourceModule
|
|
27
|
+
assert pycuda.autoinit
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Neff(mlsif.Neff):
|
|
31
|
+
|
|
32
|
+
def __init__(self, fiber):
|
|
33
|
+
super().__init__(fiber)
|
|
34
|
+
|
|
35
|
+
self.NSOLVERS = 1000 # Number of points for the solver
|
|
36
|
+
self._init_gpu()
|
|
37
|
+
|
|
38
|
+
def _init_gpu(self):
|
|
39
|
+
c_src = ""
|
|
40
|
+
c_path = os.path.dirname(os.path.realpath(__file__)) + "/cudasrc/"
|
|
41
|
+
for cfile in ("constf.c", "hypergf.c", "ivf.c",
|
|
42
|
+
"knf.c", "besseldiff.c", "chareq.c"):
|
|
43
|
+
with open(c_path+cfile) as f:
|
|
44
|
+
c_src += f.read()
|
|
45
|
+
cudamod = SourceModule(c_src)
|
|
46
|
+
self.chareq = cudamod.get_function("chareq")
|
|
47
|
+
self.chareq.prepare("PfPPIPP")
|
|
48
|
+
|
|
49
|
+
sof = numpy.dtype(numpy.float32).itemsize
|
|
50
|
+
self.gpu_neff = cuda.mem_alloc(self.NSOLVERS * sof)
|
|
51
|
+
r = numpy.array(self.fiber._r, dtype=numpy.float32)
|
|
52
|
+
self.gpu_r = cuda.mem_alloc(r.nbytes)
|
|
53
|
+
cuda.memcpy_htod(self.gpu_r, r)
|
|
54
|
+
self.gpu_n = cuda.mem_alloc(len(self.fiber) * sof)
|
|
55
|
+
soi = numpy.dtype(numpy.int32).itemsize
|
|
56
|
+
self.gpu_nu = cuda.mem_alloc(soi)
|
|
57
|
+
self.x = numpy.empty((1, self.NSOLVERS), dtype=numpy.float32)
|
|
58
|
+
self.gpu_x = cuda.mem_alloc(self.x.nbytes)
|
|
59
|
+
|
|
60
|
+
def __call__(self, wl, mode, delta, lowbound):
|
|
61
|
+
|
|
62
|
+
if mode.nu == 0 or mode.family is ModeFamily.LP:
|
|
63
|
+
return super().__call__(wl, mode, delta, lowbound)
|
|
64
|
+
|
|
65
|
+
wl = Wavelength(wl)
|
|
66
|
+
nmin = self.fiber.minIndex(-1, wl)
|
|
67
|
+
nmax = max(layer.maxIndex(wl) for layer in self.fiber.layers)
|
|
68
|
+
neff = numpy.linspace(nmin, nmax, self.NSOLVERS).astype(numpy.float32)
|
|
69
|
+
cuda.memcpy_htod(self.gpu_neff, neff)
|
|
70
|
+
|
|
71
|
+
n = numpy.fromiter((layer.minIndex(wl) for layer in self.fiber.layers),
|
|
72
|
+
dtype=numpy.float32,
|
|
73
|
+
count=len(self.fiber))
|
|
74
|
+
cuda.memcpy_htod(self.gpu_n, n)
|
|
75
|
+
|
|
76
|
+
nu = numpy.array([mode.nu], dtype=numpy.int32)
|
|
77
|
+
cuda.memcpy_htod(self.gpu_nu, nu)
|
|
78
|
+
|
|
79
|
+
self.chareq.prepared_call(
|
|
80
|
+
(neff.size, nu.size), (5, 4, 2),
|
|
81
|
+
self.gpu_neff, numpy.float32(wl.k0), self.gpu_r,
|
|
82
|
+
self.gpu_n, numpy.uint32(n.size), self.gpu_nu,
|
|
83
|
+
self.gpu_x,
|
|
84
|
+
shared_size=5*4*2*4)
|
|
85
|
+
|
|
86
|
+
cuda.memcpy_dtoh(self.x, self.gpu_x)
|
|
87
|
+
|
|
88
|
+
sols = []
|
|
89
|
+
|
|
90
|
+
for i in range(self.NSOLVERS-1, 0, -1):
|
|
91
|
+
if (abs(self.x[0, i]) > 1e5) or (abs(self.x[0, i-1]) > 1e5):
|
|
92
|
+
continue
|
|
93
|
+
if ((self.x[0, i-1] < 0 and self.x[0, i] > 0) or
|
|
94
|
+
(self.x[0, i-1] > 0 and self.x[0, i] < 0)):
|
|
95
|
+
sols.append((neff[i-1], neff[i]))
|
|
96
|
+
# sols.append(self._findBetween(
|
|
97
|
+
# self._heceq, neff[i-1], neff[i], args=(wl, mode.nu)))
|
|
98
|
+
|
|
99
|
+
famc = cycle((ModeFamily.HE, ModeFamily.EH))
|
|
100
|
+
m = 1
|
|
101
|
+
for n in sols:
|
|
102
|
+
fam = next(famc)
|
|
103
|
+
self.fiber.set_ne_cache(wl, Mode(fam, mode.nu, m), n)
|
|
104
|
+
if fam == ModeFamily.EH:
|
|
105
|
+
m += 1
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
return self.fiber.ne_cache[wl][mode]
|
|
109
|
+
except KeyError:
|
|
110
|
+
return float("nan")
|
|
111
|
+
|
|
112
|
+
if __name__ == '__main__':
|
|
113
|
+
from fibermodes import FiberFactory
|
|
114
|
+
|
|
115
|
+
f = FiberFactory()
|
|
116
|
+
f.setSolvers(neff=Neff)
|
|
117
|
+
f.addLayer(radius=4e-6, index=1.4489)
|
|
118
|
+
f.addLayer(radius=10e-6, index=1.4474)
|
|
119
|
+
f.addLayer(index=1.4444)
|
|
120
|
+
fiber = f[0]
|
|
121
|
+
wl = 1550e-9
|
|
122
|
+
modes = fiber.findVmodes(wl)
|
|
123
|
+
for mode in modes:
|
|
124
|
+
print(mode, fiber.neff(mode, wl, delta=1e-5))
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
|
|
2
|
+
__device__ float jnp(int n, float z) {
|
|
3
|
+
if (0 == n)
|
|
4
|
+
return -jnf(1, z);
|
|
5
|
+
else
|
|
6
|
+
return (jnf(n-1, z) - jnf(n+1, z)) / 2.;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
__device__ float ynp(int n, float z) {
|
|
10
|
+
if (0 == n)
|
|
11
|
+
return -ynf(1, z);
|
|
12
|
+
else
|
|
13
|
+
return (ynf(n-1, z) - ynf(n+1, z)) / 2.;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
__device__ float ivp(int n, float z) {
|
|
17
|
+
if (0 == n)
|
|
18
|
+
return ivf(1, z);
|
|
19
|
+
else
|
|
20
|
+
return (ivf(n-1, z) + ivf(n+1, z)) / 2.;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
__device__ float knp(int n, float z) {
|
|
24
|
+
if (0 == n)
|
|
25
|
+
return -knf(1, z);
|
|
26
|
+
else
|
|
27
|
+
return -(knf(n-1, z) + knf(n+1, z)) / 2.;
|
|
28
|
+
}
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
|
|
2
|
+
#include <math_constants.h>
|
|
3
|
+
|
|
4
|
+
#define ETA0 376.73031346177066f
|
|
5
|
+
|
|
6
|
+
#define THREADIDX ((blockDim.x * blockDim.y * threadIdx.z) + (blockDim.x * threadIdx.y) + threadIdx.x)
|
|
7
|
+
#define BLOCKIDX ((gridDim.x * blockIdx.y) + blockIdx.x)
|
|
8
|
+
#define IDX THREADIDX
|
|
9
|
+
// ((BLOCKIDX * blockDim.x * blockDim.y * blockDim.z) + THREADIDX)
|
|
10
|
+
#define IJ(i, j) ((blockDim.x * blockDim.y * threadIdx.z) + (blockDim.x * i) + j)
|
|
11
|
+
#define IZ(i) (blockDim.y * threadIdx.z + i)
|
|
12
|
+
|
|
13
|
+
#define F(fct, nu, u, r, rho) (fct(nu, u * r / rho) / fct(nu, u))
|
|
14
|
+
#define FP(fct, fctp, nu, u, r, rho) (fctp(nu, u * r / rho) / fct(nu, u))
|
|
15
|
+
|
|
16
|
+
#define J(nu, u, r, rho) F(jnf, nu, u, r, rho)
|
|
17
|
+
#define Y(nu, u, r, rho) F(ynf, nu, u, r, rho)
|
|
18
|
+
#define I(nu, u, r, rho) F(ivf, nu, u, r, rho)
|
|
19
|
+
#define K(nu, u, r, rho) F(knf, nu, u, r, rho)
|
|
20
|
+
|
|
21
|
+
#define JP(nu, u, r, rho) FP(jnf, jnp, nu, u, r, rho)
|
|
22
|
+
#define YP(nu, u, r, rho) FP(ynf, ynp, nu, u, r, rho)
|
|
23
|
+
#define IP(nu, u, r, rho) FP(ivf, ivp, nu, u, r, rho)
|
|
24
|
+
#define KP(nu, u, r, rho) FP(knf, knp, nu, u, r, rho)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
__device__ void solve(float *x) {
|
|
28
|
+
extern __shared__ float ab[];
|
|
29
|
+
|
|
30
|
+
int i, j;
|
|
31
|
+
int pidx, rpidx, ridx;
|
|
32
|
+
float temp, temp2;
|
|
33
|
+
|
|
34
|
+
for (i=0; i<blockDim.y; ++i) {
|
|
35
|
+
/* find max pivot */
|
|
36
|
+
if (threadIdx.y == i) {
|
|
37
|
+
temp = 0.;
|
|
38
|
+
for (j=i; j<blockDim.y; ++j) {
|
|
39
|
+
if ( (temp2 = fabs(ab[IJ(j,i)])) > temp) {
|
|
40
|
+
temp = temp2;
|
|
41
|
+
pidx = j;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/* swap rows */
|
|
47
|
+
__syncthreads();
|
|
48
|
+
if (threadIdx.y == i && pidx != i) {
|
|
49
|
+
ridx = IJ(pidx,threadIdx.x);
|
|
50
|
+
temp = ab[IDX];
|
|
51
|
+
ab[IDX] = ab[ridx];
|
|
52
|
+
ab[ridx] = temp;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
ridx = IJ(i,threadIdx.x);
|
|
56
|
+
pidx = IJ(i,i);
|
|
57
|
+
rpidx = IJ(threadIdx.y,i);
|
|
58
|
+
|
|
59
|
+
/* row operations */
|
|
60
|
+
__syncthreads();
|
|
61
|
+
if (threadIdx.y != i) {
|
|
62
|
+
if (threadIdx.x > i) {
|
|
63
|
+
ab[IDX] -= ab[ridx] * ab[rpidx] / ab[pidx];
|
|
64
|
+
}
|
|
65
|
+
else if (threadIdx.x == i) {
|
|
66
|
+
ab[IDX] = 0.;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/* normalize row */
|
|
71
|
+
__syncthreads();
|
|
72
|
+
if (threadIdx.y == i) {
|
|
73
|
+
if (threadIdx.x > i) {
|
|
74
|
+
ab[IDX] /= ab[pidx];
|
|
75
|
+
}
|
|
76
|
+
else if (threadIdx.x == i) {
|
|
77
|
+
ab[IDX] = 1.;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/* copy from ab to x */
|
|
83
|
+
__syncthreads();
|
|
84
|
+
for (i=0; i<8; ++i) {
|
|
85
|
+
x[i] = ab[(i / 4) * (blockDim.x * blockDim.y) + blockDim.y + (i % 4)];
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
__device__ float _chareq(float neff, float k0,
|
|
91
|
+
float *r, float *n, unsigned int N,
|
|
92
|
+
unsigned int nu) {
|
|
93
|
+
extern __shared__ float ab[];
|
|
94
|
+
unsigned int i;
|
|
95
|
+
float *w, *u;
|
|
96
|
+
float x[8];
|
|
97
|
+
|
|
98
|
+
w = (float *) malloc(N*sizeof(float));
|
|
99
|
+
u = (float *) malloc(N*sizeof(float));
|
|
100
|
+
|
|
101
|
+
for (i=0; i<N; ++i) {
|
|
102
|
+
w[i] = sqrtf(fabsf(n[i]*n[i] - neff*neff));
|
|
103
|
+
if (0. == w[i]) {
|
|
104
|
+
free(w);
|
|
105
|
+
free(u);
|
|
106
|
+
return CUDART_INF_F;
|
|
107
|
+
}
|
|
108
|
+
u[i] = k0 * r[(i+1 == N)?(i-1):i] * w[i];
|
|
109
|
+
if (neff > n[i]) {
|
|
110
|
+
w[i] = -w[i];
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/* First layer */
|
|
115
|
+
if (4 == threadIdx.x) {
|
|
116
|
+
switch (threadIdx.y) {
|
|
117
|
+
case 0:
|
|
118
|
+
case 1:
|
|
119
|
+
ab[IDX] = (threadIdx.y == threadIdx.z)?1.:0.;
|
|
120
|
+
break;
|
|
121
|
+
case 2:
|
|
122
|
+
if (threadIdx.z == 0) {
|
|
123
|
+
ab[IDX] = neff * nu / (u[0] * w[0]);
|
|
124
|
+
}
|
|
125
|
+
else {
|
|
126
|
+
if (neff < n[0]) {
|
|
127
|
+
ab[IDX] = -JP(nu, u[0], 1, 1) * ETA0 / w[0];
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
ab[IDX] = -IP(nu, u[0], 1, 1) * ETA0 / w[0];
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
break;
|
|
134
|
+
case 3:
|
|
135
|
+
if (threadIdx.z == 0) {
|
|
136
|
+
if (neff < n[0]) {
|
|
137
|
+
ab[IDX] = JP(nu, u[0], 1, 1) * n[0] * n[0] / (ETA0 * w[0]);
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
ab[IDX] = IP(nu, u[0], 1, 1) * n[0] * n[0] / (ETA0 * w[0]);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
ab[IDX] = -neff * nu / (u[0] * w[0]);
|
|
145
|
+
}
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/* For each intermediate layer */
|
|
151
|
+
for (i=1; i<N-1; ++i) {
|
|
152
|
+
/* Fill matrix */
|
|
153
|
+
switch (threadIdx.x) {
|
|
154
|
+
case 0:
|
|
155
|
+
switch (threadIdx.y) {
|
|
156
|
+
case 0:
|
|
157
|
+
ab[IDX] = (neff < n[i]) ? J(nu, u[i], r[i-1], r[i]):
|
|
158
|
+
I(nu, u[i], r[i-1], r[i]);
|
|
159
|
+
break;
|
|
160
|
+
case 1:
|
|
161
|
+
ab[IDX] = (neff < n[i]) ? Y(nu, u[i], r[i-1], r[i]):
|
|
162
|
+
K(nu, u[i], r[i-1], r[i]);
|
|
163
|
+
break;
|
|
164
|
+
case 2:
|
|
165
|
+
case 3:
|
|
166
|
+
ab[IDX] = 0.;
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
break;
|
|
170
|
+
case 1:
|
|
171
|
+
switch (threadIdx.y) {
|
|
172
|
+
case 0:
|
|
173
|
+
case 1:
|
|
174
|
+
ab[IDX] = 0.;
|
|
175
|
+
break;
|
|
176
|
+
case 2:
|
|
177
|
+
ab[IDX] = (neff < n[i]) ? J(nu, u[i], r[i-1], r[i]):
|
|
178
|
+
I(nu, u[i], r[i-1], r[i]);
|
|
179
|
+
break;
|
|
180
|
+
case 3:
|
|
181
|
+
ab[IDX] = (neff < n[i]) ? Y(nu, u[i], r[i-1], r[i]):
|
|
182
|
+
K(nu, u[i], r[i-1], r[i]);
|
|
183
|
+
break;
|
|
184
|
+
}
|
|
185
|
+
break;
|
|
186
|
+
case 2:
|
|
187
|
+
switch (threadIdx.y) {
|
|
188
|
+
case 0:
|
|
189
|
+
ab[IDX] = ((neff < n[i]) ? J(nu, u[i], r[i-1], r[i]):
|
|
190
|
+
I(nu, u[i], r[i-1], r[i])) *
|
|
191
|
+
neff * nu * r[i] / (u[i] * r[i-1] * w[i]);
|
|
192
|
+
break;
|
|
193
|
+
case 1:
|
|
194
|
+
ab[IDX] = ((neff < n[i]) ? Y(nu, u[i], r[i-1], r[i]):
|
|
195
|
+
K(nu, u[i], r[i-1], r[i])) *
|
|
196
|
+
neff * nu * r[i] / (u[i] * r[i-1] * w[i]);
|
|
197
|
+
break;
|
|
198
|
+
case 2:
|
|
199
|
+
ab[IDX] = -((neff < n[i]) ? JP(nu, u[i], r[i-1], r[i]):
|
|
200
|
+
IP(nu, u[i], r[i-1], r[i])) *
|
|
201
|
+
ETA0 / w[i];
|
|
202
|
+
break;
|
|
203
|
+
case 3:
|
|
204
|
+
ab[IDX] = -((neff < n[i]) ? YP(nu, u[i], r[i-1], r[i]):
|
|
205
|
+
KP(nu, u[i], r[i-1], r[i])) *
|
|
206
|
+
ETA0 / w[i];
|
|
207
|
+
break;
|
|
208
|
+
}
|
|
209
|
+
break;
|
|
210
|
+
case 3:
|
|
211
|
+
switch (threadIdx.y) {
|
|
212
|
+
case 0:
|
|
213
|
+
ab[IDX] = ((neff < n[i]) ? JP(nu, u[i], r[i-1], r[i]):
|
|
214
|
+
IP(nu, u[i], r[i-1], r[i])) *
|
|
215
|
+
n[i] * n[i] / (ETA0 * w[i]);
|
|
216
|
+
break;
|
|
217
|
+
case 1:
|
|
218
|
+
ab[IDX] = ((neff < n[i]) ? YP(nu, u[i], r[i-1], r[i]):
|
|
219
|
+
KP(nu, u[i], r[i-1], r[i])) *
|
|
220
|
+
n[i] * n[i] / (ETA0 * w[i]);
|
|
221
|
+
break;
|
|
222
|
+
case 2:
|
|
223
|
+
ab[IDX] = -((neff < n[i]) ? J(nu, u[i], r[i-1], r[i]):
|
|
224
|
+
I(nu, u[i], r[i-1], r[i])) *
|
|
225
|
+
neff * nu * r[i] / (u[i] * r[i-1] * w[i]);
|
|
226
|
+
break;
|
|
227
|
+
case 3:
|
|
228
|
+
ab[IDX] = -((neff < n[i]) ? Y(nu, u[i], r[i-1], r[i]):
|
|
229
|
+
K(nu, u[i], r[i-1], r[i])) *
|
|
230
|
+
neff * nu * r[i] / (u[i] * r[i-1] * w[i]);
|
|
231
|
+
break;
|
|
232
|
+
}
|
|
233
|
+
break;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/* Solve system */
|
|
237
|
+
__syncthreads();
|
|
238
|
+
solve(x);
|
|
239
|
+
|
|
240
|
+
/* Get E and H values */
|
|
241
|
+
if (4 == threadIdx.x) {
|
|
242
|
+
switch (threadIdx.y) {
|
|
243
|
+
case 0:
|
|
244
|
+
ab[IDX] = x[IZ(0)] + x[IZ(1)];
|
|
245
|
+
break;
|
|
246
|
+
case 1:
|
|
247
|
+
ab[IDX] = x[IZ(2)] + x[IZ(3)];
|
|
248
|
+
break;
|
|
249
|
+
case 2:
|
|
250
|
+
if (neff < n[i])
|
|
251
|
+
ab[IDX] = ((neff * nu / (u[i] * w[i])) * (x[IZ(0)] + x[IZ(1)])) -
|
|
252
|
+
((ETA0 / w[i]) * (x[IZ(2)] * JP(nu, u[i], 1, 1) +
|
|
253
|
+
x[IZ(3)] * YP(nu, u[i], 1, 1)));
|
|
254
|
+
else
|
|
255
|
+
ab[IDX] = ((neff * nu / (u[i] * w[i])) * (x[IZ(0)] + x[IZ(1)])) -
|
|
256
|
+
((ETA0 / w[i]) * (x[IZ(2)] * IP(nu, u[i], 1, 1) +
|
|
257
|
+
x[IZ(3)] * KP(nu, u[i], 1, 1)));
|
|
258
|
+
break;
|
|
259
|
+
case 3:
|
|
260
|
+
if (neff < n[i])
|
|
261
|
+
ab[IDX] = ((n[i] * n[i] / (ETA0 * w[i])) *
|
|
262
|
+
(x[IZ(0)] * JP(nu, u[i], 1, 1) +
|
|
263
|
+
x[IZ(1)] * YP(nu, u[i], 1, 1))) -
|
|
264
|
+
((neff * nu / (u[i] * w[i])) * (x[IZ(2)] + x[IZ(3)]));
|
|
265
|
+
else
|
|
266
|
+
ab[IDX] = ((n[i] * n[i] / (ETA0 * w[i])) *
|
|
267
|
+
(x[IZ(0)] * IP(nu, u[i], 1, 1) +
|
|
268
|
+
x[IZ(1)] * KP(nu, u[i], 1, 1))) -
|
|
269
|
+
((neff * nu / (u[i] * w[i])) * (x[IZ(2)] + x[IZ(3)]));
|
|
270
|
+
break;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
__syncthreads();
|
|
277
|
+
|
|
278
|
+
/* Find values for last layer */
|
|
279
|
+
if (neff < n[i]) {
|
|
280
|
+
/* Leaky mode */
|
|
281
|
+
}
|
|
282
|
+
else {
|
|
283
|
+
/* Guided mode */
|
|
284
|
+
if (4 == threadIdx.x) {
|
|
285
|
+
if (2 == threadIdx.y) {
|
|
286
|
+
ab[IDX] -= (neff * nu / (u[N-1] * w[N-1])) * ab[IJ(0,4)] -
|
|
287
|
+
((ETA0 / w[N-1]) * ab[IJ(1,4)] * KP(nu, u[N-1], 1, 1));
|
|
288
|
+
}
|
|
289
|
+
else if (3 == threadIdx.y) {
|
|
290
|
+
ab[IDX] -= (n[N-1] * n[N-1] / (ETA0 * w[N-1])) * ab[IJ(0,4)] * KP(nu, u[N-1], 1, 1) -
|
|
291
|
+
(neff * nu / (u[N-1] * w[N-1])) * ab[IJ(1,4)];
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
free(w);
|
|
297
|
+
free(u);
|
|
298
|
+
|
|
299
|
+
__syncthreads();
|
|
300
|
+
|
|
301
|
+
return ab[((blockDim.x * 2) + 4)] * ab[((blockDim.x * blockDim.y) + (blockDim.x * 3) + 4)] -
|
|
302
|
+
ab[((blockDim.x * blockDim.y) + (blockDim.x * 2) + 4)] * ab[((blockDim.x * 3) + 4)];
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
__global__ void chareq(float *neff, float k0,
|
|
306
|
+
float *r, float *n, unsigned int N, unsigned int *nu,
|
|
307
|
+
float *x) {
|
|
308
|
+
float xx;
|
|
309
|
+
|
|
310
|
+
xx = _chareq(neff[blockIdx.x], k0, r, n, N, nu[blockIdx.y]);
|
|
311
|
+
if (0 == threadIdx.x == threadIdx.y == threadIdx.z) {
|
|
312
|
+
x[BLOCKIDX] = xx;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
|
|
2
|
+
#ifdef DEC
|
|
3
|
+
/* MAXNUMF = 2^127 * (1 - 2^-24) */
|
|
4
|
+
__device__ float MAXNUMF = 1.7014117331926442990585209174225846272e38;
|
|
5
|
+
__device__ float MAXLOGF = 88.02969187150841;
|
|
6
|
+
__device__ float MINLOGF = -88.7228391116729996; /* log(2^-128) */
|
|
7
|
+
#else
|
|
8
|
+
/* MAXNUMF = 2^128 * (1 - 2^-24) */
|
|
9
|
+
__device__ float MAXNUMF = 3.4028234663852885981170418348451692544e38;
|
|
10
|
+
__device__ float MAXLOGF = 88.72283905206835;
|
|
11
|
+
__device__ float MINLOGF = -103.278929903431851103; /* log(2^-149) */
|
|
12
|
+
#endif
|
|
13
|
+
|
|
14
|
+
__device__ float LOG2EF = 1.44269504088896341;
|
|
15
|
+
__device__ float LOGE2F = 0.693147180559945309;
|
|
16
|
+
__device__ float SQRTHF = 0.707106781186547524;
|
|
17
|
+
__device__ float PIF = 3.141592653589793238;
|
|
18
|
+
__device__ float PIO2F = 1.5707963267948966192;
|
|
19
|
+
__device__ float PIO4F = 0.7853981633974483096;
|
|
20
|
+
__device__ float MACHEPF = 5.9604644775390625E-8;
|