acoular 24.3__py3-none-any.whl → 24.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. acoular/__init__.py +119 -54
  2. acoular/calib.py +29 -38
  3. acoular/configuration.py +132 -82
  4. acoular/demo/__init__.py +10 -4
  5. acoular/demo/acoular_demo.py +73 -55
  6. acoular/environments.py +270 -264
  7. acoular/fastFuncs.py +366 -196
  8. acoular/fbeamform.py +1797 -1934
  9. acoular/grids.py +504 -548
  10. acoular/h5cache.py +74 -83
  11. acoular/h5files.py +159 -142
  12. acoular/internal.py +13 -14
  13. acoular/microphones.py +57 -53
  14. acoular/sdinput.py +57 -53
  15. acoular/signals.py +180 -178
  16. acoular/sources.py +920 -724
  17. acoular/spectra.py +353 -363
  18. acoular/tbeamform.py +416 -416
  19. acoular/tfastfuncs.py +180 -104
  20. acoular/tools/__init__.py +25 -0
  21. acoular/tools/aiaa.py +185 -0
  22. acoular/tools/helpers.py +189 -0
  23. acoular/tools/metrics.py +165 -0
  24. acoular/tprocess.py +1240 -1182
  25. acoular/traitsviews.py +513 -501
  26. acoular/trajectory.py +50 -52
  27. acoular/version.py +5 -6
  28. acoular/xml/minidsp_uma-16.xml +20 -0
  29. acoular/xml/{minidsp_uma16.xml → minidsp_uma-16_mirrored.xml} +3 -0
  30. {acoular-24.3.dist-info → acoular-24.7.dist-info}/METADATA +58 -39
  31. acoular-24.7.dist-info/RECORD +50 -0
  32. {acoular-24.3.dist-info → acoular-24.7.dist-info}/WHEEL +1 -1
  33. acoular-24.7.dist-info/licenses/LICENSE +28 -0
  34. acoular/fileimport.py +0 -380
  35. acoular/nidaqimport.py +0 -273
  36. acoular/tests/reference_data/BeamformerBase.npy +0 -0
  37. acoular/tests/reference_data/BeamformerBaseFalse1.npy +0 -0
  38. acoular/tests/reference_data/BeamformerBaseFalse2.npy +0 -0
  39. acoular/tests/reference_data/BeamformerBaseFalse3.npy +0 -0
  40. acoular/tests/reference_data/BeamformerBaseFalse4.npy +0 -0
  41. acoular/tests/reference_data/BeamformerBaseTrue1.npy +0 -0
  42. acoular/tests/reference_data/BeamformerBaseTrue2.npy +0 -0
  43. acoular/tests/reference_data/BeamformerBaseTrue3.npy +0 -0
  44. acoular/tests/reference_data/BeamformerBaseTrue4.npy +0 -0
  45. acoular/tests/reference_data/BeamformerCMFLassoLarsBIC.npy +0 -0
  46. acoular/tests/reference_data/BeamformerCMFNNLS.npy +0 -0
  47. acoular/tests/reference_data/BeamformerCapon.npy +0 -0
  48. acoular/tests/reference_data/BeamformerClean.npy +0 -0
  49. acoular/tests/reference_data/BeamformerCleansc.npy +0 -0
  50. acoular/tests/reference_data/BeamformerCleant.npy +0 -0
  51. acoular/tests/reference_data/BeamformerCleantSq.npy +0 -0
  52. acoular/tests/reference_data/BeamformerCleantSqTraj.npy +0 -0
  53. acoular/tests/reference_data/BeamformerCleantTraj.npy +0 -0
  54. acoular/tests/reference_data/BeamformerDamas.npy +0 -0
  55. acoular/tests/reference_data/BeamformerDamasPlus.npy +0 -0
  56. acoular/tests/reference_data/BeamformerEig.npy +0 -0
  57. acoular/tests/reference_data/BeamformerEigFalse1.npy +0 -0
  58. acoular/tests/reference_data/BeamformerEigFalse2.npy +0 -0
  59. acoular/tests/reference_data/BeamformerEigFalse3.npy +0 -0
  60. acoular/tests/reference_data/BeamformerEigFalse4.npy +0 -0
  61. acoular/tests/reference_data/BeamformerEigTrue1.npy +0 -0
  62. acoular/tests/reference_data/BeamformerEigTrue2.npy +0 -0
  63. acoular/tests/reference_data/BeamformerEigTrue3.npy +0 -0
  64. acoular/tests/reference_data/BeamformerEigTrue4.npy +0 -0
  65. acoular/tests/reference_data/BeamformerFunctional.npy +0 -0
  66. acoular/tests/reference_data/BeamformerGIB.npy +0 -0
  67. acoular/tests/reference_data/BeamformerGridlessOrth.npy +0 -0
  68. acoular/tests/reference_data/BeamformerMusic.npy +0 -0
  69. acoular/tests/reference_data/BeamformerOrth.npy +0 -0
  70. acoular/tests/reference_data/BeamformerSODIX.npy +0 -0
  71. acoular/tests/reference_data/BeamformerTime.npy +0 -0
  72. acoular/tests/reference_data/BeamformerTimeSq.npy +0 -0
  73. acoular/tests/reference_data/BeamformerTimeSqTraj.npy +0 -0
  74. acoular/tests/reference_data/BeamformerTimeTraj.npy +0 -0
  75. acoular/tests/reference_data/Environment.npy +0 -0
  76. acoular/tests/reference_data/Example1_numerical_values_testsum.h5 +0 -0
  77. acoular/tests/reference_data/FiltFiltOctave__.npy +0 -0
  78. acoular/tests/reference_data/FiltFiltOctave_band_100_0_fraction_Thirdoctave_.npy +0 -0
  79. acoular/tests/reference_data/FiltFreqWeight_weight_A_.npy +0 -0
  80. acoular/tests/reference_data/FiltFreqWeight_weight_C_.npy +0 -0
  81. acoular/tests/reference_data/FiltFreqWeight_weight_Z_.npy +0 -0
  82. acoular/tests/reference_data/FiltOctave__.npy +0 -0
  83. acoular/tests/reference_data/FiltOctave_band_100_0_fraction_Thirdoctave_.npy +0 -0
  84. acoular/tests/reference_data/Filter__.npy +0 -0
  85. acoular/tests/reference_data/GeneralFlowEnvironment.npy +0 -0
  86. acoular/tests/reference_data/OctaveFilterBank__.npy +0 -0
  87. acoular/tests/reference_data/OpenJet.npy +0 -0
  88. acoular/tests/reference_data/PointSource.npy +0 -0
  89. acoular/tests/reference_data/PowerSpectra_csm.npy +0 -0
  90. acoular/tests/reference_data/PowerSpectra_ev.npy +0 -0
  91. acoular/tests/reference_data/RotatingFlow.npy +0 -0
  92. acoular/tests/reference_data/SlotJet.npy +0 -0
  93. acoular/tests/reference_data/TimeAverage__.npy +0 -0
  94. acoular/tests/reference_data/TimeCumAverage__.npy +0 -0
  95. acoular/tests/reference_data/TimeExpAverage_weight_F_.npy +0 -0
  96. acoular/tests/reference_data/TimeExpAverage_weight_I_.npy +0 -0
  97. acoular/tests/reference_data/TimeExpAverage_weight_S_.npy +0 -0
  98. acoular/tests/reference_data/TimeInOut__.npy +0 -0
  99. acoular/tests/reference_data/TimePower__.npy +0 -0
  100. acoular/tests/reference_data/TimeReverse__.npy +0 -0
  101. acoular/tests/reference_data/UniformFlowEnvironment.npy +0 -0
  102. acoular/tests/reference_data/beamformer_traj_time_data.h5 +0 -0
  103. acoular/tests/run_tests.sh +0 -18
  104. acoular/tests/run_tests_osx.sh +0 -16
  105. acoular/tests/test.npy +0 -0
  106. acoular/tests/test_beamformer_results.py +0 -213
  107. acoular/tests/test_classes.py +0 -60
  108. acoular/tests/test_digest.py +0 -125
  109. acoular/tests/test_environments.py +0 -73
  110. acoular/tests/test_example1.py +0 -124
  111. acoular/tests/test_grid.py +0 -92
  112. acoular/tests/test_integrate.py +0 -102
  113. acoular/tests/test_signals.py +0 -60
  114. acoular/tests/test_sources.py +0 -65
  115. acoular/tests/test_spectra.py +0 -38
  116. acoular/tests/test_timecache.py +0 -35
  117. acoular/tests/test_tprocess.py +0 -90
  118. acoular/tests/test_traj_beamformer_results.py +0 -164
  119. acoular/tests/unsupported/SpeedComparison/OvernightTestcasesBeamformer_nMics32_nGridPoints100_nFreqs4_nTrials10.png +0 -0
  120. acoular/tests/unsupported/SpeedComparison/cythonBeamformer.pyx +0 -237
  121. acoular/tests/unsupported/SpeedComparison/mainForCython.py +0 -103
  122. acoular/tests/unsupported/SpeedComparison/mainForParallelJit.py +0 -143
  123. acoular/tests/unsupported/SpeedComparison/setupCythonOpenMP.py +0 -63
  124. acoular/tests/unsupported/SpeedComparison/sharedFunctions.py +0 -153
  125. acoular/tests/unsupported/SpeedComparison/timeOverNMics_AllImportantMethods.png +0 -0
  126. acoular/tests/unsupported/SpeedComparison/timeOverNMics_faverage.png +0 -0
  127. acoular/tests/unsupported/SpeedComparison/vglOptimierungFAverage.py +0 -204
  128. acoular/tests/unsupported/SpeedComparison/vglOptimierungGaussSeidel.py +0 -182
  129. acoular/tests/unsupported/SpeedComparison/vglOptimierungR_BEAMFULL_INVERSE.py +0 -764
  130. acoular/tests/unsupported/SpeedComparison/vglOptimierungR_BEAM_OS.py +0 -231
  131. acoular/tests/unsupported/SpeedComparison/whatsFastestWayFor_absASquared.py +0 -48
  132. acoular/tests/unsupported/functionalBeamformer.py +0 -123
  133. acoular/tests/unsupported/precisionTest.py +0 -153
  134. acoular/tests/unsupported/validationOfBeamformerFuncsPOSTAcoularIntegration.py +0 -254
  135. acoular/tests/unsupported/validationOfBeamformerFuncsPREeAcoularIntegration.py +0 -531
  136. acoular/tools.py +0 -422
  137. acoular-24.3.dist-info/RECORD +0 -148
  138. acoular-24.3.dist-info/licenses/LICENSE +0 -29
  139. {acoular-24.3.dist-info → acoular-24.7.dist-info}/licenses/AUTHORS.rst +0 -0
@@ -1,764 +0,0 @@
1
- #!/usr/bin/env python2
2
- # -*- coding: utf-8 -*-
3
- """
4
- Comparison of different optimization approaches to the 'r_beamfull_inverse' method.
5
- Compared are: Numpy (matrix-vector-calculations), Numba and Cython.
6
-
7
- 1. Background:
8
- Currently (python=2) performance-critical acoular methods (e.g.
9
- faverage, all freqency-domain-beamformers, ...) are optimized via Scipy.weave
10
- which translates code to c++, including compiling. Those executables can then
11
- be imported in python.
12
- Scipy.weave isn't supported anymore in python=3. Furthermore the executables of
13
- already with Scipy.weave (python=2) compiled code cannot be imported in python=3.
14
-
15
- 2. Structure of comparison:
16
- The benchmark in both errors and time consumption is always the Scipy.weave build,
17
- OpenMP optimized, c++ compiled code. Especially the relative and absolute inf-norm
18
- errors in the plots refer to the outputs of this function.
19
-
20
- 3. Remarks to the code:
21
- In various codes below there are repeating patterns which may need some explanation:
22
- a. There is a cast from 64-bit double precision to 32-bit precision of 'temp3',
23
- which is the argument to the exp() when calculating the steering vectors
24
- -> This down-cast shortens the series expansion of exp() drastically which
25
- leads to faster calculations while having acceptable errors. In fact if
26
- there is no down-cast, the relative error between otherwise identical
27
- methods is about 10^-8.
28
- b. The exp() (when calculating the steering vector, see a.) is mostly replaced
29
- by a direct calculating of 'cos() - 1j*sin()', which can be done because
30
- the input 'temp3' of exp(temp3) is pure imaginary. Because of this the
31
- calculation of exp(0)=1 in 'exp(0 - 1j*a) = exp(0) * (cos(a) - 1j*sin(a))'
32
- can be spared. This leads to further speed improvements.
33
-
34
- 4. Remark on the use of Cython:
35
- See file 'cythonBeamformer.pyx' for remarks on Cython. It showed that at the
36
- moment Cython doesn't work to well for the beamformer case.
37
-
38
- 5. Others:
39
-
40
-
41
- Versions used in this script:
42
- numba=0.34.0
43
- python=2.7.13
44
-
45
-
46
-
47
- # multiplizieren mit nMics erfolgt ausserhalb, sonst hier
48
-
49
-
50
-
51
- """
52
- import time as tm
53
- import threading
54
- import gc
55
-
56
- import numpy as np
57
- from numba import jit, guvectorize, complex128, complex64, float64, float32, void, uint64, njit, prange
58
-
59
- import sharedFunctions as shFncs
60
- from cythonBeamformer import beamformerCython, beamformerCythonNOTparallel # created with cython
61
- from beamformer import r_beamfull_inverse # The benchmark (created with scipy.weave)
62
- from beamformer_withoutMP import r_beamfull_inverse_OhneMP # also created with scipy.weave, but WITHOUT using multiple cores via OpenMP
63
-
64
- #%% Formulate the Beamformer as VECTOR * MATRIX * VECTOR product
65
- def vectorized(csm, e, h, r0, rm, kj):
66
- """ Uses Numpys fast array operations, distributed via the mkl-package.
67
- Those oparations are already optimized and use all available physical cores.
68
- """
69
- nFreqs = csm.shape[0]
70
- nGridPoints = r0.shape[0]
71
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.complex128)
72
- for cntFreqs in xrange(nFreqs):
73
- for cntGrid in xrange(nGridPoints):
74
- steeringVector = rm[cntGrid, :] / r0[cntGrid] \
75
- * np.exp(-1j * kj[cntFreqs].imag * (rm[cntGrid, :] - r0[cntGrid]))
76
- beamformOutput[cntFreqs, cntGrid] = np.inner(np.inner(steeringVector.T.conj(), csm[cntFreqs, :, :]), steeringVector)
77
- return beamformOutput.real
78
-
79
- def vectorizedOptimized(csm, e, h, r0, rm, kj):
80
- """ Same as 'vectorized' but including both 3.a. & 3.b. of the documentation
81
- string at the beginning of this file. In opposite to the numba-optimized
82
- methods below, the use of 'cos() - 1j*sin()' instead of 'exp()' (see 3.a.)
83
- doesn't seem to have any speed improvement here.
84
- """
85
- nFreqs = csm.shape[0]
86
- nGridPoints = r0.shape[0]
87
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.complex128)
88
- for cntFreqs in xrange(nFreqs):
89
- kjj = kj[cntFreqs].imag
90
- for cntGrid in xrange(nGridPoints):
91
- temp3 = np.float32(kjj * (rm[cntGrid, :] - r0[cntGrid]))
92
- steeringVector = rm[cntGrid, :] / r0[cntGrid] * (np.cos(temp3) - 1j * np.sin(temp3))
93
- beamformOutput[cntFreqs, cntGrid] = np.vdot(steeringVector, np.dot(steeringVector, csm[cntFreqs, :, :]))
94
- return beamformOutput.real
95
-
96
- @jit
97
- def vectorized_NumbaJitOnly(csm, e, h, r0, rm, kj):
98
- """ Identical code to vectorized. Just decorated with the most basic
99
- jit-optimization routine. If jit is able to translate all variables into
100
- primitive datatypes (NOT the native python objects) it will do that. If not,
101
- jit will fall back into 'Object mode' (native python objects) which will
102
- mostly be much slower.
103
- """
104
- nFreqs = csm.shape[0]
105
- nGridPoints = r0.shape[0]
106
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.complex128)
107
- for cntFreqs in xrange(nFreqs):
108
- for cntGrid in xrange(nGridPoints):
109
- steeringVector = rm[cntGrid, :] / r0[cntGrid] \
110
- * np.exp(-1j * kj[cntFreqs].imag * (rm[cntGrid, :] - r0[cntGrid]))
111
- beamformOutput[cntFreqs, cntGrid] = np.inner(np.inner(steeringVector.T.conj(), csm[cntFreqs, :, :]), steeringVector)
112
- return beamformOutput.real
113
-
114
- @jit(nopython=True) # same as directly calling @njit
115
- def vectorized_NumbaJit_nopythonTrue(csm, e, h, r0, rm, kj):
116
- """ In addition to 'vectorized_NumbaJitOnly' the nopython=True (or simply
117
- @njit for numby>=0.34.) makes shure that if jit cannot translate the code
118
- into primitive datatypes, it will NOT fall back into object mode but
119
- instead returns an error.
120
- """
121
- nFreqs = csm.shape[0]
122
- nGridPoints = r0.shape[0]
123
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.complex128)
124
- for cntFreqs in xrange(nFreqs):
125
- for cntGrid in xrange(nGridPoints):
126
- steeringVector = rm[cntGrid, :] / r0[cntGrid] * np.exp(-1j * kj[cntFreqs].imag * (rm[cntGrid, :] - r0[cntGrid]))
127
-
128
- #==============================================================================
129
- # #Not all numpy functions are supported in jit. It took some
130
- # #tries to figure out how to implement the easy np.inner used in
131
- # #'vectorized' into jit-supported functions.
132
- #
133
-
134
- # peer = np.inner(np.inner(steeringVector.T.conjugate(), csm[cntFreqs, :, :]), steeringVector)
135
- # peer2 = np.vdot(steeringVector, np.dot(steeringVector, csm[cntFreqs, :, :])) # Der scheint zu klappen
136
- # peer3 = np.vdot(steeringVector, np.dot(csm[cntFreqs, :, :], steeringVector))
137
- #
138
- # versuch1 = np.inner(steeringVector, csm[cntFreqs, :, :])
139
- # versuch12 = np.inner(steeringVector.conjugate(), csm[cntFreqs, :, :])
140
- # versuch2 = np.dot(steeringVector, csm[cntFreqs, :, :]) # complex konj von versuch 12
141
- # versuch3 = np.dot(csm[cntFreqs, :, :], steeringVector) # gleiche wie versuch1
142
- # versuch4 = np.dot(steeringVector.conjugate(), csm[cntFreqs, :, :]) # is das kompl conjugierte zu versuch1, versuch3
143
- # versuch5 = np.dot(csm[cntFreqs, :, :], steeringVector.conjugate()) # is das gleiche wie versuch12
144
- # ##--> Anscheinend ist die Syntax fuer x^H * A = dot(A, x.conj)
145
- beamformOutput[cntFreqs, cntGrid] = np.vdot(steeringVector, np.dot(steeringVector, csm[cntFreqs, :, :])) # This works
146
- #==============================================================================
147
- return beamformOutput.real
148
-
149
- @njit(float64[:,:](complex128[:,:,:], complex128[:], float64[:,:], float64[:], float64[:,:], complex128[:]))
150
- def vectorized_NumbaJit_nopythonTrue_DeclareInput(csm, e, h, r0, rm, kj):
151
- """ In addition to 'vectorized_NumbaJit_nopythonTrue' the in-/output of the
152
- method are declared in the decorator, which normally leads to speed
153
- improvements (even though they're very little in this particular case).
154
- """
155
- nFreqs = csm.shape[0]
156
- nGridPoints = r0.shape[0]
157
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.float64)
158
- for cntFreqs in xrange(nFreqs):
159
- for cntGrid in xrange(nGridPoints):
160
- steeringVector = rm[cntGrid, :] / r0[cntGrid] * np.exp(-1j * kj[cntFreqs].imag * (rm[cntGrid, :] - r0[cntGrid]))
161
- beamformOutput[cntFreqs, cntGrid] = np.vdot(steeringVector, np.dot(steeringVector, csm[cntFreqs, :, :])).real
162
- return beamformOutput
163
-
164
- @njit(float64[:,:](complex128[:,:,:], float64[:], float64[:,:], complex128[:]), parallel=True)
165
- def vectorizedOptimized_NumbaJit_Parallel(csm, r0, rm, kj):
166
- """ The parallel=True flag turns on an automized parallezation process.
167
- When one wants to manually parallelize a certain loop one can do so by
168
- using prange instead of xrange/range. BUT in this method the prange
169
- produced errors. Maybe thats because the numpy package performs
170
- parallelization itself, which is then in conflict with prange.
171
- """
172
- nFreqs = csm.shape[0]
173
- nGridPoints = r0.shape[0]
174
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.float64)
175
- for cntFreqs in xrange(nFreqs):
176
- kjj = kj[cntFreqs].imag
177
- for cntGrid in xrange(nGridPoints): # error when trying with prange
178
- temp3 = (kjj * (rm[cntGrid, :] - r0[cntGrid]))
179
- steeringVector = rm[cntGrid, :] / r0[cntGrid] * (np.cos(temp3) - 1j * np.sin(temp3))
180
- beamformOutput[cntFreqs, cntGrid] = (np.vdot(steeringVector, np.dot(steeringVector, csm[cntFreqs, :, :]))).real
181
- return beamformOutput
182
-
183
- #%% Formulate the Beamformer as LOOPS
184
- def loops_exactCopyOfCPP(csm, e, h, r0, rm, kj):
185
- """ A python copy of the current benchmark function, created with scipy.weave
186
- """
187
- nFreqs = csm.shape[0]
188
- nGridPoints = r0.shape[0]
189
- nMics = csm.shape[1]
190
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.complex128)
191
- steerVec = np.zeros((nMics), np.complex128)
192
-
193
- for cntFreqs in xrange(nFreqs):
194
- kjj = kj[cntFreqs].imag
195
- for cntGrid in xrange(nGridPoints):
196
- rs = 0
197
- r01 = r0[cntGrid]
198
-
199
- # Calculating of Steering-Vectors
200
- for cntMics in xrange(nMics):
201
- rm1 = rm[cntGrid, cntMics]
202
- rs += 1.0 / (rm1**2)
203
- temp3 = np.float32(kjj * (rm1 - r01))
204
- steerVec[cntMics] = (np.cos(temp3) - 1j * np.sin(temp3)) * rm1
205
- rs = r01 ** 2
206
-
207
- # Calculating of the matrix - vector - multiplication
208
- temp1 = 0.0
209
- for cntMics in xrange(nMics):
210
- temp2 = 0.0
211
- for cntMics2 in xrange(cntMics):
212
- temp2 += csm[cntFreqs, cntMics2, cntMics] * steerVec[cntMics2]
213
- temp1 += 2 * (temp2 * steerVec[cntMics].conjugate()).real
214
- temp1 += (csm[cntFreqs, cntMics, cntMics] * np.conjugate(steerVec[cntMics]) * steerVec[cntMics]).real
215
- beamformOutput[cntFreqs, cntGrid] = temp1 / rs
216
- return beamformOutput
217
-
218
- @njit(float64[:,:](complex128[:,:,:], complex128[:], float64[:,:], float64[:], float64[:,:], complex128[:]))
219
- def loops_NumbaJit_nopythonTrue_exactCopyOfCPP(csm, e, h, r0, rm, kj):
220
- """ See 'vectorized_NumbaJit_nopythonTrue_DeclareInput' for explenation of
221
- the numba decorator.
222
- """
223
- nFreqs = csm.shape[0]
224
- nGridPoints = r0.shape[0]
225
- nMics = csm.shape[1]
226
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.float64)
227
- steerVec = np.zeros((nMics), np.complex128)
228
-
229
- for cntFreqs in xrange(nFreqs):
230
- kjj = kj[cntFreqs].imag
231
- for cntGrid in xrange(nGridPoints):
232
- rs = 0
233
- r01 = r0[cntGrid]
234
- for cntMics in xrange(nMics):
235
- rm1 = rm[cntGrid, cntMics]
236
- rs += 1.0 / (rm1**2)
237
- temp3 = np.float32(kjj * (rm1 - r01))
238
- steerVec[cntMics] = (np.cos(temp3) - 1j * np.sin(temp3)) * rm1
239
- rs = r01 ** 2
240
-
241
- temp1 = 0.0
242
- for cntMics in xrange(nMics):
243
- temp2 = 0.0
244
- for cntMics2 in xrange(cntMics):
245
- temp2 += csm[cntFreqs, cntMics2, cntMics] * steerVec[cntMics2]
246
- temp1 += 2 * (temp2 * steerVec[cntMics].conjugate()).real
247
- temp1 += (csm[cntFreqs, cntMics, cntMics] * np.conjugate(steerVec[cntMics]) * steerVec[cntMics]).real
248
- beamformOutput[cntFreqs, cntGrid] = temp1 / rs
249
- return beamformOutput
250
-
251
-
252
- @njit(float64[:,:](complex128[:,:,:], complex128[:], float64[:,:], float64[:], float64[:,:], complex128[:]), parallel=True)
253
- def loops_NumbaJit_parallel_FirstWritingOfSteer(csm, e, h, r0, rm, kj):
254
- """ This method implements the parallelized loop (prange) over the
255
- Gridpoints, which is a direct implementation of the currently used
256
- c++ method created with scipy.wave.
257
-
258
- Very strange: Just like with Cython, this implementation (prange over Gridpoints)
259
- produces wrong results. If one doesn't parallelize -> everything is good
260
- (just like with Cython). Maybe Cython and Numba.jit use the same interpreter
261
- to generate OpenMP-parallelizable code.
262
-
263
- BUT: If one uncomments the 'steerVec' declaration in the prange-loop over the
264
- gridpoints an error occurs. After commenting the line again and executing
265
- the script once more, THE BEAMFORMER-RESULTS ARE CORRECT (for repeated tries).
266
- Funny enough the method is now twice as slow in comparison to the
267
- 'wrong version' (before invoking the error).
268
-
269
- A workaround is given by 'loops_NumbaJit_parallel', which is much slower,
270
- because the sterring vector is calculated redundantly.
271
- """
272
- nFreqs = csm.shape[0]
273
- nGridPoints = r0.shape[0]
274
- nMics = csm.shape[1]
275
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.float64)
276
- steerVec = np.zeros((nMics), np.complex128)
277
-
278
- for cntFreqs in xrange(nFreqs):
279
- kjj = kj[cntFreqs].imag
280
- for cntGrid in prange(nGridPoints):
281
- # steerVec = np.zeros((nMics), np.complex128) # This is the line that has to be uncommented (see this methods documentation comment)
282
- rs = 0
283
- r01 = r0[cntGrid]
284
-
285
- for cntMics in xrange(nMics):
286
- rm1 = rm[cntGrid, cntMics]
287
- rs += 1.0 / (rm1**2)
288
- temp3 = np.float32(kjj * (rm1 - r01))
289
- steerVec[cntMics] = (np.cos(temp3) - 1j * np.sin(temp3)) * rm1
290
- rs = r01 ** 2
291
-
292
- temp1 = 0.0
293
- for cntMics in xrange(nMics):
294
- temp2 = 0.0
295
- for cntMics2 in xrange(cntMics):
296
- temp2 = temp2 + csm[cntFreqs, cntMics2, cntMics] * steerVec[cntMics2]
297
- temp1 = temp1 + 2 * (temp2 * steerVec[cntMics].conjugate()).real
298
- temp1 = temp1 + (csm[cntFreqs, cntMics, cntMics] * np.conjugate(steerVec[cntMics]) * steerVec[cntMics]).real
299
-
300
- beamformOutput[cntFreqs, cntGrid] = temp1 / rs
301
- return beamformOutput
302
-
303
- @njit(float64[:,:](complex128[:,:,:], complex128[:], float64[:,:], float64[:], float64[:,:], complex128[:]), parallel=True)
304
- def loops_NumbaJit_parallel(csm, e, h, r0, rm, kj):
305
- """ Workaround for the prange error in jit. See documentation comment of
306
- 'loops_NumbaJit_parallel_FirstWritingOfSteer'.
307
- For infos on the numba decorator see 'vectorizedOptimized_NumbaJit_Parallel'
308
- """
309
- nFreqs = csm.shape[0]
310
- nGridPoints = r0.shape[0]
311
- nMics = csm.shape[1]
312
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.float64)
313
-
314
- for cntFreqs in xrange(nFreqs):
315
- kjj = kj[cntFreqs].imag
316
- for cntGrid in prange(nGridPoints):
317
- r01 = r0[cntGrid]
318
- rs = r01 ** 2
319
-
320
- temp1 = 0.0
321
- for cntMics in xrange(nMics):
322
- temp2 = 0.0
323
- rm1 = rm[cntGrid, cntMics]
324
- temp3 = np.float32(kjj * (rm1 - r01))
325
- steerVec = (np.cos(temp3) - 1j * np.sin(temp3)) * rm1
326
-
327
- for cntMics2 in xrange(cntMics):
328
- rm1 = rm[cntGrid, cntMics2]
329
- temp3 = np.float32(kjj * (rm1 - r01))
330
- steerVec1 = (np.cos(temp3) - 1j * np.sin(temp3)) * rm1 # Steering vec is calculated redundantly--> very slow
331
- temp2 += csm[cntFreqs, cntMics2, cntMics] * steerVec1
332
- temp1 += 2 * (temp2 * steerVec.conjugate()).real
333
- temp1 += (csm[cntFreqs, cntMics, cntMics] * steerVec.conjugate() * steerVec).real
334
- beamformOutput[cntFreqs, cntGrid] = temp1 / rs
335
- return beamformOutput
336
-
337
- #%% Multithreading
338
-
339
- #Due to pythons global interpreter lock (GIL) only one thread can run at a time.
340
- #This means, that if one wants to make use of multiple cores, one has to release
341
- #the GIL for concurrently running threads. Numbas jit can release the gil, if
342
- #all datatypes are primitive (nopython=True) via nogil=True.
343
- #This doesn't have to be done with all the numba.guvectorized stuff, as the
344
- #multithreading happens automatically.
345
- #I tested
346
-
347
- # VERTIG MACHEN
348
-
349
-
350
-
351
-
352
-
353
- nThreadsGlobal = 2 # einmal mit 2 und einmal mit 4 probieren.. vermutung: die saceh
354
- # die numpy parallelsieirt (ohne jit) arbeitet eh auf beiden cores-> mehr threads bringt dann nichts meh
355
-
356
- def vectorized_multiThreading(csm, e, h, r0, rm, kj):
357
- """ Prepares the Multithreading of 'vectorized_multiThreading_CoreFunction'.
358
- This method does not free the GIL. As descripted above (beginning of
359
- Multithreading section) it therefore shouldn't run concurrently (on multiple
360
- cores). BUT as numpys mkl package organizes concurrency itself (see 'vectorized'),
361
- this
362
- """
363
- nThreads = nThreadsGlobal
364
- dataSizePerThread = nGridPoints / nThreads
365
- startingIndexPerThread = [cnt * dataSizePerThread for cnt in range(nThreads + 1)]
366
- startingIndexPerThread[-1] = nGridPoints
367
- threads = [threading.Thread(target=vectorized_multiThreading_CoreFunction, args=(csm, e, h, r0, rm, kj, startingIndexPerThread[cnt], startingIndexPerThread[cnt+1])) for cnt in range(nThreads)]
368
- for thread in threads:
369
- thread.start()
370
- for thread in threads:
371
- thread.join()
372
- return h
373
- def vectorized_multiThreading_CoreFunction(csm, e, h, r0, rm, kj, startPoint, endPoint):
374
- nFreqs = csm.shape[0]
375
- for cntFreqs in xrange(nFreqs):
376
- for cntGrid in xrange(startPoint, endPoint):
377
- steeringVector = rm[cntGrid, :] / r0[cntGrid] * np.exp(-1j * kj[cntFreqs].imag * (rm[cntGrid, :] - r0[cntGrid]))
378
- h[cntFreqs, cntGrid] = (np.vdot(steeringVector, np.dot(steeringVector, csm[cntFreqs, :, :]))).real
379
-
380
-
381
- def vectorized_NumbaJit_multiThreading(csm, e, h, r0, rm, kj):
382
- """ Prepares the Multithreading of 'vectorized_NumbaJit_multiThreading_CoreFunction'
383
- """
384
- nThreads = nThreadsGlobal
385
- dataSizePerThread = nGridPoints / nThreads
386
- startingIndexPerThread = [cnt * dataSizePerThread for cnt in range(nThreads + 1)]
387
- startingIndexPerThread[-1] = nGridPoints
388
- threads = [threading.Thread(target=vectorized_NumbaJit_multiThreading_CoreFunction, args=(csm, e, h, r0, rm, kj, startingIndexPerThread[cnt], startingIndexPerThread[cnt+1])) for cnt in range(nThreads)]
389
- for thread in threads:
390
- thread.start()
391
- for thread in threads:
392
- thread.join()
393
- return h
394
- @njit(void(complex128[:,:,:], complex128[:], float64[:,:], float64[:], float64[:,:], complex128[:], uint64, uint64), nogil=True)
395
- def vectorized_NumbaJit_multiThreading_CoreFunction(csm, e, h, r0, rm, kj, startPoint, endPoint):
396
- nFreqs = csm.shape[0]
397
- for cntFreqs in xrange(nFreqs):
398
- for cntGrid in xrange(startPoint, endPoint):
399
- steeringVector = rm[cntGrid, :] / r0[cntGrid] * np.exp(-1j * kj[cntFreqs].imag * (rm[cntGrid, :] - r0[cntGrid]))
400
- h[cntFreqs, cntGrid] = (np.vdot(steeringVector, np.dot(steeringVector, csm[cntFreqs, :, :]))).real
401
-
402
-
403
- def loops_NumbaJit_multiThreading(csm, e, h, r0, rm, kj):
404
- """ Prepares the Multithreading of 'loops_NumbaJit_multiThreading_CoreFunction'.
405
- Here the cores are used as they should which means:
406
- You spawn 2 threads -> cpu uses 2 cores,
407
- You spawn 3 threads -> cpu uses 3 cores...
408
- """
409
- nThreads = nThreadsGlobal
410
- dataSizePerThread = nGridPoints / nThreads
411
- startingIndexPerThread = [cnt * dataSizePerThread for cnt in range(nThreads + 1)]
412
- startingIndexPerThread[-1] = nGridPoints
413
- threads = [threading.Thread(target=loops_NumbaJit_multiThreading_CoreFunction, args=(csm, e, h, r0, rm, kj, startingIndexPerThread[cnt], startingIndexPerThread[cnt+1])) for cnt in range(nThreads)]
414
- for thread in threads:
415
- thread.start()
416
- for thread in threads:
417
- thread.join()
418
- return h
419
- @njit(void(complex128[:,:,:], complex128[:], float64[:,:], float64[:], float64[:,:], complex128[:], uint64, uint64), nogil=True)
420
- def loops_NumbaJit_multiThreading_CoreFunction(csm, e, h, r0, rm, kj, startPoint, endPoint):
421
- nFreqs = csm.shape[0]
422
- nMics = csm.shape[1]
423
- steerVec = np.zeros((nMics), np.complex128)
424
-
425
- for cntFreqs in xrange(nFreqs):
426
- kjj = kj[cntFreqs].imag
427
- for cntGrid in xrange(startPoint, endPoint):
428
- rs = 0
429
- r01 = r0[cntGrid]
430
- for cntMics in xrange(nMics):
431
- rm1 = rm[cntGrid, cntMics]
432
- rs += 1.0 / (rm1**2)
433
- temp3 = np.float32(kjj * (rm1 - r01))
434
- steerVec[cntMics] = (np.cos(temp3) - 1j * np.sin(temp3)) * rm1
435
- rs = r01 ** 2
436
-
437
- temp1 = 0.0
438
- for cntMics in xrange(nMics):
439
- temp2 = 0.0
440
- for cntMics2 in xrange(cntMics):
441
- temp2 += csm[cntFreqs, cntMics2, cntMics] * steerVec[cntMics2]
442
- temp1 += 2 * (temp2 * steerVec[cntMics].conjugate()).real
443
- temp1 += (csm[cntFreqs, cntMics, cntMics] * np.conjugate(steerVec[cntMics]) * steerVec[cntMics]).real
444
- h[cntFreqs, cntGrid] = temp1 / rs
445
-
446
- #%% NUMBA - GUVECTORIZE
447
-
448
- @guvectorize([void(complex128[:,:], float64[:], float64[:,:], complex128[:], float64[:])], '(m,m),(g),(g,m),()->(g)', nopython=True, target='parallel')
449
- def loops_NumbaGuvectorize(csm, r0, rm, kj, h):
450
- """ Creating a Numpy-Ufunc: Define it for an input which is n-dimensional.
451
- Then call it with an input which is n+1 dimensional. Python takes care of
452
- the parallelization over the available cores itself.
453
- In this case python parallelizes over the frequencies.
454
- Numbas guvectorize doesn't return a value but has to overwrite an result
455
- vector passed to the method (in this case 'h') as the last input.
456
-
457
- Short description of the guvectorize decorator:
458
- 1. Input-Argument: Declaration of output/input datatypes just like
459
- with jit, but with a obligatary [] around it
460
- 2. '(m,m),(g)...': A symbolic explenation of the input dimensions. In this
461
- case 'loops_NumbaGuvectorize' is defined for the following input-dim
462
- (csm[nMics x nMics], r0[nGridpoints], rm[nGridpoints x nMics], kj (a scalar), h[nGridpoints])
463
- , where 'h' contains the calculated results (identified by '->').
464
- When you then give an input which tensorical order is exactly one order
465
- higher then the here made definition (e.g. csm[!nFreqs! x nMics x nMics]),
466
- numba automatically distributes the new tensor order onto the
467
- muliple cores (in our case every core computes the beamformer map
468
- for a single frequency independently of the others)
469
- 3. target: one can compute only on one core (target='CPU'), all available
470
- cores (target='parallel') or even on graphic cards (target='cuda') (if drivers are installed)
471
- 4. nopython: See jit-decorator, used above
472
-
473
- See also man page "http://numba.pydata.org/".
474
-
475
- REMARK: Strangly this seemed only to work, if the added order of CSM was its
476
- first dimension. E.g. csm[nMics x nMics x nFreqs] didn't seem to work.
477
- """
478
- nGridPoints = r0.shape[0]
479
- nMics = csm.shape[0]
480
- steerVec = np.zeros((nMics), np.complex128)
481
-
482
- kjj = kj[0].imag # If input is scalar, it has to be dereferenced using the 'variable[0]'-syntax
483
- for cntGrid in xrange(nGridPoints):
484
- rs = 0.0
485
- r01 = r0[cntGrid]
486
-
487
- for cntMics in xrange(nMics):
488
- rm1 = rm[cntGrid, cntMics]
489
- rs += 1.0 / (rm1**2)
490
- temp3 = np.float32(kjj * (rm1 - r01))
491
- steerVec[cntMics] = (np.cos(temp3) - 1j * np.sin(temp3)) * rm1
492
- rs = r01 ** 2
493
-
494
- temp1 = 0.0
495
- for cntMics in xrange(nMics):
496
- temp2 = 0.0
497
- for cntMics2 in xrange(cntMics):
498
- temp2 += csm[cntMics2, cntMics] * steerVec[cntMics2]
499
- temp1 += 2 * (temp2 * steerVec[cntMics].conjugate()).real
500
- temp1 += (csm[cntMics, cntMics] * np.conjugate(steerVec[cntMics]) * steerVec[cntMics]).real
501
- h[cntGrid] = temp1.real / rs
502
-
503
- #@njit(float64[:,:](complex128[:,:,:], float64[:], float64[:,:], complex128[:])) # right now it doesn't seem to be supported for jit-optimized methods to call guvectorized subroutines. Maybe this will be changed in the future
504
- def loops_NumbaGuvectorizeOverGrid(csm, r0, rm, kj):
505
- """ Similar to 'loops_NumbaGuvectorize', but in this case the UFunc parallelizes
506
- over the Gridpoints (as it is done in the scipy.weave version). This leads
507
- to significant speed improvements.
508
- Thoughts on the speed improvements: I can't see why the pipelining should
509
- work any more effective in comparison to 'loops_NumbaGuvectorize' (where
510
- the parallelization is done over the frequency-loop), as in both cases the
511
- most time is spend in the loop over the gridpoints, so the chain of
512
- instructions should essentially be the same.
513
- BUT in 'loops_NumbaGuvectorize' the slice of every currently calculated
514
- frequency of the CSM is loaded into the shared Cache (e.g. with 4 cores a
515
- '4 x nMics x nMics'-tensor is loaded into the shared Cache), whereas with
516
- 'loops_NumbaGuvectorizeOverGrid' only a '1 x nMics x nMics'-tensor is
517
- loaded into the shared Cache. This maybe leads to better managing of resources.
518
- """
519
- nGridPoints = r0.shape[0]
520
- nFreqs = csm.shape[0]
521
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.float64)
522
- for cntFreqs in xrange(nFreqs):
523
- result = np.zeros(nGridPoints, np.float64)
524
- loops_NumbaGuvectorizeOverGrid_CoreFunction(csm[cntFreqs, :, :], r0, rm, kj[cntFreqs], result)
525
- beamformOutput[cntFreqs, :] = result
526
- return beamformOutput
527
-
528
- @guvectorize([(complex128[:,:], float64[:], float64[:], complex128[:], float64[:])],
529
- '(m,m),(),(m),()->()', nopython=True, target='parallel')
530
- def loops_NumbaGuvectorizeOverGrid_CoreFunction(csm, r0, rm, kj, h):
531
- """ CoreFunction of 'loops_NumbaGuvectorizeOverGrid', which does the
532
- parallelization over the gridpoints.
533
- """
534
- nMics = csm.shape[0]
535
- steerVec = np.zeros((nMics), np.complex128)
536
- kjj = kj[0].imag
537
-
538
- rs = 0.0
539
- r01 = r0[0]
540
- for cntMics in xrange(nMics):
541
- rm1 = rm[cntMics]
542
- rs += 1.0 / (rm1**2)
543
- temp3 = np.float32(kjj * (rm1 - r01))
544
- #==============================================================================
545
- steerVec[cntMics] = (np.cos(temp3) - 1j * np.sin(temp3)) * rm1
546
- # steerVec[cntMics] = np.exp(-1j * temp3) * rm1 # is analytically the same as the last line
547
- #
548
- # With exp(), instead of cos + 1j* sin, the function is noticeable slower
549
- # AND the relative error is ca 10^-8 (as with those implementations which
550
- # don't perform the down cast from double to 32-bit-precision)
551
- # -> Maybe the exp() performs implicitly a cast back to double if its
552
- # input is imaginary?!
553
- #==============================================================================
554
- rs = r01 ** 2
555
-
556
- temp1 = 0.0
557
- for cntMics in xrange(nMics):
558
- temp2 = 0.0
559
- for cntMics2 in xrange(cntMics):
560
- temp2 += csm[cntMics2, cntMics] * steerVec[cntMics2]
561
- temp1 += 2 * (temp2 * steerVec[cntMics].conjugate()).real
562
- temp1 += (csm[cntMics, cntMics] * np.conjugate(steerVec[cntMics]) * steerVec[cntMics]).real
563
- h[0] = temp1 / rs
564
-
565
-
566
- def loops_NumbaGuvectorizeOverGridNoCast(csm, r0, rm, kj):
567
- nGridPoints = r0.shape[0]
568
- nFreqs = csm.shape[0]
569
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.float64)
570
- for cntFreqs in xrange(nFreqs):
571
- result = np.zeros(nGridPoints, np.float64)
572
- loops_NumbaGuvectorizeOverGridNoCast_CoreFunction(csm[cntFreqs, :, :], r0, rm, kj[cntFreqs], result)
573
- beamformOutput[cntFreqs, :] = result
574
- return beamformOutput
575
-
576
- @guvectorize([(complex128[:,:], float64[:], float64[:], complex128[:], float64[:])],
577
- '(m,m),(),(m),()->()', nopython=True, target='parallel')
578
- def loops_NumbaGuvectorizeOverGridNoCast_CoreFunction(csm, r0, rm, kj, h):
579
- nMics = csm.shape[0]
580
- steerVec = np.zeros((nMics), np.complex128)
581
- kjj = kj[0].imag
582
-
583
- rs = 0.0
584
- r01 = r0[0]
585
- for cntMics in xrange(nMics):
586
- rm1 = rm[cntMics]
587
- rs += 1.0 / (rm1**2)
588
- temp3 = kjj * (rm1 - r01)
589
- steerVec[cntMics] = (np.cos(temp3) - 1j * np.sin(temp3)) * rm1
590
- rs = r01 ** 2
591
-
592
- temp1 = 0.0
593
- for cntMics in xrange(nMics):
594
- temp2 = 0.0
595
- for cntMics2 in xrange(cntMics):
596
- temp2 += csm[cntMics2, cntMics] * steerVec[cntMics2]
597
- temp1 += 2 * (temp2 * steerVec[cntMics].conjugate()).real
598
- temp1 += (csm[cntMics, cntMics] * np.conjugate(steerVec[cntMics]) * steerVec[cntMics]).real
599
- h[0] = temp1 / rs
600
-
601
-
602
- def loops_NumbaGuvectorizeOverGridAllCalcsIn32Bit(csm, r0, rm, kj):
603
- nGridPoints = r0.shape[0]
604
- nFreqs = csm.shape[0]
605
- beamformOutput = np.zeros((nFreqs, nGridPoints), np.float64)
606
- for cntFreqs in xrange(nFreqs):
607
- result = np.zeros(nGridPoints, np.float64)
608
- # loops_NumbaGuvectorizeOverGridAllCalcsIn32Bit_CoreFunction(csm[cntFreqs, :, :], r0, rm, kj[cntFreqs], result)
609
- loops_NumbaGuvectorizeOverGridAllCalcsIn32Bit_CoreFunction(np.complex64(csm[cntFreqs, :, :]), np.float32(r0), np.float32(rm), np.complex64(kj[cntFreqs]), np.float32(result))
610
- beamformOutput[cntFreqs, :] = result
611
- return beamformOutput
612
-
613
- @guvectorize([(complex64[:,:], float32[:], float32[:], complex64[:], float32[:])], '(m,m),(),(m),()->()', nopython=True, target='parallel')
614
- #@guvectorize([(complex128[:,:], float64[:], float64[:], complex128[:], float64[:])], '(m,m),(),(m),()->()', nopython=True, target='parallel')
615
- def loops_NumbaGuvectorizeOverGridAllCalcsIn32Bit_CoreFunction(csm, r0, rm, kj, h):
616
- nMics = csm.shape[0]
617
- steerVec = np.zeros((nMics), np.complex64)
618
- kjj = np.float32(kj[0].imag)
619
-
620
- r01 = np.float32(r0[0])
621
- for cntMics in xrange(nMics):
622
- rm1 = np.float32(rm[cntMics])
623
- temp3 = np.float32(kjj * (rm[cntMics] - r01))
624
- steerVec[cntMics] = np.complex64((np.cos(temp3) - 1j * np.sin(temp3)) * rm1)
625
- rs = r01 * r01
626
-
627
- temp1 = np.float32(0.0)
628
- # temp1 = np.float64(0.0)
629
- for cntMics in xrange(nMics):
630
- temp2 = np.complex64(0.0 + 0.0j)
631
- # temp2 = np.complex128(0.0 + 0.0j)
632
- for cntMics2 in xrange(cntMics):
633
- temp2 += csm[cntMics2, cntMics] * steerVec[cntMics2]
634
- temp1 += 2 * (temp2 * steerVec[cntMics].conjugate()).real
635
- temp1 += (csm[cntMics, cntMics] * np.conjugate(steerVec[cntMics]) * steerVec[cntMics]).real
636
- h[0] = temp1 / rs
637
- #%% MAIN
638
- listOfMics = [64] #[64, 100, 250, 500, 700, 1000]
639
- listGridPoints = [5] # [500, 5000, 10000] # Standard value: 12000 # The number of gridpoints doesn't seeme to have to great of an influence
640
- nTrials = 10
641
- listOfNFreqs = [20]
642
-
643
- #==============================================================================
644
- # The benchmark function 'r_beamfull_inverse' and also other implementations of
645
- # the beamformer create a lot of overhead, which influences the computational
646
- # effort of the succeding function. This is mostly the case, if concurrent
647
- # calculations are done (multiple cores). So often the first trial of a new
648
- # function takes some time longer than the other trials.
649
- #==============================================================================
650
-
651
- #funcsToTrial = [vectorized, vectorizedOptimized, vectorized_NumbaJitOnly, \
652
- # vectorized_NumbaJit_nopythonTrue, vectorized_NumbaJit_nopythonTrue_DeclareInput, \
653
- # vectorizedOptimized_NumbaJit_Parallel, \
654
- # loops_exactCopyOfCPP, loops_NumbaJit_nopythonTrue_exactCopyOfCPP, \
655
- # loops_NumbaJit_parallel_FirstWritingOfSteer, loops_NumbaJit_parallel, \
656
- # vectorized_multiThreading, vectorized_NumbaJit_multiThreading, loops_NumbaJit_multiThreading, \
657
- # loops_NumbaGuvectorize, loops_NumbaGuvectorizeOverGrid, \
658
- # r_beamfull_inverse_OhneMP, r_beamfull_inverse]
659
-
660
- #funcsToTrial = [vectorized, vectorizedOptimized, beamformerCythonNOTparallel, loops_NumbaJit_parallel_FirstWritingOfSteer, \
661
- # vectorized_multiThreading, vectorized_NumbaJit_multiThreading, loops_NumbaJit_multiThreading, \
662
- # loops_NumbaGuvectorize, loops_NumbaGuvectorizeOverGrid, \
663
- # r_beamfull_inverse_OhneMP, r_beamfull_inverse]
664
-
665
- funcsToTrial = [loops_NumbaGuvectorize, loops_NumbaGuvectorizeOverGrid, loops_NumbaGuvectorizeOverGridNoCast, loops_NumbaGuvectorizeOverGridAllCalcsIn32Bit, r_beamfull_inverse]
666
-
667
- for nMics in listOfMics:
668
- for nGridPoints in listGridPoints:
669
- for nFreqs in listOfNFreqs:
670
- # Init
671
- print(10*'-' + 'New Test configuration: nMics=%s, nGridpoints=%s, nFreqs=%s' %(nMics, nGridPoints, nFreqs) + 10*'-')
672
- print(10*'-' + 'Creation of inputInputs' + 10*'-')
673
-
674
- # Inputs for the beamformer methods:
675
- # At the moment the beamformer-methods are called once per
676
- # frequency (CSM is a Matrix, no 3rd-order-tensor)
677
- # For easier camparability we build the CSM as a 3rd-order-tensor) instead
678
- csm = np.random.rand(nFreqs, nMics, nMics) + 1j*np.random.rand(nFreqs, nMics, nMics) # cross spectral matrix
679
- for cntFreqs in range(nFreqs):
680
- csm[cntFreqs, :, :] += csm[cntFreqs, :, :].T.conj() # make CSM hermetical
681
- e = np.random.rand(nMics) + 1j*np.random.rand(nMics) # has no usage
682
- h = np.zeros((nFreqs, nGridPoints)) # results are stored here, if function has no return value
683
- r0 = np.random.rand(nGridPoints) # distance between gridpoints and middle of array
684
- rm = np.random.rand(nGridPoints, nMics) # distance between gridpoints and all mics in the array
685
- kj = np.zeros(nFreqs) + 1j*np.random.rand(nFreqs) # complex wavenumber
686
-
687
- nameOfFuncsToTrial = map(lambda x: x.__name__, funcsToTrial)
688
- nameOfFuncsForError = [funcName for funcName in nameOfFuncsToTrial if funcName != 'r_beamfull_inverse']
689
- maxRelativeDeviation = np.zeros((len(funcsToTrial), nTrials))
690
- maxAbsoluteDeviation = np.zeros((len(funcsToTrial), nTrials))
691
- timeConsumption = [[] for _ in range(len(funcsToTrial))]
692
- indOfBaselineFnc = nameOfFuncsToTrial.index('r_beamfull_inverse')
693
-
694
- print(10*'-' + 'Onetime calculation of "r_beamfull_inverse" for error reference' + 10*'-')
695
- r_beamfull_inverse(csm, e, h, r0, rm, kj)
696
- resultReference = h # For relative/absolute error
697
- gc.collect()
698
-
699
- # Testing
700
- print(10*'-' + 'Testing of functions' + 10*'-')
701
- cntFunc = 0
702
- for func in funcsToTrial:
703
- print(func.__name__)
704
- for cntTrials in xrange(nTrials):
705
- h = np.zeros((nFreqs, nGridPoints))
706
- if func.__name__ == 'r_beamfull_inverse' or func.__name__ == 'r_beamfull_inverse_OhneMP':
707
- t0 = tm.time()
708
- func(csm, e, h, r0, rm, kj)
709
- t1 = tm.time()
710
- result = h
711
- # gc.collect()
712
- elif func.__name__ == 'loops_NumbaGuvectorize':
713
- t0 = tm.time()
714
- func(csm, r0, rm, kj, h)
715
- t1 = tm.time()
716
- result = h
717
- elif func.__name__ == 'loops_NumbaGuvectorizeOverGrid' or func.__name__ == 'vectorizedOptimized_NumbaJit_Parallel' or func.__name__ == 'loops_NumbaGuvectorizeOverGridNoCast' or func.__name__ == 'loops_NumbaGuvectorizeOverGridAllCalcsIn32Bit':
718
- t0 = tm.time()
719
- output = func(csm, r0, rm, kj)
720
- t1 = tm.time()
721
- result = output
722
- elif func.__name__ == 'beamformerCython' or func.__name__ == 'beamformerCythonNOTparallel':
723
- t0 = tm.time()
724
- output = func(csm, r0, rm, kj)
725
- t1 = tm.time()
726
- result = np.array(output)
727
- else:
728
- t0 = tm.time()
729
- output = func(csm, e, h, r0, rm, kj)
730
- t1 = tm.time()
731
- result = output
732
- timeConsumption[cntFunc].append(t1 - t0)
733
- relativeDiffBetweenNewCodeAndRef = (result - resultReference) / (result + resultReference) * 2 # error in relation to the resulting value
734
- maxRelativeDeviation[cntFunc, cntTrials] = np.amax(np.amax(abs(relativeDiffBetweenNewCodeAndRef), axis=1), axis=0) # relative error in inf-norm
735
- maxAbsoluteDeviation[cntFunc, cntTrials] = np.amax(np.amax(abs(result - resultReference), axis=1), axis=0) # absolute error in inf-norm
736
- cntFunc += 1
737
- factorTimeConsump = [np.mean(timeConsumption[cnt]) for cnt in range(0, len(funcsToTrial))] \
738
- / np.mean(timeConsumption[indOfBaselineFnc])
739
-
740
- # Save the current test-config as .sav
741
- helpString = 'The order of the variables is: \n nameOfFuncsToTrial \n maxRelativeDeviation'\
742
- '\n timeConsumption [nFuncs, nTrials] \n nMics \n nGridPoints \n nFreqs '\
743
- '\n Factor of time consumption (in relation to the original .cpp) \n maxAbsoluteDeviation \n nThreadsGlobal'
744
- saveTupel = (helpString, nameOfFuncsToTrial, maxRelativeDeviation, timeConsumption,
745
- nMics, nGridPoints, nFreqs, factorTimeConsump, maxAbsoluteDeviation, nThreadsGlobal)
746
- stringParameters = 'OvernightTestcasesBeamformer_nMics%s_nGridPoints%s_nFreqs%s_nTrials%s' %(nMics, nGridPoints, nFreqs, nTrials)
747
-
748
- stringSaveName = 'Peter'
749
- # stringSaveName = 'Sicherung_DurchgelaufeneTests/Beamformer/AllImportantMethods/' + stringParameters
750
- # stringSaveName = 'Sicherung_DurchgelaufeneTests/Beamformer/EinflussGridpoints/AMDFX6100/' + stringParameters
751
- # stringSaveName = 'Sicherung_DurchgelaufeneTests/Beamformer/JitPrange/' + stringParameters
752
- # stringSaveName = 'Sicherung_DurchgelaufeneTests/Beamformer/Multithreading_02Threads/' + stringParameters
753
-
754
- shFncs.savingTimeConsumption(stringSaveName, saveTupel) # saving as "stringSaveName.sav"
755
-
756
- shFncs.plottingOfOvernightTestcasesBeamformer(stringSaveName + '.sav') # plot of the current test-config
757
-
758
- #==============================================================================
759
- #The Following use of the numba decorators could lead to less code (as a function
760
- #body could be used more often) but is also slower, which is why it wasn't used
761
- #in this comparison.
762
- # signature = complex128[:,:](complex128[:,:,:], float64[:], float64[:,:])
763
- # numbaOptimizedFunction= jit(signature, nopython=True)(plainPythonFunction.py_func)
764
- #==============================================================================