nmatrix-lapacke 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/ext/nmatrix/data/complex.h +364 -0
  3. data/ext/nmatrix/data/data.h +638 -0
  4. data/ext/nmatrix/data/meta.h +64 -0
  5. data/ext/nmatrix/data/ruby_object.h +389 -0
  6. data/ext/nmatrix/math/asum.h +120 -0
  7. data/ext/nmatrix/math/cblas_enums.h +36 -0
  8. data/ext/nmatrix/math/cblas_templates_core.h +507 -0
  9. data/ext/nmatrix/math/gemm.h +241 -0
  10. data/ext/nmatrix/math/gemv.h +178 -0
  11. data/ext/nmatrix/math/getrf.h +255 -0
  12. data/ext/nmatrix/math/getrs.h +121 -0
  13. data/ext/nmatrix/math/imax.h +79 -0
  14. data/ext/nmatrix/math/laswp.h +165 -0
  15. data/ext/nmatrix/math/long_dtype.h +49 -0
  16. data/ext/nmatrix/math/math.h +744 -0
  17. data/ext/nmatrix/math/nrm2.h +160 -0
  18. data/ext/nmatrix/math/rot.h +117 -0
  19. data/ext/nmatrix/math/rotg.h +106 -0
  20. data/ext/nmatrix/math/scal.h +71 -0
  21. data/ext/nmatrix/math/trsm.h +332 -0
  22. data/ext/nmatrix/math/util.h +148 -0
  23. data/ext/nmatrix/nm_memory.h +60 -0
  24. data/ext/nmatrix/nmatrix.h +408 -0
  25. data/ext/nmatrix/ruby_constants.h +106 -0
  26. data/ext/nmatrix/storage/common.h +176 -0
  27. data/ext/nmatrix/storage/dense/dense.h +128 -0
  28. data/ext/nmatrix/storage/list/list.h +137 -0
  29. data/ext/nmatrix/storage/storage.h +98 -0
  30. data/ext/nmatrix/storage/yale/class.h +1139 -0
  31. data/ext/nmatrix/storage/yale/iterators/base.h +142 -0
  32. data/ext/nmatrix/storage/yale/iterators/iterator.h +130 -0
  33. data/ext/nmatrix/storage/yale/iterators/row.h +449 -0
  34. data/ext/nmatrix/storage/yale/iterators/row_stored.h +139 -0
  35. data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +168 -0
  36. data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +123 -0
  37. data/ext/nmatrix/storage/yale/math/transpose.h +110 -0
  38. data/ext/nmatrix/storage/yale/yale.h +202 -0
  39. data/ext/nmatrix/types.h +54 -0
  40. data/ext/nmatrix/util/io.h +115 -0
  41. data/ext/nmatrix/util/sl_list.h +143 -0
  42. data/ext/nmatrix/util/util.h +78 -0
  43. data/ext/nmatrix_lapacke/extconf.rb +200 -0
  44. data/ext/nmatrix_lapacke/lapacke.cpp +100 -0
  45. data/ext/nmatrix_lapacke/lapacke/include/lapacke.h +16445 -0
  46. data/ext/nmatrix_lapacke/lapacke/include/lapacke_config.h +119 -0
  47. data/ext/nmatrix_lapacke/lapacke/include/lapacke_mangling.h +17 -0
  48. data/ext/nmatrix_lapacke/lapacke/include/lapacke_mangling_with_flags.h +17 -0
  49. data/ext/nmatrix_lapacke/lapacke/include/lapacke_utils.h +579 -0
  50. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgeev.c +89 -0
  51. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgeev_work.c +141 -0
  52. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgesdd.c +106 -0
  53. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgesdd_work.c +158 -0
  54. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgesvd.c +94 -0
  55. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgesvd_work.c +149 -0
  56. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrf.c +51 -0
  57. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrf_work.c +83 -0
  58. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetri.c +77 -0
  59. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetri_work.c +89 -0
  60. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrs.c +56 -0
  61. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrs_work.c +102 -0
  62. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrf.c +50 -0
  63. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrf_work.c +82 -0
  64. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotri.c +50 -0
  65. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotri_work.c +82 -0
  66. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrs.c +55 -0
  67. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrs_work.c +101 -0
  68. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgeev.c +78 -0
  69. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgeev_work.c +136 -0
  70. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgesdd.c +88 -0
  71. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgesdd_work.c +153 -0
  72. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgesvd.c +83 -0
  73. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgesvd_work.c +144 -0
  74. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrf.c +50 -0
  75. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrf_work.c +81 -0
  76. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetri.c +75 -0
  77. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetri_work.c +87 -0
  78. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrs.c +55 -0
  79. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrs_work.c +99 -0
  80. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrf.c +50 -0
  81. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrf_work.c +81 -0
  82. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotri.c +50 -0
  83. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotri_work.c +81 -0
  84. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrs.c +54 -0
  85. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrs_work.c +97 -0
  86. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgeev.c +78 -0
  87. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgeev_work.c +134 -0
  88. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgesdd.c +88 -0
  89. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgesdd_work.c +152 -0
  90. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgesvd.c +83 -0
  91. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgesvd_work.c +143 -0
  92. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrf.c +50 -0
  93. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrf_work.c +81 -0
  94. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetri.c +75 -0
  95. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetri_work.c +87 -0
  96. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrs.c +55 -0
  97. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrs_work.c +99 -0
  98. data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotrf.c +50 -0
  99. data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotrf_work.c +81 -0
  100. data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotri.c +50 -0
  101. data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotri_work.c +81 -0
  102. data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotrs.c +54 -0
  103. data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotrs_work.c +97 -0
  104. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgeev.c +89 -0
  105. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgeev_work.c +141 -0
  106. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgesdd.c +106 -0
  107. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgesdd_work.c +158 -0
  108. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgesvd.c +94 -0
  109. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgesvd_work.c +149 -0
  110. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrf.c +51 -0
  111. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrf_work.c +83 -0
  112. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetri.c +77 -0
  113. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetri_work.c +89 -0
  114. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrs.c +56 -0
  115. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrs_work.c +102 -0
  116. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrf.c +50 -0
  117. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrf_work.c +82 -0
  118. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotri.c +50 -0
  119. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotri_work.c +82 -0
  120. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrs.c +55 -0
  121. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrs_work.c +101 -0
  122. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_cge_nancheck.c +62 -0
  123. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_cge_trans.c +65 -0
  124. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_cpo_nancheck.c +43 -0
  125. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_cpo_trans.c +45 -0
  126. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_ctr_nancheck.c +85 -0
  127. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_ctr_trans.c +85 -0
  128. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dge_nancheck.c +62 -0
  129. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dge_trans.c +65 -0
  130. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dpo_nancheck.c +43 -0
  131. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dpo_trans.c +45 -0
  132. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dtr_nancheck.c +85 -0
  133. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dtr_trans.c +85 -0
  134. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_lsame.c +41 -0
  135. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_sge_nancheck.c +62 -0
  136. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_sge_trans.c +65 -0
  137. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_spo_nancheck.c +43 -0
  138. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_spo_trans.c +45 -0
  139. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_str_nancheck.c +85 -0
  140. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_str_trans.c +85 -0
  141. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_xerbla.c +46 -0
  142. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_zge_nancheck.c +62 -0
  143. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_zge_trans.c +65 -0
  144. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_zpo_nancheck.c +43 -0
  145. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_zpo_trans.c +45 -0
  146. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_ztr_nancheck.c +85 -0
  147. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_ztr_trans.c +85 -0
  148. data/ext/nmatrix_lapacke/lapacke_nmatrix.h +16 -0
  149. data/ext/nmatrix_lapacke/make_lapacke_cpp.rb +9 -0
  150. data/ext/nmatrix_lapacke/math_lapacke.cpp +967 -0
  151. data/ext/nmatrix_lapacke/math_lapacke/cblas_local.h +576 -0
  152. data/ext/nmatrix_lapacke/math_lapacke/cblas_templates_lapacke.h +51 -0
  153. data/ext/nmatrix_lapacke/math_lapacke/lapacke_templates.h +356 -0
  154. data/ext/nmatrix_lapacke/nmatrix_lapacke.cpp +42 -0
  155. data/lib/nmatrix/lapack_ext_common.rb +69 -0
  156. data/lib/nmatrix/lapacke.rb +213 -0
  157. data/spec/00_nmatrix_spec.rb +730 -0
  158. data/spec/01_enum_spec.rb +190 -0
  159. data/spec/02_slice_spec.rb +389 -0
  160. data/spec/03_nmatrix_monkeys_spec.rb +78 -0
  161. data/spec/2x2_dense_double.mat +0 -0
  162. data/spec/4x4_sparse.mat +0 -0
  163. data/spec/4x5_dense.mat +0 -0
  164. data/spec/blas_spec.rb +193 -0
  165. data/spec/elementwise_spec.rb +303 -0
  166. data/spec/homogeneous_spec.rb +99 -0
  167. data/spec/io/fortran_format_spec.rb +88 -0
  168. data/spec/io/harwell_boeing_spec.rb +98 -0
  169. data/spec/io/test.rua +9 -0
  170. data/spec/io_spec.rb +149 -0
  171. data/spec/lapack_core_spec.rb +482 -0
  172. data/spec/leakcheck.rb +16 -0
  173. data/spec/math_spec.rb +730 -0
  174. data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
  175. data/spec/nmatrix_yale_spec.rb +286 -0
  176. data/spec/plugins/lapacke/lapacke_spec.rb +303 -0
  177. data/spec/rspec_monkeys.rb +56 -0
  178. data/spec/rspec_spec.rb +34 -0
  179. data/spec/shortcuts_spec.rb +310 -0
  180. data/spec/slice_set_spec.rb +157 -0
  181. data/spec/spec_helper.rb +140 -0
  182. data/spec/stat_spec.rb +203 -0
  183. data/spec/test.pcd +20 -0
  184. data/spec/utm5940.mtx +83844 -0
  185. metadata +262 -0
@@ -0,0 +1,43 @@
1
+ /*****************************************************************************
2
+ Copyright (c) 2010, Intel Corp.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ * Redistributions of source code must retain the above copyright notice,
9
+ this list of conditions and the following disclaimer.
10
+ * Redistributions in binary form must reproduce the above copyright
11
+ notice, this list of conditions and the following disclaimer in the
12
+ documentation and/or other materials provided with the distribution.
13
+ * Neither the name of Intel Corporation nor the names of its contributors
14
+ may be used to endorse or promote products derived from this software
15
+ without specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27
+ THE POSSIBILITY OF SUCH DAMAGE.
28
+ ******************************************************************************
29
+ * Contents: Native C interface to LAPACK utility function
30
+ * Author: Intel Corporation
31
+ * Created in February, 2010
32
+ *****************************************************************************/
33
+ #include "lapacke_utils.h"
34
+
35
+ /* Check a matrix for NaN entries. */
36
+
37
+ lapack_logical LAPACKE_zpo_nancheck( int matrix_order, char uplo,
38
+ lapack_int n,
39
+ const lapack_complex_double *a,
40
+ lapack_int lda )
41
+ {
42
+ return LAPACKE_ztr_nancheck( matrix_order, uplo, 'n', n, a, lda );
43
+ }
@@ -0,0 +1,45 @@
1
+ /*****************************************************************************
2
+ Copyright (c) 2010, Intel Corp.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ * Redistributions of source code must retain the above copyright notice,
9
+ this list of conditions and the following disclaimer.
10
+ * Redistributions in binary form must reproduce the above copyright
11
+ notice, this list of conditions and the following disclaimer in the
12
+ documentation and/or other materials provided with the distribution.
13
+ * Neither the name of Intel Corporation nor the names of its contributors
14
+ may be used to endorse or promote products derived from this software
15
+ without specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27
+ THE POSSIBILITY OF SUCH DAMAGE.
28
+ ******************************************************************************
29
+ * Contents: Native C interface to LAPACK utility function
30
+ * Author: Intel Corporation
31
+ * Created in February, 2010
32
+ *****************************************************************************/
33
+
34
+ #include "lapacke_utils.h"
35
+
36
+ /* Converts input symmetric matrix from row-major(C) to column-major(Fortran)
37
+ * layout or vice versa.
38
+ */
39
+
40
+ void LAPACKE_zpo_trans( int matrix_order, char uplo, lapack_int n,
41
+ const lapack_complex_double *in, lapack_int ldin,
42
+ lapack_complex_double *out, lapack_int ldout )
43
+ {
44
+ LAPACKE_ztr_trans( matrix_order, uplo, 'n', n, in, ldin, out, ldout );
45
+ }
@@ -0,0 +1,85 @@
1
+ /*****************************************************************************
2
+ Copyright (c) 2010, Intel Corp.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ * Redistributions of source code must retain the above copyright notice,
9
+ this list of conditions and the following disclaimer.
10
+ * Redistributions in binary form must reproduce the above copyright
11
+ notice, this list of conditions and the following disclaimer in the
12
+ documentation and/or other materials provided with the distribution.
13
+ * Neither the name of Intel Corporation nor the names of its contributors
14
+ may be used to endorse or promote products derived from this software
15
+ without specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27
+ THE POSSIBILITY OF SUCH DAMAGE.
28
+ ******************************************************************************
29
+ * Contents: Native C interface to LAPACK utility function
30
+ * Author: Intel Corporation
31
+ * Created in February, 2010
32
+ *****************************************************************************/
33
+ #include "lapacke_utils.h"
34
+
35
+ /* Check a matrix for NaN entries. */
36
+
37
+ lapack_logical LAPACKE_ztr_nancheck( int matrix_order, char uplo, char diag,
38
+ lapack_int n,
39
+ const lapack_complex_double *a,
40
+ lapack_int lda )
41
+ {
42
+ lapack_int i, j, st;
43
+ lapack_logical colmaj, lower, unit;
44
+
45
+ if( a == NULL ) return (lapack_logical) 0;
46
+
47
+ colmaj = ( matrix_order == LAPACK_COL_MAJOR );
48
+ lower = LAPACKE_lsame( uplo, 'l' );
49
+ unit = LAPACKE_lsame( diag, 'u' );
50
+
51
+ if( ( !colmaj && ( matrix_order != LAPACK_ROW_MAJOR ) ) ||
52
+ ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) ||
53
+ ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) {
54
+ /* Just exit if any of input parameters are wrong */
55
+ return (lapack_logical) 0;
56
+ }
57
+ if( unit ) {
58
+ /* If unit, then don't touch diagonal, start from 1st column or row */
59
+ st = 1;
60
+ } else {
61
+ /* If non-unit, then check diagonal also, starting from [0,0] */
62
+ st = 0;
63
+ }
64
+
65
+ /* Since col_major upper and row_major lower are equal,
66
+ * and col_major lower and row_major upper are equals too -
67
+ * using one code for equal cases. XOR( colmaj, upper )
68
+ */
69
+ if( ( colmaj || lower ) && !( colmaj && lower ) ) {
70
+ for( j = st; j < n; j++ ) {
71
+ for( i = 0; i < MIN( j+1-st, lda ); i++ ) {
72
+ if( LAPACK_ZISNAN( a[i+j*lda] ) )
73
+ return (lapack_logical) 1;
74
+ }
75
+ }
76
+ } else {
77
+ for( j = 0; j < n-st; j++ ) {
78
+ for( i = j+st; i < MIN( n, lda ); i++ ) {
79
+ if( LAPACK_ZISNAN( a[i+j*lda] ) )
80
+ return (lapack_logical) 1;
81
+ }
82
+ }
83
+ }
84
+ return (lapack_logical) 0;
85
+ }
@@ -0,0 +1,85 @@
1
+ /*****************************************************************************
2
+ Copyright (c) 2010, Intel Corp.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ * Redistributions of source code must retain the above copyright notice,
9
+ this list of conditions and the following disclaimer.
10
+ * Redistributions in binary form must reproduce the above copyright
11
+ notice, this list of conditions and the following disclaimer in the
12
+ documentation and/or other materials provided with the distribution.
13
+ * Neither the name of Intel Corporation nor the names of its contributors
14
+ may be used to endorse or promote products derived from this software
15
+ without specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27
+ THE POSSIBILITY OF SUCH DAMAGE.
28
+ ******************************************************************************
29
+ * Contents: Native C interface to LAPACK utility function
30
+ * Author: Intel Corporation
31
+ * Created in February, 2010
32
+ *****************************************************************************/
33
+
34
+ #include "lapacke_utils.h"
35
+
36
+ /* Converts input triangular matrix from row-major(C) to column-major(Fortran)
37
+ * layout or vice versa.
38
+ */
39
+
40
+ void LAPACKE_ztr_trans( int matrix_order, char uplo, char diag, lapack_int n,
41
+ const lapack_complex_double *in, lapack_int ldin,
42
+ lapack_complex_double *out, lapack_int ldout )
43
+ {
44
+ lapack_int i, j, st;
45
+ lapack_logical colmaj, lower, unit;
46
+
47
+ if( in == NULL || out == NULL ) return ;
48
+
49
+ colmaj = ( matrix_order == LAPACK_COL_MAJOR );
50
+ lower = LAPACKE_lsame( uplo, 'l' );
51
+ unit = LAPACKE_lsame( diag, 'u' );
52
+
53
+ if( ( !colmaj && ( matrix_order != LAPACK_ROW_MAJOR ) ) ||
54
+ ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) ||
55
+ ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) {
56
+ /* Just exit if any of input parameters are wrong */
57
+ return;
58
+ }
59
+ if( unit ) {
60
+ /* If unit, then don't touch diagonal, start from 1st column or row */
61
+ st = 1;
62
+ } else {
63
+ /* If non-unit, then check diagonal also, starting from [0,0] */
64
+ st = 0;
65
+ }
66
+
67
+ /* Perform conversion:
68
+ * Since col_major upper and row_major lower are equal,
69
+ * and col_major lower and row_major upper are equals too -
70
+ * using one code for equal cases. XOR( colmaj, upper )
71
+ */
72
+ if( ( colmaj || lower ) && !( colmaj && lower ) ) {
73
+ for( j = st; j < MIN( n, ldout ); j++ ) {
74
+ for( i = 0; i < MIN( j+1-st, ldin ); i++ ) {
75
+ out[ j+i*ldout ] = in[ i+j*ldin ];
76
+ }
77
+ }
78
+ } else {
79
+ for( j = 0; j < MIN( n-st, ldout ); j++ ) {
80
+ for( i = j+st; i < MIN( n, ldin ); i++ ) {
81
+ out[ j+i*ldout ] = in[ i+j*ldin ];
82
+ }
83
+ }
84
+ }
85
+ }
@@ -0,0 +1,16 @@
1
+ //need to define a few things before including the real lapacke.h
2
+
3
+ #include "data/data.h" //needed because this is where our complex types are defined
4
+
5
+ //tell LAPACKE to use our complex types
6
+ #define LAPACK_COMPLEX_CUSTOM
7
+ #define lapack_complex_float nm::Complex64
8
+ #define lapack_complex_double nm::Complex128
9
+
10
+ //define name-mangling scheme for FORTRAN functions
11
+ //ADD_ means that the symbol dgemm_ is associated with the fortran
12
+ //function DGEMM
13
+ #define ADD_
14
+
15
+ //now we can include the real lapacke.h
16
+ #include "lapacke.h"
@@ -0,0 +1,9 @@
1
+ #We want this to be a C++ file since our complex types require C++.
2
+
3
+ File.open("lapacke.cpp","w") do |file|
4
+ file.puts "//This file is auto-generated by make_lapacke_cpp.rb"
5
+ file.puts "//It includes all source files in the lapacke/ subdirectory"
6
+ Dir["lapacke/**/*.c"].each do |file2|
7
+ file.puts "#include \"#{file2}\""
8
+ end
9
+ end
@@ -0,0 +1,967 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == math_lapacke.cpp
25
+ //
26
+ // Ruby-exposed CBLAS and LAPACK functions that call BLAS
27
+ // and LAPACKE functions.
28
+ //
29
+
30
+ #include "data/data.h"
31
+
32
+ #include "lapacke_nmatrix.h"
33
+
34
+ #include "math_lapacke/cblas_local.h"
35
+
36
+ #include "math/util.h"
37
+
38
+ #include "math_lapacke/cblas_templates_lapacke.h"
39
+
40
+ #include "math_lapacke/lapacke_templates.h"
41
+
42
+
43
+ /*
44
+ * Forward Declarations
45
+ */
46
+
47
+ extern "C" {
48
+ /* BLAS Level 1. */
49
+ static VALUE nm_lapacke_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
50
+ static VALUE nm_lapacke_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
51
+ static VALUE nm_lapacke_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
52
+ static VALUE nm_lapacke_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
53
+ static VALUE nm_lapacke_cblas_rotg(VALUE self, VALUE ab);
54
+ static VALUE nm_lapacke_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);
55
+
56
+ /* BLAS Level 2. */
57
+ static VALUE nm_lapacke_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
58
+ VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
59
+
60
+ /* BLAS Level 3. */
61
+ static VALUE nm_lapacke_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
62
+ VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
63
+ static VALUE nm_lapacke_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
64
+ VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
65
+ static VALUE nm_lapacke_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
66
+ VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
67
+ static VALUE nm_lapacke_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
68
+ VALUE lda, VALUE beta, VALUE c, VALUE ldc);
69
+ static VALUE nm_lapacke_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
70
+ VALUE lda, VALUE beta, VALUE c, VALUE ldc);
71
+
72
+ /* LAPACK. */
73
+ static VALUE nm_lapacke_lapacke_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
74
+ static VALUE nm_lapacke_lapacke_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
75
+ static VALUE nm_lapacke_lapacke_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
76
+ static VALUE nm_lapacke_lapacke_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
77
+ static VALUE nm_lapacke_lapacke_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
78
+ static VALUE nm_lapacke_lapacke_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
79
+
80
+ static VALUE nm_lapacke_lapacke_gesvd(VALUE self, VALUE order, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE superb);
81
+ static VALUE nm_lapacke_lapacke_gesdd(VALUE self, VALUE order, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt);
82
+ static VALUE nm_lapacke_lapacke_geev(VALUE self, VALUE order, VALUE jobvl, VALUE jobvr, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr);
83
+ }
84
+
85
+ extern "C" {
86
+
87
+ ///////////////////
88
+ // Ruby Bindings //
89
+ ///////////////////
90
+
91
+ void nm_math_init_lapack() {
92
+
93
+ VALUE cNMatrix_LAPACKE = rb_define_module_under(cNMatrix, "LAPACKE");
94
+
95
+ VALUE cNMatrix_LAPACKE_LAPACK = rb_define_module_under(cNMatrix_LAPACKE, "LAPACK");
96
+ VALUE cNMatrix_LAPACKE_BLAS = rb_define_module_under(cNMatrix_LAPACKE, "BLAS");
97
+
98
+ //BLAS Level 1
99
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_scal", (METHOD)nm_lapacke_cblas_scal, 4);
100
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_nrm2", (METHOD)nm_lapacke_cblas_nrm2, 3);
101
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_asum", (METHOD)nm_lapacke_cblas_asum, 3);
102
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_rot", (METHOD)nm_lapacke_cblas_rot, 7);
103
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_rotg", (METHOD)nm_lapacke_cblas_rotg, 1);
104
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_imax", (METHOD)nm_lapacke_cblas_imax, 3);
105
+
106
+ //BLAS Level 2
107
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_gemv", (METHOD)nm_lapacke_cblas_gemv, 11);
108
+
109
+ //BLAS Level 3
110
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_gemm", (METHOD)nm_lapacke_cblas_gemm, 14);
111
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_trsm", (METHOD)nm_lapacke_cblas_trsm, 12);
112
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_trmm", (METHOD)nm_lapacke_cblas_trmm, 12);
113
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_syrk", (METHOD)nm_lapacke_cblas_syrk, 11);
114
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_herk", (METHOD)nm_lapacke_cblas_herk, 11);
115
+
116
+ /* LAPACK Functions */
117
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_getrf", (METHOD)nm_lapacke_lapacke_getrf, 5);
118
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_getrs", (METHOD)nm_lapacke_lapacke_getrs, 9);
119
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_getri", (METHOD)nm_lapacke_lapacke_getri, 5);
120
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_potrf", (METHOD)nm_lapacke_lapacke_potrf, 5);
121
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_potrs", (METHOD)nm_lapacke_lapacke_potrs, 8);
122
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_potri", (METHOD)nm_lapacke_lapacke_potri, 5);
123
+
124
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_gesvd", (METHOD)nm_lapacke_lapacke_gesvd, 13);
125
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_gesdd", (METHOD)nm_lapacke_lapacke_gesdd, 11);
126
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_geev", (METHOD)nm_lapacke_lapacke_geev, 12);
127
+ }
128
+
129
+ /*
130
+ * call-seq:
131
+ * NMatrix::BLAS.cblas_scal(n, alpha, vector, inc) -> NMatrix
132
+ *
133
+ * BLAS level 1 function +scal+. Works with all dtypes.
134
+ *
135
+ * Scale +vector+ in-place by +alpha+ and also return it. The operation is as
136
+ * follows:
137
+ * x <- alpha * x
138
+ *
139
+ * - +n+ -> Number of elements of +vector+.
140
+ * - +alpha+ -> Scalar value used in the operation.
141
+ * - +vector+ -> NMatrix of shape [n,1] or [1,n]. Modified in-place.
142
+ * - +inc+ -> Increment used in the scaling function. Should generally be 1.
143
+ */
144
+ static VALUE nm_lapacke_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE incx) {
145
+ nm::dtype_t dtype = NM_DTYPE(vector);
146
+
147
+ void* scalar = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
148
+ rubyval_to_cval(alpha, dtype, scalar);
149
+
150
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::lapacke::cblas_scal, void, const int n,
151
+ const void* scalar, void* x, const int incx);
152
+
153
+ ttable[dtype](FIX2INT(n), scalar, NM_STORAGE_DENSE(vector)->elements,
154
+ FIX2INT(incx));
155
+
156
+ return vector;
157
+ }
158
+
159
+ /*
160
+ * Call any of the cblas_xrotg functions as directly as possible.
161
+ *
162
+ * xROTG computes the elements of a Givens plane rotation matrix such that:
163
+ *
164
+ * | c s | | a | | r |
165
+ * | -s c | * | b | = | 0 |
166
+ *
167
+ * where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
168
+ *
169
+ * The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
170
+ *
171
+ * This function differs from most of the other raw BLAS accessors. Instead of
172
+ * providing a, b, c, s as arguments, you should only provide a and b (the
173
+ * inputs), and you should provide them as the first two elements of any dense
174
+ * NMatrix type.
175
+ *
176
+ * The outputs [c,s] will be returned in a Ruby Array at the end; the input
177
+ * NMatrix will also be modified in-place.
178
+ *
179
+ * This function, like the other cblas_ functions, does minimal type-checking.
180
+ */
181
+ static VALUE nm_lapacke_cblas_rotg(VALUE self, VALUE ab) {
182
+ static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
183
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
184
+ nm::math::lapacke::cblas_rotg<float>,
185
+ nm::math::lapacke::cblas_rotg<double>,
186
+ nm::math::lapacke::cblas_rotg<nm::Complex64>,
187
+ nm::math::lapacke::cblas_rotg<nm::Complex128>,
188
+ NULL //nm::math::lapacke::cblas_rotg<nm::RubyObject>
189
+ };
190
+
191
+ nm::dtype_t dtype = NM_DTYPE(ab);
192
+
193
+ if (!ttable[dtype]) {
194
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
195
+ return Qnil;
196
+
197
+ } else {
198
+ NM_CONSERVATIVE(nm_register_value(&self));
199
+ NM_CONSERVATIVE(nm_register_value(&ab));
200
+ void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
201
+ *pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
202
+
203
+ // extract A and B from the NVector (first two elements)
204
+ void* pA = NM_STORAGE_DENSE(ab)->elements;
205
+ void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
206
+ // c and s are output
207
+
208
+ ttable[dtype](pA, pB, pC, pS);
209
+
210
+ VALUE result = rb_ary_new2(2);
211
+
212
+ if (dtype == nm::RUBYOBJ) {
213
+ rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
214
+ rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
215
+ } else {
216
+ rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval);
217
+ rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval);
218
+ }
219
+ NM_CONSERVATIVE(nm_unregister_value(&ab));
220
+ NM_CONSERVATIVE(nm_unregister_value(&self));
221
+ return result;
222
+ }
223
+ }
224
+
225
+
226
+ /*
227
+ * Call any of the cblas_xrot functions as directly as possible.
228
+ *
229
+ * xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
230
+ *
231
+ * It's tough to find documentation on xROT. Here are what we think the arguments are for:
232
+ * * n :: number of elements to consider in x and y
233
+ * * x :: a vector (expects an NVector)
234
+ * * incx :: stride of x
235
+ * * y :: a vector (expects an NVector)
236
+ * * incy :: stride of y
237
+ * * c :: cosine of the angle of rotation
238
+ * * s :: sine of the angle of rotation
239
+ *
240
+ * Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
241
+ * will be float for Complex64 or double for Complex128.
242
+ *
243
+ * You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
244
+ * with its arguments?
245
+ *
246
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
247
+ * handling, so you can easily crash Ruby!
248
+ */
249
+ static VALUE nm_lapacke_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
250
+ static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
251
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
252
+ nm::math::lapacke::cblas_rot<float,float>,
253
+ nm::math::lapacke::cblas_rot<double,double>,
254
+ nm::math::lapacke::cblas_rot<nm::Complex64,float>,
255
+ nm::math::lapacke::cblas_rot<nm::Complex128,double>,
256
+ nm::math::lapacke::cblas_rot<nm::RubyObject,nm::RubyObject>
257
+ };
258
+
259
+ nm::dtype_t dtype = NM_DTYPE(x);
260
+
261
+
262
+ if (!ttable[dtype]) {
263
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
264
+ return Qfalse;
265
+ } else {
266
+ void *pC, *pS;
267
+
268
+ // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
269
+ if (dtype == nm::COMPLEX64) {
270
+ pC = NM_ALLOCA_N(float,1);
271
+ pS = NM_ALLOCA_N(float,1);
272
+ rubyval_to_cval(c, nm::FLOAT32, pC);
273
+ rubyval_to_cval(s, nm::FLOAT32, pS);
274
+ } else if (dtype == nm::COMPLEX128) {
275
+ pC = NM_ALLOCA_N(double,1);
276
+ pS = NM_ALLOCA_N(double,1);
277
+ rubyval_to_cval(c, nm::FLOAT64, pC);
278
+ rubyval_to_cval(s, nm::FLOAT64, pS);
279
+ } else {
280
+ pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
281
+ pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
282
+ rubyval_to_cval(c, dtype, pC);
283
+ rubyval_to_cval(s, dtype, pS);
284
+ }
285
+
286
+
287
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);
288
+
289
+ return Qtrue;
290
+ }
291
+ }
292
+
293
+
294
+ /*
295
+ * Call any of the cblas_xnrm2 functions as directly as possible.
296
+ *
297
+ * xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
298
+ *
299
+ * Arguments:
300
+ * * n :: length of x, must be at least 0
301
+ * * x :: pointer to first entry of input vector
302
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
303
+ *
304
+ * You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
305
+ * with its arguments?
306
+ *
307
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
308
+ * handling, so you can easily crash Ruby!
309
+ */
310
+ static VALUE nm_lapacke_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
311
+
312
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
313
+ NULL, NULL, NULL, NULL, NULL, // no help for integers
314
+ nm::math::lapacke::cblas_nrm2<float32_t,float32_t>,
315
+ nm::math::lapacke::cblas_nrm2<float64_t,float64_t>,
316
+ nm::math::lapacke::cblas_nrm2<float32_t,nm::Complex64>,
317
+ nm::math::lapacke::cblas_nrm2<float64_t,nm::Complex128>,
318
+ nm::math::lapacke::cblas_nrm2<nm::RubyObject,nm::RubyObject>
319
+ };
320
+
321
+ nm::dtype_t dtype = NM_DTYPE(x);
322
+
323
+ if (!ttable[dtype]) {
324
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
325
+ return Qnil;
326
+
327
+ } else {
328
+ // Determine the return dtype and allocate it
329
+ nm::dtype_t rdtype = dtype;
330
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
331
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
332
+
333
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
334
+
335
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
336
+
337
+ return rubyobj_from_cval(Result, rdtype).rval;
338
+ }
339
+ }
340
+
341
+
342
+
343
+ /*
344
+ * Call any of the cblas_xasum functions as directly as possible.
345
+ *
346
+ * xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
347
+ * of a vector x.
348
+ *
349
+ * Arguments:
350
+ * * n :: length of x, must be at least 0
351
+ * * x :: pointer to first entry of input vector
352
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
353
+ *
354
+ * You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
355
+ * with its arguments?
356
+ *
357
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
358
+ * handling, so you can easily crash Ruby!
359
+ */
360
+ static VALUE nm_lapacke_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
361
+
362
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
363
+ nm::math::lapacke::cblas_asum<uint8_t,uint8_t>,
364
+ nm::math::lapacke::cblas_asum<int8_t,int8_t>,
365
+ nm::math::lapacke::cblas_asum<int16_t,int16_t>,
366
+ nm::math::lapacke::cblas_asum<int32_t,int32_t>,
367
+ nm::math::lapacke::cblas_asum<int64_t,int64_t>,
368
+ nm::math::lapacke::cblas_asum<float32_t,float32_t>,
369
+ nm::math::lapacke::cblas_asum<float64_t,float64_t>,
370
+ nm::math::lapacke::cblas_asum<float32_t,nm::Complex64>,
371
+ nm::math::lapacke::cblas_asum<float64_t,nm::Complex128>,
372
+ nm::math::lapacke::cblas_asum<nm::RubyObject,nm::RubyObject>
373
+ };
374
+
375
+ nm::dtype_t dtype = NM_DTYPE(x);
376
+
377
+ // Determine the return dtype and allocate it
378
+ nm::dtype_t rdtype = dtype;
379
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
380
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
381
+
382
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
383
+
384
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
385
+
386
+ return rubyobj_from_cval(Result, rdtype).rval;
387
+ }
388
+
389
+ /*
390
+ * call-seq:
391
+ * NMatrix::BLAS.cblas_imax(n, vector, inc) -> Fixnum
392
+ *
393
+ * BLAS level 1 routine.
394
+ *
395
+ * Return the index of the largest element of +vector+.
396
+ *
397
+ * - +n+ -> Vector's size. Generally, you can use NMatrix#rows or NMatrix#cols.
398
+ * - +vector+ -> A NMatrix of shape [n,1] or [1,n] with any dtype.
399
+ * - +inc+ -> It's the increment used when searching. Use 1 except if you know
400
+ * what you're doing.
401
+ */
402
+ static VALUE nm_lapacke_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx) {
403
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::lapacke::cblas_imax, int, const int n, const void* x, const int incx);
404
+
405
+ nm::dtype_t dtype = NM_DTYPE(x);
406
+
407
+ int index = ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx));
408
+
409
+ // Convert to Ruby's Int value.
410
+ return INT2FIX(index);
411
+ }
412
+
413
+ /* Call any of the cblas_xgemv functions as directly as possible.
414
+ *
415
+ * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
416
+ *
417
+ * y = alpha*op(A)*x + beta*y
418
+ *
419
+ * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
420
+ *
421
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
422
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
423
+ * expose the ultra-optimized ATLAS versions.
424
+ *
425
+ * == Arguments
426
+ * See: http://www.netlib.org/blas/dgemm.f
427
+ *
428
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
429
+ * with its arguments?
430
+ *
431
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
432
+ * handling, so you can easily crash Ruby!
433
+ */
434
+ static VALUE nm_lapacke_cblas_gemv(VALUE self,
435
+ VALUE trans_a,
436
+ VALUE m, VALUE n,
437
+ VALUE alpha,
438
+ VALUE a, VALUE lda,
439
+ VALUE x, VALUE incx,
440
+ VALUE beta,
441
+ VALUE y, VALUE incy)
442
+ {
443
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::lapacke::cblas_gemv, bool, const enum CBLAS_TRANSPOSE, const int, const int, const void*, const void*, const int, const void*, const int, const void*, void*, const int)
444
+
445
+ nm::dtype_t dtype = NM_DTYPE(a);
446
+
447
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
448
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
449
+ rubyval_to_cval(alpha, dtype, pAlpha);
450
+ rubyval_to_cval(beta, dtype, pBeta);
451
+
452
+ return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
453
+ }
454
+
455
+ /* Call any of the cblas_xgemm functions as directly as possible.
456
+ *
457
+ * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
458
+ *
459
+ * C = alpha*op(A)*op(B) + beta*C
460
+ *
461
+ * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
462
+ *
463
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
464
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
465
+ * expose the ultra-optimized ATLAS versions.
466
+ *
467
+ * == Arguments
468
+ * See: http://www.netlib.org/blas/dgemm.f
469
+ *
470
+ * You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
471
+ * with its arguments?
472
+ *
473
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
474
+ * handling, so you can easily crash Ruby!
475
+ */
476
+ static VALUE nm_lapacke_cblas_gemm(VALUE self,
477
+ VALUE order,
478
+ VALUE trans_a, VALUE trans_b,
479
+ VALUE m, VALUE n, VALUE k,
480
+ VALUE alpha,
481
+ VALUE a, VALUE lda,
482
+ VALUE b, VALUE ldb,
483
+ VALUE beta,
484
+ VALUE c, VALUE ldc)
485
+ {
486
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::lapacke::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
487
+
488
+ nm::dtype_t dtype = NM_DTYPE(a);
489
+
490
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
491
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
492
+ rubyval_to_cval(alpha, dtype, pAlpha);
493
+ rubyval_to_cval(beta, dtype, pBeta);
494
+
495
+ ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
496
+
497
+ return c;
498
+ }
499
+
500
+
501
+ static VALUE nm_lapacke_cblas_trsm(VALUE self,
502
+ VALUE order,
503
+ VALUE side, VALUE uplo,
504
+ VALUE trans_a, VALUE diag,
505
+ VALUE m, VALUE n,
506
+ VALUE alpha,
507
+ VALUE a, VALUE lda,
508
+ VALUE b, VALUE ldb)
509
+ {
510
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
511
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
512
+ const int m, const int n, const void* alpha, const void* a,
513
+ const int lda, void* b, const int ldb) = {
514
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
515
+ nm::math::lapacke::cblas_trsm<float>,
516
+ nm::math::lapacke::cblas_trsm<double>,
517
+ cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
518
+ nm::math::lapacke::cblas_trsm<nm::RubyObject>
519
+ };
520
+
521
+ nm::dtype_t dtype = NM_DTYPE(a);
522
+
523
+ if (!ttable[dtype]) {
524
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
525
+ } else {
526
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
527
+ rubyval_to_cval(alpha, dtype, pAlpha);
528
+
529
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
530
+ }
531
+
532
+ return Qtrue;
533
+ }
534
+
535
+ static VALUE nm_lapacke_cblas_trmm(VALUE self,
536
+ VALUE order,
537
+ VALUE side, VALUE uplo,
538
+ VALUE trans_a, VALUE diag,
539
+ VALUE m, VALUE n,
540
+ VALUE alpha,
541
+ VALUE a, VALUE lda,
542
+ VALUE b, VALUE ldb)
543
+ {
544
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
545
+ const enum CBLAS_SIDE, const enum CBLAS_UPLO,
546
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
547
+ const int m, const int n, const void* alpha, const void* a,
548
+ const int lda, void* b, const int ldb) = {
549
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
550
+ nm::math::lapacke::cblas_trmm<float>,
551
+ nm::math::lapacke::cblas_trmm<double>,
552
+ cblas_ctrmm, cblas_ztrmm, // call directly, same function signature!
553
+ NULL
554
+ };
555
+
556
+ nm::dtype_t dtype = NM_DTYPE(a);
557
+
558
+ if (!ttable[dtype]) {
559
+ rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
560
+ } else {
561
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
562
+ rubyval_to_cval(alpha, dtype, pAlpha);
563
+
564
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
565
+ }
566
+
567
+ return b;
568
+ }
569
+
570
+ static VALUE nm_lapacke_cblas_syrk(VALUE self,
571
+ VALUE order,
572
+ VALUE uplo,
573
+ VALUE trans,
574
+ VALUE n, VALUE k,
575
+ VALUE alpha,
576
+ VALUE a, VALUE lda,
577
+ VALUE beta,
578
+ VALUE c, VALUE ldc)
579
+ {
580
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
581
+ const int n, const int k, const void* alpha, const void* a,
582
+ const int lda, const void* beta, void* c, const int ldc) = {
583
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
584
+ nm::math::lapacke::cblas_syrk<float>,
585
+ nm::math::lapacke::cblas_syrk<double>,
586
+ cblas_csyrk, cblas_zsyrk, // call directly, same function signature!
587
+ NULL
588
+ };
589
+
590
+ nm::dtype_t dtype = NM_DTYPE(a);
591
+
592
+ if (!ttable[dtype]) {
593
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
594
+ } else {
595
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
596
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
597
+ rubyval_to_cval(alpha, dtype, pAlpha);
598
+ rubyval_to_cval(beta, dtype, pBeta);
599
+
600
+ ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
601
+ }
602
+
603
+ return Qtrue;
604
+ }
605
+
606
+ static VALUE nm_lapacke_cblas_herk(VALUE self,
607
+ VALUE order,
608
+ VALUE uplo,
609
+ VALUE trans,
610
+ VALUE n, VALUE k,
611
+ VALUE alpha,
612
+ VALUE a, VALUE lda,
613
+ VALUE beta,
614
+ VALUE c, VALUE ldc)
615
+ {
616
+
617
+ nm::dtype_t dtype = NM_DTYPE(a);
618
+
619
+ if (dtype == nm::COMPLEX64) {
620
+ cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
621
+ } else if (dtype == nm::COMPLEX128) {
622
+ cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
623
+ } else
624
+ rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
625
+ return Qtrue;
626
+ }
627
+
628
+ /* Call any of the lapacke_xgetri functions as directly as possible.
629
+ *
630
+ * This version (the LAPACKE version) differs from the CLAPACK version in terms of the
631
+ * input it expects (which is the output of getrf). See getrf for details.
632
+ *
633
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
634
+ * handling, so you can easily crash Ruby!
635
+ *
636
+ * Returns an array giving the pivot indices (normally these are argument #5).
637
+ */
638
+ static VALUE nm_lapacke_lapacke_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
639
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
640
+ NULL, NULL, NULL, NULL, NULL,
641
+ nm::math::lapacke::lapacke_getri<float>,
642
+ nm::math::lapacke::lapacke_getri<double>,
643
+ nm::math::lapacke::lapacke_getri<nm::Complex64>,
644
+ nm::math::lapacke::lapacke_getri<nm::Complex128>,
645
+ NULL
646
+ };
647
+
648
+ // Allocate the C version of the pivot index array
649
+ int* ipiv_;
650
+ if (TYPE(ipiv) != T_ARRAY) {
651
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
652
+ } else {
653
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
654
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
655
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
656
+ }
657
+ }
658
+
659
+ if (!ttable[NM_DTYPE(a)]) {
660
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
661
+ } else {
662
+ ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
663
+ }
664
+
665
+ return a;
666
+ }
667
+
668
+ /* Call any of the lapacke_xgetrf functions as directly as possible.
669
+ *
670
+ * The lapacke_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
671
+ * matrix A using partial pivoting with row interchanges.
672
+ *
673
+ * The factorization has the form:
674
+ * A = P * L * U
675
+ * where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
676
+ * and U is upper triangular (upper trapezoidal if m < n).
677
+ *
678
+ * This version of getrf (the LAPACKE one) differs from the CLAPACK version. The CLAPACK has
679
+ * different behavior for row-major matrices (the upper matrix has unit diagonals instead of
680
+ * the lower and it uses column permutations instead of rows).
681
+ *
682
+ * This is the right-looking level 3 BLAS version of the algorithm.
683
+ *
684
+ * == Arguments
685
+ * See: http://www.netlib.org/lapack/double/dgetrf.f
686
+ * (You don't need argument 5; this is the value returned by this function.)
687
+ *
688
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
689
+ * handling, so you can easily crash Ruby!
690
+ *
691
+ * Returns an array giving the pivot indices (normally these are argument #5).
692
+ */
693
+ static VALUE nm_lapacke_lapacke_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
694
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
695
+ NULL, NULL, NULL, NULL, NULL,
696
+ nm::math::lapacke::lapacke_getrf<float>,
697
+ nm::math::lapacke::lapacke_getrf<double>,
698
+ nm::math::lapacke::lapacke_getrf<nm::Complex64>,
699
+ nm::math::lapacke::lapacke_getrf<nm::Complex128>,
700
+ NULL
701
+ };
702
+
703
+ int M = FIX2INT(m),
704
+ N = FIX2INT(n);
705
+
706
+ // Allocate the pivot index array, which is of size MIN(M, N).
707
+ size_t ipiv_size = std::min(M,N);
708
+ int* ipiv = NM_ALLOCA_N(int, ipiv_size);
709
+
710
+ if (!ttable[NM_DTYPE(a)]) {
711
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
712
+ } else {
713
+ ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
714
+ }
715
+
716
+ // Result will be stored in a. We return ipiv as an array.
717
+ VALUE ipiv_array = rb_ary_new2(ipiv_size);
718
+ for (size_t i = 0; i < ipiv_size; ++i) {
719
+ rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
720
+ }
721
+
722
+ return ipiv_array;
723
+ }
724
+
725
+ /*
726
+ * Call any of the lapacke_xgetrs functions as directly as possible.
727
+ */
728
+ static VALUE nm_lapacke_lapacke_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
729
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, char Trans, const int N,
730
+ const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
731
+ const int ldb) = {
732
+ NULL, NULL, NULL, NULL, NULL,
733
+ nm::math::lapacke::lapacke_getrs<float>,
734
+ nm::math::lapacke::lapacke_getrs<double>,
735
+ nm::math::lapacke::lapacke_getrs<nm::Complex64>,
736
+ nm::math::lapacke::lapacke_getrs<nm::Complex128>,
737
+ NULL
738
+ };
739
+
740
+ // Allocate the C version of the pivot index array
741
+ int* ipiv_;
742
+ if (TYPE(ipiv) != T_ARRAY) {
743
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
744
+ } else {
745
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
746
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
747
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
748
+ }
749
+ }
750
+
751
+ if (!ttable[NM_DTYPE(a)]) {
752
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
753
+ } else {
754
+ ttable[NM_DTYPE(a)](blas_order_sym(order), lapacke_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
755
+ ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
756
+ }
757
+
758
+ // b is both returned and modified directly in the argument list.
759
+ return b;
760
+ }
761
+
762
+ /* Call any of the LAPACKE_xpotrf functions as directly as possible.
763
+ *
764
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
765
+ * handling, so you can easily crash Ruby!
766
+ */
767
+ static VALUE nm_lapacke_lapacke_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
768
+
769
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, char, const int n, void* a, const int lda) = {
770
+ NULL, NULL, NULL, NULL, NULL,
771
+ nm::math::lapacke::lapacke_potrf<float>,
772
+ nm::math::lapacke::lapacke_potrf<double>,
773
+ nm::math::lapacke::lapacke_potrf<nm::Complex64>,
774
+ nm::math::lapacke::lapacke_potrf<nm::Complex128>,
775
+ NULL
776
+ };
777
+
778
+ if (!ttable[NM_DTYPE(a)]) {
779
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
780
+ } else {
781
+ ttable[NM_DTYPE(a)](blas_order_sym(order), lapacke_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
782
+ }
783
+
784
+ return a;
785
+ }
786
+
787
+ /*
788
+ * Call any of the LAPACKE_xpotrs functions as directly as possible.
789
+ */
790
+ static VALUE nm_lapacke_lapacke_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
791
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, char Uplo, const int N,
792
+ const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
793
+ NULL, NULL, NULL, NULL, NULL,
794
+ nm::math::lapacke::lapacke_potrs<float>,
795
+ nm::math::lapacke::lapacke_potrs<double>,
796
+ nm::math::lapacke::lapacke_potrs<nm::Complex64>,
797
+ nm::math::lapacke::lapacke_potrs<nm::Complex128>,
798
+ NULL
799
+ };
800
+
801
+
802
+ if (!ttable[NM_DTYPE(a)]) {
803
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
804
+ } else {
805
+
806
+ ttable[NM_DTYPE(a)](blas_order_sym(order), lapacke_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
807
+ NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
808
+ }
809
+
810
+ // b is both returned and modified directly in the argument list.
811
+ return b;
812
+ }
813
+
814
+ /* Call any of the lapacke_xpotri functions as directly as possible.
815
+ *
816
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
817
+ * handling, so you can easily crash Ruby!
818
+ */
819
+ static VALUE nm_lapacke_lapacke_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
820
+
821
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, char, const int n, void* a, const int lda) = {
822
+ NULL, NULL, NULL, NULL, NULL,
823
+ nm::math::lapacke::lapacke_potri<float>,
824
+ nm::math::lapacke::lapacke_potri<double>,
825
+ nm::math::lapacke::lapacke_potri<nm::Complex64>,
826
+ nm::math::lapacke::lapacke_potri<nm::Complex128>,
827
+ NULL
828
+ };
829
+
830
+ if (!ttable[NM_DTYPE(a)]) {
831
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
832
+ } else {
833
+ ttable[NM_DTYPE(a)](blas_order_sym(order), lapacke_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
834
+ }
835
+
836
+ return a;
837
+ }
838
+
839
+ //badly need docs for gesvd, gesdd because of the real/complex mixing
840
+
841
+ /*
842
+ * xGESVD computes the singular value decomposition (SVD) of a real
843
+ * M-by-N matrix A, optionally computing the left and/or right singular
844
+ * vectors. The SVD is written
845
+ *
846
+ * A = U * SIGMA * transpose(V)
847
+ *
848
+ * where SIGMA is an M-by-N matrix which is zero except for its
849
+ * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
850
+ * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
851
+ * are the singular values of A; they are real and non-negative, and
852
+ * are returned in descending order. The first min(m,n) columns of
853
+ * U and V are the left and right singular vectors of A.
854
+ *
855
+ * Note that the routine returns V**T, not V.
856
+ */
857
+ static VALUE nm_lapacke_lapacke_gesvd(VALUE self, VALUE order, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE superb) {
858
+ static int (*gesvd_table[nm::NUM_DTYPES])(int, char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* superb) = {
859
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
860
+ nm::math::lapacke::lapacke_gesvd<float,float>,
861
+ nm::math::lapacke::lapacke_gesvd<double,double>,
862
+ nm::math::lapacke::lapacke_gesvd<nm::Complex64,float>,
863
+ nm::math::lapacke::lapacke_gesvd<nm::Complex128,double>,
864
+ NULL // no Ruby objects
865
+ };
866
+
867
+ nm::dtype_t dtype = NM_DTYPE(a);
868
+
869
+
870
+ if (!gesvd_table[dtype]) {
871
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
872
+ return Qfalse;
873
+ } else {
874
+ int M = FIX2INT(m),
875
+ N = FIX2INT(n);
876
+
877
+ char JOBU = lapack_svd_job_sym(jobu),
878
+ JOBVT = lapack_svd_job_sym(jobvt);
879
+
880
+ int info = gesvd_table[dtype](blas_order_sym(order),JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
881
+ NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
882
+ NM_STORAGE_DENSE(superb)->elements);
883
+ return INT2FIX(info);
884
+ }
885
+ }
886
+
887
+ static VALUE nm_lapacke_lapacke_gesdd(VALUE self, VALUE order, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt) {
888
+ static int (*gesdd_table[nm::NUM_DTYPES])(int, char, int, int, void* a, int, void* s, void* u, int, void* vt, int) = {
889
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
890
+ nm::math::lapacke::lapacke_gesdd<float,float>,
891
+ nm::math::lapacke::lapacke_gesdd<double,double>,
892
+ nm::math::lapacke::lapacke_gesdd<nm::Complex64,float>,
893
+ nm::math::lapacke::lapacke_gesdd<nm::Complex128,double>,
894
+ NULL // no Ruby objects
895
+ };
896
+
897
+ nm::dtype_t dtype = NM_DTYPE(a);
898
+
899
+
900
+ if (!gesdd_table[dtype]) {
901
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
902
+ return Qfalse;
903
+ } else {
904
+ int M = FIX2INT(m),
905
+ N = FIX2INT(n);
906
+
907
+ char JOBZ = lapack_svd_job_sym(jobz);
908
+
909
+ int info = gesdd_table[dtype](blas_order_sym(order),JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
910
+ NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt));
911
+ return INT2FIX(info);
912
+ }
913
+ }
914
+
915
+ /*
916
+ * GEEV computes for an N-by-N real nonsymmetric matrix A, the
917
+ * eigenvalues and, optionally, the left and/or right eigenvectors.
918
+ *
919
+ * The right eigenvector v(j) of A satisfies
920
+ * A * v(j) = lambda(j) * v(j)
921
+ * where lambda(j) is its eigenvalue.
922
+ *
923
+ * The left eigenvector u(j) of A satisfies
924
+ * u(j)**H * A = lambda(j) * u(j)**H
925
+ * where u(j)**H denotes the conjugate transpose of u(j).
926
+ *
927
+ * The computed eigenvectors are normalized to have Euclidean norm
928
+ * equal to 1 and largest component real.
929
+ */
930
+ //note on wi
931
+ static VALUE nm_lapacke_lapacke_geev(VALUE self, VALUE order, VALUE jobvl, VALUE jobvr, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr) {
932
+ static int (*geev_table[nm::NUM_DTYPES])(int, char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int) = {
933
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
934
+ nm::math::lapacke::lapacke_geev<float>,
935
+ nm::math::lapacke::lapacke_geev<double>,
936
+ nm::math::lapacke::lapacke_geev<nm::Complex64>,
937
+ nm::math::lapacke::lapacke_geev<nm::Complex128>,
938
+ NULL // no Ruby objects
939
+ };
940
+
941
+ nm::dtype_t dtype = NM_DTYPE(a);
942
+
943
+
944
+ if (!geev_table[dtype]) {
945
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
946
+ return Qfalse;
947
+ } else {
948
+ int N = FIX2INT(n);
949
+
950
+ char JOBVL = lapack_evd_job_sym(jobvl),
951
+ JOBVR = lapack_evd_job_sym(jobvr);
952
+
953
+ void* A = NM_STORAGE_DENSE(a)->elements;
954
+ void* W = NM_STORAGE_DENSE(w)->elements;
955
+ void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements; //For complex, wi should be nil
956
+ void* VL = JOBVL == 'V' ? NM_STORAGE_DENSE(vl)->elements : NULL;
957
+ void* VR = JOBVR == 'V' ? NM_STORAGE_DENSE(vr)->elements : NULL;
958
+
959
+ // Perform the actual calculation.
960
+ int info = geev_table[dtype](blas_order_sym(order), JOBVL, JOBVR, N, A, FIX2INT(lda), W, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr));
961
+
962
+ return INT2FIX(info);
963
+ }
964
+ }
965
+
966
+
967
+ }