nmatrix-lapacke 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/ext/nmatrix/data/complex.h +364 -0
  3. data/ext/nmatrix/data/data.h +638 -0
  4. data/ext/nmatrix/data/meta.h +64 -0
  5. data/ext/nmatrix/data/ruby_object.h +389 -0
  6. data/ext/nmatrix/math/asum.h +120 -0
  7. data/ext/nmatrix/math/cblas_enums.h +36 -0
  8. data/ext/nmatrix/math/cblas_templates_core.h +507 -0
  9. data/ext/nmatrix/math/gemm.h +241 -0
  10. data/ext/nmatrix/math/gemv.h +178 -0
  11. data/ext/nmatrix/math/getrf.h +255 -0
  12. data/ext/nmatrix/math/getrs.h +121 -0
  13. data/ext/nmatrix/math/imax.h +79 -0
  14. data/ext/nmatrix/math/laswp.h +165 -0
  15. data/ext/nmatrix/math/long_dtype.h +49 -0
  16. data/ext/nmatrix/math/math.h +744 -0
  17. data/ext/nmatrix/math/nrm2.h +160 -0
  18. data/ext/nmatrix/math/rot.h +117 -0
  19. data/ext/nmatrix/math/rotg.h +106 -0
  20. data/ext/nmatrix/math/scal.h +71 -0
  21. data/ext/nmatrix/math/trsm.h +332 -0
  22. data/ext/nmatrix/math/util.h +148 -0
  23. data/ext/nmatrix/nm_memory.h +60 -0
  24. data/ext/nmatrix/nmatrix.h +408 -0
  25. data/ext/nmatrix/ruby_constants.h +106 -0
  26. data/ext/nmatrix/storage/common.h +176 -0
  27. data/ext/nmatrix/storage/dense/dense.h +128 -0
  28. data/ext/nmatrix/storage/list/list.h +137 -0
  29. data/ext/nmatrix/storage/storage.h +98 -0
  30. data/ext/nmatrix/storage/yale/class.h +1139 -0
  31. data/ext/nmatrix/storage/yale/iterators/base.h +142 -0
  32. data/ext/nmatrix/storage/yale/iterators/iterator.h +130 -0
  33. data/ext/nmatrix/storage/yale/iterators/row.h +449 -0
  34. data/ext/nmatrix/storage/yale/iterators/row_stored.h +139 -0
  35. data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +168 -0
  36. data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +123 -0
  37. data/ext/nmatrix/storage/yale/math/transpose.h +110 -0
  38. data/ext/nmatrix/storage/yale/yale.h +202 -0
  39. data/ext/nmatrix/types.h +54 -0
  40. data/ext/nmatrix/util/io.h +115 -0
  41. data/ext/nmatrix/util/sl_list.h +143 -0
  42. data/ext/nmatrix/util/util.h +78 -0
  43. data/ext/nmatrix_lapacke/extconf.rb +200 -0
  44. data/ext/nmatrix_lapacke/lapacke.cpp +100 -0
  45. data/ext/nmatrix_lapacke/lapacke/include/lapacke.h +16445 -0
  46. data/ext/nmatrix_lapacke/lapacke/include/lapacke_config.h +119 -0
  47. data/ext/nmatrix_lapacke/lapacke/include/lapacke_mangling.h +17 -0
  48. data/ext/nmatrix_lapacke/lapacke/include/lapacke_mangling_with_flags.h +17 -0
  49. data/ext/nmatrix_lapacke/lapacke/include/lapacke_utils.h +579 -0
  50. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgeev.c +89 -0
  51. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgeev_work.c +141 -0
  52. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgesdd.c +106 -0
  53. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgesdd_work.c +158 -0
  54. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgesvd.c +94 -0
  55. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgesvd_work.c +149 -0
  56. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrf.c +51 -0
  57. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrf_work.c +83 -0
  58. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetri.c +77 -0
  59. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetri_work.c +89 -0
  60. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrs.c +56 -0
  61. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrs_work.c +102 -0
  62. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrf.c +50 -0
  63. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrf_work.c +82 -0
  64. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotri.c +50 -0
  65. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotri_work.c +82 -0
  66. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrs.c +55 -0
  67. data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrs_work.c +101 -0
  68. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgeev.c +78 -0
  69. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgeev_work.c +136 -0
  70. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgesdd.c +88 -0
  71. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgesdd_work.c +153 -0
  72. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgesvd.c +83 -0
  73. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgesvd_work.c +144 -0
  74. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrf.c +50 -0
  75. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrf_work.c +81 -0
  76. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetri.c +75 -0
  77. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetri_work.c +87 -0
  78. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrs.c +55 -0
  79. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrs_work.c +99 -0
  80. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrf.c +50 -0
  81. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrf_work.c +81 -0
  82. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotri.c +50 -0
  83. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotri_work.c +81 -0
  84. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrs.c +54 -0
  85. data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrs_work.c +97 -0
  86. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgeev.c +78 -0
  87. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgeev_work.c +134 -0
  88. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgesdd.c +88 -0
  89. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgesdd_work.c +152 -0
  90. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgesvd.c +83 -0
  91. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgesvd_work.c +143 -0
  92. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrf.c +50 -0
  93. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrf_work.c +81 -0
  94. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetri.c +75 -0
  95. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetri_work.c +87 -0
  96. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrs.c +55 -0
  97. data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrs_work.c +99 -0
  98. data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotrf.c +50 -0
  99. data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotrf_work.c +81 -0
  100. data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotri.c +50 -0
  101. data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotri_work.c +81 -0
  102. data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotrs.c +54 -0
  103. data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotrs_work.c +97 -0
  104. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgeev.c +89 -0
  105. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgeev_work.c +141 -0
  106. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgesdd.c +106 -0
  107. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgesdd_work.c +158 -0
  108. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgesvd.c +94 -0
  109. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgesvd_work.c +149 -0
  110. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrf.c +51 -0
  111. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrf_work.c +83 -0
  112. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetri.c +77 -0
  113. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetri_work.c +89 -0
  114. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrs.c +56 -0
  115. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrs_work.c +102 -0
  116. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrf.c +50 -0
  117. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrf_work.c +82 -0
  118. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotri.c +50 -0
  119. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotri_work.c +82 -0
  120. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrs.c +55 -0
  121. data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrs_work.c +101 -0
  122. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_cge_nancheck.c +62 -0
  123. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_cge_trans.c +65 -0
  124. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_cpo_nancheck.c +43 -0
  125. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_cpo_trans.c +45 -0
  126. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_ctr_nancheck.c +85 -0
  127. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_ctr_trans.c +85 -0
  128. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dge_nancheck.c +62 -0
  129. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dge_trans.c +65 -0
  130. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dpo_nancheck.c +43 -0
  131. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dpo_trans.c +45 -0
  132. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dtr_nancheck.c +85 -0
  133. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dtr_trans.c +85 -0
  134. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_lsame.c +41 -0
  135. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_sge_nancheck.c +62 -0
  136. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_sge_trans.c +65 -0
  137. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_spo_nancheck.c +43 -0
  138. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_spo_trans.c +45 -0
  139. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_str_nancheck.c +85 -0
  140. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_str_trans.c +85 -0
  141. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_xerbla.c +46 -0
  142. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_zge_nancheck.c +62 -0
  143. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_zge_trans.c +65 -0
  144. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_zpo_nancheck.c +43 -0
  145. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_zpo_trans.c +45 -0
  146. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_ztr_nancheck.c +85 -0
  147. data/ext/nmatrix_lapacke/lapacke/utils/lapacke_ztr_trans.c +85 -0
  148. data/ext/nmatrix_lapacke/lapacke_nmatrix.h +16 -0
  149. data/ext/nmatrix_lapacke/make_lapacke_cpp.rb +9 -0
  150. data/ext/nmatrix_lapacke/math_lapacke.cpp +967 -0
  151. data/ext/nmatrix_lapacke/math_lapacke/cblas_local.h +576 -0
  152. data/ext/nmatrix_lapacke/math_lapacke/cblas_templates_lapacke.h +51 -0
  153. data/ext/nmatrix_lapacke/math_lapacke/lapacke_templates.h +356 -0
  154. data/ext/nmatrix_lapacke/nmatrix_lapacke.cpp +42 -0
  155. data/lib/nmatrix/lapack_ext_common.rb +69 -0
  156. data/lib/nmatrix/lapacke.rb +213 -0
  157. data/spec/00_nmatrix_spec.rb +730 -0
  158. data/spec/01_enum_spec.rb +190 -0
  159. data/spec/02_slice_spec.rb +389 -0
  160. data/spec/03_nmatrix_monkeys_spec.rb +78 -0
  161. data/spec/2x2_dense_double.mat +0 -0
  162. data/spec/4x4_sparse.mat +0 -0
  163. data/spec/4x5_dense.mat +0 -0
  164. data/spec/blas_spec.rb +193 -0
  165. data/spec/elementwise_spec.rb +303 -0
  166. data/spec/homogeneous_spec.rb +99 -0
  167. data/spec/io/fortran_format_spec.rb +88 -0
  168. data/spec/io/harwell_boeing_spec.rb +98 -0
  169. data/spec/io/test.rua +9 -0
  170. data/spec/io_spec.rb +149 -0
  171. data/spec/lapack_core_spec.rb +482 -0
  172. data/spec/leakcheck.rb +16 -0
  173. data/spec/math_spec.rb +730 -0
  174. data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
  175. data/spec/nmatrix_yale_spec.rb +286 -0
  176. data/spec/plugins/lapacke/lapacke_spec.rb +303 -0
  177. data/spec/rspec_monkeys.rb +56 -0
  178. data/spec/rspec_spec.rb +34 -0
  179. data/spec/shortcuts_spec.rb +310 -0
  180. data/spec/slice_set_spec.rb +157 -0
  181. data/spec/spec_helper.rb +140 -0
  182. data/spec/stat_spec.rb +203 -0
  183. data/spec/test.pcd +20 -0
  184. data/spec/utm5940.mtx +83844 -0
  185. metadata +262 -0
@@ -0,0 +1,43 @@
1
+ /*****************************************************************************
2
+ Copyright (c) 2010, Intel Corp.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ * Redistributions of source code must retain the above copyright notice,
9
+ this list of conditions and the following disclaimer.
10
+ * Redistributions in binary form must reproduce the above copyright
11
+ notice, this list of conditions and the following disclaimer in the
12
+ documentation and/or other materials provided with the distribution.
13
+ * Neither the name of Intel Corporation nor the names of its contributors
14
+ may be used to endorse or promote products derived from this software
15
+ without specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27
+ THE POSSIBILITY OF SUCH DAMAGE.
28
+ ******************************************************************************
29
+ * Contents: Native C interface to LAPACK utility function
30
+ * Author: Intel Corporation
31
+ * Created in February, 2010
32
+ *****************************************************************************/
33
+ #include "lapacke_utils.h"
34
+
35
+ /* Check a matrix for NaN entries. */
36
+
37
+ lapack_logical LAPACKE_zpo_nancheck( int matrix_order, char uplo,
38
+ lapack_int n,
39
+ const lapack_complex_double *a,
40
+ lapack_int lda )
41
+ {
42
+ return LAPACKE_ztr_nancheck( matrix_order, uplo, 'n', n, a, lda );
43
+ }
@@ -0,0 +1,45 @@
1
+ /*****************************************************************************
2
+ Copyright (c) 2010, Intel Corp.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ * Redistributions of source code must retain the above copyright notice,
9
+ this list of conditions and the following disclaimer.
10
+ * Redistributions in binary form must reproduce the above copyright
11
+ notice, this list of conditions and the following disclaimer in the
12
+ documentation and/or other materials provided with the distribution.
13
+ * Neither the name of Intel Corporation nor the names of its contributors
14
+ may be used to endorse or promote products derived from this software
15
+ without specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27
+ THE POSSIBILITY OF SUCH DAMAGE.
28
+ ******************************************************************************
29
+ * Contents: Native C interface to LAPACK utility function
30
+ * Author: Intel Corporation
31
+ * Created in February, 2010
32
+ *****************************************************************************/
33
+
34
+ #include "lapacke_utils.h"
35
+
36
+ /* Converts input symmetric matrix from row-major(C) to column-major(Fortran)
37
+ * layout or vice versa.
38
+ */
39
+
40
+ void LAPACKE_zpo_trans( int matrix_order, char uplo, lapack_int n,
41
+ const lapack_complex_double *in, lapack_int ldin,
42
+ lapack_complex_double *out, lapack_int ldout )
43
+ {
44
+ LAPACKE_ztr_trans( matrix_order, uplo, 'n', n, in, ldin, out, ldout );
45
+ }
@@ -0,0 +1,85 @@
1
+ /*****************************************************************************
2
+ Copyright (c) 2010, Intel Corp.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ * Redistributions of source code must retain the above copyright notice,
9
+ this list of conditions and the following disclaimer.
10
+ * Redistributions in binary form must reproduce the above copyright
11
+ notice, this list of conditions and the following disclaimer in the
12
+ documentation and/or other materials provided with the distribution.
13
+ * Neither the name of Intel Corporation nor the names of its contributors
14
+ may be used to endorse or promote products derived from this software
15
+ without specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27
+ THE POSSIBILITY OF SUCH DAMAGE.
28
+ ******************************************************************************
29
+ * Contents: Native C interface to LAPACK utility function
30
+ * Author: Intel Corporation
31
+ * Created in February, 2010
32
+ *****************************************************************************/
33
+ #include "lapacke_utils.h"
34
+
35
+ /* Check a matrix for NaN entries. */
36
+
37
+ lapack_logical LAPACKE_ztr_nancheck( int matrix_order, char uplo, char diag,
38
+ lapack_int n,
39
+ const lapack_complex_double *a,
40
+ lapack_int lda )
41
+ {
42
+ lapack_int i, j, st;
43
+ lapack_logical colmaj, lower, unit;
44
+
45
+ if( a == NULL ) return (lapack_logical) 0;
46
+
47
+ colmaj = ( matrix_order == LAPACK_COL_MAJOR );
48
+ lower = LAPACKE_lsame( uplo, 'l' );
49
+ unit = LAPACKE_lsame( diag, 'u' );
50
+
51
+ if( ( !colmaj && ( matrix_order != LAPACK_ROW_MAJOR ) ) ||
52
+ ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) ||
53
+ ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) {
54
+ /* Just exit if any of input parameters are wrong */
55
+ return (lapack_logical) 0;
56
+ }
57
+ if( unit ) {
58
+ /* If unit, then don't touch diagonal, start from 1st column or row */
59
+ st = 1;
60
+ } else {
61
+ /* If non-unit, then check diagonal also, starting from [0,0] */
62
+ st = 0;
63
+ }
64
+
65
+ /* Since col_major upper and row_major lower are equal,
66
+ * and col_major lower and row_major upper are equals too -
67
+ * using one code for equal cases. XOR( colmaj, upper )
68
+ */
69
+ if( ( colmaj || lower ) && !( colmaj && lower ) ) {
70
+ for( j = st; j < n; j++ ) {
71
+ for( i = 0; i < MIN( j+1-st, lda ); i++ ) {
72
+ if( LAPACK_ZISNAN( a[i+j*lda] ) )
73
+ return (lapack_logical) 1;
74
+ }
75
+ }
76
+ } else {
77
+ for( j = 0; j < n-st; j++ ) {
78
+ for( i = j+st; i < MIN( n, lda ); i++ ) {
79
+ if( LAPACK_ZISNAN( a[i+j*lda] ) )
80
+ return (lapack_logical) 1;
81
+ }
82
+ }
83
+ }
84
+ return (lapack_logical) 0;
85
+ }
@@ -0,0 +1,85 @@
1
+ /*****************************************************************************
2
+ Copyright (c) 2010, Intel Corp.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ * Redistributions of source code must retain the above copyright notice,
9
+ this list of conditions and the following disclaimer.
10
+ * Redistributions in binary form must reproduce the above copyright
11
+ notice, this list of conditions and the following disclaimer in the
12
+ documentation and/or other materials provided with the distribution.
13
+ * Neither the name of Intel Corporation nor the names of its contributors
14
+ may be used to endorse or promote products derived from this software
15
+ without specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27
+ THE POSSIBILITY OF SUCH DAMAGE.
28
+ ******************************************************************************
29
+ * Contents: Native C interface to LAPACK utility function
30
+ * Author: Intel Corporation
31
+ * Created in February, 2010
32
+ *****************************************************************************/
33
+
34
+ #include "lapacke_utils.h"
35
+
36
+ /* Converts input triangular matrix from row-major(C) to column-major(Fortran)
37
+ * layout or vice versa.
38
+ */
39
+
40
+ void LAPACKE_ztr_trans( int matrix_order, char uplo, char diag, lapack_int n,
41
+ const lapack_complex_double *in, lapack_int ldin,
42
+ lapack_complex_double *out, lapack_int ldout )
43
+ {
44
+ lapack_int i, j, st;
45
+ lapack_logical colmaj, lower, unit;
46
+
47
+ if( in == NULL || out == NULL ) return ;
48
+
49
+ colmaj = ( matrix_order == LAPACK_COL_MAJOR );
50
+ lower = LAPACKE_lsame( uplo, 'l' );
51
+ unit = LAPACKE_lsame( diag, 'u' );
52
+
53
+ if( ( !colmaj && ( matrix_order != LAPACK_ROW_MAJOR ) ) ||
54
+ ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) ||
55
+ ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) {
56
+ /* Just exit if any of input parameters are wrong */
57
+ return;
58
+ }
59
+ if( unit ) {
60
+ /* If unit, then don't touch diagonal, start from 1st column or row */
61
+ st = 1;
62
+ } else {
63
+ /* If non-unit, then check diagonal also, starting from [0,0] */
64
+ st = 0;
65
+ }
66
+
67
+ /* Perform conversion:
68
+ * Since col_major upper and row_major lower are equal,
69
+ * and col_major lower and row_major upper are equals too -
70
+ * using one code for equal cases. XOR( colmaj, upper )
71
+ */
72
+ if( ( colmaj || lower ) && !( colmaj && lower ) ) {
73
+ for( j = st; j < MIN( n, ldout ); j++ ) {
74
+ for( i = 0; i < MIN( j+1-st, ldin ); i++ ) {
75
+ out[ j+i*ldout ] = in[ i+j*ldin ];
76
+ }
77
+ }
78
+ } else {
79
+ for( j = 0; j < MIN( n-st, ldout ); j++ ) {
80
+ for( i = j+st; i < MIN( n, ldin ); i++ ) {
81
+ out[ j+i*ldout ] = in[ i+j*ldin ];
82
+ }
83
+ }
84
+ }
85
+ }
@@ -0,0 +1,16 @@
1
+ //need to define a few things before including the real lapacke.h
2
+
3
+ #include "data/data.h" //needed because this is where our complex types are defined
4
+
5
+ //tell LAPACKE to use our complex types
6
+ #define LAPACK_COMPLEX_CUSTOM
7
+ #define lapack_complex_float nm::Complex64
8
+ #define lapack_complex_double nm::Complex128
9
+
10
+ //define name-mangling scheme for FORTRAN functions
11
+ //ADD_ means that the symbol dgemm_ is associated with the fortran
12
+ //function DGEMM
13
+ #define ADD_
14
+
15
+ //now we can include the real lapacke.h
16
+ #include "lapacke.h"
@@ -0,0 +1,9 @@
1
+ #We want this to be a C++ file since our complex types require C++.
2
+
3
+ File.open("lapacke.cpp","w") do |file|
4
+ file.puts "//This file is auto-generated by make_lapacke_cpp.rb"
5
+ file.puts "//It includes all source files in the lapacke/ subdirectory"
6
+ Dir["lapacke/**/*.c"].each do |file2|
7
+ file.puts "#include \"#{file2}\""
8
+ end
9
+ end
@@ -0,0 +1,967 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == math_lapacke.cpp
25
+ //
26
+ // Ruby-exposed CBLAS and LAPACK functions that call BLAS
27
+ // and LAPACKE functions.
28
+ //
29
+
30
+ #include "data/data.h"
31
+
32
+ #include "lapacke_nmatrix.h"
33
+
34
+ #include "math_lapacke/cblas_local.h"
35
+
36
+ #include "math/util.h"
37
+
38
+ #include "math_lapacke/cblas_templates_lapacke.h"
39
+
40
+ #include "math_lapacke/lapacke_templates.h"
41
+
42
+
43
+ /*
44
+ * Forward Declarations
45
+ */
46
+
47
+ extern "C" {
48
+ /* BLAS Level 1. */
49
+ static VALUE nm_lapacke_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
50
+ static VALUE nm_lapacke_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
51
+ static VALUE nm_lapacke_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
52
+ static VALUE nm_lapacke_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
53
+ static VALUE nm_lapacke_cblas_rotg(VALUE self, VALUE ab);
54
+ static VALUE nm_lapacke_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);
55
+
56
+ /* BLAS Level 2. */
57
+ static VALUE nm_lapacke_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
58
+ VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
59
+
60
+ /* BLAS Level 3. */
61
+ static VALUE nm_lapacke_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
62
+ VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
63
+ static VALUE nm_lapacke_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
64
+ VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
65
+ static VALUE nm_lapacke_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
66
+ VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
67
+ static VALUE nm_lapacke_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
68
+ VALUE lda, VALUE beta, VALUE c, VALUE ldc);
69
+ static VALUE nm_lapacke_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
70
+ VALUE lda, VALUE beta, VALUE c, VALUE ldc);
71
+
72
+ /* LAPACK. */
73
+ static VALUE nm_lapacke_lapacke_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
74
+ static VALUE nm_lapacke_lapacke_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
75
+ static VALUE nm_lapacke_lapacke_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
76
+ static VALUE nm_lapacke_lapacke_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
77
+ static VALUE nm_lapacke_lapacke_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
78
+ static VALUE nm_lapacke_lapacke_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
79
+
80
+ static VALUE nm_lapacke_lapacke_gesvd(VALUE self, VALUE order, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE superb);
81
+ static VALUE nm_lapacke_lapacke_gesdd(VALUE self, VALUE order, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt);
82
+ static VALUE nm_lapacke_lapacke_geev(VALUE self, VALUE order, VALUE jobvl, VALUE jobvr, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr);
83
+ }
84
+
85
+ extern "C" {
86
+
87
+ ///////////////////
88
+ // Ruby Bindings //
89
+ ///////////////////
90
+
91
+ void nm_math_init_lapack() {
92
+
93
+ VALUE cNMatrix_LAPACKE = rb_define_module_under(cNMatrix, "LAPACKE");
94
+
95
+ VALUE cNMatrix_LAPACKE_LAPACK = rb_define_module_under(cNMatrix_LAPACKE, "LAPACK");
96
+ VALUE cNMatrix_LAPACKE_BLAS = rb_define_module_under(cNMatrix_LAPACKE, "BLAS");
97
+
98
+ //BLAS Level 1
99
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_scal", (METHOD)nm_lapacke_cblas_scal, 4);
100
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_nrm2", (METHOD)nm_lapacke_cblas_nrm2, 3);
101
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_asum", (METHOD)nm_lapacke_cblas_asum, 3);
102
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_rot", (METHOD)nm_lapacke_cblas_rot, 7);
103
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_rotg", (METHOD)nm_lapacke_cblas_rotg, 1);
104
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_imax", (METHOD)nm_lapacke_cblas_imax, 3);
105
+
106
+ //BLAS Level 2
107
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_gemv", (METHOD)nm_lapacke_cblas_gemv, 11);
108
+
109
+ //BLAS Level 3
110
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_gemm", (METHOD)nm_lapacke_cblas_gemm, 14);
111
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_trsm", (METHOD)nm_lapacke_cblas_trsm, 12);
112
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_trmm", (METHOD)nm_lapacke_cblas_trmm, 12);
113
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_syrk", (METHOD)nm_lapacke_cblas_syrk, 11);
114
+ rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_herk", (METHOD)nm_lapacke_cblas_herk, 11);
115
+
116
+ /* LAPACK Functions */
117
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_getrf", (METHOD)nm_lapacke_lapacke_getrf, 5);
118
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_getrs", (METHOD)nm_lapacke_lapacke_getrs, 9);
119
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_getri", (METHOD)nm_lapacke_lapacke_getri, 5);
120
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_potrf", (METHOD)nm_lapacke_lapacke_potrf, 5);
121
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_potrs", (METHOD)nm_lapacke_lapacke_potrs, 8);
122
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_potri", (METHOD)nm_lapacke_lapacke_potri, 5);
123
+
124
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_gesvd", (METHOD)nm_lapacke_lapacke_gesvd, 13);
125
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_gesdd", (METHOD)nm_lapacke_lapacke_gesdd, 11);
126
+ rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_geev", (METHOD)nm_lapacke_lapacke_geev, 12);
127
+ }
128
+
129
+ /*
130
+ * call-seq:
131
+ * NMatrix::BLAS.cblas_scal(n, alpha, vector, inc) -> NMatrix
132
+ *
133
+ * BLAS level 1 function +scal+. Works with all dtypes.
134
+ *
135
+ * Scale +vector+ in-place by +alpha+ and also return it. The operation is as
136
+ * follows:
137
+ * x <- alpha * x
138
+ *
139
+ * - +n+ -> Number of elements of +vector+.
140
+ * - +alpha+ -> Scalar value used in the operation.
141
+ * - +vector+ -> NMatrix of shape [n,1] or [1,n]. Modified in-place.
142
+ * - +inc+ -> Increment used in the scaling function. Should generally be 1.
143
+ */
144
+ static VALUE nm_lapacke_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE incx) {
145
+ nm::dtype_t dtype = NM_DTYPE(vector);
146
+
147
+ void* scalar = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
148
+ rubyval_to_cval(alpha, dtype, scalar);
149
+
150
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::lapacke::cblas_scal, void, const int n,
151
+ const void* scalar, void* x, const int incx);
152
+
153
+ ttable[dtype](FIX2INT(n), scalar, NM_STORAGE_DENSE(vector)->elements,
154
+ FIX2INT(incx));
155
+
156
+ return vector;
157
+ }
158
+
159
+ /*
160
+ * Call any of the cblas_xrotg functions as directly as possible.
161
+ *
162
+ * xROTG computes the elements of a Givens plane rotation matrix such that:
163
+ *
164
+ * | c s | | a | | r |
165
+ * | -s c | * | b | = | 0 |
166
+ *
167
+ * where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
168
+ *
169
+ * The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
170
+ *
171
+ * This function differs from most of the other raw BLAS accessors. Instead of
172
+ * providing a, b, c, s as arguments, you should only provide a and b (the
173
+ * inputs), and you should provide them as the first two elements of any dense
174
+ * NMatrix type.
175
+ *
176
+ * The outputs [c,s] will be returned in a Ruby Array at the end; the input
177
+ * NMatrix will also be modified in-place.
178
+ *
179
+ * This function, like the other cblas_ functions, does minimal type-checking.
180
+ */
181
+ static VALUE nm_lapacke_cblas_rotg(VALUE self, VALUE ab) {
182
+ static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
183
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
184
+ nm::math::lapacke::cblas_rotg<float>,
185
+ nm::math::lapacke::cblas_rotg<double>,
186
+ nm::math::lapacke::cblas_rotg<nm::Complex64>,
187
+ nm::math::lapacke::cblas_rotg<nm::Complex128>,
188
+ NULL //nm::math::lapacke::cblas_rotg<nm::RubyObject>
189
+ };
190
+
191
+ nm::dtype_t dtype = NM_DTYPE(ab);
192
+
193
+ if (!ttable[dtype]) {
194
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
195
+ return Qnil;
196
+
197
+ } else {
198
+ NM_CONSERVATIVE(nm_register_value(&self));
199
+ NM_CONSERVATIVE(nm_register_value(&ab));
200
+ void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
201
+ *pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
202
+
203
+ // extract A and B from the NVector (first two elements)
204
+ void* pA = NM_STORAGE_DENSE(ab)->elements;
205
+ void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
206
+ // c and s are output
207
+
208
+ ttable[dtype](pA, pB, pC, pS);
209
+
210
+ VALUE result = rb_ary_new2(2);
211
+
212
+ if (dtype == nm::RUBYOBJ) {
213
+ rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
214
+ rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
215
+ } else {
216
+ rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval);
217
+ rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval);
218
+ }
219
+ NM_CONSERVATIVE(nm_unregister_value(&ab));
220
+ NM_CONSERVATIVE(nm_unregister_value(&self));
221
+ return result;
222
+ }
223
+ }
224
+
225
+
226
+ /*
227
+ * Call any of the cblas_xrot functions as directly as possible.
228
+ *
229
+ * xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
230
+ *
231
+ * It's tough to find documentation on xROT. Here are what we think the arguments are for:
232
+ * * n :: number of elements to consider in x and y
233
+ * * x :: a vector (expects an NVector)
234
+ * * incx :: stride of x
235
+ * * y :: a vector (expects an NVector)
236
+ * * incy :: stride of y
237
+ * * c :: cosine of the angle of rotation
238
+ * * s :: sine of the angle of rotation
239
+ *
240
+ * Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
241
+ * will be float for Complex64 or double for Complex128.
242
+ *
243
+ * You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
244
+ * with its arguments?
245
+ *
246
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
247
+ * handling, so you can easily crash Ruby!
248
+ */
249
+ static VALUE nm_lapacke_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
250
+ static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
251
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
252
+ nm::math::lapacke::cblas_rot<float,float>,
253
+ nm::math::lapacke::cblas_rot<double,double>,
254
+ nm::math::lapacke::cblas_rot<nm::Complex64,float>,
255
+ nm::math::lapacke::cblas_rot<nm::Complex128,double>,
256
+ nm::math::lapacke::cblas_rot<nm::RubyObject,nm::RubyObject>
257
+ };
258
+
259
+ nm::dtype_t dtype = NM_DTYPE(x);
260
+
261
+
262
+ if (!ttable[dtype]) {
263
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
264
+ return Qfalse;
265
+ } else {
266
+ void *pC, *pS;
267
+
268
+ // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
269
+ if (dtype == nm::COMPLEX64) {
270
+ pC = NM_ALLOCA_N(float,1);
271
+ pS = NM_ALLOCA_N(float,1);
272
+ rubyval_to_cval(c, nm::FLOAT32, pC);
273
+ rubyval_to_cval(s, nm::FLOAT32, pS);
274
+ } else if (dtype == nm::COMPLEX128) {
275
+ pC = NM_ALLOCA_N(double,1);
276
+ pS = NM_ALLOCA_N(double,1);
277
+ rubyval_to_cval(c, nm::FLOAT64, pC);
278
+ rubyval_to_cval(s, nm::FLOAT64, pS);
279
+ } else {
280
+ pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
281
+ pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
282
+ rubyval_to_cval(c, dtype, pC);
283
+ rubyval_to_cval(s, dtype, pS);
284
+ }
285
+
286
+
287
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);
288
+
289
+ return Qtrue;
290
+ }
291
+ }
292
+
293
+
294
+ /*
295
+ * Call any of the cblas_xnrm2 functions as directly as possible.
296
+ *
297
+ * xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
298
+ *
299
+ * Arguments:
300
+ * * n :: length of x, must be at least 0
301
+ * * x :: pointer to first entry of input vector
302
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
303
+ *
304
+ * You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
305
+ * with its arguments?
306
+ *
307
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
308
+ * handling, so you can easily crash Ruby!
309
+ */
310
+ static VALUE nm_lapacke_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
311
+
312
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
313
+ NULL, NULL, NULL, NULL, NULL, // no help for integers
314
+ nm::math::lapacke::cblas_nrm2<float32_t,float32_t>,
315
+ nm::math::lapacke::cblas_nrm2<float64_t,float64_t>,
316
+ nm::math::lapacke::cblas_nrm2<float32_t,nm::Complex64>,
317
+ nm::math::lapacke::cblas_nrm2<float64_t,nm::Complex128>,
318
+ nm::math::lapacke::cblas_nrm2<nm::RubyObject,nm::RubyObject>
319
+ };
320
+
321
+ nm::dtype_t dtype = NM_DTYPE(x);
322
+
323
+ if (!ttable[dtype]) {
324
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
325
+ return Qnil;
326
+
327
+ } else {
328
+ // Determine the return dtype and allocate it
329
+ nm::dtype_t rdtype = dtype;
330
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
331
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
332
+
333
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
334
+
335
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
336
+
337
+ return rubyobj_from_cval(Result, rdtype).rval;
338
+ }
339
+ }
340
+
341
+
342
+
343
+ /*
344
+ * Call any of the cblas_xasum functions as directly as possible.
345
+ *
346
+ * xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
347
+ * of a vector x.
348
+ *
349
+ * Arguments:
350
+ * * n :: length of x, must be at least 0
351
+ * * x :: pointer to first entry of input vector
352
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
353
+ *
354
+ * You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
355
+ * with its arguments?
356
+ *
357
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
358
+ * handling, so you can easily crash Ruby!
359
+ */
360
+ static VALUE nm_lapacke_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
361
+
362
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
363
+ nm::math::lapacke::cblas_asum<uint8_t,uint8_t>,
364
+ nm::math::lapacke::cblas_asum<int8_t,int8_t>,
365
+ nm::math::lapacke::cblas_asum<int16_t,int16_t>,
366
+ nm::math::lapacke::cblas_asum<int32_t,int32_t>,
367
+ nm::math::lapacke::cblas_asum<int64_t,int64_t>,
368
+ nm::math::lapacke::cblas_asum<float32_t,float32_t>,
369
+ nm::math::lapacke::cblas_asum<float64_t,float64_t>,
370
+ nm::math::lapacke::cblas_asum<float32_t,nm::Complex64>,
371
+ nm::math::lapacke::cblas_asum<float64_t,nm::Complex128>,
372
+ nm::math::lapacke::cblas_asum<nm::RubyObject,nm::RubyObject>
373
+ };
374
+
375
+ nm::dtype_t dtype = NM_DTYPE(x);
376
+
377
+ // Determine the return dtype and allocate it
378
+ nm::dtype_t rdtype = dtype;
379
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
380
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
381
+
382
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
383
+
384
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
385
+
386
+ return rubyobj_from_cval(Result, rdtype).rval;
387
+ }
388
+
389
+ /*
390
+ * call-seq:
391
+ * NMatrix::BLAS.cblas_imax(n, vector, inc) -> Fixnum
392
+ *
393
+ * BLAS level 1 routine.
394
+ *
395
+ * Return the index of the largest element of +vector+.
396
+ *
397
+ * - +n+ -> Vector's size. Generally, you can use NMatrix#rows or NMatrix#cols.
398
+ * - +vector+ -> A NMatrix of shape [n,1] or [1,n] with any dtype.
399
+ * - +inc+ -> It's the increment used when searching. Use 1 except if you know
400
+ * what you're doing.
401
+ */
402
+ static VALUE nm_lapacke_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx) {
403
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::lapacke::cblas_imax, int, const int n, const void* x, const int incx);
404
+
405
+ nm::dtype_t dtype = NM_DTYPE(x);
406
+
407
+ int index = ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx));
408
+
409
+ // Convert to Ruby's Int value.
410
+ return INT2FIX(index);
411
+ }
412
+
413
+ /* Call any of the cblas_xgemv functions as directly as possible.
414
+ *
415
+ * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
416
+ *
417
+ * y = alpha*op(A)*x + beta*y
418
+ *
419
+ * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
420
+ *
421
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
422
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
423
+ * expose the ultra-optimized ATLAS versions.
424
+ *
425
+ * == Arguments
426
+ * See: http://www.netlib.org/blas/dgemm.f
427
+ *
428
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
429
+ * with its arguments?
430
+ *
431
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
432
+ * handling, so you can easily crash Ruby!
433
+ */
434
+ static VALUE nm_lapacke_cblas_gemv(VALUE self,
435
+ VALUE trans_a,
436
+ VALUE m, VALUE n,
437
+ VALUE alpha,
438
+ VALUE a, VALUE lda,
439
+ VALUE x, VALUE incx,
440
+ VALUE beta,
441
+ VALUE y, VALUE incy)
442
+ {
443
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::lapacke::cblas_gemv, bool, const enum CBLAS_TRANSPOSE, const int, const int, const void*, const void*, const int, const void*, const int, const void*, void*, const int)
444
+
445
+ nm::dtype_t dtype = NM_DTYPE(a);
446
+
447
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
448
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
449
+ rubyval_to_cval(alpha, dtype, pAlpha);
450
+ rubyval_to_cval(beta, dtype, pBeta);
451
+
452
+ return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
453
+ }
454
+
455
+ /* Call any of the cblas_xgemm functions as directly as possible.
456
+ *
457
+ * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
458
+ *
459
+ * C = alpha*op(A)*op(B) + beta*C
460
+ *
461
+ * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
462
+ *
463
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
464
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
465
+ * expose the ultra-optimized ATLAS versions.
466
+ *
467
+ * == Arguments
468
+ * See: http://www.netlib.org/blas/dgemm.f
469
+ *
470
+ * You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
471
+ * with its arguments?
472
+ *
473
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
474
+ * handling, so you can easily crash Ruby!
475
+ */
476
+ static VALUE nm_lapacke_cblas_gemm(VALUE self,
477
+ VALUE order,
478
+ VALUE trans_a, VALUE trans_b,
479
+ VALUE m, VALUE n, VALUE k,
480
+ VALUE alpha,
481
+ VALUE a, VALUE lda,
482
+ VALUE b, VALUE ldb,
483
+ VALUE beta,
484
+ VALUE c, VALUE ldc)
485
+ {
486
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::lapacke::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
487
+
488
+ nm::dtype_t dtype = NM_DTYPE(a);
489
+
490
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
491
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
492
+ rubyval_to_cval(alpha, dtype, pAlpha);
493
+ rubyval_to_cval(beta, dtype, pBeta);
494
+
495
+ ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
496
+
497
+ return c;
498
+ }
499
+
500
+
501
+ static VALUE nm_lapacke_cblas_trsm(VALUE self,
502
+ VALUE order,
503
+ VALUE side, VALUE uplo,
504
+ VALUE trans_a, VALUE diag,
505
+ VALUE m, VALUE n,
506
+ VALUE alpha,
507
+ VALUE a, VALUE lda,
508
+ VALUE b, VALUE ldb)
509
+ {
510
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
511
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
512
+ const int m, const int n, const void* alpha, const void* a,
513
+ const int lda, void* b, const int ldb) = {
514
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
515
+ nm::math::lapacke::cblas_trsm<float>,
516
+ nm::math::lapacke::cblas_trsm<double>,
517
+ cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
518
+ nm::math::lapacke::cblas_trsm<nm::RubyObject>
519
+ };
520
+
521
+ nm::dtype_t dtype = NM_DTYPE(a);
522
+
523
+ if (!ttable[dtype]) {
524
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
525
+ } else {
526
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
527
+ rubyval_to_cval(alpha, dtype, pAlpha);
528
+
529
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
530
+ }
531
+
532
+ return Qtrue;
533
+ }
534
+
535
+ static VALUE nm_lapacke_cblas_trmm(VALUE self,
536
+ VALUE order,
537
+ VALUE side, VALUE uplo,
538
+ VALUE trans_a, VALUE diag,
539
+ VALUE m, VALUE n,
540
+ VALUE alpha,
541
+ VALUE a, VALUE lda,
542
+ VALUE b, VALUE ldb)
543
+ {
544
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
545
+ const enum CBLAS_SIDE, const enum CBLAS_UPLO,
546
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
547
+ const int m, const int n, const void* alpha, const void* a,
548
+ const int lda, void* b, const int ldb) = {
549
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
550
+ nm::math::lapacke::cblas_trmm<float>,
551
+ nm::math::lapacke::cblas_trmm<double>,
552
+ cblas_ctrmm, cblas_ztrmm, // call directly, same function signature!
553
+ NULL
554
+ };
555
+
556
+ nm::dtype_t dtype = NM_DTYPE(a);
557
+
558
+ if (!ttable[dtype]) {
559
+ rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
560
+ } else {
561
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
562
+ rubyval_to_cval(alpha, dtype, pAlpha);
563
+
564
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
565
+ }
566
+
567
+ return b;
568
+ }
569
+
570
+ static VALUE nm_lapacke_cblas_syrk(VALUE self,
571
+ VALUE order,
572
+ VALUE uplo,
573
+ VALUE trans,
574
+ VALUE n, VALUE k,
575
+ VALUE alpha,
576
+ VALUE a, VALUE lda,
577
+ VALUE beta,
578
+ VALUE c, VALUE ldc)
579
+ {
580
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
581
+ const int n, const int k, const void* alpha, const void* a,
582
+ const int lda, const void* beta, void* c, const int ldc) = {
583
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
584
+ nm::math::lapacke::cblas_syrk<float>,
585
+ nm::math::lapacke::cblas_syrk<double>,
586
+ cblas_csyrk, cblas_zsyrk, // call directly, same function signature!
587
+ NULL
588
+ };
589
+
590
+ nm::dtype_t dtype = NM_DTYPE(a);
591
+
592
+ if (!ttable[dtype]) {
593
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
594
+ } else {
595
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
596
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
597
+ rubyval_to_cval(alpha, dtype, pAlpha);
598
+ rubyval_to_cval(beta, dtype, pBeta);
599
+
600
+ ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
601
+ }
602
+
603
+ return Qtrue;
604
+ }
605
+
606
+ static VALUE nm_lapacke_cblas_herk(VALUE self,
607
+ VALUE order,
608
+ VALUE uplo,
609
+ VALUE trans,
610
+ VALUE n, VALUE k,
611
+ VALUE alpha,
612
+ VALUE a, VALUE lda,
613
+ VALUE beta,
614
+ VALUE c, VALUE ldc)
615
+ {
616
+
617
+ nm::dtype_t dtype = NM_DTYPE(a);
618
+
619
+ if (dtype == nm::COMPLEX64) {
620
+ cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
621
+ } else if (dtype == nm::COMPLEX128) {
622
+ cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
623
+ } else
624
+ rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
625
+ return Qtrue;
626
+ }
627
+
628
+ /* Call any of the lapacke_xgetri functions as directly as possible.
629
+ *
630
+ * This version (the LAPACKE version) differs from the CLAPACK version in terms of the
631
+ * input it expects (which is the output of getrf). See getrf for details.
632
+ *
633
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
634
+ * handling, so you can easily crash Ruby!
635
+ *
636
+ * Returns an array giving the pivot indices (normally these are argument #5).
637
+ */
638
+ static VALUE nm_lapacke_lapacke_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
639
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
640
+ NULL, NULL, NULL, NULL, NULL,
641
+ nm::math::lapacke::lapacke_getri<float>,
642
+ nm::math::lapacke::lapacke_getri<double>,
643
+ nm::math::lapacke::lapacke_getri<nm::Complex64>,
644
+ nm::math::lapacke::lapacke_getri<nm::Complex128>,
645
+ NULL
646
+ };
647
+
648
+ // Allocate the C version of the pivot index array
649
+ int* ipiv_;
650
+ if (TYPE(ipiv) != T_ARRAY) {
651
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
652
+ } else {
653
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
654
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
655
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
656
+ }
657
+ }
658
+
659
+ if (!ttable[NM_DTYPE(a)]) {
660
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
661
+ } else {
662
+ ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
663
+ }
664
+
665
+ return a;
666
+ }
667
+
668
+ /* Call any of the lapacke_xgetrf functions as directly as possible.
669
+ *
670
+ * The lapacke_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
671
+ * matrix A using partial pivoting with row interchanges.
672
+ *
673
+ * The factorization has the form:
674
+ * A = P * L * U
675
+ * where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
676
+ * and U is upper triangular (upper trapezoidal if m < n).
677
+ *
678
+ * This version of getrf (the LAPACKE one) differs from the CLAPACK version. The CLAPACK has
679
+ * different behavior for row-major matrices (the upper matrix has unit diagonals instead of
680
+ * the lower and it uses column permutations instead of rows).
681
+ *
682
+ * This is the right-looking level 3 BLAS version of the algorithm.
683
+ *
684
+ * == Arguments
685
+ * See: http://www.netlib.org/lapack/double/dgetrf.f
686
+ * (You don't need argument 5; this is the value returned by this function.)
687
+ *
688
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
689
+ * handling, so you can easily crash Ruby!
690
+ *
691
+ * Returns an array giving the pivot indices (normally these are argument #5).
692
+ */
693
+ static VALUE nm_lapacke_lapacke_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
694
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
695
+ NULL, NULL, NULL, NULL, NULL,
696
+ nm::math::lapacke::lapacke_getrf<float>,
697
+ nm::math::lapacke::lapacke_getrf<double>,
698
+ nm::math::lapacke::lapacke_getrf<nm::Complex64>,
699
+ nm::math::lapacke::lapacke_getrf<nm::Complex128>,
700
+ NULL
701
+ };
702
+
703
+ int M = FIX2INT(m),
704
+ N = FIX2INT(n);
705
+
706
+ // Allocate the pivot index array, which is of size MIN(M, N).
707
+ size_t ipiv_size = std::min(M,N);
708
+ int* ipiv = NM_ALLOCA_N(int, ipiv_size);
709
+
710
+ if (!ttable[NM_DTYPE(a)]) {
711
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
712
+ } else {
713
+ ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
714
+ }
715
+
716
+ // Result will be stored in a. We return ipiv as an array.
717
+ VALUE ipiv_array = rb_ary_new2(ipiv_size);
718
+ for (size_t i = 0; i < ipiv_size; ++i) {
719
+ rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
720
+ }
721
+
722
+ return ipiv_array;
723
+ }
724
+
725
+ /*
726
+ * Call any of the lapacke_xgetrs functions as directly as possible.
727
+ */
728
+ static VALUE nm_lapacke_lapacke_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
729
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, char Trans, const int N,
730
+ const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
731
+ const int ldb) = {
732
+ NULL, NULL, NULL, NULL, NULL,
733
+ nm::math::lapacke::lapacke_getrs<float>,
734
+ nm::math::lapacke::lapacke_getrs<double>,
735
+ nm::math::lapacke::lapacke_getrs<nm::Complex64>,
736
+ nm::math::lapacke::lapacke_getrs<nm::Complex128>,
737
+ NULL
738
+ };
739
+
740
+ // Allocate the C version of the pivot index array
741
+ int* ipiv_;
742
+ if (TYPE(ipiv) != T_ARRAY) {
743
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
744
+ } else {
745
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
746
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
747
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
748
+ }
749
+ }
750
+
751
+ if (!ttable[NM_DTYPE(a)]) {
752
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
753
+ } else {
754
+ ttable[NM_DTYPE(a)](blas_order_sym(order), lapacke_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
755
+ ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
756
+ }
757
+
758
+ // b is both returned and modified directly in the argument list.
759
+ return b;
760
+ }
761
+
762
+ /* Call any of the LAPACKE_xpotrf functions as directly as possible.
763
+ *
764
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
765
+ * handling, so you can easily crash Ruby!
766
+ */
767
+ static VALUE nm_lapacke_lapacke_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
768
+
769
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, char, const int n, void* a, const int lda) = {
770
+ NULL, NULL, NULL, NULL, NULL,
771
+ nm::math::lapacke::lapacke_potrf<float>,
772
+ nm::math::lapacke::lapacke_potrf<double>,
773
+ nm::math::lapacke::lapacke_potrf<nm::Complex64>,
774
+ nm::math::lapacke::lapacke_potrf<nm::Complex128>,
775
+ NULL
776
+ };
777
+
778
+ if (!ttable[NM_DTYPE(a)]) {
779
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
780
+ } else {
781
+ ttable[NM_DTYPE(a)](blas_order_sym(order), lapacke_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
782
+ }
783
+
784
+ return a;
785
+ }
786
+
787
+ /*
788
+ * Call any of the LAPACKE_xpotrs functions as directly as possible.
789
+ */
790
+ static VALUE nm_lapacke_lapacke_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
791
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, char Uplo, const int N,
792
+ const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
793
+ NULL, NULL, NULL, NULL, NULL,
794
+ nm::math::lapacke::lapacke_potrs<float>,
795
+ nm::math::lapacke::lapacke_potrs<double>,
796
+ nm::math::lapacke::lapacke_potrs<nm::Complex64>,
797
+ nm::math::lapacke::lapacke_potrs<nm::Complex128>,
798
+ NULL
799
+ };
800
+
801
+
802
+ if (!ttable[NM_DTYPE(a)]) {
803
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
804
+ } else {
805
+
806
+ ttable[NM_DTYPE(a)](blas_order_sym(order), lapacke_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
807
+ NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
808
+ }
809
+
810
+ // b is both returned and modified directly in the argument list.
811
+ return b;
812
+ }
813
+
814
+ /* Call any of the lapacke_xpotri functions as directly as possible.
815
+ *
816
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
817
+ * handling, so you can easily crash Ruby!
818
+ */
819
+ static VALUE nm_lapacke_lapacke_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
820
+
821
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, char, const int n, void* a, const int lda) = {
822
+ NULL, NULL, NULL, NULL, NULL,
823
+ nm::math::lapacke::lapacke_potri<float>,
824
+ nm::math::lapacke::lapacke_potri<double>,
825
+ nm::math::lapacke::lapacke_potri<nm::Complex64>,
826
+ nm::math::lapacke::lapacke_potri<nm::Complex128>,
827
+ NULL
828
+ };
829
+
830
+ if (!ttable[NM_DTYPE(a)]) {
831
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
832
+ } else {
833
+ ttable[NM_DTYPE(a)](blas_order_sym(order), lapacke_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
834
+ }
835
+
836
+ return a;
837
+ }
838
+
839
+ //badly need docs for gesvd, gesdd because of the real/complex mixing
840
+
841
+ /*
842
+ * xGESVD computes the singular value decomposition (SVD) of a real
843
+ * M-by-N matrix A, optionally computing the left and/or right singular
844
+ * vectors. The SVD is written
845
+ *
846
+ * A = U * SIGMA * transpose(V)
847
+ *
848
+ * where SIGMA is an M-by-N matrix which is zero except for its
849
+ * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
850
+ * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
851
+ * are the singular values of A; they are real and non-negative, and
852
+ * are returned in descending order. The first min(m,n) columns of
853
+ * U and V are the left and right singular vectors of A.
854
+ *
855
+ * Note that the routine returns V**T, not V.
856
+ */
857
+ static VALUE nm_lapacke_lapacke_gesvd(VALUE self, VALUE order, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE superb) {
858
+ static int (*gesvd_table[nm::NUM_DTYPES])(int, char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* superb) = {
859
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
860
+ nm::math::lapacke::lapacke_gesvd<float,float>,
861
+ nm::math::lapacke::lapacke_gesvd<double,double>,
862
+ nm::math::lapacke::lapacke_gesvd<nm::Complex64,float>,
863
+ nm::math::lapacke::lapacke_gesvd<nm::Complex128,double>,
864
+ NULL // no Ruby objects
865
+ };
866
+
867
+ nm::dtype_t dtype = NM_DTYPE(a);
868
+
869
+
870
+ if (!gesvd_table[dtype]) {
871
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
872
+ return Qfalse;
873
+ } else {
874
+ int M = FIX2INT(m),
875
+ N = FIX2INT(n);
876
+
877
+ char JOBU = lapack_svd_job_sym(jobu),
878
+ JOBVT = lapack_svd_job_sym(jobvt);
879
+
880
+ int info = gesvd_table[dtype](blas_order_sym(order),JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
881
+ NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
882
+ NM_STORAGE_DENSE(superb)->elements);
883
+ return INT2FIX(info);
884
+ }
885
+ }
886
+
887
+ static VALUE nm_lapacke_lapacke_gesdd(VALUE self, VALUE order, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt) {
888
+ static int (*gesdd_table[nm::NUM_DTYPES])(int, char, int, int, void* a, int, void* s, void* u, int, void* vt, int) = {
889
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
890
+ nm::math::lapacke::lapacke_gesdd<float,float>,
891
+ nm::math::lapacke::lapacke_gesdd<double,double>,
892
+ nm::math::lapacke::lapacke_gesdd<nm::Complex64,float>,
893
+ nm::math::lapacke::lapacke_gesdd<nm::Complex128,double>,
894
+ NULL // no Ruby objects
895
+ };
896
+
897
+ nm::dtype_t dtype = NM_DTYPE(a);
898
+
899
+
900
+ if (!gesdd_table[dtype]) {
901
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
902
+ return Qfalse;
903
+ } else {
904
+ int M = FIX2INT(m),
905
+ N = FIX2INT(n);
906
+
907
+ char JOBZ = lapack_svd_job_sym(jobz);
908
+
909
+ int info = gesdd_table[dtype](blas_order_sym(order),JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
910
+ NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt));
911
+ return INT2FIX(info);
912
+ }
913
+ }
914
+
915
+ /*
916
+ * GEEV computes for an N-by-N real nonsymmetric matrix A, the
917
+ * eigenvalues and, optionally, the left and/or right eigenvectors.
918
+ *
919
+ * The right eigenvector v(j) of A satisfies
920
+ * A * v(j) = lambda(j) * v(j)
921
+ * where lambda(j) is its eigenvalue.
922
+ *
923
+ * The left eigenvector u(j) of A satisfies
924
+ * u(j)**H * A = lambda(j) * u(j)**H
925
+ * where u(j)**H denotes the conjugate transpose of u(j).
926
+ *
927
+ * The computed eigenvectors are normalized to have Euclidean norm
928
+ * equal to 1 and largest component real.
929
+ */
930
+ //note on wi
931
+ static VALUE nm_lapacke_lapacke_geev(VALUE self, VALUE order, VALUE jobvl, VALUE jobvr, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr) {
932
+ static int (*geev_table[nm::NUM_DTYPES])(int, char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int) = {
933
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
934
+ nm::math::lapacke::lapacke_geev<float>,
935
+ nm::math::lapacke::lapacke_geev<double>,
936
+ nm::math::lapacke::lapacke_geev<nm::Complex64>,
937
+ nm::math::lapacke::lapacke_geev<nm::Complex128>,
938
+ NULL // no Ruby objects
939
+ };
940
+
941
+ nm::dtype_t dtype = NM_DTYPE(a);
942
+
943
+
944
+ if (!geev_table[dtype]) {
945
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
946
+ return Qfalse;
947
+ } else {
948
+ int N = FIX2INT(n);
949
+
950
+ char JOBVL = lapack_evd_job_sym(jobvl),
951
+ JOBVR = lapack_evd_job_sym(jobvr);
952
+
953
+ void* A = NM_STORAGE_DENSE(a)->elements;
954
+ void* W = NM_STORAGE_DENSE(w)->elements;
955
+ void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements; //For complex, wi should be nil
956
+ void* VL = JOBVL == 'V' ? NM_STORAGE_DENSE(vl)->elements : NULL;
957
+ void* VR = JOBVR == 'V' ? NM_STORAGE_DENSE(vr)->elements : NULL;
958
+
959
+ // Perform the actual calculation.
960
+ int info = geev_table[dtype](blas_order_sym(order), JOBVL, JOBVR, N, A, FIX2INT(lda), W, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr));
961
+
962
+ return INT2FIX(info);
963
+ }
964
+ }
965
+
966
+
967
+ }