nmatrix 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +27 -0
- data/.rspec +2 -0
- data/Gemfile +3 -5
- data/Guardfile +6 -0
- data/History.txt +33 -0
- data/Manifest.txt +41 -38
- data/README.rdoc +88 -11
- data/Rakefile +35 -53
- data/ext/nmatrix/data/complex.h +372 -0
- data/ext/nmatrix/data/data.cpp +275 -0
- data/ext/nmatrix/data/data.h +707 -0
- data/ext/nmatrix/data/rational.h +421 -0
- data/ext/nmatrix/data/ruby_object.h +446 -0
- data/ext/nmatrix/extconf.rb +101 -51
- data/ext/nmatrix/new_extconf.rb +56 -0
- data/ext/nmatrix/nmatrix.cpp +1609 -0
- data/ext/nmatrix/nmatrix.h +265 -849
- data/ext/nmatrix/ruby_constants.cpp +134 -0
- data/ext/nmatrix/ruby_constants.h +103 -0
- data/ext/nmatrix/storage/common.cpp +70 -0
- data/ext/nmatrix/storage/common.h +170 -0
- data/ext/nmatrix/storage/dense.cpp +665 -0
- data/ext/nmatrix/storage/dense.h +116 -0
- data/ext/nmatrix/storage/list.cpp +1088 -0
- data/ext/nmatrix/storage/list.h +129 -0
- data/ext/nmatrix/storage/storage.cpp +658 -0
- data/ext/nmatrix/storage/storage.h +99 -0
- data/ext/nmatrix/storage/yale.cpp +1601 -0
- data/ext/nmatrix/storage/yale.h +208 -0
- data/ext/nmatrix/ttable_helper.rb +126 -0
- data/ext/nmatrix/{yale/smmp1_header.template.c → types.h} +36 -9
- data/ext/nmatrix/util/io.cpp +295 -0
- data/ext/nmatrix/util/io.h +117 -0
- data/ext/nmatrix/util/lapack.h +1175 -0
- data/ext/nmatrix/util/math.cpp +557 -0
- data/ext/nmatrix/util/math.h +1363 -0
- data/ext/nmatrix/util/sl_list.cpp +475 -0
- data/ext/nmatrix/util/sl_list.h +255 -0
- data/ext/nmatrix/util/util.h +78 -0
- data/lib/nmatrix/blas.rb +70 -0
- data/lib/nmatrix/io/mat5_reader.rb +567 -0
- data/lib/nmatrix/io/mat_reader.rb +162 -0
- data/lib/{string.rb → nmatrix/monkeys.rb} +49 -2
- data/lib/nmatrix/nmatrix.rb +199 -0
- data/lib/nmatrix/nvector.rb +103 -0
- data/lib/nmatrix/version.rb +27 -0
- data/lib/nmatrix.rb +22 -230
- data/nmatrix.gemspec +59 -0
- data/scripts/mac-brew-gcc.sh +47 -0
- data/spec/4x4_sparse.mat +0 -0
- data/spec/4x5_dense.mat +0 -0
- data/spec/blas_spec.rb +47 -0
- data/spec/elementwise_spec.rb +164 -0
- data/spec/io_spec.rb +60 -0
- data/spec/lapack_spec.rb +52 -0
- data/spec/math_spec.rb +96 -0
- data/spec/nmatrix_spec.rb +93 -89
- data/spec/nmatrix_yale_spec.rb +52 -36
- data/spec/nvector_spec.rb +1 -1
- data/spec/slice_spec.rb +257 -0
- data/spec/spec_helper.rb +51 -0
- data/spec/utm5940.mtx +83844 -0
- metadata +113 -71
- data/.autotest +0 -23
- data/.gemtest +0 -0
- data/ext/nmatrix/cblas.c +0 -150
- data/ext/nmatrix/dense/blas_header.template.c +0 -52
- data/ext/nmatrix/dense/elementwise.template.c +0 -107
- data/ext/nmatrix/dense/gemm.template.c +0 -159
- data/ext/nmatrix/dense/gemv.template.c +0 -130
- data/ext/nmatrix/dense/rationalmath.template.c +0 -68
- data/ext/nmatrix/dense.c +0 -307
- data/ext/nmatrix/depend +0 -18
- data/ext/nmatrix/generator/syntax_tree.rb +0 -481
- data/ext/nmatrix/generator.rb +0 -594
- data/ext/nmatrix/list.c +0 -774
- data/ext/nmatrix/nmatrix.c +0 -1977
- data/ext/nmatrix/rational.c +0 -98
- data/ext/nmatrix/yale/complexmath.template.c +0 -71
- data/ext/nmatrix/yale/elementwise.template.c +0 -46
- data/ext/nmatrix/yale/elementwise_op.template.c +0 -73
- data/ext/nmatrix/yale/numbmm.template.c +0 -94
- data/ext/nmatrix/yale/smmp1.template.c +0 -21
- data/ext/nmatrix/yale/smmp2.template.c +0 -43
- data/ext/nmatrix/yale/smmp2_header.template.c +0 -46
- data/ext/nmatrix/yale/sort_columns.template.c +0 -56
- data/ext/nmatrix/yale/symbmm.template.c +0 -54
- data/ext/nmatrix/yale/transp.template.c +0 -68
- data/ext/nmatrix/yale.c +0 -726
- data/lib/array.rb +0 -67
- data/spec/syntax_tree_spec.rb +0 -46
@@ -1,159 +0,0 @@
|
|
1
|
-
|
2
|
-
int %%TYPE_ABBREV%%gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB,
|
3
|
-
const int M, const int N, const int K, const %%TYPE%% alpha,
|
4
|
-
const %%TYPE%%* A, const int lda,
|
5
|
-
const %%TYPE%%* B, const int ldb, const %%TYPE%% beta,
|
6
|
-
%%TYPE%%* C, const int ldc)
|
7
|
-
{
|
8
|
-
int num_rows_a, /*num_cols_a,*/ num_rows_b; // nrowa, ncola, nrowb
|
9
|
-
|
10
|
-
// use longest possible type for intermediate value storage:
|
11
|
-
%%TYPE_LONG%% temp;
|
12
|
-
%%= if [:rational,:complex,:value].include?(dtype.type); "#{dtype.long_dtype.sizeof} temp1, temp2;"; end%%
|
13
|
-
int i, j, l;
|
14
|
-
|
15
|
-
if (TransA == CblasNoTrans) num_rows_a = M;
|
16
|
-
else num_rows_a = K;
|
17
|
-
|
18
|
-
if (TransB == CblasNoTrans) num_rows_b = K;
|
19
|
-
else num_rows_b = N;
|
20
|
-
|
21
|
-
// Test the input parameters
|
22
|
-
if (TransA < 111 || TransA > 113) {
|
23
|
-
fprintf(stderr, "GEMM: TransA must be CblasNoTrans, CblasTrans, or CblasConjTrans\n");
|
24
|
-
return 0;
|
25
|
-
} else if (TransB < 111 || TransB > 113) {
|
26
|
-
fprintf(stderr, "GEMM: TransB must be CblasNoTrans, CblasTrans, or CblasConjTrans\n");
|
27
|
-
return 0;
|
28
|
-
} else if (M < 0) {
|
29
|
-
fprintf(stderr, "GEMM: Expected M >= 0\n");
|
30
|
-
return 0;
|
31
|
-
} else if (N < 0) {
|
32
|
-
fprintf(stderr, "GEMM: Expected N >= 0\n");
|
33
|
-
return 0;
|
34
|
-
} else if (K < 0) {
|
35
|
-
fprintf(stderr, "GEMM: Expected K >= 0\n");
|
36
|
-
return 0;
|
37
|
-
} else if (lda < NM_MAX(1, num_rows_a)) {
|
38
|
-
fprintf(stderr, "GEMM: Expected lda >= max(1, num_rows_a), with num_rows_a = %d; got lda=%d\n", num_rows_a, lda);
|
39
|
-
return 0;
|
40
|
-
} else if (ldb < NM_MAX(1, num_rows_b)) {
|
41
|
-
fprintf(stderr, "GEMM: Expected ldb >= max(1, num_rows_b), with num_rows_b = %d; got ldb=%d\n", num_rows_b, ldb);
|
42
|
-
return 0;
|
43
|
-
} else if (ldc < NM_MAX(1,M)) {
|
44
|
-
fprintf(stderr, "GEMM: Expected ldc >= max(1,M) with M=%d; got ldc=%d\n", M, ldc);
|
45
|
-
return 0;
|
46
|
-
}
|
47
|
-
|
48
|
-
// Quick return if possible
|
49
|
-
if (!M || !N || (%%TYPE alpha == 0%% || !K) && %%TYPE beta == 1%%) return 0;
|
50
|
-
|
51
|
-
// For alpha = 0
|
52
|
-
if (%%TYPE alpha == 0%%) {
|
53
|
-
if (%%TYPE beta == 0%%) {
|
54
|
-
for (j = 0; j < N; ++j)
|
55
|
-
for (i = 0; i < M; ++i) {
|
56
|
-
%%TYPE C[i+j*ldc] = 0%%
|
57
|
-
}
|
58
|
-
} else {
|
59
|
-
for (j = 0; j < N; ++j)
|
60
|
-
for (i = 0; i < M; ++i) {
|
61
|
-
%%TYPE C[i+j*ldc] *= beta%%
|
62
|
-
}
|
63
|
-
}
|
64
|
-
return 0;
|
65
|
-
}
|
66
|
-
|
67
|
-
// Start the operations
|
68
|
-
if (TransB == CblasNoTrans) {
|
69
|
-
if (TransA == CblasNoTrans) {
|
70
|
-
// C = alpha*A*B+beta*C
|
71
|
-
for (j = 0; j < N; ++j) {
|
72
|
-
if (%%TYPE beta == 0%%) {
|
73
|
-
for (i = 0; i < M; ++i) {
|
74
|
-
%%TYPE C[i+j*ldc] = 0%%
|
75
|
-
}
|
76
|
-
} else if (%%TYPE beta != 1%%) {
|
77
|
-
for (i = 0; i < M; ++i) {
|
78
|
-
%%TYPE C[i+j*ldc] *= beta%%
|
79
|
-
}
|
80
|
-
}
|
81
|
-
|
82
|
-
for (l = 0; l < K; ++l) {
|
83
|
-
if (%%TYPE B[l+j*ldb] != 0%%) {
|
84
|
-
%%TYPE_LONG temp = alpha * B[l+j*ldb]%%
|
85
|
-
for (i = 0; i < M; ++i) {
|
86
|
-
%%TYPE C[i+j*ldc] += A[i+l*lda] * temp%%
|
87
|
-
}
|
88
|
-
}
|
89
|
-
}
|
90
|
-
}
|
91
|
-
|
92
|
-
} else {
|
93
|
-
|
94
|
-
// C = alpha*A**T*B + beta*C
|
95
|
-
for (j = 0; j < N; ++j) {
|
96
|
-
for (i = 0; i < M; ++i) {
|
97
|
-
%%TYPE temp = 0%%
|
98
|
-
for (l = 0; l < K; ++l) {
|
99
|
-
%%TYPE_LONG temp += A[l+i*lda] * B[l+j*ldb]%%
|
100
|
-
}
|
101
|
-
|
102
|
-
if (%%TYPE beta == 0%%) {
|
103
|
-
%%TYPE C[i+j*ldc] = alpha*temp%%
|
104
|
-
} else {
|
105
|
-
%%TYPE C[i+j*ldc] = alpha*temp + beta*C[i+j*ldc]%%
|
106
|
-
}
|
107
|
-
}
|
108
|
-
}
|
109
|
-
|
110
|
-
}
|
111
|
-
|
112
|
-
} else if (TransA == CblasNoTrans) {
|
113
|
-
|
114
|
-
// C = alpha*A*B**T + beta*C
|
115
|
-
for (j = 0; j < N; ++j) {
|
116
|
-
if (%%TYPE beta == 0%%) {
|
117
|
-
for (i = 0; i < M; ++i) {
|
118
|
-
%%TYPE C[i+j*ldc] = 0%%
|
119
|
-
}
|
120
|
-
} else if (%%TYPE beta != 1%%) {
|
121
|
-
for (i = 0; i < M; ++i) {
|
122
|
-
%%TYPE C[i+j*ldc] *= beta%%
|
123
|
-
}
|
124
|
-
}
|
125
|
-
|
126
|
-
for (l = 0; l < K; ++l) {
|
127
|
-
if (%%TYPE B[j+l*ldb] != 0%%) {
|
128
|
-
%%TYPE_LONG temp = alpha * B[j+l*ldb]%%
|
129
|
-
for (i = 0; i < M; ++i) {
|
130
|
-
%%TYPE C[i+j*ldc] += A[i+l*lda] * temp%%
|
131
|
-
}
|
132
|
-
}
|
133
|
-
}
|
134
|
-
|
135
|
-
}
|
136
|
-
|
137
|
-
} else {
|
138
|
-
|
139
|
-
// C = alpha*A**T*B**T + beta*C
|
140
|
-
for (j = 0; j < N; ++j) {
|
141
|
-
for (i = 0; i < M; ++i) {
|
142
|
-
%%TYPE temp = 0%%
|
143
|
-
for (l = 0; l < K; ++l) {
|
144
|
-
%%TYPE_LONG temp += A[l+i*lda] * B[j+l*ldb]%%
|
145
|
-
}
|
146
|
-
|
147
|
-
if (%%TYPE beta == 0%%) {
|
148
|
-
%%TYPE C[i+j*ldc] = alpha*temp%%
|
149
|
-
} else {
|
150
|
-
%%TYPE C[i+j*ldc] = alpha*temp + beta*C[i+j*ldc]%%
|
151
|
-
}
|
152
|
-
}
|
153
|
-
}
|
154
|
-
|
155
|
-
}
|
156
|
-
|
157
|
-
return 0;
|
158
|
-
}
|
159
|
-
|
@@ -1,130 +0,0 @@
|
|
1
|
-
|
2
|
-
int %%TYPE_ABBREV%%gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const %%TYPE%% alpha,
|
3
|
-
const %%TYPE%%* A, const size_t lda, const %%TYPE%%* X, const int incX, const %%TYPE%% beta, %%TYPE%%* Y, const int incY)
|
4
|
-
{
|
5
|
-
size_t lenX, lenY, i, j;
|
6
|
-
int kx, ky, iy, jx, jy, ix;
|
7
|
-
%%TYPE_LONG%% temp;
|
8
|
-
%%= if [:rational,:complex,:value].include?(dtype.type); "#{dtype.long_dtype.sizeof} temp1;"; end%%
|
9
|
-
|
10
|
-
// Test the input parameters
|
11
|
-
if (Trans < 111 || Trans > 113) {
|
12
|
-
fprintf(stderr, "IGEMV: TransA must be CblasNoTrans, CblasTrans, or CblasConjTrans\n");
|
13
|
-
return 0;
|
14
|
-
} else if (lda < NM_MAX(1, N)) {
|
15
|
-
fprintf(stderr, "IGEMV: Expected lda >= max(1, N), with N = %d; got lda=%d\n", N, lda);
|
16
|
-
return 0;
|
17
|
-
} else if (incX == 0) {
|
18
|
-
fprintf(stderr, "IGEMV: Expected incX != 0\n");
|
19
|
-
return 0;
|
20
|
-
} else if (incY == 0) {
|
21
|
-
fprintf(stderr, "IGEMV: Expected incY != 0\n");
|
22
|
-
return 0;
|
23
|
-
}
|
24
|
-
|
25
|
-
// Quick return if possible
|
26
|
-
if (!M || !N || %%TYPE alpha == 0%% && %%TYPE beta == 1%%) return 0;
|
27
|
-
|
28
|
-
if (Trans == CblasNoTrans) {
|
29
|
-
lenX = N;
|
30
|
-
lenY = M;
|
31
|
-
} else {
|
32
|
-
lenX = M;
|
33
|
-
lenY = N;
|
34
|
-
}
|
35
|
-
|
36
|
-
if (incX > 0) kx = 0;
|
37
|
-
else kx = (lenX - 1) * -incX;
|
38
|
-
|
39
|
-
if (incY > 0) ky = 0;
|
40
|
-
else ky = (lenY - 1) * -incY;
|
41
|
-
|
42
|
-
// Start the operations. In this version, the elements of A are accessed sequentially with one pass through A.
|
43
|
-
if (%%TYPE beta != 1%%) {
|
44
|
-
if (incY == 1) {
|
45
|
-
if (%%TYPE beta == 0%%) {
|
46
|
-
for (i = 0; i < lenY; ++i) {
|
47
|
-
%%TYPE Y[i] = 0%%
|
48
|
-
}
|
49
|
-
} else {
|
50
|
-
for (i = 0; i < lenY; ++i) {
|
51
|
-
%%TYPE Y[i] *= beta%%
|
52
|
-
}
|
53
|
-
}
|
54
|
-
} else {
|
55
|
-
iy = ky;
|
56
|
-
if (%%TYPE beta == 0%%) {
|
57
|
-
for (i = 0; i < lenY; ++i) {
|
58
|
-
%%TYPE Y[iy] = 0%%
|
59
|
-
iy += incY;
|
60
|
-
}
|
61
|
-
} else {
|
62
|
-
for (i = 0; i < lenY; ++i) {
|
63
|
-
%%TYPE Y[iy] *= beta%%
|
64
|
-
iy += incY;
|
65
|
-
}
|
66
|
-
}
|
67
|
-
}
|
68
|
-
}
|
69
|
-
|
70
|
-
if (%%TYPE alpha == 0%%) return 0;
|
71
|
-
|
72
|
-
if (Trans == CblasNoTrans) {
|
73
|
-
|
74
|
-
// Form y := alpha*A*x + y.
|
75
|
-
jx = kx;
|
76
|
-
if (incY == 1) {
|
77
|
-
for (j = 0; j < N; ++j) {
|
78
|
-
if (%%TYPE X[jx] != 0%%) {
|
79
|
-
%%TYPE_LONG temp = alpha * X[jx]%%
|
80
|
-
for (i = 0; i < M; ++i) {
|
81
|
-
%%TYPE Y[i] += A[j+i*lda] * temp%%
|
82
|
-
}
|
83
|
-
}
|
84
|
-
jx += incX;
|
85
|
-
}
|
86
|
-
} else {
|
87
|
-
for (j = 0; j < N; ++j) {
|
88
|
-
if (%%TYPE X[jx] != 0%%) {
|
89
|
-
%%TYPE_LONG temp = alpha * X[jx]%%
|
90
|
-
iy = ky;
|
91
|
-
for (i = 0; i < M; ++i) {
|
92
|
-
%%TYPE Y[iy] += A[j+i*lda] * temp%%
|
93
|
-
iy += incY;
|
94
|
-
}
|
95
|
-
}
|
96
|
-
jx += incX;
|
97
|
-
}
|
98
|
-
}
|
99
|
-
|
100
|
-
} else { // TODO: Check that indices are correct! They're switched for C.
|
101
|
-
|
102
|
-
// Form y := alpha*A**T*x + y.
|
103
|
-
jy = ky;
|
104
|
-
|
105
|
-
if (incX == 1) {
|
106
|
-
for (j = 0; j < N; ++j) {
|
107
|
-
%%TYPE temp = 0%%
|
108
|
-
for (i = 0; i < M; ++i) {
|
109
|
-
%%TYPE_LONG temp += A[j+i*lda]*X[j]%%
|
110
|
-
}
|
111
|
-
%%TYPE Y[jy] += alpha * temp%%
|
112
|
-
jy += incY;
|
113
|
-
}
|
114
|
-
} else {
|
115
|
-
for (j = 0; j < N; ++j) {
|
116
|
-
%%TYPE temp = 0%%
|
117
|
-
ix = kx;
|
118
|
-
for (i = 0; i < M; ++i) {
|
119
|
-
%%TYPE_LONG temp += A[j+i*lda] * X[ix]%%
|
120
|
-
ix += incX;
|
121
|
-
}
|
122
|
-
|
123
|
-
%%TYPE Y[jy] += alpha * temp%%
|
124
|
-
jy += incY;
|
125
|
-
}
|
126
|
-
}
|
127
|
-
}
|
128
|
-
|
129
|
-
return 0;
|
130
|
-
} // end of GEMV
|
@@ -1,68 +0,0 @@
|
|
1
|
-
|
2
|
-
inline %%TYPE%% BOOL2%%= dtype.id.to_s.upcase%%(bool expr) {
|
3
|
-
%%TYPE%% result;
|
4
|
-
result.n = expr;
|
5
|
-
result.d = 1;
|
6
|
-
return result;
|
7
|
-
}
|
8
|
-
|
9
|
-
inline %%TYPE%% %%TYPE_ABBREV%%_bang(%%= dtype.sym == :rational128 ? "int64_t n, int64_t d" : (dtype.sym == :rational64 ? "int32_t n, int32_t d" : "int16_t n, int16_t d")%%)
|
10
|
-
{
|
11
|
-
%%TYPE%% result = {!n, 1};
|
12
|
-
return result;
|
13
|
-
}
|
14
|
-
|
15
|
-
inline %%TYPE%% %%TYPE_ABBREV%%_negate(%%= dtype.sym == :rational128 ? "int64_t n, int64_t d" : (dtype.sym == :rational64 ? "int32_t n, int32_t d" : "int16_t n, int16_t d")%%)
|
16
|
-
{
|
17
|
-
%%TYPE%% result = {-n, -d};
|
18
|
-
return result;
|
19
|
-
}
|
20
|
-
|
21
|
-
inline %%TYPE%% %%TYPE_ABBREV%%_muldiv(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k) {
|
22
|
-
%%TYPE%% result;
|
23
|
-
int64_t t, g1, g2;
|
24
|
-
|
25
|
-
if (k == '/') { // Switch numerator and denominator for division (and move sign)
|
26
|
-
if (bnum < 0) {
|
27
|
-
anum = -anum;
|
28
|
-
bnum = -bnum;
|
29
|
-
}
|
30
|
-
t = bnum;
|
31
|
-
bnum = bden;
|
32
|
-
bden = t;
|
33
|
-
}
|
34
|
-
|
35
|
-
g1 = nmrb_gcd(anum, bden);
|
36
|
-
g2 = nmrb_gcd(aden, bnum);
|
37
|
-
|
38
|
-
result.n = (anum / g1) * (bnum / g2);
|
39
|
-
result.d = (aden / g2) * (bden / g1);
|
40
|
-
|
41
|
-
return result;
|
42
|
-
}
|
43
|
-
|
44
|
-
inline %%TYPE%% %%TYPE_ABBREV%%_addsub(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k) {
|
45
|
-
%%TYPE%% result;
|
46
|
-
|
47
|
-
int64_t ig = nmrb_gcd(aden, bden);
|
48
|
-
int64_t a = anum * (bden / ig);
|
49
|
-
int64_t b = bnum * (aden / ig);
|
50
|
-
int64_t c;
|
51
|
-
|
52
|
-
if (k == '+') c=a+b;
|
53
|
-
else c=a-b;
|
54
|
-
|
55
|
-
b = aden / ig;
|
56
|
-
ig = nmrb_gcd(aden, ig);
|
57
|
-
result.n = c / ig;
|
58
|
-
a = bden / ig;
|
59
|
-
result.d = a*b;
|
60
|
-
|
61
|
-
return result;
|
62
|
-
}
|
63
|
-
|
64
|
-
inline %%TYPE%% %%TYPE_ABBREV%%_mod(%%= dtype.sym == :rational128 ? "int64_t anum, int64_t aden, int64_t bnum, int64_t bden" : (dtype.sym == :rational64 ? "int32_t anum, int32_t aden, int32_t bnum, int32_t bden" : "int16_t anum, int16_t aden, int16_t bnum, int16_t bden")%%)
|
65
|
-
{
|
66
|
-
// a - (b * int(a/b))
|
67
|
-
return %%TYPE_ABBREV%%_addsub(anum, aden, bnum*((int64_t)((anum * bden) / (aden * bnum))), bden, '-');
|
68
|
-
}
|
data/ext/nmatrix/dense.c
DELETED
@@ -1,307 +0,0 @@
|
|
1
|
-
/////////////////////////////////////////////////////////////////////
|
2
|
-
// = NMatrix
|
3
|
-
//
|
4
|
-
// A linear algebra library for scientific computation in Ruby.
|
5
|
-
// NMatrix is part of SciRuby.
|
6
|
-
//
|
7
|
-
// NMatrix was originally inspired by and derived from NArray, by
|
8
|
-
// Masahiro Tanaka: http://narray.rubyforge.org
|
9
|
-
//
|
10
|
-
// == Copyright Information
|
11
|
-
//
|
12
|
-
// SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
|
13
|
-
// NMatrix is Copyright (c) 2012, Ruby Science Foundation
|
14
|
-
//
|
15
|
-
// Please see LICENSE.txt for additional copyright notices.
|
16
|
-
//
|
17
|
-
// == Contributing
|
18
|
-
//
|
19
|
-
// By contributing source code to SciRuby, you agree to be bound by
|
20
|
-
// our Contributor Agreement:
|
21
|
-
//
|
22
|
-
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
|
23
|
-
//
|
24
|
-
// == dense.c
|
25
|
-
//
|
26
|
-
// Dense n-dimensional matrix storage.
|
27
|
-
|
28
|
-
#ifndef DENSE_C
|
29
|
-
#define DENSE_C
|
30
|
-
|
31
|
-
#include <ruby.h>
|
32
|
-
|
33
|
-
#include "nmatrix.h"
|
34
|
-
|
35
|
-
|
36
|
-
/* Calculate the number of elements in the dense storage structure, based on shape and rank */
|
37
|
-
size_t count_dense_storage_elements(const DENSE_STORAGE* s) {
|
38
|
-
size_t i;
|
39
|
-
size_t count = 1;
|
40
|
-
for (i = 0; i < s->rank; ++i) count *= s->shape[i];
|
41
|
-
return count;
|
42
|
-
}
|
43
|
-
|
44
|
-
|
45
|
-
// Do these two dense matrices of the same dtype have exactly the same contents?
|
46
|
-
bool dense_storage_eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
|
47
|
-
return !memcmp(left->elements, right->elements, count_dense_storage_elements(left) / nm_sizeof[left->dtype]);
|
48
|
-
}
|
49
|
-
|
50
|
-
|
51
|
-
size_t dense_storage_pos(DENSE_STORAGE* s, size_t* coords) {
|
52
|
-
size_t k, l;
|
53
|
-
size_t inner, outer = 0;
|
54
|
-
for (k = 0; k < s->rank; ++k) {
|
55
|
-
inner = coords[k];
|
56
|
-
for (l = k+1; l < s->rank; ++l) {
|
57
|
-
inner *= s->shape[l];
|
58
|
-
}
|
59
|
-
outer += inner;
|
60
|
-
}
|
61
|
-
return outer;
|
62
|
-
}
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
void* dense_storage_get(DENSE_STORAGE* s, size_t* coords) {
|
67
|
-
return (char*)(s->elements) + dense_storage_pos(s, coords) * nm_sizeof[s->dtype];
|
68
|
-
}
|
69
|
-
|
70
|
-
|
71
|
-
/* Does not free passed-in value! Different from list_storage_insert. */
|
72
|
-
void dense_storage_set(DENSE_STORAGE* s, size_t* coords, void* val) {
|
73
|
-
memcpy((char*)(s->elements) + dense_storage_pos(s, coords) * nm_sizeof[s->dtype], val, nm_sizeof[s->dtype]);
|
74
|
-
}
|
75
|
-
|
76
|
-
|
77
|
-
DENSE_STORAGE* copy_dense_storage(DENSE_STORAGE* rhs) {
|
78
|
-
DENSE_STORAGE* lhs;
|
79
|
-
size_t count = count_dense_storage_elements(rhs), p;
|
80
|
-
size_t* shape = ALLOC_N(size_t, rhs->rank);
|
81
|
-
if (!shape) return NULL;
|
82
|
-
|
83
|
-
// copy shape array
|
84
|
-
for (p = 0; p < rhs->rank; ++p)
|
85
|
-
shape[p] = rhs->shape[p];
|
86
|
-
|
87
|
-
lhs = create_dense_storage(rhs->dtype, shape, rhs->rank, NULL, 0);
|
88
|
-
|
89
|
-
if (lhs && count) // ensure that allocation worked before copying
|
90
|
-
memcpy(lhs->elements, rhs->elements, nm_sizeof[rhs->dtype] * count);
|
91
|
-
|
92
|
-
return lhs;
|
93
|
-
}
|
94
|
-
|
95
|
-
|
96
|
-
DENSE_STORAGE* cast_copy_dense_storage(DENSE_STORAGE* rhs, int8_t new_dtype) {
|
97
|
-
DENSE_STORAGE* lhs;
|
98
|
-
size_t count = count_dense_storage_elements(rhs), p;
|
99
|
-
size_t* shape = ALLOC_N(size_t, rhs->rank);
|
100
|
-
if (!shape) return NULL;
|
101
|
-
|
102
|
-
// copy shape array
|
103
|
-
for (p = 0; p < rhs->rank; ++p) shape[p] = rhs->shape[p];
|
104
|
-
|
105
|
-
lhs = create_dense_storage(new_dtype, shape, rhs->rank, NULL, 0);
|
106
|
-
|
107
|
-
if (lhs && count) // ensure that allocation worked before copying
|
108
|
-
if (lhs->dtype == rhs->dtype)
|
109
|
-
memcpy(lhs->elements, rhs->elements, nm_sizeof[rhs->dtype] * count);
|
110
|
-
else
|
111
|
-
SetFuncs[lhs->dtype][rhs->dtype](count, lhs->elements, nm_sizeof[lhs->dtype], rhs->elements, nm_sizeof[rhs->dtype]);
|
112
|
-
|
113
|
-
|
114
|
-
return lhs;
|
115
|
-
}
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
// Copy a set of default values into dense
|
120
|
-
static inline void cast_copy_dense_list_default(void* lhs, void* default_val, int8_t l_dtype, int8_t r_dtype, size_t* pos, const size_t* shape, size_t rank, size_t max_elements, size_t recursions) {
|
121
|
-
size_t i;
|
122
|
-
|
123
|
-
for (i = 0; i < shape[rank-1-recursions]; ++i, ++(*pos)) {
|
124
|
-
//fprintf(stderr, "default: pos = %u, dim = %u\t", *pos, shape[rank-1-recursions]);
|
125
|
-
|
126
|
-
if (recursions == 0) { cast_copy_value_single((char*)lhs + (*pos)*nm_sizeof[l_dtype], default_val, l_dtype, r_dtype); fprintf(stderr, "zero\n"); }
|
127
|
-
else { cast_copy_dense_list_default(lhs, default_val, l_dtype, r_dtype, pos, shape, rank, max_elements, recursions-1); fprintf(stderr, "column of zeros\n"); }
|
128
|
-
}
|
129
|
-
--(*pos);
|
130
|
-
}
|
131
|
-
|
132
|
-
|
133
|
-
// Copy list contents into dense recursively
|
134
|
-
static void cast_copy_dense_list_contents(void* lhs, const LIST* rhs, void* default_val, int8_t l_dtype, int8_t r_dtype, size_t* pos, const size_t* shape, size_t rank, size_t max_elements, size_t recursions) {
|
135
|
-
NODE *curr = rhs->first;
|
136
|
-
int last_key = -1;
|
137
|
-
size_t i = 0;
|
138
|
-
|
139
|
-
for (i = 0; i < shape[rank-1-recursions]; ++i, ++(*pos)) {
|
140
|
-
|
141
|
-
if (!curr || (curr->key > (size_t)(last_key+1))) {
|
142
|
-
//fprintf(stderr, "pos = %u, dim = %u, curr->key XX, last_key+1 = %d\t", *pos, shape[rank-1-recursions], last_key+1);
|
143
|
-
if (recursions == 0) cast_copy_value_single((char*)lhs + (*pos)*nm_sizeof[l_dtype], default_val, l_dtype, r_dtype); //fprintf(stderr, "zero\n"); }
|
144
|
-
else cast_copy_dense_list_default(lhs, default_val, l_dtype, r_dtype, pos, shape, rank, max_elements, recursions-1); //fprintf(stderr, "column of zeros\n"); }
|
145
|
-
|
146
|
-
++last_key;
|
147
|
-
} else {
|
148
|
-
//fprintf(stderr, "pos = %u, dim = %u, curr->key = %u, last_key+1 = %d\t", *pos, shape[rank-1-recursions], curr->key, last_key+1);
|
149
|
-
if (recursions == 0) cast_copy_value_single((char*)lhs + (*pos)*nm_sizeof[l_dtype], curr->val, l_dtype, r_dtype); //fprintf(stderr, "value\n"); }
|
150
|
-
else cast_copy_dense_list_contents(lhs, curr->val, default_val, l_dtype, r_dtype, pos, shape, rank, max_elements, recursions-1); //fprintf(stderr, "column of values\n"); }
|
151
|
-
|
152
|
-
last_key = curr->key;
|
153
|
-
curr = curr->next;
|
154
|
-
}
|
155
|
-
}
|
156
|
-
--(*pos);
|
157
|
-
}
|
158
|
-
|
159
|
-
|
160
|
-
// Convert (by creating a copy) from list storage to dense storage.
|
161
|
-
DENSE_STORAGE* scast_copy_dense_list(const LIST_STORAGE* rhs, int8_t l_dtype) {
|
162
|
-
DENSE_STORAGE* lhs;
|
163
|
-
size_t pos = 0; // position in lhs->elements
|
164
|
-
|
165
|
-
// allocate and copy shape
|
166
|
-
size_t* shape = ALLOC_N(size_t, rhs->rank);
|
167
|
-
memcpy(shape, rhs->shape, rhs->rank * sizeof(size_t));
|
168
|
-
|
169
|
-
lhs = create_dense_storage(l_dtype, shape, rhs->rank, NULL, 0);
|
170
|
-
|
171
|
-
// recursively copy the contents
|
172
|
-
cast_copy_dense_list_contents(lhs->elements, rhs->rows, rhs->default_val, l_dtype, rhs->dtype, &pos, shape, lhs->rank, count_storage_max_elements((STORAGE*)rhs), rhs->rank-1);
|
173
|
-
|
174
|
-
return lhs;
|
175
|
-
}
|
176
|
-
|
177
|
-
|
178
|
-
DENSE_STORAGE* scast_copy_dense_yale(const YALE_STORAGE* rhs, int8_t l_dtype) {
|
179
|
-
DENSE_STORAGE* lhs;
|
180
|
-
y_size_t i, j, // position in lhs->elements
|
181
|
-
ija, ija_next, jj; // position in rhs->elements
|
182
|
-
y_size_t pos = 0; // position in dense to write to
|
183
|
-
void* R_ZERO = (char*)(rhs->a) + rhs->shape[0] * nm_sizeof[rhs->dtype]; // determine zero representation
|
184
|
-
|
185
|
-
// allocate and set shape
|
186
|
-
size_t* shape = ALLOC_N(size_t, rhs->rank);
|
187
|
-
memcpy(shape, rhs->shape, rhs->rank * sizeof(size_t));
|
188
|
-
|
189
|
-
lhs = create_dense_storage(l_dtype, shape, rhs->rank, NULL, 0);
|
190
|
-
|
191
|
-
// Walk through rows. For each entry we set in dense, increment pos.
|
192
|
-
for (i = 0; i < rhs->shape[0]; ++i) {
|
193
|
-
|
194
|
-
// get boundaries of this row, store in ija and ija_next
|
195
|
-
YaleGetIJA(ija, rhs, i);
|
196
|
-
YaleGetIJA(ija_next, rhs, i+1);
|
197
|
-
|
198
|
-
if (ija == ija_next) { // row is empty?
|
199
|
-
|
200
|
-
for (j = 0; j < rhs->shape[1]; ++j) { // write zeros in each column
|
201
|
-
|
202
|
-
// Fill in zeros (except for diagonal)
|
203
|
-
if (i == j) cast_copy_value_single((char*)(lhs->elements) + pos*nm_sizeof[l_dtype], (char*)(rhs->a) + i*nm_sizeof[rhs->dtype], l_dtype, rhs->dtype);
|
204
|
-
else cast_copy_value_single((char*)(lhs->elements) + pos*nm_sizeof[l_dtype], R_ZERO, l_dtype, rhs->dtype);
|
205
|
-
|
206
|
-
++pos; // move to next dense position
|
207
|
-
}
|
208
|
-
|
209
|
-
} else {
|
210
|
-
// row contains entries: write those in each column, interspersed with zeros
|
211
|
-
YaleGetIJA(jj, rhs, ija);
|
212
|
-
|
213
|
-
for (j = 0; j < rhs->shape[1]; ++j) {
|
214
|
-
if (i == j) {
|
215
|
-
|
216
|
-
cast_copy_value_single((char*)(lhs->elements) + pos*nm_sizeof[l_dtype], (char*)(rhs->a) + i*nm_sizeof[rhs->dtype], l_dtype, rhs->dtype);
|
217
|
-
|
218
|
-
} else if (j == jj) {
|
219
|
-
|
220
|
-
// copy from rhs
|
221
|
-
cast_copy_value_single((char*)(lhs->elements) + pos*nm_sizeof[l_dtype], (char*)(rhs->a) + ija*nm_sizeof[rhs->dtype], l_dtype, rhs->dtype);
|
222
|
-
|
223
|
-
// get next
|
224
|
-
++ija;
|
225
|
-
|
226
|
-
// increment to next column ID (or go off the end)
|
227
|
-
if (ija < ija_next) YaleGetIJA(jj, rhs, ija);
|
228
|
-
else jj = rhs->shape[1];
|
229
|
-
|
230
|
-
} else { // j < jj
|
231
|
-
|
232
|
-
// insert zero
|
233
|
-
cast_copy_value_single((char*)(lhs->elements) + pos*nm_sizeof[l_dtype], R_ZERO, l_dtype, rhs->dtype);
|
234
|
-
}
|
235
|
-
++pos; // move to next dense position
|
236
|
-
}
|
237
|
-
}
|
238
|
-
}
|
239
|
-
|
240
|
-
return lhs;
|
241
|
-
}
|
242
|
-
|
243
|
-
|
244
|
-
// Note that elements and elements_length are for initial value(s) passed in. If they are the correct length, they will
|
245
|
-
// be used directly. If not, they will be concatenated over and over again into a new elements array. If elements is NULL,
|
246
|
-
// the new elements array will not be initialized.
|
247
|
-
DENSE_STORAGE* create_dense_storage(int8_t dtype, size_t* shape, size_t rank, void* elements, size_t elements_length) {
|
248
|
-
DENSE_STORAGE* s;
|
249
|
-
size_t count, i, copy_length = elements_length;
|
250
|
-
|
251
|
-
s = ALLOC( DENSE_STORAGE );
|
252
|
-
//if (!(s = malloc(sizeof(DENSE_STORAGE)))) return NULL;
|
253
|
-
|
254
|
-
s->rank = rank;
|
255
|
-
s->shape = shape;
|
256
|
-
s->dtype = dtype;
|
257
|
-
|
258
|
-
//fprintf(stderr, "create_dense_storage: %p\n", s);
|
259
|
-
|
260
|
-
count = count_dense_storage_elements(s);
|
261
|
-
//fprintf(stderr, "count_dense_storage_elements: %d\n", count);
|
262
|
-
|
263
|
-
if (elements_length == count) s->elements = elements;
|
264
|
-
else {
|
265
|
-
s->elements = ALLOC_N(char, nm_sizeof[dtype]*count);
|
266
|
-
|
267
|
-
if (elements_length > 0) {
|
268
|
-
// repeat elements over and over again until the end of the matrix
|
269
|
-
for (i = 0; i < count; i += elements_length) {
|
270
|
-
if (i + elements_length > count) copy_length = count - i;
|
271
|
-
memcpy((char*)(s->elements)+i*nm_sizeof[dtype], (char*)(elements)+(i % elements_length)*nm_sizeof[dtype], copy_length*nm_sizeof[dtype]);
|
272
|
-
}
|
273
|
-
|
274
|
-
// get rid of the init_val
|
275
|
-
free(elements);
|
276
|
-
}
|
277
|
-
}
|
278
|
-
|
279
|
-
return s;
|
280
|
-
}
|
281
|
-
|
282
|
-
|
283
|
-
void delete_dense_storage(DENSE_STORAGE* s) {
|
284
|
-
if (s) { // sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure)
|
285
|
-
free(s->shape);
|
286
|
-
free(s->elements);
|
287
|
-
free(s);
|
288
|
-
}
|
289
|
-
}
|
290
|
-
|
291
|
-
|
292
|
-
void mark_dense_storage(void* m) {
|
293
|
-
size_t i;
|
294
|
-
DENSE_STORAGE* storage;
|
295
|
-
|
296
|
-
if (m) {
|
297
|
-
storage = (DENSE_STORAGE*)(((NMATRIX*)m)->storage);
|
298
|
-
//fprintf(stderr, "mark_dense_storage\n");
|
299
|
-
if (storage && storage->dtype == NM_ROBJ)
|
300
|
-
for (i = 0; i < count_dense_storage_elements(storage); ++i)
|
301
|
-
rb_gc_mark(*((VALUE*)((char*)(storage->elements) + i*nm_sizeof[NM_ROBJ])));
|
302
|
-
}
|
303
|
-
}
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
#endif
|
data/ext/nmatrix/depend
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
dtypes.h dtypes.c smmp1.c smmp2.c blas.c: generator.rb
|
2
|
-
$(RUBY) -I$(srcdir) $(srcdir)/generator.rb
|
3
|
-
|
4
|
-
blas.o: blas.c
|
5
|
-
|
6
|
-
smmp2.o: smmp1.c smmp2.c nmatrix.h yale/smmp2_header.template.c yale/symbmm.template.c yale/transp.template.c yale/numbmm.template.c
|
7
|
-
|
8
|
-
smmp1.o: smmp1.c nmatrix_config.h nmatrix.h yale/smmp1_header.template.c
|
9
|
-
|
10
|
-
dfuncs.o: dfuncs.c $(hdrdir)/ruby.h
|
11
|
-
|
12
|
-
$(DLLIB): dtypes.h dtypes.c dfuncs.c $(OBJS) Makefile
|
13
|
-
# $(ECHO) linking shared-object $(DLLIB)
|
14
|
-
@-$(RM) $(@)
|
15
|
-
$(Q) $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
16
|
-
|
17
|
-
soclean:
|
18
|
-
@-$(RM) *.so *.o *.bundle
|