nmatrix 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/.autotest +23 -0
  2. data/.gemtest +0 -0
  3. data/Gemfile +7 -0
  4. data/History.txt +6 -0
  5. data/LICENSE.txt +21 -0
  6. data/Manifest.txt +51 -0
  7. data/README.rdoc +63 -0
  8. data/Rakefile +154 -0
  9. data/ext/nmatrix/cblas.c +150 -0
  10. data/ext/nmatrix/dense.c +307 -0
  11. data/ext/nmatrix/dense/blas_header.template.c +52 -0
  12. data/ext/nmatrix/dense/elementwise.template.c +107 -0
  13. data/ext/nmatrix/dense/gemm.template.c +159 -0
  14. data/ext/nmatrix/dense/gemv.template.c +130 -0
  15. data/ext/nmatrix/dense/rationalmath.template.c +68 -0
  16. data/ext/nmatrix/depend +18 -0
  17. data/ext/nmatrix/extconf.rb +143 -0
  18. data/ext/nmatrix/generator.rb +594 -0
  19. data/ext/nmatrix/generator/syntax_tree.rb +481 -0
  20. data/ext/nmatrix/list.c +774 -0
  21. data/ext/nmatrix/nmatrix.c +1977 -0
  22. data/ext/nmatrix/nmatrix.h +912 -0
  23. data/ext/nmatrix/rational.c +98 -0
  24. data/ext/nmatrix/yale.c +726 -0
  25. data/ext/nmatrix/yale/complexmath.template.c +71 -0
  26. data/ext/nmatrix/yale/elementwise.template.c +46 -0
  27. data/ext/nmatrix/yale/elementwise_op.template.c +73 -0
  28. data/ext/nmatrix/yale/numbmm.template.c +94 -0
  29. data/ext/nmatrix/yale/smmp1.template.c +21 -0
  30. data/ext/nmatrix/yale/smmp1_header.template.c +38 -0
  31. data/ext/nmatrix/yale/smmp2.template.c +43 -0
  32. data/ext/nmatrix/yale/smmp2_header.template.c +46 -0
  33. data/ext/nmatrix/yale/sort_columns.template.c +56 -0
  34. data/ext/nmatrix/yale/symbmm.template.c +54 -0
  35. data/ext/nmatrix/yale/transp.template.c +68 -0
  36. data/lib/array.rb +67 -0
  37. data/lib/nmatrix.rb +263 -0
  38. data/lib/string.rb +65 -0
  39. data/spec/nmatrix_spec.rb +395 -0
  40. data/spec/nmatrix_yale_spec.rb +239 -0
  41. data/spec/nvector_spec.rb +43 -0
  42. data/spec/syntax_tree_spec.rb +46 -0
  43. metadata +150 -0
@@ -0,0 +1,1977 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == nmatrix.c
25
+ //
26
+ #ifndef NMATRIX_C
27
+ # define NMATRIX_C
28
+
29
+ #include <ruby.h>
30
+
31
+ #include "nmatrix.h"
32
+ //#include "types.h"
33
+
34
+ VALUE cNMatrix, cNVector;
35
+ VALUE nm_eDataTypeError, nm_eStorageTypeError;
36
+
37
+ ID nm_id_real, nm_id_imag;
38
+ ID nm_id_numer, nm_id_denom;
39
+ ID nm_id_transpose, nm_id_no_transpose, nm_id_complex_conjugate; // cblas
40
+ ID nm_id_list, nm_id_dense;
41
+ ID nm_id_mult, nm_id_multeq;
42
+ ID nm_id_add;
43
+
44
+ #include "dtypes.c"
45
+
46
+ #ifdef BENCHMARK
47
+ double get_time() {
48
+ struct timeval t;
49
+ struct timezone tzp;
50
+ gettimeofday(&t, &tzp);
51
+ return t.tv_sec + t.tv_usec*1e-6;
52
+ }
53
+ #endif
54
+
55
+
56
+ const char *nm_stypestring[] = {
57
+ "dense",
58
+ "list",
59
+ "yale",
60
+ "stypes"
61
+ };
62
+
63
+
64
+ nm_delete_t DeleteFuncs = {
65
+ delete_dense_storage,
66
+ delete_list_storage,
67
+ delete_yale_storage
68
+ };
69
+
70
+
71
+ nm_mark_t MarkFuncs = {
72
+ mark_dense_storage,
73
+ mark_list_storage,
74
+ mark_yale_storage
75
+ };
76
+
77
+
78
+ nm_gemv_t GemvFuncs = {
79
+ NULL,
80
+ cblas_bgemv_,
81
+ cblas_i8gemv_,
82
+ cblas_i16gemv_,
83
+ cblas_i32gemv_,
84
+ cblas_i64gemv_,
85
+ cblas_sgemv_,
86
+ cblas_dgemv_,
87
+ cblas_cgemv_,
88
+ cblas_zgemv_,
89
+ cblas_r32gemv_,
90
+ cblas_r64gemv_,
91
+ cblas_r128gemv_,
92
+ NULL
93
+ };
94
+
95
+
96
+ nm_gemm_t GemmFuncs = { // by NM_TYPES
97
+ NULL,
98
+ cblas_bgemm_,
99
+ cblas_i8gemm_,
100
+ cblas_i16gemm_,
101
+ cblas_i32gemm_,
102
+ cblas_i64gemm_,
103
+ cblas_sgemm_,
104
+ cblas_dgemm_,
105
+ cblas_cgemm_,
106
+ cblas_zgemm_,
107
+ cblas_r32gemm_,
108
+ cblas_r64gemm_,
109
+ cblas_r128gemm_,
110
+ cblas_vgemm_
111
+ };
112
+
113
+ static void TransposeTypeErr(y_size_t n, y_size_t m, YALE_PARAM A, YALE_PARAM B, bool move) {
114
+ rb_raise(nm_eDataTypeError, "illegal operation with this matrix type");
115
+ }
116
+
117
+
118
+ // First dimension is dtype, second dimension is index dtype (so lots of nulls)
119
+ nm_smmp_transpose_t SparseTransposeFuncs = {
120
+ {TransposeTypeErr, TransposeTypeErr, TransposeTypeErr, TransposeTypeErr, TransposeTypeErr, TransposeTypeErr}, // NM_NONE
121
+ {TransposeTypeErr, TransposeTypeErr, i8_b_transp_, i16_b_transp_, i32_b_transp_, i64_b_transp_}, // NM_BYTE
122
+ {TransposeTypeErr, TransposeTypeErr, i8_i8_transp_, i16_i8_transp_, i32_i8_transp_, i64_i8_transp_}, // NM_INT8
123
+ {TransposeTypeErr, TransposeTypeErr, i8_i16_transp_, i16_i16_transp_, i32_i16_transp_, i64_i16_transp_}, // NM_INT16
124
+ {TransposeTypeErr, TransposeTypeErr, i8_i32_transp_, i16_i32_transp_, i32_i32_transp_, i64_i32_transp_}, // NM_INT32
125
+ {TransposeTypeErr, TransposeTypeErr, i8_i64_transp_, i16_i64_transp_, i32_i64_transp_, i64_i64_transp_}, // NM_INT64
126
+ {TransposeTypeErr, TransposeTypeErr, i8_f32_transp_, i16_f32_transp_, i32_f32_transp_, i64_f32_transp_}, // NM_FLOAT32
127
+ {TransposeTypeErr, TransposeTypeErr, i8_f64_transp_, i16_f64_transp_, i32_f64_transp_, i64_f64_transp_}, // NM_FLOAT64
128
+ {TransposeTypeErr, TransposeTypeErr, i8_c64_transp_, i16_c64_transp_, i32_c64_transp_, i64_c64_transp_}, // NM_COMPLEX64
129
+ {TransposeTypeErr, TransposeTypeErr, i8_c128_transp_, i16_c128_transp_, i32_c128_transp_, i64_c128_transp_}, // NM_COMPLEX128
130
+ {TransposeTypeErr, TransposeTypeErr, i8_r32_transp_, i16_r32_transp_, i32_r32_transp_, i64_r32_transp_}, // NM_RATIONAL32
131
+ {TransposeTypeErr, TransposeTypeErr, i8_r64_transp_, i16_r64_transp_, i32_r64_transp_, i64_r64_transp_}, // NM_RATIONAL64
132
+ {TransposeTypeErr, TransposeTypeErr, i8_r128_transp_, i16_r128_transp_, i32_r128_transp_, i64_r128_transp_}, // NM_RATIONAL128
133
+ {TransposeTypeErr, TransposeTypeErr, i8_v_transp_, i16_v_transp_, i32_v_transp_, i64_v_transp_} // NM_ROBJ
134
+ };
135
+
136
+ /*
137
+ // Currently commented out because dense_transpose_generic is about the same speed. Let's resurrect this when we write
138
+ // an in-place transpose (e.g., transpose!).
139
+
140
+ static void DenseTransTypeErr(int M, int N, void* A, int lda, void* B, int ldb, bool move) {
141
+ rb_raise(nm_eDataTypeError, "illegal operation with this matrix type");
142
+ }
143
+
144
+ nm_dense_transpose_t DenseTransposeFuncs = {
145
+ DenseTransTypeErr,
146
+ btransp,
147
+ i8transp,
148
+ i16transp,
149
+ i32transp,
150
+ i64transp,
151
+ f32transp,
152
+ f64transp,
153
+ c64transp,
154
+ c128transp,
155
+ r32transp,
156
+ r64transp,
157
+ r128transp,
158
+ vtransp
159
+ }; */
160
+
161
+
162
+ static void SmmpTypeErr(y_size_t n, y_size_t m, YALE_PARAM A, YALE_PARAM B, YALE_PARAM C) {
163
+ rb_raise(nm_eDataTypeError, "illegal operation with this matrix type");
164
+ }
165
+
166
+ // First dimension is dtype, second dimension is index dtype (so lots of nulls)
167
+ nm_smmp_t SmmpFuncs = {
168
+ {SmmpTypeErr, SmmpTypeErr, SmmpTypeErr, SmmpTypeErr, SmmpTypeErr, SmmpTypeErr}, // NM_NONE
169
+ {SmmpTypeErr, SmmpTypeErr, i8_b_smmp, i16_b_smmp, i32_b_smmp, i64_b_smmp}, // NM_BYTE
170
+ {SmmpTypeErr, SmmpTypeErr, i8_i8_smmp, i16_i8_smmp, i32_i8_smmp, i64_i8_smmp}, // NM_INT8
171
+ {SmmpTypeErr, SmmpTypeErr, i8_i16_smmp, i16_i16_smmp, i32_i16_smmp, i64_i16_smmp}, // NM_INT16
172
+ {SmmpTypeErr, SmmpTypeErr, i8_i32_smmp, i16_i32_smmp, i32_i32_smmp, i64_i32_smmp}, // NM_INT32
173
+ {SmmpTypeErr, SmmpTypeErr, i8_i64_smmp, i16_i64_smmp, i32_i64_smmp, i64_i64_smmp}, // NM_INT64
174
+ {SmmpTypeErr, SmmpTypeErr, i8_f32_smmp, i16_f32_smmp, i32_f32_smmp, i64_f32_smmp}, // NM_FLOAT32
175
+ {SmmpTypeErr, SmmpTypeErr, i8_f64_smmp, i16_f64_smmp, i32_f64_smmp, i64_f64_smmp}, // NM_FLOAT64
176
+ {SmmpTypeErr, SmmpTypeErr, i8_c64_smmp, i16_c64_smmp, i32_c64_smmp, i64_c64_smmp}, // NM_COMPLEX64
177
+ {SmmpTypeErr, SmmpTypeErr, i8_c128_smmp, i16_c128_smmp, i32_c128_smmp, i64_c128_smmp}, // NM_COMPLEX128
178
+ {SmmpTypeErr, SmmpTypeErr, i8_r32_smmp, i16_r32_smmp, i32_r32_smmp, i64_r32_smmp}, // NM_RATIONAL32
179
+ {SmmpTypeErr, SmmpTypeErr, i8_r64_smmp, i16_r64_smmp, i32_r64_smmp, i64_r64_smmp}, // NM_RATIONAL64
180
+ {SmmpTypeErr, SmmpTypeErr, i8_r128_smmp, i16_r128_smmp, i32_r128_smmp, i64_r128_smmp}, // NM_RATIONAL128
181
+ {SmmpTypeErr, SmmpTypeErr, i8_v_smmp, i16_v_smmp, i32_v_smmp, i64_v_smmp} // NM_ROBJ
182
+ };
183
+
184
+
185
+ static inline DENSE_PARAM cblas_params_for_multiply(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const DENSE_STORAGE* result, bool vector) {
186
+ DENSE_PARAM p;
187
+
188
+ p.A = left->elements;
189
+ p.B = right->elements; // for vector, this is actually x
190
+ p.C = result->elements; // vector Y
191
+
192
+ p.M = left->shape[0];
193
+ p.lda = left->shape[1];
194
+
195
+ if (vector) {
196
+ p.N = left->shape[1];
197
+
198
+ p.ldb = 1; // incX
199
+ p.ldc = 1; // incY
200
+ } else {
201
+ p.N = right->shape[1];
202
+ p.K = left->shape[1];
203
+
204
+ p.ldb = right->shape[1];
205
+ p.ldc = result->shape[1];
206
+ }
207
+
208
+ switch(left->dtype) {
209
+ case NM_FLOAT32:
210
+ case NM_FLOAT64:
211
+ p.alpha.d[0] = 1.0;
212
+ p.beta.d[0] = 0.0;
213
+ break;
214
+
215
+ case NM_COMPLEX64:
216
+ p.alpha.c[0].r = 1.0;
217
+ p.alpha.c[0].i = 0.0;
218
+ p.beta.c[0].r = 0.0;
219
+ p.beta.c[0].i = 0.0;
220
+ break;
221
+
222
+ case NM_COMPLEX128:
223
+ p.alpha.z.r = 1.0;
224
+ p.alpha.z.i = 0.0;
225
+ p.beta.z.r = 0.0;
226
+ p.beta.z.i = 0.0;
227
+ break;
228
+
229
+ case NM_BYTE:
230
+ p.alpha.b[0] = 1;
231
+ p.beta.b[0] = 0;
232
+ break;
233
+
234
+ case NM_INT8:
235
+ case NM_INT16:
236
+ case NM_INT32:
237
+ case NM_INT64:
238
+ p.alpha.i[0] = 1;
239
+ p.beta.i[0] = 0;
240
+ break;
241
+
242
+ case NM_RATIONAL32:
243
+ p.alpha.r[0].n = 1;
244
+ p.alpha.r[0].d = 1;
245
+ p.beta.r[0].n = 0;
246
+ p.beta.r[0].d = 1;
247
+ break;
248
+
249
+ case NM_RATIONAL64:
250
+ p.alpha.ra[0].n = 1;
251
+ p.alpha.ra[0].d = 1;
252
+ p.beta.ra[0].n = 0;
253
+ p.beta.ra[0].d = 1;
254
+ break;
255
+
256
+ case NM_RATIONAL128:
257
+ p.alpha.rat.n = 1;
258
+ p.alpha.rat.d = 1;
259
+ p.beta.rat.n = 0;
260
+ p.beta.rat.d = 1;
261
+ break;
262
+
263
+ case NM_ROBJ:
264
+ p.alpha.v[0] = INT2FIX(1);
265
+ p.beta.v[0] = RUBY_ZERO;
266
+ break;
267
+
268
+ default:
269
+ rb_raise(nm_eDataTypeError, "unexpected dtype");
270
+
271
+ }
272
+
273
+ return p;
274
+ }
275
+
276
+
277
+ static NMATRIX* multiply_matrix_dense_casted(STORAGE_PAIR casted_storage, size_t* resulting_shape, bool vector) {
278
+ DENSE_STORAGE *left = (DENSE_STORAGE*)(casted_storage.left),
279
+ *right = (DENSE_STORAGE*)(casted_storage.right),
280
+ *result;
281
+
282
+ // We can safely get dtype from the casted matrices; post-condition of binary_storage_cast_alloc is that dtype is the
283
+ // same for left and right.
284
+ int8_t dtype = left->dtype;
285
+
286
+ // Create result storage.
287
+ result = create_dense_storage(dtype, resulting_shape, 2, NULL, 0);
288
+
289
+ // Do the multiplication
290
+ if (vector) GemvFuncs[dtype](CblasRowMajor, CblasNoTrans, cblas_params_for_multiply(left, right, result, true));
291
+ else GemmFuncs[dtype](CblasRowMajor, CblasNoTrans, CblasNoTrans, cblas_params_for_multiply(left, right, result, false));
292
+
293
+ return nm_create(S_DENSE, result);
294
+ }
295
+
296
+
297
+ static NMATRIX* multiply_matrix_yale_casted(STORAGE_PAIR casted_storage, size_t* resulting_shape, bool vector) {
298
+ YALE_STORAGE *left = (YALE_STORAGE*)(casted_storage.left),
299
+ *right = (YALE_STORAGE*)(casted_storage.right),
300
+ *result;
301
+ YALE_PARAM A, B, C;
302
+
303
+ // We can safely get dtype from the casted matrices; post-condition of binary_storage_cast_alloc is that dtype is the
304
+ // same for left and right.
305
+ int8_t dtype = left->dtype;
306
+
307
+ // Create result storage.
308
+ result = create_yale_storage(dtype, resulting_shape, 2, left->capacity + right->capacity);
309
+ init_yale_storage(result);
310
+
311
+ // Set multiplication parameters
312
+ A.ia = A.ja = left->ija;
313
+ A.a = left->a;
314
+ B.ia = B.ja = right->ija;
315
+ B.a = right->a;
316
+ C.ia = C.ja = result->ija;
317
+ C.a = result->a;
318
+
319
+ A.diag = B.diag = C.diag = true;
320
+
321
+ // Do the multiplication
322
+ SmmpFuncs[dtype][left->index_dtype](result->shape[0], result->shape[1], A, B, C);
323
+
324
+ return nm_create(S_YALE, result);
325
+ }
326
+
327
+
328
+ static NMATRIX* multiply_matrix_list_casted(STORAGE_PAIR casted_storage, size_t* resulting_shape) {
329
+ rb_raise(rb_eNotImpError, "multiplication not implemented for list-of-list matrices");
330
+ free(resulting_shape);
331
+ return NULL;
332
+ }
333
+
334
+
335
+ nm_matrix_multiply_op_t CastedMultiplyFuncs = {
336
+ multiply_matrix_dense_casted,
337
+ multiply_matrix_list_casted,
338
+ multiply_matrix_yale_casted
339
+ };
340
+
341
+
342
+ nm_d_elementwise_binary_op_t DenseElementwiseFuncs = { // only for dense!
343
+ NULL,
344
+ nm_d_b_elementwise,
345
+ nm_d_i8_elementwise,
346
+ nm_d_i16_elementwise,
347
+ nm_d_i32_elementwise,
348
+ nm_d_i64_elementwise,
349
+ nm_d_f32_elementwise,
350
+ nm_d_f64_elementwise,
351
+ nm_d_c64_elementwise,
352
+ nm_d_c128_elementwise,
353
+ nm_d_r32_elementwise,
354
+ nm_d_r64_elementwise,
355
+ nm_d_r128_elementwise,
356
+ nm_d_v_elementwise,
357
+ NULL
358
+ };
359
+
360
+ static void EwTypeErr(y_size_t n, enum NMatrix_Ops op, void* ija, void* ijb, void* ijc, void* a, void* b, void* c) {
361
+ rb_raise(nm_eDataTypeError, "illegal operation with this matrix type");
362
+ }
363
+
364
+ // First dimension is dtype, second dimension is index dtype (so lots of nulls)
365
+ nm_y_elementwise_binary_op_t YaleElementwiseFuncs = { // only for yale!
366
+ {EwTypeErr, EwTypeErr, EwTypeErr, EwTypeErr, EwTypeErr, EwTypeErr},
367
+ {EwTypeErr, EwTypeErr, EwTypeErr, EwTypeErr, EwTypeErr, EwTypeErr},
368
+ {EwTypeErr, EwTypeErr, i8_i8_ew, i16_i8_ew, i32_i8_ew, i64_i8_ew},
369
+ {EwTypeErr, EwTypeErr, i8_i16_ew, i16_i16_ew, i32_i16_ew, i64_i16_ew},
370
+ {EwTypeErr, EwTypeErr, i8_i32_ew, i16_i32_ew, i32_i32_ew, i64_i32_ew},
371
+ {EwTypeErr, EwTypeErr, i8_i64_ew, i16_i64_ew, i32_i64_ew, i64_i64_ew},
372
+ {EwTypeErr, EwTypeErr, i8_f32_ew, i16_f32_ew, i32_f32_ew, i64_f32_ew},
373
+ {EwTypeErr, EwTypeErr, i8_f64_ew, i16_f64_ew, i32_f64_ew, i64_f64_ew},
374
+ {EwTypeErr, EwTypeErr, i8_c64_ew, i16_c64_ew, i32_c64_ew, i64_c64_ew},
375
+ {EwTypeErr, EwTypeErr, i8_c128_ew,i16_c128_ew,i32_c128_ew,i64_c128_ew},
376
+ {EwTypeErr, EwTypeErr, i8_r32_ew, i16_r32_ew, i32_r32_ew, i64_r32_ew},
377
+ {EwTypeErr, EwTypeErr, i8_r64_ew, i16_r64_ew, i32_r64_ew, i64_r64_ew},
378
+ {EwTypeErr, EwTypeErr, i8_r128_ew,i16_r128_ew,i32_r128_ew,i64_r128_ew},
379
+ {EwTypeErr, EwTypeErr, i8_v_ew, i16_v_ew, i32_v_ew, i64_v_ew}
380
+ };
381
+
382
+
383
+ static NMATRIX* elementwise_dense_casted(STORAGE_PAIR casted_storage, char op) {
384
+ DENSE_STORAGE *left = (DENSE_STORAGE*)(casted_storage.left),
385
+ *right = (DENSE_STORAGE*)(casted_storage.right),
386
+ *result;
387
+
388
+ // We can safely get dtype from the casted matrices; post-condition of binary_storage_cast_alloc is that dtype is the
389
+ // same for left and right.
390
+ size_t i;
391
+ int8_t dtype = left->dtype;
392
+
393
+ // Setup matrix shape for result
394
+ size_t* shape = ALLOC_N(size_t, left->rank);
395
+ for (i = 0; i < left->rank; ++i) shape[i] = left->shape[i];
396
+
397
+ // Create result storage.
398
+ result = create_dense_storage(dtype, shape, left->rank, NULL, 0);
399
+
400
+ // Do the operation
401
+ DenseElementwiseFuncs[dtype](left->elements, right->elements, result->elements, count_dense_storage_elements(result), op);
402
+
403
+ return nm_create(S_DENSE, result);
404
+ }
405
+
406
+
407
+ static NMATRIX* elementwise_list_casted(STORAGE_PAIR casted_storage, char op) {
408
+ rb_raise(rb_eNotImpError, "elementwise operations not implemented for list-of-list matrices");
409
+ return NULL;
410
+ }
411
+
412
+
413
+ static NMATRIX* elementwise_yale_casted(STORAGE_PAIR casted_storage, char op) {
414
+ YALE_STORAGE *left = (YALE_STORAGE*)(casted_storage.left),
415
+ *right = (YALE_STORAGE*)(casted_storage.right);
416
+ YALE_STORAGE *result = create_merged_yale_storage(left, right);
417
+
418
+ fprintf(stderr, "result: %d, %d\n", result->dtype, result->index_dtype);
419
+
420
+ //fprintf(stderr, "Remember to fix elementwise for yale!\n");
421
+ YaleElementwiseFuncs[result->dtype][result->index_dtype](result->shape[0], result->shape[1], op, left->ija, right->ija, result->ija, left->a, right->a, result->a);
422
+
423
+ return nm_create(S_YALE, result);
424
+ }
425
+
426
+
427
+ nm_elementwise_binary_op_casted_t CastedElementwiseFuncs = {
428
+ elementwise_dense_casted,
429
+ elementwise_list_casted,
430
+ elementwise_yale_casted
431
+ };
432
+
433
+
434
+ nm_compare_t EqEqFuncs = {
435
+ dense_storage_eqeq,
436
+ list_storage_eqeq,
437
+ yale_storage_eqeq
438
+ };
439
+
440
+
441
+ static void nm_delete(NMATRIX* mat) {
442
+ DeleteFuncs[mat->stype](mat->storage);
443
+ }
444
+
445
+
446
+ static STORAGE* nm_dense_new(size_t* shape, size_t rank, int8_t dtype, void* init_val, size_t init_val_len, VALUE self) {
447
+ return (STORAGE*)(create_dense_storage(dtype, shape, rank, init_val, init_val_len));
448
+ }
449
+
450
+ static STORAGE* nm_list_new(size_t* shape, size_t rank, int8_t dtype, void* init_val, size_t init_val_len, VALUE self) {
451
+ if (init_val_len > 1) {
452
+ rb_raise(rb_eArgError, "list storage needs initial size, not initial value\n");
453
+ return NULL;
454
+ }
455
+ return (STORAGE*)(create_list_storage(dtype, shape, rank, init_val));
456
+ }
457
+
458
+
459
+ static STORAGE* nm_yale_new(size_t* shape, size_t rank, int8_t dtype, void* init_val, size_t init_val_len, VALUE self) {
460
+ YALE_STORAGE* s;
461
+
462
+ if (init_val_len > 1) {
463
+ rb_raise(rb_eArgError, "list storage needs initial size, not initial value\n");
464
+ return NULL;
465
+ }
466
+
467
+ s = create_yale_storage(dtype, shape, rank, *(size_t*)init_val);
468
+ init_yale_storage(s);
469
+ free(init_val);
470
+
471
+ if (!s) rb_raise(rb_eNoMemError, "Yale allocation failed");
472
+
473
+ return (STORAGE*)(s);
474
+ //return Data_Wrap_Struct(self, NULL, nm_delete, matrix);
475
+ }
476
+
477
+
478
+ nm_create_storage_t CreateFuncs = {
479
+ nm_dense_new,
480
+ nm_list_new,
481
+ nm_yale_new
482
+ };
483
+
484
+
485
+ nm_cast_copy_storage_t CastCopyFuncs = {
486
+ cast_copy_dense_storage,
487
+ cast_copy_list_storage,
488
+ cast_copy_yale_storage
489
+ };
490
+
491
+
492
+
493
+ nm_scast_copy_storage_t ScastCopyFuncs = {
494
+ {cast_copy_dense_storage, scast_copy_dense_list, scast_copy_dense_yale},
495
+ {scast_copy_list_dense, cast_copy_list_storage, scast_copy_list_yale},
496
+ {scast_copy_yale_dense, scast_copy_yale_list, cast_copy_yale_storage}
497
+ };
498
+
499
+
500
+ nm_stype_ref_t RefFuncs = {
501
+ dense_storage_get,
502
+ list_storage_get,
503
+ yale_storage_ref
504
+ };
505
+
506
+
507
+ VALUE nm_dense_set(STORAGE* s, size_t* coords, VALUE val) {
508
+ void* v = ALLOCA_N(char, nm_sizeof[s->dtype]);
509
+ SetFuncs[s->dtype][NM_ROBJ](1, v, 0, &val, 0);
510
+ dense_storage_set( (DENSE_STORAGE*)s, coords, v );
511
+ return val;
512
+ }
513
+
514
+
515
+ // Should work exactly the same as nm_dense_set.
516
+ VALUE nm_yale_set(STORAGE* s, size_t* coords, VALUE val) {
517
+ void* v = ALLOCA_N(char, nm_sizeof[s->dtype]);
518
+ SetFuncs[s->dtype][NM_ROBJ](1, v, 0, &val, 0);
519
+ yale_storage_set( (YALE_STORAGE*)s, coords, v );
520
+ return val;
521
+ }
522
+
523
+
524
+ // TODO: Why can't you be more like your brothers, nm_dense_set and nm_yale_set?
525
+ VALUE nm_list_set(STORAGE* s, size_t* coords, VALUE val) {
526
+ void *v = ALLOC_N(char, nm_sizeof[s->dtype]), *rm;
527
+ LIST_STORAGE* ls = (LIST_STORAGE*)s;
528
+
529
+ //fprintf(stderr, " create_val: %p\n", v);
530
+
531
+ SetFuncs[s->dtype][NM_ROBJ](1, v, 0, &val, 0);
532
+
533
+ if (!memcmp(ls->default_val, v, nm_sizeof[s->dtype])) {
534
+ // User asked to insert default_value, which is actually node *removal*.
535
+ // So let's do that instead.
536
+
537
+ rm = list_storage_remove( ls, coords );
538
+
539
+ //if (rm) fprintf(stderr, " remove_val: %p\n", rm);
540
+
541
+ if (rm) free(rm);
542
+ return val;
543
+
544
+ } else if (list_storage_insert( ls, coords, v )) return val;
545
+ return Qnil;
546
+ // No need to free; the list keeps v.
547
+ }
548
+
549
+
550
+
551
+ nm_stype_ins_t InsFuncs = {
552
+ nm_dense_set,
553
+ nm_list_set,
554
+ nm_yale_set,
555
+ };
556
+
557
+
558
+
559
+ // Converts a typestring to a typecode for storage. Only looks at the first three characters.
560
+ int8_t nm_stypestring_to_stype(VALUE str) {
561
+ int8_t i;
562
+ for (i = 0; i < S_TYPES; ++i)
563
+ if ( !strncmp(RSTRING_PTR(str), nm_stypestring[i], 3) ) return i;
564
+ return S_DENSE;
565
+ }
566
+
567
+ int8_t nm_stypesymbol_to_stype(VALUE sym) {
568
+ int8_t i;
569
+ for (i = 0; i < S_TYPES; ++i)
570
+ if (SYM2ID(sym) == rb_intern(nm_stypestring[i])) return i;
571
+ return S_DENSE;
572
+ }
573
+
574
+
575
+ int8_t nm_dtypestring_to_dtype(VALUE str) {
576
+ int8_t i;
577
+ for (i = 0; i < NM_TYPES; ++i)
578
+ if ( !strncmp(RSTRING_PTR(str), nm_dtypestring[i], RSTRING_LEN(str)) ) return i;
579
+ return NM_NONE;
580
+ }
581
+
582
+ int8_t nm_dtypesymbol_to_dtype(VALUE sym) {
583
+ int8_t i;
584
+ for (i = 0; i < NM_TYPES; ++i)
585
+ if (SYM2ID(sym) == rb_intern(nm_dtypestring[i])) return i;
586
+ return NM_NONE;
587
+ }
588
+
589
+
590
+ // TODO: Probably needs some work for Bignum.
591
+ int8_t nm_guess_dtype(VALUE v) {
592
+ switch(TYPE(v)) {
593
+ case T_TRUE:
594
+ case T_FALSE:
595
+ return NM_BYTE;
596
+ case T_STRING:
597
+ if (RSTRING_LEN(v) == 1) return NM_BYTE;
598
+ else return NM_NONE;
599
+
600
+ #if SIZEOF_INT == 8
601
+ case T_FIXNUM:
602
+ return NM_INT64;
603
+ case T_RATIONAL:
604
+ return NM_RATIONAL128;
605
+ #else
606
+ # if SIZEOF_INT == 4
607
+ case T_FIXNUM:
608
+ return NM_INT32;
609
+ case T_RATIONAL:
610
+ return NM_RATIONAL64;
611
+ #else
612
+ case T_FIXNUM:
613
+ return NM_INT16;
614
+ case T_RATIONAL:
615
+ return NM_RATIONAL32;
616
+ # endif
617
+ #endif
618
+
619
+ case T_BIGNUM:
620
+ return NM_INT64;
621
+
622
+ #if SIZEOF_FLOAT == 4
623
+ case T_COMPLEX:
624
+ return NM_COMPLEX128;
625
+ case T_FLOAT:
626
+ return NM_FLOAT64;
627
+ #else
628
+ # if SIZEOF_FLOAT == 2
629
+ case T_COMPLEX:
630
+ return NM_COMPLEX64;
631
+ case T_FLOAT:
632
+ return NM_FLOAT32;
633
+ # endif
634
+ #endif
635
+
636
+ case T_ARRAY: // may be passed for dense -- for now, just look at the first element.
637
+ return nm_guess_dtype(RARRAY_PTR(v)[0]);
638
+ // TODO: Look at entire array for most specific type.
639
+
640
+ case T_NIL:
641
+ default:
642
+ return NM_NONE;
643
+ }
644
+ }
645
+
646
+ // Used for scasting (changing stype)
647
+ inline void cast_copy_value_single(void* to, const void* from, int8_t l_dtype, int8_t r_dtype) {
648
+ if (l_dtype == r_dtype) memcpy(to, from, nm_sizeof[l_dtype]);
649
+ else SetFuncs[l_dtype][r_dtype](1, to, 0, from, 0);
650
+ }
651
+
652
+
653
+
654
+ // Read the shape argument to NMatrix.new, which may be either an array or a single number.
655
+ // Second argument is where the shape is stored at the end of the function; returns the rank.
656
+ // You are responsible for freeing shape!
657
+ size_t* nm_interpret_shape_arg(VALUE arg, size_t* rank) {
658
+ size_t i;
659
+ size_t* shape;
660
+
661
+ if (TYPE(arg) == T_ARRAY) {
662
+ *rank = RARRAY_LEN(arg);
663
+ shape = ALLOC_N(size_t, *rank);
664
+ for (i = 0; i < *rank; ++i)
665
+ shape[i] = (size_t)(FIX2UINT(RARRAY_PTR(arg)[i]));
666
+ } else if (FIXNUM_P(arg)) {
667
+ *rank = 2;
668
+ shape = ALLOC_N(size_t, *rank);
669
+ for (i = 0; i < *rank; ++i)
670
+ shape[i] = (size_t)(FIX2UINT(arg));
671
+ } else {
672
+ *rank = 0;
673
+ shape = NULL;
674
+ rb_raise(rb_eArgError, "Expected an array of numbers or a single fixnum for matrix shape");
675
+ }
676
+
677
+ return shape;
678
+ }
679
+
680
+
681
+ // argv will be either 1 or 2 elements. If 1, could be either initial or dtype. If 2, is initial and dtype.
682
+ // This function returns the dtype.
683
+ int8_t nm_interpret_dtype(int argc, VALUE* argv, int8_t stype) {
684
+ int offset = 0; // if argc == 1
685
+ if (argc == 2) offset = 1;
686
+ else if (argc != 1) rb_raise(rb_eArgError, "Need an initial value or a dtype");
687
+
688
+ if (SYMBOL_P(argv[offset])) return nm_dtypesymbol_to_dtype(argv[offset]);
689
+ else if (IS_STRING(argv[offset])) return nm_dtypestring_to_dtype(StringValue(argv[offset]));
690
+ else if (stype == S_YALE) rb_raise(rb_eArgError, "yale requires dtype");
691
+ else return nm_guess_dtype(argv[0]);
692
+
693
+ return NM_NONE;
694
+ }
695
+
696
+ int8_t nm_interpret_stype(VALUE arg) {
697
+ if (SYMBOL_P(arg)) return nm_stypesymbol_to_stype(arg);
698
+ else if (IS_STRING(arg)) return nm_stypestring_to_stype(StringValue(arg));
699
+ else rb_raise(rb_eArgError, "Expected storage type");
700
+ return S_DENSE;
701
+ }
702
+
703
+
704
+ void* nm_interpret_initial_value(VALUE arg, int8_t dtype) {
705
+ void* init_val;
706
+
707
+ if (TYPE(arg) == T_ARRAY) { // array
708
+ init_val = ALLOC_N(char, nm_sizeof[dtype] * RARRAY_LEN(arg));
709
+ SetFuncs[dtype][NM_ROBJ](RARRAY_LEN(arg), init_val, nm_sizeof[dtype], RARRAY_PTR(arg), nm_sizeof[NM_ROBJ]);
710
+ } else { // single value
711
+ init_val = ALLOC_N(char, nm_sizeof[dtype]);
712
+ SetFuncs[dtype][NM_ROBJ](1, init_val, 0, &arg, 0);
713
+ }
714
+
715
+ return init_val;
716
+ }
717
+
718
+
719
+ size_t* nm_interpret_initial_capacity(VALUE arg) {
720
+ size_t* init_cap = ALLOC(size_t);
721
+ *init_cap = FIX2UINT(arg);
722
+ return init_cap;
723
+ }
724
+
725
+
726
+ /*
727
+ * Create a new NMatrix.
728
+ *
729
+ * There are several ways to do this. At a minimum, dimensions and either a dtype or initial values are needed, e.g.,
730
+ *
731
+ * NMatrix.new(3, :int64) # square 3x3 dense matrix
732
+ * NMatrix.new([3,4], :float32) # 3x4 matrix
733
+ * NMatrix.new(3, 0) # 3x3 dense matrix initialized to all zeros
734
+ * NMatrix.new([3,3], [1,2,3]) # [[1,2,3],[1,2,3],[1,2,3]]
735
+ *
736
+ * NMatrix will try to guess the dtype from the first value in the initial values array.
737
+ *
738
+ * You can also provide the stype prior to the dimensions. However, non-dense matrices cannot take initial values, and
739
+ * require a dtype (e.g., :int64):
740
+ *
741
+ * NMatrix.new(:yale, [4,3], :int64)
742
+ * NMatrix.new(:list, 5, :rational128)
743
+ *
744
+ * Finally, you can be extremely specific, and define a matrix very exactly:
745
+ *
746
+ * NMatrix.new(:dense, [2,2,2], [0,1,2,3,4,5,6,7], :int8)
747
+ *
748
+ * Just be careful! There are no overflow warnings in NMatrix.
749
+ */
750
+ static VALUE nm_init(int argc, VALUE* argv, VALUE nm) {
751
+ char ZERO = 0;
752
+ VALUE QNIL = Qnil;
753
+ int8_t dtype, stype, offset = 0;
754
+ size_t rank;
755
+ size_t* shape;
756
+ size_t init_val_len = 0;
757
+ void* init_val = NULL;
758
+ NMATRIX* nmatrix;
759
+
760
+ // READ ARGUMENTS
761
+
762
+ //fprintf(stderr, "Called nmatrix new with %d arguments\n", argc);
763
+
764
+ if (argc < 2 || argc > 4) { rb_raise(rb_eArgError, "Expected 2, 3, or 4 arguments"); return Qnil; }
765
+
766
+ if (!SYMBOL_P(argv[0]) && !IS_STRING(argv[0])) {
767
+ stype = S_DENSE;
768
+ } else {
769
+ stype = nm_interpret_stype(argv[0]); // 0: String or Symbol
770
+ offset = 1;
771
+ }
772
+ shape = nm_interpret_shape_arg(argv[offset], &rank); // 1: String or Symbol
773
+ dtype = nm_interpret_dtype(argc-1-offset, argv+offset+1, stype); // 2-3: dtype
774
+
775
+ if (IS_NUMERIC(argv[1+offset]) || TYPE(argv[1+offset]) == T_ARRAY) { // initial value provided (could also be initial capacity, if yale)
776
+ if (stype == S_YALE) {
777
+ init_val = nm_interpret_initial_capacity(argv[1+offset]);
778
+ init_val_len = 1;
779
+ } else {
780
+ init_val = nm_interpret_initial_value(argv[1+offset], dtype);// 4: initial value / dtype
781
+ if (TYPE(argv[1+offset]) == T_ARRAY) init_val_len = RARRAY_LEN(argv[1+offset]);
782
+ else init_val_len = 1;
783
+ }
784
+ } else {
785
+ if (stype == S_DENSE) { // no need to initialize dense with any kind of default value unless it's an NM_ROBJ matrix
786
+ if (dtype == NM_ROBJ) { // pretend [nil] was passed for ROBJ.
787
+ init_val = ALLOC(VALUE);
788
+ SetFuncs[NM_ROBJ][NM_ROBJ](1, init_val, 0, &QNIL, 0);
789
+ init_val_len = 1;
790
+ } else init_val = NULL;
791
+ } else if (stype == S_YALE) { // if it's a list or compressed, we want to assume default of 0 even if none provided
792
+ init_val = ALLOC(size_t);
793
+ *(size_t*)init_val = 0;
794
+ } else {
795
+ init_val = ALLOC_N(char, nm_sizeof[dtype]);
796
+ //memset(init_val, 0, nm_sizeof[dtype]); // TODO: See if this works instead of the next line (with NM_ROBJ matrix). Cleaner.
797
+ SetFuncs[dtype][NM_BYTE](1, init_val, 0, &ZERO, 0);
798
+ }
799
+ }
800
+
801
+
802
+ // TODO: Update to allow an array as the initial value.
803
+
804
+ if (dtype == NM_NONE) {
805
+ rb_raise(rb_eArgError, "Could not recognize dtype");
806
+ free(init_val);
807
+ free(shape);
808
+ return nm;
809
+ }
810
+
811
+ if (stype < S_TYPES) {
812
+ UnwrapNMatrix( nm, nmatrix );
813
+
814
+ nmatrix->stype = stype;
815
+ nmatrix->storage = CreateFuncs[stype](shape, rank, dtype, init_val, init_val_len, nm);
816
+
817
+ return nm;
818
+ } else
819
+ rb_raise(rb_eNotImpError, "Unrecognized storage type");
820
+
821
+ free(shape);
822
+ free(init_val);
823
+ return nm;
824
+ }
825
+
826
+
827
+ static VALUE nm_alloc(VALUE klass) {
828
+ NMATRIX* mat = ALLOC(NMATRIX);
829
+ mat->storage = NULL;
830
+ mat->stype = S_TYPES;
831
+ return Data_Wrap_Struct(klass, MarkFuncs[mat->stype], nm_delete, mat);
832
+ }
833
+
834
+
835
+ // This is the "back-door initializer," for when Ruby needs to create the object in an atypical way.
836
+ //
837
+ // Note that objects created this way will have NULL storage.
838
+ /*static VALUE nm_initialize(VALUE self, VALUE stype, VALUE dtype) {
839
+ NMATRIX* matrix;
840
+ UnwrapNMatrix(self, matrix);
841
+
842
+ matrix->stype = nm_interpret_stype(stype);
843
+ matrix->dtype = nm_interpret_dtype(1, &dtype, stype);
844
+ matrix->storage = NULL;
845
+
846
+ return self;
847
+ }*/
848
+
849
+
850
+ static VALUE nm_init_copy(VALUE copy, VALUE original) {
851
+ NMATRIX *lhs, *rhs;
852
+
853
+ CheckNMatrixType(original);
854
+
855
+ if (copy == original) return copy;
856
+
857
+ UnwrapNMatrix( original, rhs );
858
+ UnwrapNMatrix( copy, lhs );
859
+
860
+ lhs->stype = rhs->stype;
861
+
862
+ // Copy the storage
863
+ lhs->storage = CastCopyFuncs[rhs->stype](rhs->storage, rhs->storage->dtype);
864
+
865
+ return copy;
866
+ }
867
+
868
+
869
+ static VALUE nm_init_cast_copy(VALUE copy, VALUE original, VALUE new_dtype_symbol) {
870
+ NMATRIX *lhs, *rhs;
871
+ int8_t new_dtype = nm_dtypesymbol_to_dtype(new_dtype_symbol);
872
+ //fprintf(stderr,"In copy constructor\n");
873
+
874
+ CheckNMatrixType(original);
875
+
876
+ if (copy == original) return copy;
877
+
878
+ UnwrapNMatrix( original, rhs );
879
+ UnwrapNMatrix( copy, lhs );
880
+
881
+ lhs->stype = rhs->stype;
882
+
883
+ // Copy the storage
884
+ lhs->storage = CastCopyFuncs[rhs->stype](rhs->storage, new_dtype);
885
+
886
+ return copy;
887
+ }
888
+
889
+
890
+ /*
891
+ * Create a copy of an NMatrix with a different dtype. See also cast.
892
+ */
893
+ // TODO: Deprecate this function and farm it out to scast_copy. as_dtype will still work, but it'll be in pure Ruby and
894
+ // just use ::cast instead.
895
+ static VALUE nm_cast_copy(VALUE self, VALUE new_dtype_symbol) {
896
+ NMATRIX *original, *copy;
897
+ int8_t new_dtype = nm_dtypesymbol_to_dtype(new_dtype_symbol);
898
+
899
+ CheckNMatrixType(self);
900
+
901
+ UnwrapNMatrix(self, original);
902
+
903
+ copy = ALLOC(NMATRIX);
904
+ copy->stype = original->stype;
905
+ copy->storage = CastCopyFuncs[original->stype](original->storage, new_dtype);
906
+
907
+ return Data_Wrap_Struct(cNMatrix, MarkFuncs[copy->stype], nm_delete, copy);
908
+ }
909
+
910
+
911
+ /*
912
+ * Create a copy of an NMatrix with a different stype and dtype. See also cast.
913
+ *
914
+ * m.cast(:dense, :int64)
915
+ *
916
+ */
917
+ static VALUE nm_scast_copy(VALUE self, VALUE new_stype_symbol, VALUE new_dtype_symbol) {
918
+ NMATRIX* original, *copy;
919
+ int8_t new_dtype = nm_dtypesymbol_to_dtype(new_dtype_symbol);
920
+ int8_t new_stype = nm_stypesymbol_to_stype(new_stype_symbol);
921
+
922
+ CheckNMatrixType(self);
923
+
924
+ UnwrapNMatrix(self, original);
925
+
926
+ copy = ALLOC(NMATRIX);
927
+ copy->stype = new_stype;
928
+
929
+ // Copy and scast the storage.
930
+ if (new_stype == original->stype) copy->storage = CastCopyFuncs[original->stype](original->storage, new_dtype);
931
+ else copy->storage = ScastCopyFuncs[copy->stype][original->stype](original->storage, new_dtype);
932
+
933
+ return Data_Wrap_Struct(cNMatrix, MarkFuncs[copy->stype], nm_delete, copy);
934
+ }
935
+
936
+
937
+
938
+ // Cast a single matrix to a new dtype (unless it's already casted, then just return it). Helper for binary_storage_cast_alloc.
939
+ static inline STORAGE* storage_cast_alloc(NMATRIX* matrix, int8_t new_dtype) {
940
+ if (matrix->storage->dtype == new_dtype) return matrix->storage;
941
+ else return CastCopyFuncs[matrix->stype](matrix->storage, new_dtype);
942
+ }
943
+
944
+
945
+ // Cast a pair of matrices for a binary operation to a new dtype (which this function determines). Technically, only
946
+ // does an actual cast on matrices that are the wrong dtype; otherwise returns a reference to the original. Bear this in
947
+ // mind when freeing memory!
948
+ static inline STORAGE_PAIR binary_storage_cast_alloc(NMATRIX* left_matrix, NMATRIX* right_matrix) {
949
+ STORAGE_PAIR casted;
950
+ int8_t new_dtype = Upcast[left_matrix->storage->dtype][right_matrix->storage->dtype];
951
+
952
+ casted.left = storage_cast_alloc(left_matrix, new_dtype);
953
+ casted.right = storage_cast_alloc(right_matrix, new_dtype);
954
+
955
+ return casted;
956
+ }
957
+
958
+ /*
959
+ * Equality operator. Returns a single true or false value indicating whether the matrices are equivalent.
960
+ *
961
+ * For elementwise, use == instead.
962
+ *
963
+ * This method will raise an exception if dimensions do not match.
964
+ */
965
+ static VALUE nm_eqeq(VALUE left, VALUE right) {
966
+ bool result;
967
+ NMATRIX *l, *r;
968
+ STORAGE_PAIR casted;
969
+
970
+ CheckNMatrixType(left);
971
+ CheckNMatrixType(right);
972
+
973
+ UnwrapNMatrix(left, l);
974
+ UnwrapNMatrix(right, r);
975
+
976
+ if (l->stype != r->stype) //rb_raise(nm_eStorageTypeError, "wrong storage type");
977
+ rb_raise(rb_eNotImpError, "comparison between different matrix stypes not yet implemented");
978
+
979
+ casted = binary_storage_cast_alloc(l, r);
980
+
981
+ result = EqEqFuncs[l->stype](casted.left, casted.right);
982
+
983
+ // Free any casted-storage we created for the comparison.
984
+ // TODO: Can we make the Ruby GC take care of this stuff now that we're using it?
985
+ // If we did that, we night not have to re-create these every time, right? Or wrong? Need to do
986
+ // more research.
987
+ if (l->storage != casted.left) DeleteFuncs[l->stype](casted.left);
988
+ if (r->storage != casted.right) DeleteFuncs[l->stype](casted.right);
989
+
990
+ return result ? Qtrue : Qfalse;
991
+ }
992
+
993
+
994
+ static VALUE multiply_matrix(NMATRIX* left, NMATRIX* right) {
995
+ ///TODO: multiplication for non-dense and/or non-decimal matrices
996
+ size_t* resulting_shape = ALLOC_N(size_t, 2);
997
+ NMATRIX* result;
998
+ bool vector = false;
999
+
1000
+ // Make sure both of our matrices are of the correct type.
1001
+ STORAGE_PAIR casted = binary_storage_cast_alloc(left, right);
1002
+
1003
+ resulting_shape[0] = left->storage->shape[0];
1004
+ resulting_shape[1] = right->storage->shape[1];
1005
+
1006
+ // Sometimes we only need to use matrix-vector multiplication (e.g., GEMM versus GEMV). Find out.
1007
+ if (resulting_shape[1] == 1) vector = true;
1008
+
1009
+ result = CastedMultiplyFuncs[left->stype](casted, resulting_shape, vector);
1010
+
1011
+ // Free any casted-storage we created for the multiplication.
1012
+ // TODO: Can we make the Ruby GC take care of this stuff now that we're using it?
1013
+ // If we did that, we night not have to re-create these every time, right? Or wrong? Need to do
1014
+ // more research.
1015
+ if (left->storage != casted.left) DeleteFuncs[left->stype](casted.left);
1016
+ if (right->storage != casted.right) DeleteFuncs[left->stype](casted.right);
1017
+
1018
+ if (result) return Data_Wrap_Struct(cNMatrix, MarkFuncs[result->stype], nm_delete, result);
1019
+ return Qnil; // Only if we try to multiply list matrices should we return Qnil.
1020
+ }
1021
+
1022
+
1023
+ static VALUE multiply_scalar(NMATRIX* left, VALUE scalar) {
1024
+ rb_raise(rb_eNotImpError, "matrix-scalar multiplication not implemented yet");
1025
+ return Qnil;
1026
+ }
1027
+
1028
+
1029
+ /*
1030
+ * Matrix multiply (dot product): against another matrix or a vector.
1031
+ *
1032
+ * For elementwise, use * instead.
1033
+ *
1034
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1035
+ */
1036
+ static VALUE nm_multiply(VALUE left_v, VALUE right_v) {
1037
+ NMATRIX *left, *right;
1038
+
1039
+ // left has to be of type NMatrix.
1040
+ CheckNMatrixType(left_v);
1041
+
1042
+ UnwrapNMatrix( left_v, left );
1043
+
1044
+ if (IS_NUMERIC(right_v))
1045
+ return multiply_scalar(left, right_v);
1046
+
1047
+ else if (TYPE(right_v) == T_ARRAY)
1048
+ rb_raise(rb_eNotImpError, "for matrix-vector multiplication, please use an NVector instead of an Array for now");
1049
+
1050
+ //if (RDATA(right_v)->dfree != (RUBY_DATA_FUNC)nm_delete) {
1051
+ else if (TYPE(right_v) == T_DATA && RDATA(right_v)->dfree == (RUBY_DATA_FUNC)nm_delete) { // both are matrices
1052
+ UnwrapNMatrix( right_v, right );
1053
+
1054
+ if (left->storage->shape[1] != right->storage->shape[0])
1055
+ rb_raise(rb_eArgError, "incompatible dimensions");
1056
+
1057
+ if (left->stype != right->stype)
1058
+ rb_raise(rb_eNotImpError, "matrices must have same stype");
1059
+
1060
+ return multiply_matrix(left, right);
1061
+
1062
+ } else rb_raise(rb_eTypeError, "expected right operand to be NMatrix, NVector, or single numeric value");
1063
+
1064
+ return Qnil;
1065
+ }
1066
+
1067
+
1068
+ static VALUE nm_elementwise(VALUE leftv, VALUE rightv, char op) {
1069
+ ///TODO: multiplication for non-dense and/or non-decimal matrices
1070
+ NMATRIX *result, *left, *right;
1071
+ STORAGE_PAIR casted;
1072
+
1073
+ CheckNMatrixType(leftv);
1074
+ CheckNMatrixType(rightv);
1075
+
1076
+ UnwrapNMatrix(rightv, right);
1077
+ UnwrapNMatrix(leftv, left);
1078
+
1079
+ // Make sure both of our matrices are of the correct type.
1080
+ casted = binary_storage_cast_alloc(left, right);
1081
+
1082
+ result = CastedElementwiseFuncs[left->stype](casted, op);
1083
+
1084
+ // Free up temporary casted matrices
1085
+ if (left->storage != casted.left) DeleteFuncs[left->stype](casted.left);
1086
+ if (right->storage != casted.right) DeleteFuncs[left->stype](casted.right);
1087
+
1088
+ if (result) return Data_Wrap_Struct(cNMatrix, MarkFuncs[result->stype], nm_delete, result);
1089
+ return Qnil; // Only if we try to multiply list matrices should we return Qnil.
1090
+ }
1091
+
1092
+
1093
+ /*
1094
+ * Matrix element-wise addition.
1095
+ *
1096
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1097
+ *
1098
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1099
+ */
1100
+ static VALUE nm_ew_add(VALUE left, VALUE right) {
1101
+ return nm_elementwise(left, right, '+');
1102
+ }
1103
+
1104
+ /*
1105
+ * Matrix element-wise subtraction.
1106
+ *
1107
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1108
+ *
1109
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1110
+ */
1111
+ static VALUE nm_ew_subtract(VALUE left, VALUE right) {
1112
+ return nm_elementwise(left, right, '-');
1113
+ }
1114
+
1115
+ /*
1116
+ * Matrix element-wise multiplication.
1117
+ *
1118
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1119
+ *
1120
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1121
+ *
1122
+ * For dot product, use +dot+ instead.
1123
+ */
1124
+ static VALUE nm_ew_multiply(VALUE left, VALUE right) {
1125
+ return nm_elementwise(left, right, '*');
1126
+ }
1127
+
1128
+ /*
1129
+ * Matrix element-wise division.
1130
+ *
1131
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1132
+ *
1133
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1134
+ */
1135
+ static VALUE nm_ew_divide(VALUE left, VALUE right) {
1136
+ return nm_elementwise(left, right, '/');
1137
+ }
1138
+
1139
+
1140
+ /*
1141
+ * Matrix element-wise comparison (equality) operator.
1142
+ *
1143
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1144
+ *
1145
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1146
+ *
1147
+ * Note that the matrix returned will be of the same dtype as the upcast of the input matrices. If that's not what you
1148
+ * want, use +cast+.
1149
+ */
1150
+ static VALUE nm_ew_eqeq(VALUE left, VALUE right) {
1151
+ return nm_elementwise(left, right, NM_OP_EQEQ);
1152
+ }
1153
+
1154
+ /*
1155
+ * Matrix element-wise less-than-or-equals operator.
1156
+ *
1157
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1158
+ *
1159
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1160
+ *
1161
+ * Note that the matrix returned will be of the same dtype as the upcast of the input matrices. If that's not what you
1162
+ * want, use +cast+.
1163
+ */
1164
+ static VALUE nm_ew_leq(VALUE left, VALUE right) {
1165
+ return nm_elementwise(left, right, NM_OP_LTE);
1166
+ }
1167
+
1168
+
1169
+ /*
1170
+ * Matrix element-wise greater-than-or-equals operator.
1171
+ *
1172
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1173
+ *
1174
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1175
+ *
1176
+ * Note that the matrix returned will be of the same dtype as the upcast of the input matrices. If that's not what you
1177
+ * want, use +cast+.
1178
+ */
1179
+ static VALUE nm_ew_geq(VALUE left, VALUE right) {
1180
+ return nm_elementwise(left, right, NM_OP_GTE);
1181
+ }
1182
+
1183
+
1184
+ /*
1185
+ * Matrix element-wise strictly-less-than operator.
1186
+ *
1187
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1188
+ *
1189
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1190
+ *
1191
+ * Note that the matrix returned will be of the same dtype as the upcast of the input matrices. If that's not what you
1192
+ * want, use +cast+.
1193
+ */
1194
+ static VALUE nm_ew_lt(VALUE left, VALUE right) {
1195
+ return nm_elementwise(left, right, '<');
1196
+ }
1197
+
1198
+
1199
+ /*
1200
+ * Matrix element-wise strictly-greater-than operator.
1201
+ *
1202
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1203
+ *
1204
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1205
+ *
1206
+ * Note that the matrix returned will be of the same dtype as the upcast of the input matrices. If that's not what you
1207
+ * want, use +cast+.
1208
+ */
1209
+ static VALUE nm_ew_gt(VALUE left, VALUE right) {
1210
+ return nm_elementwise(left, right, '>');
1211
+ }
1212
+
1213
+
1214
+ /*
1215
+ * Matrix element-wise inequality operator.
1216
+ *
1217
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1218
+ *
1219
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1220
+ *
1221
+ * Note that the matrix returned will be of the same dtype as the upcast of the input matrices. If that's not what you
1222
+ * want, use +cast+.
1223
+ */
1224
+ static VALUE nm_ew_neq(VALUE left, VALUE right) {
1225
+ return nm_elementwise(left, right, NM_OP_NEQ);
1226
+ }
1227
+
1228
+
1229
+ // Borrowed this function from NArray. Handles 'each' iteration on a dense matrix.
1230
+ //
1231
+ // Additionally, handles separately matrices containing VALUEs and matrices containing
1232
+ // other types of data.
1233
+ static VALUE nm_dense_each(VALUE nmatrix) {
1234
+ DENSE_STORAGE* s = (DENSE_STORAGE*)(NM_STORAGE(nmatrix));
1235
+ VALUE v;
1236
+ size_t i;
1237
+
1238
+ void (*copy)();
1239
+
1240
+ if (NM_DTYPE(nmatrix) == NM_ROBJ) {
1241
+
1242
+ // matrix of Ruby objects -- yield directly
1243
+ for (i = 0; i < count_dense_storage_elements(s); ++i)
1244
+ rb_yield( *((VALUE*)((char*)(s->elements) + i*nm_sizeof[NM_DTYPE(nmatrix)])) );
1245
+
1246
+ } else {
1247
+ // We're going to copy the matrix element into a Ruby VALUE and then operate on it.
1248
+ copy = SetFuncs[NM_ROBJ][NM_DTYPE(nmatrix)];
1249
+
1250
+ for (i = 0; i < count_dense_storage_elements(s); ++i) {
1251
+ (*copy)(1, &v, 0, (char*)(s->elements) + i*nm_sizeof[NM_DTYPE(nmatrix)], 0);
1252
+ rb_yield(v); // yield to the copy we made
1253
+ }
1254
+ }
1255
+
1256
+ return nmatrix;
1257
+ }
1258
+
1259
+
1260
+ /*
1261
+ * Iterate over the matrix as you would an Enumerable (e.g., Array).
1262
+ *
1263
+ * Currently only works for dense.
1264
+ */
1265
+ static VALUE nm_each(VALUE nmatrix) {
1266
+ volatile VALUE nm = nmatrix; // not sure why we do this, but it gets done in ruby's array.c.
1267
+
1268
+ switch(NM_STYPE(nm)) {
1269
+ case S_DENSE:
1270
+ return nm_dense_each(nm);
1271
+ default:
1272
+ rb_raise(rb_eNotImpError, "only dense each works right now");
1273
+ }
1274
+ }
1275
+
1276
+
1277
+ // Does not create storage, but does destroy it.
1278
+ NMATRIX* nm_create(int8_t stype, void* storage) {
1279
+ NMATRIX* mat = ALLOC(NMATRIX);
1280
+
1281
+ mat->stype = stype;
1282
+ mat->storage = storage;
1283
+
1284
+ return mat;
1285
+ }
1286
+
1287
+
1288
+ static size_t* convert_coords(size_t rank, VALUE* c, VALUE self) {
1289
+ size_t r;
1290
+ size_t* coords = ALLOC_N(size_t,rank);
1291
+
1292
+ for (r = 0; r < rank; ++r) {
1293
+ coords[r] = FIX2UINT(c[r]);
1294
+ if (coords[r] >= NM_SHAPE(self,r)) rb_raise(rb_eArgError, "out of range");
1295
+ }
1296
+
1297
+ return coords;
1298
+ }
1299
+
1300
+
1301
+ /*
1302
+ * Access the contents of an NMatrix at given coordinates.
1303
+ *
1304
+ * n[3,3] # => 5.0
1305
+ *
1306
+ */
1307
+ VALUE nm_mref(int argc, VALUE* argv, VALUE self) {
1308
+ VALUE v;
1309
+
1310
+ if (NM_RANK(self) == (size_t)(argc)) {
1311
+
1312
+ SetFuncs[NM_ROBJ][NM_DTYPE(self)](1, &v, 0,
1313
+ RefFuncs[NM_STYPE(self)](NM_STORAGE(self),
1314
+ convert_coords((size_t)(argc), argv, self)
1315
+ ), 0);
1316
+ return v;
1317
+
1318
+ } else if (NM_RANK(self) < (size_t)(argc)) {
1319
+ rb_raise(rb_eArgError, "Coordinates given exceed matrix rank");
1320
+ } else {
1321
+ rb_raise(rb_eNotImpError, "Slicing not supported yet");
1322
+ }
1323
+ return Qnil;
1324
+ }
1325
+
1326
+
1327
+ /*
1328
+ * Modify the contents of an NMatrix in the given cell
1329
+ *
1330
+ * n[3,3] = 5.0
1331
+ *
1332
+ * Also returns the new contents, so you can chain:
1333
+ *
1334
+ * n[3,3] = n[2,3] = 5.0
1335
+ */
1336
+ VALUE nm_mset(int argc, VALUE* argv, VALUE self) {
1337
+ size_t rank = argc - 1; // last arg is the value
1338
+
1339
+ if (argc <= 1) {
1340
+ rb_raise(rb_eArgError, "Expected coordinates and r-value");
1341
+
1342
+ } else if (NM_RANK(self) == rank) {
1343
+ return (*(InsFuncs[NM_STYPE(self)]))( NM_STORAGE(self),
1344
+ convert_coords(rank, argv, self),
1345
+ argv[rank] );
1346
+
1347
+ } else if (NM_RANK(self) < rank) {
1348
+ rb_raise(rb_eArgError, "Coordinates given exceed matrix rank");
1349
+ } else {
1350
+ rb_raise(rb_eNotImpError, "Slicing not supported yet");
1351
+ }
1352
+ return Qnil;
1353
+ }
1354
+
1355
+
1356
+ /*
1357
+ * Get the rank of an NMatrix (the number of dimensions).
1358
+ *
1359
+ * In other words, if you set your matrix to be 3x4, the rank is 2. If the matrix was initialized as 3x4x3, the rank
1360
+ * is 3.
1361
+ *
1362
+ * This function may lie slightly for NVectors, which are internally stored as rank 2 (and have an orientation), but
1363
+ * act as if they're rank 1.
1364
+ */
1365
+ VALUE nm_rank(VALUE self) {
1366
+ VALUE ret;
1367
+ SetFuncs[NM_ROBJ][NM_INT64]( 1, &ret, 0, &(NM_STORAGE(self)->rank), 0 );
1368
+ return ret;
1369
+ }
1370
+
1371
+
1372
+ /*
1373
+ * Get the shape (dimensions) of a matrix.
1374
+ */
1375
+ VALUE nm_shape(VALUE self) {
1376
+ STORAGE* s = NM_STORAGE(self);
1377
+
1378
+ // Copy elements into a VALUE array and then use those to create a Ruby array with rb_ary_new4.
1379
+ VALUE* shape = ALLOCA_N(VALUE, s->rank);
1380
+ SetFuncs[NM_ROBJ][NM_SIZE_T]( s->rank, shape, sizeof(VALUE), s->shape, sizeof(size_t));
1381
+
1382
+ return rb_ary_new4(s->rank, shape);
1383
+ }
1384
+
1385
+
1386
+ /*
1387
+ * Get the storage type (stype) of a matrix, e.g., :yale, :dense, or :list.
1388
+ */
1389
+ static VALUE nm_stype(VALUE self) {
1390
+ ID stype = rb_intern(nm_stypestring[NM_STYPE(self)]);
1391
+ return ID2SYM(stype);
1392
+ }
1393
+
1394
+
1395
+ /*
1396
+ * Get the data type (dtype) of a matrix, e.g., :byte, :int8, :int16, :int32, :int64, :float32, :float64, :complex64,
1397
+ * :complex128, :rational32, :rational64, :rational128, or :object (the last is a Ruby object).
1398
+ */
1399
+ static VALUE nm_dtype(VALUE self) {
1400
+ ID dtype = rb_intern(nm_dtypestring[NM_DTYPE(self)]);
1401
+ return ID2SYM(dtype);
1402
+ }
1403
+
1404
+
1405
+ /* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
1406
+ * into an enum recognized by cblas.
1407
+ *
1408
+ * Called by nm_cblas_gemm -- basically inline.
1409
+ *
1410
+ */
1411
+ static char gemm_op_sym(VALUE op) {
1412
+ if (op == false || rb_to_id(op) == nm_id_no_transpose) return CblasNoTrans;
1413
+ else if (rb_to_id(op) == nm_id_transpose) return CblasTrans;
1414
+ else if (rb_to_id(op) == nm_id_complex_conjugate) return CblasConjTrans;
1415
+ else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
1416
+ return CblasNoTrans;
1417
+ }
1418
+
1419
+
1420
+ /* Call any of the cblas_xgemm functions as directly as possible.
1421
+ *
1422
+ * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
1423
+ *
1424
+ * C = alpha*op(A)*op(B) + beta*C
1425
+ *
1426
+ * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
1427
+ *
1428
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
1429
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
1430
+ * expose the ultra-optimized ATLAS versions.
1431
+ *
1432
+ * == Arguments
1433
+ * See: http://www.netlib.org/blas/dgemm.f
1434
+ *
1435
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemm, which is more flexible
1436
+ * with its arguments?
1437
+ *
1438
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1439
+ * handling, so you can easily crash Ruby!
1440
+ */
1441
+ static VALUE nm_cblas_gemm(VALUE self,
1442
+ VALUE trans_a, VALUE trans_b,
1443
+ VALUE m, VALUE n, VALUE k,
1444
+ VALUE alpha,
1445
+ VALUE a, VALUE lda,
1446
+ VALUE b, VALUE ldb,
1447
+ VALUE beta,
1448
+ VALUE c, VALUE ldc)
1449
+ {
1450
+ struct cblas_param_t p = cblas_params_for_multiply(((DENSE_STORAGE*)(NM_STORAGE(a))), ((DENSE_STORAGE*)(NM_STORAGE(b))), ((DENSE_STORAGE*)(NM_STORAGE(c))), false);
1451
+ p.M = FIX2INT(m);
1452
+ p.N = FIX2INT(n);
1453
+ p.K = FIX2INT(k);
1454
+
1455
+ p.lda = FIX2INT(lda);
1456
+ p.ldb = FIX2INT(ldb);
1457
+ p.ldc = FIX2INT(ldc);
1458
+
1459
+ switch(NM_DTYPE(c)) {
1460
+ case NM_FLOAT32:
1461
+ case NM_FLOAT64:
1462
+ p.alpha.d[0] = NUM2DBL(alpha);
1463
+ p.beta.d[0] = NUM2DBL(beta);
1464
+ break;
1465
+
1466
+ case NM_COMPLEX64:
1467
+ p.alpha.c[0].r = REAL2DBL(alpha);
1468
+ p.alpha.c[0].i = IMAG2DBL(alpha);
1469
+ p.beta.c[0].r = REAL2DBL(beta);
1470
+ p.beta.c[0].i = IMAG2DBL(beta);
1471
+ break;
1472
+
1473
+ case NM_COMPLEX128:
1474
+ p.alpha.z.r = REAL2DBL(alpha);
1475
+ p.alpha.z.i = IMAG2DBL(alpha);
1476
+ p.beta.z.r = REAL2DBL(beta);
1477
+ p.beta.z.i = IMAG2DBL(beta);
1478
+ break;
1479
+
1480
+ case NM_BYTE:
1481
+ p.alpha.b[0] = FIX2INT(alpha);
1482
+ p.beta.b[0] = FIX2INT(beta);
1483
+ break;
1484
+
1485
+ case NM_INT8:
1486
+ case NM_INT16:
1487
+ case NM_INT32:
1488
+ case NM_INT64:
1489
+ p.alpha.i[0] = FIX2INT(alpha);
1490
+ p.beta.i[0] = FIX2INT(beta);
1491
+ break;
1492
+
1493
+ case NM_RATIONAL32:
1494
+ p.alpha.r[0].n = NUMER2INT(alpha);
1495
+ p.alpha.r[0].d = DENOM2INT(alpha);
1496
+ p.beta.r[0].n = NUMER2INT(beta);
1497
+ p.beta.r[0].d = DENOM2INT(beta);
1498
+ break;
1499
+
1500
+ case NM_RATIONAL64:
1501
+ p.alpha.ra[0].n = NUMER2INT(alpha);
1502
+ p.alpha.ra[0].d = DENOM2INT(alpha);
1503
+ p.beta.ra[0].n = NUMER2INT(beta);
1504
+ p.beta.ra[0].d = DENOM2INT(beta);
1505
+ break;
1506
+
1507
+ case NM_RATIONAL128:
1508
+ p.alpha.rat.n = NUMER2INT(alpha);
1509
+ p.alpha.rat.d = DENOM2INT(alpha);
1510
+ p.beta.rat.n = NUMER2INT(beta);
1511
+ p.beta.rat.d = DENOM2INT(beta);
1512
+ break;
1513
+
1514
+ case NM_ROBJ:
1515
+ p.alpha.v[0] = alpha;
1516
+ p.beta.v[0] = beta;
1517
+ break;
1518
+
1519
+ default:
1520
+ rb_raise(nm_eDataTypeError, "unexpected dtype");
1521
+
1522
+ }
1523
+
1524
+ /* fprintf(stderr, "cblas_gemm: %d %d %d %d %d %f %d %d %f %d\n", trans_a_, trans_b_,
1525
+ m_, n_, k_, alpha_, lda_, ldb_, beta_, ldc_); */
1526
+
1527
+ GemmFuncs[NM_DTYPE(c)](CblasRowMajor, gemm_op_sym(trans_a), gemm_op_sym(trans_b), p);
1528
+
1529
+ return Qtrue;
1530
+ }
1531
+
1532
+
1533
+ /* Call any of the cblas_xgemv functions as directly as possible.
1534
+ *
1535
+ * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
1536
+ *
1537
+ * y = alpha*op(A)*x + beta*y
1538
+ *
1539
+ * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
1540
+ *
1541
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
1542
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
1543
+ * expose the ultra-optimized ATLAS versions.
1544
+ *
1545
+ * == Arguments
1546
+ * See: http://www.netlib.org/blas/dgemm.f
1547
+ *
1548
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
1549
+ * with its arguments?
1550
+ *
1551
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1552
+ * handling, so you can easily crash Ruby!
1553
+ */
1554
+ static VALUE nm_cblas_gemv(VALUE self,
1555
+ VALUE trans_a,
1556
+ VALUE m, VALUE n,
1557
+ VALUE alpha,
1558
+ VALUE a, VALUE lda,
1559
+ VALUE x, VALUE incx,
1560
+ VALUE beta,
1561
+ VALUE y, VALUE incy) {
1562
+
1563
+ struct cblas_param_t p;
1564
+ p.M = FIX2INT(m);
1565
+ p.N = FIX2INT(n);
1566
+ p.A = ((DENSE_STORAGE*)(NM_STORAGE(a)))->elements;
1567
+ p.B = ((DENSE_STORAGE*)(NM_STORAGE(x)))->elements;
1568
+ p.C = ((DENSE_STORAGE*)(NM_STORAGE(y)))->elements;
1569
+ p.lda = FIX2INT(lda);
1570
+ p.ldb = FIX2INT(incx);
1571
+ p.ldc = FIX2INT(incy);
1572
+
1573
+ switch(NM_DTYPE(y)) {
1574
+ case NM_FLOAT32:
1575
+ case NM_FLOAT64:
1576
+ p.alpha.d[0] = REAL2DBL(alpha);
1577
+ p.beta.d[0] = REAL2DBL(beta);
1578
+ break;
1579
+ case NM_COMPLEX64:
1580
+ p.alpha.c[0].r = REAL2DBL(alpha);
1581
+ p.alpha.c[0].i = IMAG2DBL(alpha);
1582
+ p.beta.c[0].r = REAL2DBL(beta);
1583
+ p.beta.c[0].i = IMAG2DBL(beta);
1584
+ break;
1585
+ case NM_COMPLEX128:
1586
+ p.alpha.z.r = REAL2DBL(alpha);
1587
+ p.alpha.z.i = IMAG2DBL(alpha);
1588
+ p.beta.z.r = REAL2DBL(beta);
1589
+ p.beta.z.i = IMAG2DBL(beta);
1590
+ break;
1591
+ }
1592
+
1593
+ /* fprintf(stderr, "cblas_gemm: %d %d %d %d %d %f %d %d %f %d\n", trans_a_, trans_b_,
1594
+ m_, n_, k_, alpha_, lda_, ldb_, beta_, ldc_); */
1595
+
1596
+ GemvFuncs[NM_DTYPE(y)](CblasRowMajor, gemm_op_sym(trans_a), p);
1597
+
1598
+ return Qtrue;
1599
+ }
1600
+
1601
+
1602
+ /*
1603
+ * Find the capacity of an NMatrix. The capacity only differs from the size for Yale matrices, which occasionally
1604
+ * allocate more space than they need. For list and dense, capacity gives the number of elements in the matrix.
1605
+ */
1606
+ static VALUE nm_capacity(VALUE self) {
1607
+ VALUE cap;
1608
+
1609
+ switch(NM_STYPE(self)) {
1610
+ case S_YALE:
1611
+ cap = UINT2NUM(((YALE_STORAGE*)(NM_STORAGE(self)))->capacity);
1612
+ break;
1613
+
1614
+ case S_DENSE:
1615
+ cap = UINT2NUM(count_dense_storage_elements( (DENSE_STORAGE*)(NM_STORAGE(self)) ));
1616
+ break;
1617
+
1618
+ case S_LIST:
1619
+ cap = UINT2NUM(count_list_storage_elements( (LIST_STORAGE*)(NM_STORAGE(self)) ));
1620
+ break;
1621
+
1622
+ default:
1623
+ //rb_raise(rb_eNotImpError, "TODO: implement capacity/size on other storage types");
1624
+ rb_raise(nm_eStorageTypeError, "unrecognized stype");
1625
+ }
1626
+
1627
+ return cap;
1628
+ }
1629
+
1630
+
1631
+ /*
1632
+ * Get the size of a Yale matrix (the number of elements actually stored).
1633
+ *
1634
+ * For capacity (the maximum number of elements that can be stored without a resize), use capacity instead.
1635
+ */
1636
+ static VALUE nm_yale_size(VALUE self) {
1637
+ VALUE sz;
1638
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1639
+
1640
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1641
+
1642
+ SetFuncs[NM_ROBJ][s->index_dtype](1, &sz, 0, (YALE_SIZE_PTR((s), nm_sizeof[s->index_dtype])), 0);
1643
+ return sz;
1644
+ }
1645
+
1646
+
1647
+ /*
1648
+ * Get the A array of a Yale matrix (which stores the diagonal and the LU portions of the matrix).
1649
+ */
1650
+ static VALUE nm_yale_a(VALUE self) {
1651
+ y_size_t sz, i;
1652
+ void* vals;
1653
+ VALUE ary;
1654
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1655
+
1656
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1657
+
1658
+ YaleGetSize(sz, s);
1659
+ vals = ALLOC_N(char, nm_sizeof[NM_ROBJ]*sz);
1660
+
1661
+ SetFuncs[NM_ROBJ][s->dtype](sz, vals, nm_sizeof[NM_ROBJ], s->a, nm_sizeof[s->dtype]);
1662
+ ary = rb_ary_new4(sz, vals);
1663
+
1664
+ for (i = sz; i < s->capacity; ++i)
1665
+ rb_ary_push(ary, Qnil);
1666
+
1667
+ return ary;
1668
+ }
1669
+
1670
+
1671
+ /*
1672
+ * Get the diagonal ("D") portion of the A array of a Yale matrix.
1673
+ */
1674
+ static VALUE nm_yale_d(VALUE self) {
1675
+ y_size_t sz;
1676
+ void* vals;
1677
+ VALUE ary;
1678
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1679
+
1680
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1681
+
1682
+ YaleGetSize(sz, s);
1683
+ vals = ALLOC_N(char, nm_sizeof[NM_ROBJ]*s->shape[0]);
1684
+
1685
+ SetFuncs[NM_ROBJ][s->dtype](s->shape[0], vals, nm_sizeof[NM_ROBJ], s->a, nm_sizeof[s->dtype]);
1686
+ ary = rb_ary_new4(s->shape[0], vals);
1687
+
1688
+ return ary;
1689
+ }
1690
+
1691
+
1692
+ /*
1693
+ * Get the non-diagonal ("LU") portion of the A array of a Yale matrix.
1694
+ */
1695
+ static VALUE nm_yale_lu(VALUE self) {
1696
+ y_size_t sz, i;
1697
+ void* vals;
1698
+ VALUE ary;
1699
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1700
+
1701
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1702
+
1703
+ YaleGetSize(sz, s);
1704
+ vals = ALLOC_N(char, nm_sizeof[NM_ROBJ]*(s->capacity - s->shape[0]));
1705
+
1706
+ SetFuncs[NM_ROBJ][s->dtype](sz - s->shape[0] - 1, vals, nm_sizeof[NM_ROBJ], (char*)(s->a) + (s->shape[0] + 1)*nm_sizeof[s->dtype], nm_sizeof[s->dtype]);
1707
+ ary = rb_ary_new4(sz - s->shape[0] - 1, vals);
1708
+
1709
+ for (i = sz; i < s->capacity; ++i)
1710
+ rb_ary_push(ary, Qnil);
1711
+
1712
+ return ary;
1713
+ }
1714
+
1715
+
1716
+ /*
1717
+ * Get the IA portion of the IJA array of a Yale matrix. This gives the start and end positions of rows in the
1718
+ * JA and LU portions of the IJA and A arrays, respectively.
1719
+ */
1720
+ static VALUE nm_yale_ia(VALUE self) {
1721
+ y_size_t sz;
1722
+ void* vals;
1723
+ VALUE ary;
1724
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1725
+
1726
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1727
+
1728
+ YaleGetSize(sz, s);
1729
+ vals = ALLOC_N(char, nm_sizeof[NM_ROBJ]*(s->shape[0]+1));
1730
+
1731
+ SetFuncs[NM_ROBJ][s->index_dtype](s->shape[0]+1, vals, nm_sizeof[NM_ROBJ], s->ija, nm_sizeof[s->index_dtype]);
1732
+ ary = rb_ary_new4(s->shape[0]+1, vals);
1733
+
1734
+ return ary;
1735
+ }
1736
+
1737
+
1738
+ /*
1739
+ * Get the JA portion of the IJA array of a Yale matrix. This gives the column indices for entries in corresponding
1740
+ * positions in the LU portion of the A array.
1741
+ */
1742
+ static VALUE nm_yale_ja(VALUE self) {
1743
+ y_size_t sz, i;
1744
+ void* vals;
1745
+ VALUE ary;
1746
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1747
+
1748
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1749
+
1750
+ YaleGetSize(sz, s);
1751
+ vals = ALLOC_N(char, nm_sizeof[NM_ROBJ]*(s->capacity - s->shape[0]));
1752
+
1753
+ SetFuncs[NM_ROBJ][s->index_dtype](sz - s->shape[0] - 1, vals, nm_sizeof[NM_ROBJ], (char*)(s->ija) + (s->shape[0] + 1)*nm_sizeof[s->index_dtype], nm_sizeof[s->index_dtype]);
1754
+ ary = rb_ary_new4(sz - s->shape[0] - 1, vals);
1755
+
1756
+ for (i = sz; i < s->capacity; ++i)
1757
+ rb_ary_push(ary, Qnil);
1758
+
1759
+ return ary;
1760
+ }
1761
+
1762
+
1763
+ /*
1764
+ * Get the IJA array of a Yale matrix.
1765
+ */
1766
+ static VALUE nm_yale_ija(VALUE self) {
1767
+ y_size_t sz, i;
1768
+ void* vals;
1769
+ VALUE ary;
1770
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1771
+
1772
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1773
+
1774
+ YaleGetSize(sz, s);
1775
+ vals = ALLOC_N(char, nm_sizeof[NM_ROBJ]*s->capacity);
1776
+
1777
+ SetFuncs[NM_ROBJ][s->index_dtype](sz, vals, nm_sizeof[NM_ROBJ], s->ija, nm_sizeof[s->index_dtype]);
1778
+ ary = rb_ary_new4(sz, vals);
1779
+
1780
+ for (i = sz; i < s->capacity; ++i)
1781
+ rb_ary_push(ary, Qnil);
1782
+
1783
+ return ary;
1784
+ }
1785
+
1786
+
1787
+ // This is probably faster and smaller than writing an array of transpose functions. But if you want to see what it would look like,
1788
+ // see transp.template.c (not the yale one).
1789
+ //
1790
+ // Note that this is a copy-transpose. In-place transpose is a whole different operation and bag of worms.
1791
+ static void dense_transpose_generic(const unsigned int M, const unsigned int N, const char* A, const int lda, char* B, const int ldb, size_t dtype_size) {
1792
+ unsigned int i, j;
1793
+
1794
+ for (i = 0; i < N; ++i) {
1795
+ for (j = 0; j < M; ++j) {
1796
+ memcpy(B + (i*ldb+j)*dtype_size, A + (j*lda+i)*dtype_size, dtype_size);
1797
+ }
1798
+ }
1799
+ }
1800
+
1801
+
1802
+ /*
1803
+ * Create a transposed copy of this matrix.
1804
+ */
1805
+ static VALUE nm_transpose_new(VALUE self) {
1806
+ NMATRIX *self_m, *result, *result2;
1807
+ size_t sz;
1808
+ size_t* shape = ALLOC_N(size_t, 2);
1809
+ YALE_PARAM A, B;
1810
+ #ifdef BENCHMARK
1811
+ double t1, t2;
1812
+ #endif
1813
+
1814
+ UnwrapNMatrix( self, self_m );
1815
+
1816
+ // switch the dimensions
1817
+ shape[1] = self_m->storage->shape[0];
1818
+ shape[0] = self_m->storage->shape[1];
1819
+
1820
+ switch(self_m->stype) {
1821
+ case S_DENSE:
1822
+ result = nm_create(S_DENSE, create_dense_storage(self_m->storage->dtype, shape, 2, NULL, 0));
1823
+ dense_transpose_generic(
1824
+ self_m->storage->shape[0],
1825
+ self_m->storage->shape[1],
1826
+ ((DENSE_STORAGE*)(self_m->storage))->elements,
1827
+ self_m->storage->shape[1],
1828
+ ((DENSE_STORAGE*)(result->storage))->elements,
1829
+ result->storage->shape[1],
1830
+ nm_sizeof[self_m->storage->dtype]);
1831
+
1832
+ break;
1833
+ case S_YALE:
1834
+ YaleGetSize(sz, (YALE_STORAGE*)(self_m->storage)); // size of new matrix is going to be size of old matrix
1835
+ result = nm_create(S_YALE, create_yale_storage(self_m->storage->dtype, shape, 2, sz));
1836
+
1837
+ // TODO: Do we really need to initialize the whole thing? Or just the A portion?
1838
+ init_yale_storage((YALE_STORAGE*)(result->storage));
1839
+
1840
+ result2 = nm_create(S_YALE, create_yale_storage(self_m->storage->dtype, shape, 2, sz));
1841
+ init_yale_storage((YALE_STORAGE*)(result2->storage));
1842
+
1843
+ A.ia = A.ja = ((YALE_STORAGE*)(self_m->storage))->ija;
1844
+ B.ia = B.ja = ((YALE_STORAGE*)(result->storage))->ija;
1845
+ A.a = ((YALE_STORAGE*)(self_m->storage))->a;
1846
+ B.a = ((YALE_STORAGE*)(result->storage))->a;
1847
+ A.diag = true;
1848
+
1849
+ #ifdef BENCHMARK
1850
+ t1 = get_time();
1851
+ #endif
1852
+
1853
+ // call the appropriate function pointer
1854
+ SparseTransposeFuncs[ self_m->storage->dtype ][ ((YALE_STORAGE*)(self_m->storage))->index_dtype ](shape[0], shape[1], A, B, true);
1855
+ #ifdef BENCHMARK
1856
+ t1 = get_time() - t1;
1857
+ /*
1858
+ t2 = get_time();
1859
+ transp(
1860
+ shape[0],
1861
+ shape[1],
1862
+ ((YALE_STORAGE*)(self_m->storage))->ija,
1863
+ ((YALE_STORAGE*)(self_m->storage))->ija,
1864
+ true,
1865
+ ((YALE_STORAGE*)(self_m->storage))->a,
1866
+ ((YALE_STORAGE*)(result2->storage))->ija,
1867
+ ((YALE_STORAGE*)(result2->storage))->ija,
1868
+ ((YALE_STORAGE*)(result2->storage))->a,
1869
+ true, // move
1870
+ ((YALE_STORAGE*)(self_m->storage))->index_dtype,
1871
+ self_m->storage->dtype
1872
+ );
1873
+
1874
+ t2 = get_time() - t2;
1875
+ fprintf(stderr, "t1: %f\nt2: %f\n", t1, t2);
1876
+ */
1877
+ #endif
1878
+
1879
+ break;
1880
+ default:
1881
+ rb_raise(rb_eNotImpError, "transpose for this type not implemented yet");
1882
+ }
1883
+
1884
+ return Data_Wrap_Struct(cNMatrix, MarkFuncs[result->stype], nm_delete, result);
1885
+ }
1886
+
1887
+ //static VALUE nm_transpose_auto(VALUE self) {
1888
+ //
1889
+ //}
1890
+
1891
+ void Init_nmatrix() {
1892
+ /* Require Complex class */
1893
+ //rb_require("complex");
1894
+ //cComplex = rb_const_get( rb_cObject, rb_intern("Complex") );
1895
+
1896
+ /* Define NMatrix class */
1897
+ cNMatrix = rb_define_class("NMatrix", rb_cObject);
1898
+
1899
+ /* class methods */
1900
+ rb_define_singleton_method(cNMatrix, "__cblas_gemm__", nm_cblas_gemm, 13);
1901
+ rb_define_singleton_method(cNMatrix, "__cblas_gemv__", nm_cblas_gemv, 11);
1902
+
1903
+ rb_define_alloc_func(cNMatrix, nm_alloc);
1904
+ rb_define_method(cNMatrix, "initialize", nm_init, -1);
1905
+ // rb_define_singleton_method(cNMatrix, "new", nm_init, -1);
1906
+
1907
+
1908
+ rb_define_method(cNMatrix, "initialize_copy", nm_init_copy, 1);
1909
+ rb_define_method(cNMatrix, "initialize_cast_copy", nm_init_cast_copy, 2);
1910
+ rb_define_method(cNMatrix, "as_dtype", nm_cast_copy, 1);
1911
+
1912
+ /* methods */
1913
+ rb_define_method(cNMatrix, "dtype", nm_dtype, 0);
1914
+ rb_define_method(cNMatrix, "stype", nm_stype, 0);
1915
+ rb_define_method(cNMatrix, "cast", nm_scast_copy, 2);
1916
+
1917
+ rb_define_method(cNMatrix, "[]", nm_mref, -1);
1918
+ rb_define_method(cNMatrix, "[]=", nm_mset, -1);
1919
+ rb_define_method(cNMatrix, "rank", nm_rank, 0);
1920
+ rb_define_alias(cNMatrix, "dim", "rank");
1921
+ rb_define_method(cNMatrix, "shape", nm_shape, 0);
1922
+ rb_define_method(cNMatrix, "transpose", nm_transpose_new, 0);
1923
+ //rb_define_method(cNMatrix, "transpose!", nm_transpose_auto, 0);
1924
+
1925
+ rb_define_method(cNMatrix, "each", nm_each, 0);
1926
+
1927
+ rb_define_method(cNMatrix, "*", nm_ew_multiply, 1);
1928
+ rb_define_method(cNMatrix, "/", nm_ew_divide, 1);
1929
+ rb_define_method(cNMatrix, "+", nm_ew_add, 1);
1930
+ rb_define_method(cNMatrix, "-", nm_ew_subtract, 1);
1931
+ rb_define_method(cNMatrix, "==", nm_ew_eqeq, 1);
1932
+ rb_define_method(cNMatrix, "!=", nm_ew_neq, 1);
1933
+ rb_define_method(cNMatrix, "<=", nm_ew_leq, 1);
1934
+ rb_define_method(cNMatrix, ">=", nm_ew_geq, 1);
1935
+ rb_define_method(cNMatrix, "<", nm_ew_lt, 1);
1936
+ rb_define_method(cNMatrix, ">", nm_ew_gt, 1);
1937
+ rb_define_method(cNMatrix, "equal?", nm_eqeq, 1);
1938
+ rb_define_method(cNMatrix, "dot", nm_multiply, 1);
1939
+ rb_define_alias(cNMatrix, "equal?", "eql?");
1940
+
1941
+
1942
+ rb_define_method(cNMatrix, "capacity", nm_capacity, 0);
1943
+
1944
+ rb_define_method(cNMatrix, "__yale_ija__", nm_yale_ija, 0);
1945
+ rb_define_method(cNMatrix, "__yale_a__", nm_yale_a, 0);
1946
+ rb_define_method(cNMatrix, "__yale_size__", nm_yale_size, 0);
1947
+ rb_define_method(cNMatrix, "__yale_ia__", nm_yale_ia, 0);
1948
+ rb_define_method(cNMatrix, "__yale_ja__", nm_yale_ja, 0);
1949
+ rb_define_method(cNMatrix, "__yale_d__", nm_yale_d, 0);
1950
+ rb_define_method(cNMatrix, "__yale_lu__", nm_yale_lu, 0);
1951
+ rb_define_const(cNMatrix, "YALE_GROWTH_CONSTANT", rb_float_new(YALE_GROWTH_CONSTANT));
1952
+
1953
+
1954
+ cNVector = rb_define_class("NVector", cNMatrix);
1955
+
1956
+ // Special exceptions
1957
+ nm_eDataTypeError = rb_define_class("DataTypeError", rb_eStandardError);
1958
+ nm_eStorageTypeError = rb_define_class("StorageTypeError", rb_eStandardError);
1959
+
1960
+ nm_id_real = rb_intern("real");
1961
+ nm_id_imag = rb_intern("imag");
1962
+ nm_id_numer = rb_intern("numerator");
1963
+ nm_id_denom = rb_intern("denominator");
1964
+ nm_id_mult = rb_intern("*");
1965
+ nm_id_add = rb_intern("+");
1966
+ nm_id_multeq= rb_intern("*=");
1967
+
1968
+ nm_id_transpose = rb_intern("transpose");
1969
+ nm_id_no_transpose = rb_intern("no_transpose");
1970
+ nm_id_complex_conjugate = rb_intern("complex_conjugate");
1971
+
1972
+ nm_id_dense = rb_intern("dense");
1973
+ nm_id_list = rb_intern("list");
1974
+
1975
+ }
1976
+
1977
+ #endif