nmatrix 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/.autotest +23 -0
  2. data/.gemtest +0 -0
  3. data/Gemfile +7 -0
  4. data/History.txt +6 -0
  5. data/LICENSE.txt +21 -0
  6. data/Manifest.txt +51 -0
  7. data/README.rdoc +63 -0
  8. data/Rakefile +154 -0
  9. data/ext/nmatrix/cblas.c +150 -0
  10. data/ext/nmatrix/dense.c +307 -0
  11. data/ext/nmatrix/dense/blas_header.template.c +52 -0
  12. data/ext/nmatrix/dense/elementwise.template.c +107 -0
  13. data/ext/nmatrix/dense/gemm.template.c +159 -0
  14. data/ext/nmatrix/dense/gemv.template.c +130 -0
  15. data/ext/nmatrix/dense/rationalmath.template.c +68 -0
  16. data/ext/nmatrix/depend +18 -0
  17. data/ext/nmatrix/extconf.rb +143 -0
  18. data/ext/nmatrix/generator.rb +594 -0
  19. data/ext/nmatrix/generator/syntax_tree.rb +481 -0
  20. data/ext/nmatrix/list.c +774 -0
  21. data/ext/nmatrix/nmatrix.c +1977 -0
  22. data/ext/nmatrix/nmatrix.h +912 -0
  23. data/ext/nmatrix/rational.c +98 -0
  24. data/ext/nmatrix/yale.c +726 -0
  25. data/ext/nmatrix/yale/complexmath.template.c +71 -0
  26. data/ext/nmatrix/yale/elementwise.template.c +46 -0
  27. data/ext/nmatrix/yale/elementwise_op.template.c +73 -0
  28. data/ext/nmatrix/yale/numbmm.template.c +94 -0
  29. data/ext/nmatrix/yale/smmp1.template.c +21 -0
  30. data/ext/nmatrix/yale/smmp1_header.template.c +38 -0
  31. data/ext/nmatrix/yale/smmp2.template.c +43 -0
  32. data/ext/nmatrix/yale/smmp2_header.template.c +46 -0
  33. data/ext/nmatrix/yale/sort_columns.template.c +56 -0
  34. data/ext/nmatrix/yale/symbmm.template.c +54 -0
  35. data/ext/nmatrix/yale/transp.template.c +68 -0
  36. data/lib/array.rb +67 -0
  37. data/lib/nmatrix.rb +263 -0
  38. data/lib/string.rb +65 -0
  39. data/spec/nmatrix_spec.rb +395 -0
  40. data/spec/nmatrix_yale_spec.rb +239 -0
  41. data/spec/nvector_spec.rb +43 -0
  42. data/spec/syntax_tree_spec.rb +46 -0
  43. metadata +150 -0
@@ -0,0 +1,912 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == nmatrix.h
25
+ //
26
+
27
+ #ifndef NMATRIX_H
28
+ #define NMATRIX_H
29
+
30
+ #include "nmatrix_config.h"
31
+
32
+ #include <cblas.h>
33
+
34
+ #include <math.h>
35
+
36
+ #include <stdlib.h>
37
+ #include <stdio.h>
38
+ #include <string.h>
39
+ #include <ruby.h>
40
+ #define RUBY_ZERO INT2FIX(0)
41
+
42
+ #ifdef BENCHMARK // SOURCE: http://stackoverflow.com/questions/2349776/how-can-i-benchmark-a-c-program-easily
43
+ # include <sys/time.h>
44
+ # include <sys/resource.h>
45
+ #endif
46
+
47
+ #include "dtypes.h"
48
+
49
+ #include <stddef.h>
50
+ #ifdef HAVE_STDINT_H
51
+ # include <stdint.h>
52
+ #endif
53
+
54
+ /*
55
+ Data types used in NArray / NMatrix :
56
+ Please modify these types if your system has any different type.
57
+ */
58
+
59
+
60
+ /* NM_BYTE : unsigned 8-bit integer */
61
+ #ifndef HAVE_U_INT8_T
62
+ # ifdef HAVE_UINT8_T
63
+ typedef uint8_t u_int8_t;
64
+ # else
65
+ typedef unsigned char u_int8_t;
66
+ # endif
67
+ #endif
68
+
69
+ //#ifndef HAVE_INT8_T
70
+ //typedef char int8_t;
71
+ //#endif
72
+
73
+ #ifndef HAVE_INT16_T
74
+ # if SIZEOF_SHORT == 2
75
+ typedef short int16_t;
76
+ # else
77
+ ---->> Please define int16_t manually because sizeof(short) != 2. <<----
78
+ # endif
79
+ #endif /* HAVE_INT16_T */
80
+
81
+ #ifndef HAVE_INT32_T
82
+ # if SIZEOF_LONG == 4
83
+ typedef long int32_t;
84
+ # else
85
+ # if SIZEOF_INT == 4
86
+ typedef int int32_t;
87
+ # else
88
+ ---->> Please define int32_t manually because sizeof(long) != 4. <<----
89
+ # endif
90
+ # endif
91
+ #endif /* HAVE_INT32_T */
92
+
93
+ /* unsigned 32-bit integer */
94
+ #ifndef HAVE_U_INT32_T
95
+ # ifdef HAVE_UINT32_T
96
+ typedef uint32_t u_int32_t;
97
+ # else
98
+ # if SIZEOF_LONG == 4
99
+ typedef unsigned long u_int32_t;
100
+ # else
101
+ # if SIZEOF_INT == 4
102
+ typedef unsigned int u_int32_t;
103
+ # else
104
+ ---->> Please define u_int32_t manually because sizeof(long) != 4. <<----
105
+ # endif
106
+ # endif
107
+ # endif
108
+ #endif /* HAVE_U_INT32_T */
109
+
110
+ #ifndef HAVE_INT64_T
111
+ # if SIZEOF_QUAD == 8
112
+ typedef quad int64_t;
113
+ # else
114
+ # if SIZEOF_LONG == 8
115
+ typedef long int64_t;
116
+ # else
117
+ ---->> Please define int64_t manually because sizeof(quad) != 8. <<----
118
+ # endif
119
+ # endif
120
+ #endif /* HAVE_INT64_T */
121
+
122
+ /* unsigned 64-bit integer */
123
+ #ifndef HAVE_U_INT64_T
124
+ # ifdef HAVE_UINT64_T
125
+ typedef uint64_t u_int64_t;
126
+ # else
127
+ # if SIZEOF_QUAD == 8
128
+ typedef unsigned quad u_int64_t;
129
+ # else
130
+ # if SIZEOF_LONG == 8
131
+ typedef unsigned long u_int64_t;
132
+ # else
133
+ ---->> Please define u_int64_t manually because sizeof(quad) != 8. <<----
134
+ # endif
135
+ # endif
136
+ # endif
137
+ #endif /* HAVE_U_INT64_T */
138
+
139
+
140
+ #ifndef HAVE_SIZE_T /// If you modify this, make sure to modify the definition of y_size_t and Y_SIZE_T!
141
+ typedef u_int64_t size_t;
142
+ # define NM_SIZE_T NM_INT64
143
+ #else
144
+ # if SIZEOF_SIZE_T == 8
145
+ # define NM_SIZE_T NM_INT64
146
+ # else
147
+ # if SIZEOF_SIZE_T == 4
148
+ # define NM_SIZE_T NM_INT32
149
+ # else
150
+ ---->> Please define size_t and y_size_t manually because sizeof(size_t) is neither 8 nor 4. <<----
151
+ # endif
152
+ # endif
153
+ #endif
154
+
155
+ // for when we need to return array indices.
156
+ // This must never be larger than size_t
157
+ typedef uint32_t y_size_t;
158
+ #define Y_SIZE_T NM_INT32
159
+
160
+
161
+ #ifdef HAVE_STDBOOL_H
162
+ # include <stdbool.h>
163
+ #else
164
+ typedef char bool;
165
+ # define true 1;
166
+ # define false 0;
167
+ #endif
168
+
169
+
170
+ typedef struct { float r,i; } complex64;
171
+ typedef struct { double r,i; } complex128;
172
+ typedef struct { int16_t n,d; } rational32;
173
+ typedef struct { int32_t n,d; } rational64;
174
+ typedef struct { int64_t n,d; } rational128;
175
+
176
+
177
+ #if SIZEOF_INT == 8
178
+ # define DEFAULT_DTYPE NM_INT64
179
+ #else
180
+ # if SIZEOF_INT == 4
181
+ # define DEFAULT_DTYPE NM_INT32
182
+ # else
183
+ # define DEFAULT_DTYPE NM_INT16
184
+ # endif
185
+ #endif
186
+
187
+
188
+ #define YALE_GROWTH_CONSTANT 1.5
189
+
190
+
191
+ enum NMatrix_STypes {
192
+ S_DENSE,
193
+ S_LIST,
194
+ S_YALE,
195
+ S_TYPES
196
+ };
197
+
198
+
199
+ // Element-wise operations (see blas/elementwise.template.c)
200
+ enum NMatrix_Ops {
201
+ NM_OP_ADD = '+',
202
+ NM_OP_SUB = '-',
203
+ NM_OP_MUL = '*',
204
+ NM_OP_DIV = '/',
205
+ NM_OP_MOD = '%',
206
+ NM_OP_BANG = '!',
207
+ NM_OP_NEG, // unary minus
208
+ NM_OP_EQEQ, // ==
209
+ NM_OP_NEQ, // !=
210
+ NM_OP_GT = '>', // >
211
+ NM_OP_LT = '<', // <
212
+ NM_OP_GTE = ',', // >=
213
+ NM_OP_LTE = '.', // <=
214
+ NM_OP_NOT = '~',
215
+ NM_OP_AND = '&',
216
+ NM_OP_OR = '|',
217
+ NM_OP_XOR = '^',
218
+ NM_OP_LSH, // <<
219
+ NM_OP_RSH // >>
220
+ };
221
+
222
+
223
+ /* Singly-linked ordered list
224
+ * - holds keys and values
225
+ * - no duplicate keys
226
+ * - keys are ordered
227
+ * - values may be lists themselves
228
+ */
229
+ typedef struct l_node { /* Linked list node */
230
+ size_t key;
231
+ void* val;
232
+ struct l_node * next; // next
233
+ } NODE;
234
+
235
+ typedef struct l_list {
236
+ NODE* first;
237
+ } LIST;
238
+
239
+
240
+ // two vectors and a capacity
241
+ typedef struct y_vector {
242
+ void* ija;
243
+ void* a;
244
+ size_t capacity;
245
+ } VECTOR;
246
+
247
+
248
+ typedef struct common_s { // Common elements found in all _s types.
249
+ int8_t dtype;
250
+ size_t rank;
251
+ size_t* shape;
252
+ } STORAGE;
253
+
254
+
255
+ typedef struct list_s {
256
+ int8_t dtype;
257
+ size_t rank;
258
+ size_t* shape;
259
+ void* default_val;
260
+ LIST* rows;
261
+ } LIST_STORAGE;
262
+
263
+
264
+ typedef struct dense_s {
265
+ int8_t dtype;
266
+ size_t rank;
267
+ size_t* shape;
268
+ void* elements;
269
+ } DENSE_STORAGE;
270
+
271
+
272
+ typedef struct yale_s {
273
+ int8_t dtype;
274
+ size_t rank;
275
+ size_t* shape;
276
+ size_t ndnz; // strictly non-diagonal non-zero count!
277
+ size_t capacity;
278
+ int8_t index_dtype;
279
+ void* ija;
280
+ void* a;
281
+ } YALE_STORAGE;
282
+
283
+
284
+ typedef struct numeric_matrix {
285
+ int8_t stype; /* method of storage (csc, dense, etc) */
286
+ STORAGE* storage; /* pointer to storage struct */
287
+ } NMATRIX;
288
+
289
+
290
+ /* Local */
291
+
292
+ typedef union {
293
+ u_int8_t b[2];
294
+ int16_t s;
295
+ } nm_size16_t;
296
+
297
+ typedef union {
298
+ u_int8_t b[4];
299
+ int32_t i;
300
+ float f;
301
+ } nm_size32_t;
302
+
303
+ typedef union {
304
+ u_int8_t b[8];
305
+ int64_t q;
306
+ float f[2];
307
+ double d;
308
+ complex64 c;
309
+ } nm_size64_t;
310
+
311
+ typedef union {
312
+ u_int8_t b[16];
313
+ int64_t i[2];
314
+ double d[2];
315
+ float f[4];
316
+ complex64 c[2];
317
+ complex128 z;
318
+ rational32 r[4];
319
+ rational64 ra[2];
320
+ rational128 rat;
321
+ VALUE v[2];
322
+ } nm_size128_t;
323
+
324
+
325
+ // For calling cblas_gemm functions (see cblas.c)
326
+ typedef struct cblas_param_t {
327
+ int M, N, K, lda, ldb, ldc;
328
+ void *A, *B, *C;
329
+ nm_size128_t alpha, beta;
330
+ } DENSE_PARAM;
331
+
332
+
333
+ // Formerly in smmp.h:
334
+ typedef struct smmp_param_t {
335
+ void *ia, *ja, *a;
336
+ bool diag;
337
+ } YALE_PARAM;
338
+
339
+ // Shouldn't be necessary, as they're defined in nmatrix.h:
340
+ // (Oddly, though, these fix the error.)
341
+ /*typedef uint8_t u_int8_t;
342
+ typedef uint16_t u_int16_t;
343
+ typedef uint32_t u_int32_t;
344
+ typedef uint64_t u_int64_t; */
345
+
346
+
347
+ // rational.c
348
+ int64_t nmrb_gcd(int64_t x, int64_t y);
349
+
350
+ // BLAS functions
351
+ #define SMMP_MAX_THREE(a,b,c) ((a)>(b) ? ( (a)>(c) ? (a) : (c) ) : ( (b)>(c) ? (b) : (c) ))
352
+ #define SMMP_MIN(a,b) ((a)>(b) ? (b) : (a))
353
+ #define SMMP_MAX(a,b) ((a)>(b) ? (a) : (b))
354
+
355
+ void transp(y_size_t n, y_size_t m, void* ia, void* ja, bool diaga, void* a, void* ib, void* jb, void* b, bool move, int8_t itype, int8_t dtype);
356
+
357
+ void i8_symbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
358
+ void i16_symbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
359
+ void i32_symbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
360
+ void i64_symbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
361
+
362
+ void i8_b_smmp_sort_columns_(y_size_t, YALE_PARAM);
363
+ void i16_b_smmp_sort_columns_(y_size_t, YALE_PARAM);
364
+ void i32_b_smmp_sort_columns_(y_size_t, YALE_PARAM);
365
+ void i64_b_smmp_sort_columns_(y_size_t, YALE_PARAM);
366
+ void i8_i8_smmp_sort_columns_(y_size_t, YALE_PARAM);
367
+ void i16_i8_smmp_sort_columns_(y_size_t, YALE_PARAM);
368
+ void i32_i8_smmp_sort_columns_(y_size_t, YALE_PARAM);
369
+ void i64_i8_smmp_sort_columns_(y_size_t, YALE_PARAM);
370
+ void i8_i16_smmp_sort_columns_(y_size_t, YALE_PARAM);
371
+ void i16_i16_smmp_sort_columns_(y_size_t, YALE_PARAM);
372
+ void i32_i16_smmp_sort_columns_(y_size_t, YALE_PARAM);
373
+ void i64_i16_smmp_sort_columns_(y_size_t, YALE_PARAM);
374
+ void i8_i32_smmp_sort_columns_(y_size_t, YALE_PARAM);
375
+ void i16_i32_smmp_sort_columns_(y_size_t, YALE_PARAM);
376
+ void i32_i32_smmp_sort_columns_(y_size_t, YALE_PARAM);
377
+ void i64_i32_smmp_sort_columns_(y_size_t, YALE_PARAM);
378
+ void i8_i64_smmp_sort_columns_(y_size_t, YALE_PARAM);
379
+ void i16_i64_smmp_sort_columns_(y_size_t, YALE_PARAM);
380
+ void i32_i64_smmp_sort_columns_(y_size_t, YALE_PARAM);
381
+ void i64_i64_smmp_sort_columns_(y_size_t, YALE_PARAM);
382
+ void i8_f32_smmp_sort_columns_(y_size_t, YALE_PARAM);
383
+ void i16_f32_smmp_sort_columns_(y_size_t, YALE_PARAM);
384
+ void i32_f32_smmp_sort_columns_(y_size_t, YALE_PARAM);
385
+ void i64_f32_smmp_sort_columns_(y_size_t, YALE_PARAM);
386
+ void i8_f64_smmp_sort_columns_(y_size_t, YALE_PARAM);
387
+ void i16_f64_smmp_sort_columns_(y_size_t, YALE_PARAM);
388
+ void i32_f64_smmp_sort_columns_(y_size_t, YALE_PARAM);
389
+ void i64_f64_smmp_sort_columns_(y_size_t, YALE_PARAM);
390
+ void i8_c64_smmp_sort_columns_(y_size_t, YALE_PARAM);
391
+ void i16_c64_smmp_sort_columns_(y_size_t, YALE_PARAM);
392
+ void i32_c64_smmp_sort_columns_(y_size_t, YALE_PARAM);
393
+ void i64_c64_smmp_sort_columns_(y_size_t, YALE_PARAM);
394
+ void i8_c128_smmp_sort_columns_(y_size_t, YALE_PARAM);
395
+ void i16_c128_smmp_sort_columns_(y_size_t, YALE_PARAM);
396
+ void i32_c128_smmp_sort_columns_(y_size_t, YALE_PARAM);
397
+ void i64_c128_smmp_sort_columns_(y_size_t, YALE_PARAM);
398
+ void i8_r32_smmp_sort_columns_(y_size_t, YALE_PARAM);
399
+ void i16_r32_smmp_sort_columns_(y_size_t, YALE_PARAM);
400
+ void i32_r32_smmp_sort_columns_(y_size_t, YALE_PARAM);
401
+ void i64_r32_smmp_sort_columns_(y_size_t, YALE_PARAM);
402
+ void i8_r64_smmp_sort_columns_(y_size_t, YALE_PARAM);
403
+ void i16_r64_smmp_sort_columns_(y_size_t, YALE_PARAM);
404
+ void i32_r64_smmp_sort_columns_(y_size_t, YALE_PARAM);
405
+ void i64_r64_smmp_sort_columns_(y_size_t, YALE_PARAM);
406
+ void i8_r128_smmp_sort_columns_(y_size_t, YALE_PARAM);
407
+ void i16_r128_smmp_sort_columns_(y_size_t, YALE_PARAM);
408
+ void i32_r128_smmp_sort_columns_(y_size_t, YALE_PARAM);
409
+ void i64_r128_smmp_sort_columns_(y_size_t, YALE_PARAM);
410
+ void i8_v_smmp_sort_columns_(y_size_t, YALE_PARAM);
411
+ void i16_v_smmp_sort_columns_(y_size_t, YALE_PARAM);
412
+ void i32_v_smmp_sort_columns_(y_size_t, YALE_PARAM);
413
+ void i64_v_smmp_sort_columns_(y_size_t, YALE_PARAM);
414
+
415
+ void i8_b_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
416
+ void i16_b_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
417
+ void i32_b_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
418
+ void i64_b_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
419
+ void i8_i8_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
420
+ void i16_i8_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
421
+ void i32_i8_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
422
+ void i64_i8_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
423
+ void i8_i16_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
424
+ void i16_i16_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
425
+ void i32_i16_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
426
+ void i64_i16_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
427
+ void i8_i32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
428
+ void i16_i32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
429
+ void i32_i32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
430
+ void i64_i32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
431
+ void i8_i64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
432
+ void i16_i64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
433
+ void i32_i64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
434
+ void i64_i64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
435
+ void i8_f32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
436
+ void i16_f32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
437
+ void i32_f32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
438
+ void i64_f32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
439
+ void i8_f64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
440
+ void i16_f64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
441
+ void i32_f64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
442
+ void i64_f64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
443
+ void i8_c64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
444
+ void i16_c64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
445
+ void i32_c64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
446
+ void i64_c64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
447
+ void i8_c128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
448
+ void i16_c128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
449
+ void i32_c128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
450
+ void i64_c128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
451
+ void i8_r32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
452
+ void i16_r32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
453
+ void i32_r32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
454
+ void i64_r32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
455
+ void i8_r64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
456
+ void i16_r64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
457
+ void i32_r64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
458
+ void i64_r64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
459
+ void i8_r128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
460
+ void i16_r128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
461
+ void i32_r128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
462
+ void i64_r128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
463
+ void i8_v_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
464
+ void i16_v_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
465
+ void i32_v_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
466
+ void i64_v_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
467
+
468
+ void i8_b_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
469
+ void i16_b_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
470
+ void i32_b_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
471
+ void i64_b_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
472
+ void i8_i8_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
473
+ void i16_i8_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
474
+ void i32_i8_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
475
+ void i64_i8_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
476
+ void i8_i16_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
477
+ void i16_i16_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
478
+ void i32_i16_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
479
+ void i64_i16_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
480
+ void i8_i32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
481
+ void i16_i32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
482
+ void i32_i32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
483
+ void i64_i32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
484
+ void i8_i64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
485
+ void i16_i64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
486
+ void i32_i64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
487
+ void i64_i64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
488
+ void i8_f32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
489
+ void i16_f32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
490
+ void i32_f32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
491
+ void i64_f32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
492
+ void i8_f64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
493
+ void i16_f64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
494
+ void i32_f64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
495
+ void i64_f64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
496
+ void i8_c64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
497
+ void i16_c64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
498
+ void i32_c64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
499
+ void i64_c64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
500
+ void i8_c128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
501
+ void i16_c128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
502
+ void i32_c128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
503
+ void i64_c128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
504
+ void i8_r32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
505
+ void i16_r32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
506
+ void i32_r32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
507
+ void i64_r32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
508
+ void i8_r64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
509
+ void i16_r64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
510
+ void i32_r64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
511
+ void i64_r64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
512
+ void i8_r128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
513
+ void i16_r128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
514
+ void i32_r128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
515
+ void i64_r128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
516
+ void i8_v_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
517
+ void i16_v_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
518
+ void i32_v_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
519
+ void i64_v_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
520
+
521
+ void i8_b_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
522
+ void i16_b_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
523
+ void i32_b_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
524
+ void i64_b_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
525
+ void i8_i8_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
526
+ void i16_i8_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
527
+ void i32_i8_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
528
+ void i64_i8_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
529
+ void i8_i16_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
530
+ void i16_i16_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
531
+ void i32_i16_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
532
+ void i64_i16_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
533
+ void i8_i32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
534
+ void i16_i32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
535
+ void i32_i32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
536
+ void i64_i32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
537
+ void i8_i64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
538
+ void i16_i64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
539
+ void i32_i64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
540
+ void i64_i64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
541
+ void i8_f32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
542
+ void i16_f32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
543
+ void i32_f32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
544
+ void i64_f32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
545
+ void i8_f64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
546
+ void i16_f64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
547
+ void i32_f64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
548
+ void i64_f64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
549
+ void i8_c64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
550
+ void i16_c64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
551
+ void i32_c64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
552
+ void i64_c64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
553
+ void i8_c128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
554
+ void i16_c128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
555
+ void i32_c128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
556
+ void i64_c128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
557
+ void i8_r32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
558
+ void i16_r32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
559
+ void i32_r32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
560
+ void i64_r32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
561
+ void i8_r64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
562
+ void i16_r64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
563
+ void i32_r64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
564
+ void i64_r64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
565
+ void i8_r128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
566
+ void i16_r128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
567
+ void i32_r128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
568
+ void i64_r128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
569
+ void i8_v_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
570
+ void i16_v_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
571
+ void i32_v_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
572
+ void i64_v_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
573
+
574
+
575
+
576
+ // For binary operations involving matrices that need to be casted.
577
+ typedef struct storage_pair_t {
578
+ STORAGE* left;
579
+ STORAGE* right;
580
+ } STORAGE_PAIR;
581
+
582
+
583
+ #ifndef NMATRIX_C
584
+ extern VALUE cNMatrix;
585
+
586
+ extern const int nm_sizeof[NM_TYPES+1];
587
+ #endif
588
+
589
+
590
+ #define NM_MAX_RANK 15
591
+
592
+ #define UnwrapNMatrix(obj,var) Data_Get_Struct(obj, struct numeric_matrix, var)
593
+ #define IsNMatrix(obj) (rb_obj_is_kind_of(obj, CNMatrix)==Qtrue)
594
+
595
+ #define NM_STORAGE(val) (((struct numeric_matrix*)DATA_PTR(val))->storage)
596
+ //#define NM_PTR(a, p) ((a)->ptr+(p)*nm_sizeof[(a)->type])
597
+ #define NM_STRUCT(val) ((struct numeric_matrix*)DATA_PTR(val))
598
+ //#define NM_PTR_TYPE(val,type) (type)(((struct numeric_matrix*)DATA_PTR(val))->ptr)
599
+ #define NM_RANK(val) (((STORAGE*)(NM_STORAGE(val)))->rank)
600
+ #define NM_DTYPE(val) (((STORAGE*)(NM_STORAGE(val)))->dtype)
601
+ #define NM_STYPE(val) (((struct numeric_matrix*)DATA_PTR(val))->stype)
602
+ #define NM_SHAPE(val,i) (((STORAGE*)(NM_STORAGE(val)))->shape[(i)])
603
+ #define NM_SHAPE0(val) (((struct numeric_matrix*)DATA_PTR(val))->shape[0])
604
+ #define NM_SHAPE1(val) (((struct numeric_matrix*)DATA_PTR(val))->shape[1])
605
+ #define NM_SIZEOF_DTYPE(val) (nm_sizeof[NM_DTYPE(val)])
606
+ #define NM_REF(val,coords) (RefFuncs[NM_STYPE(val)]( NM_STORAGE(val), coords, NM_SIZEOF_DTYPE(val) ))
607
+
608
+ #define NM_IsNMatrix(obj) (rb_obj_is_kind_of(obj, cNMatrix)==Qtrue)
609
+ #define NM_IsArray(obj) (TYPE(obj)==T_ARRAY || rb_obj_is_kind_of(obj,cNMatrix)==Qtrue)
610
+ #define NM_IsROBJ(d) ((d)->dtype==NM_ROBJ)
611
+ #define NM_IsINTEGER(a) \
612
+ (NM_DTYPE(a)==NM_BYTE || NM_DTYPE(a)==NM_INT8 || NM_DTYPE(a)==NM_INT16 || NM_DTYPE(a)==NM_INT32 || NM_DTYPE(a)==NM_INT64)
613
+ #define NM_IsCOMPLEX(a) \
614
+ (NM_DTYPE(a)==NM_COMPLEX32 || NM_DTYPE(a)==NM_COMPLEX64)
615
+ #define NM_MAX(a,b) (((a)>(b))?(a):(b))
616
+ #define NM_SWAP(a,b,tmp) {(tmp)=(a);(a)=(b);(b)=(tmp);}
617
+
618
+ //#define NUM2REAL(v) NUM2DBL( rb_funcall((v),nm_id_real,0) ) // deprecated
619
+ #define REAL2DBL(v) NUM2DBL( rb_funcall((v),nm_id_real,0) )
620
+ //#define NUM2IMAG(v) NUM2DBL( rb_funcall((v),nm_id_imag,0) ) // deprecated
621
+ #define IMAG2DBL(v) NUM2DBL( rb_funcall((v),nm_id_imag,0) )
622
+
623
+ #define NUM2NUMER(v) NUM2INT( rb_funcall((v), nm_id_numer,0) ) // deprecated
624
+ #define NUMER2INT(v) NUM2INT( rb_funcall((v), nm_id_numer,0) )
625
+ #define NUM2DENOM(v) NUM2INT( rb_funcall((v), nm_id_denom,0) ) // deprecated
626
+ #define DENOM2INT(v) NUM2INT( rb_funcall((v), nm_id_denom,0) )
627
+
628
+ #define IS_NUMERIC(v) (FIXNUM_P(v) || TYPE(v) == T_FLOAT || TYPE(v) == T_COMPLEX || TYPE(v) == T_RATIONAL)
629
+ #define IS_STRING(v) (TYPE(v) == T_STRING)
630
+
631
+ #define CheckNMatrixType(v) if (TYPE(v) != T_DATA || RDATA(v)->dfree != (RUBY_DATA_FUNC)nm_delete) rb_raise(rb_eTypeError, "expected NMatrix on left-hand side of operation");
632
+
633
+ //#define YALE_JA_START(sptr) (((YALE_STORAGE*)(sptr))->shape[0]+1)
634
+ #define YALE_IJA(sptr,elem_size,i) (void*)( (char*)(((YALE_STORAGE*)(sptr))->ija) + i * elem_size )
635
+ //#define YALE_JA(sptr,dtype,j) ((((dtype)*)((YALE_STORAGE*)(sptr))->ija)[(YALE_JA_START(sptr))+j])
636
+ #define YALE_ROW_LENGTH(sptr,elem_size,i) (*(size_t*)YALE_IA((sptr),(elem_size),(i)+1) - *(size_t*)YALE_IJA((sptr),(elem_size),(i)))
637
+ #define YALE_A(sptr,elem_size,i) (void*)((char*)(((YALE_STORAGE*)(sptr))->a) + elem_size * i)
638
+ #define YALE_DIAG(sptr, elem_size, i) ( YALE_A((sptr),(elem_size),(i)) )
639
+ //#define YALE_LU(sptr,dtype,i,j) (((dtype)*)(((YALE_STORAGE*)(sptr))->a)[ YALE_JA_START(sptr) + ])
640
+ #define YALE_MINIMUM(sptr) (((YALE_STORAGE*)(sptr))->shape[0]*2 + 1) // arbitrarily defined
641
+ #define YALE_SIZE_PTR(sptr,elem_size) (void*)((char*)((YALE_STORAGE*)(sptr))->ija + ((YALE_STORAGE*)(sptr))->shape[0]*elem_size )
642
+ #define YALE_MAX_SIZE(sptr) (((YALE_STORAGE*)(sptr))->shape[0] * ((YALE_STORAGE*)(sptr))->shape[1] + 1)
643
+ #define YALE_IA_SIZE(sptr) ((YALE_STORAGE*)(sptr))->shape[0]
644
+
645
+ // None of these next three return anything. They set a reference directly.
646
+ #define YaleGetIJA(victim,s,i) (SetFuncs[Y_SIZE_T][(s)->index_dtype](1, &(victim), 0, YALE_IJA((s), nm_sizeof[s->index_dtype], (i)), 0))
647
+ #define YaleSetIJA(i,s,from) (SetFuncs[s->index_dtype][Y_SIZE_T](1, YALE_IJA((s), nm_sizeof[s->index_dtype], (i)), 0, &(from), 0))
648
+ #define YaleGetSize(sz,s) (SetFuncs[Y_SIZE_T][(s)->index_dtype](1, &sz, 0, (YALE_SIZE_PTR((s), nm_sizeof[(s)->index_dtype])), 0))
649
+ //#define YALE_FIRST_NZ_ROW_ENTRY(sptr,elem_size,i)
650
+
651
+
652
+ #if !defined RSTRING_LEN
653
+ #define RSTRING_LEN(a) RSTRING(a)->len
654
+ #endif
655
+ #if !defined RSTRING_PTR
656
+ #define RSTRING_PTR(a) RSTRING(a)->ptr
657
+ #endif
658
+ #if !defined RARRAY_LEN
659
+ #define RARRAY_LEN(a) RARRAY(a)->len
660
+ #endif
661
+ #if !defined RARRAY_PTR
662
+ #define RARRAY_PTR(a) RARRAY(a)->ptr
663
+ #endif
664
+
665
+ #define NM_INDEX_TYPES NM_FLOAT32
666
+
667
+
668
+ typedef void (*nm_setfunc_t[NM_TYPES][NM_TYPES])(); // copy functions
669
+ typedef void (*nm_incfunc_t[NM_TYPES])(); // increment functions
670
+ typedef void* (*nm_stype_ref_t[S_TYPES])(STORAGE*, size_t*); // get/ref
671
+ typedef VALUE (*nm_stype_ins_t[S_TYPES])(STORAGE*, size_t*, VALUE); // insert
672
+ typedef STORAGE* (*nm_create_storage_t[S_TYPES])();
673
+ typedef STORAGE* (*nm_cast_copy_storage_t[S_TYPES])();
674
+ typedef STORAGE* (*nm_scast_copy_storage_t[S_TYPES][S_TYPES])();
675
+ typedef NMATRIX* (*nm_matrix_multiply_op_t[S_TYPES])();
676
+ typedef NMATRIX* (*nm_elementwise_binary_op_casted_t[S_TYPES])();
677
+ typedef int (*nm_d_elementwise_binary_op_t[NM_TYPES])();
678
+ typedef int (*nm_y_elementwise_binary_op_t[NM_TYPES][NM_INDEX_TYPES])();
679
+ typedef bool (*nm_compare_t[S_TYPES])();
680
+ typedef void (*nm_delete_t[S_TYPES])();
681
+ typedef void (*nm_mark_t[S_TYPES])(void*);
682
+ typedef void (*nm_gemm_t[NM_TYPES])(); // general matrix/matrix multiply
683
+ typedef void (*nm_dense_transpose_t[NM_TYPES])(); // dense transpose
684
+ typedef void (*nm_gemv_t[NM_TYPES])(); // general matrix/vector multiply
685
+ typedef void (*nm_smmp_t[NM_TYPES][NM_INDEX_TYPES])(); // sparse (yale) multiply
686
+ typedef void (*nm_smmp_transpose_t[NM_TYPES][NM_INDEX_TYPES])(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool); // sparse (yale) transpose
687
+ //typedef void (*nm_setsf_t[S_TYPES][S_TYPES])();
688
+ //typedef void (*nm_setdf_t[NM_DTYPES][NM_DTYPES])();
689
+
690
+ extern nm_setfunc_t SetFuncs;
691
+ extern nm_incfunc_t Increment;
692
+ extern ID nm_id_real, nm_id_imag;
693
+ extern ID nm_id_denom, nm_id_numer;
694
+ extern ID nm_id_mult, nm_id_multeq, nm_id_add;
695
+
696
+ /* blas.c */
697
+ int r32gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const rational32 alpha, const rational32* A, const int lda, const rational32* B, const int ldb, const rational32 beta, rational32* C, const int ldc);
698
+ int r32gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const rational32 alpha, const rational32* A, const size_t lda, const rational32* X, const int incX, const rational32 beta, rational32* Y, const int incY);
699
+ int r64gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const rational64 alpha, const rational64* A, const int lda, const rational64* B, const int ldb, const rational64 beta, rational64* C, const int ldc);
700
+ int r64gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const rational64 alpha, const rational64* A, const size_t lda, const rational64* X, const int incX, const rational64 beta, rational64* Y, const int incY);
701
+ int r128gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const rational128 alpha, const rational128* A, const int lda, const rational128* B, const int ldb, const rational128 beta, rational128* C, const int ldc);
702
+ int r128gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const rational128 alpha, const rational128* A, const size_t lda, const rational128* X, const int incX, const rational128 beta, rational128* Y, const int incY);
703
+ int bgemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const u_int8_t alpha, const u_int8_t* A, const int lda, const u_int8_t* B, const int ldb, const u_int8_t beta, u_int8_t* C, const int ldc);
704
+ int bgemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const u_int8_t alpha, const u_int8_t* A, const size_t lda, const u_int8_t* X, const int incX, const u_int8_t beta, u_int8_t* Y, const int incY);
705
+ int i8gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const int8_t alpha, const int8_t* A, const int lda, const int8_t* B, const int ldb, const int8_t beta, int8_t* C, const int ldc);
706
+ int i8gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const int8_t alpha, const int8_t* A, const size_t lda, const int8_t* X, const int incX, const int8_t beta, int8_t* Y, const int incY);
707
+ int i16gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const int16_t alpha, const int16_t* A, const int lda, const int16_t* B, const int ldb, const int16_t beta, int16_t* C, const int ldc);
708
+ int i16gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const int16_t alpha, const int16_t* A, const size_t lda, const int16_t* X, const int incX, const int16_t beta, int16_t* Y, const int incY);
709
+ int i32gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const int32_t alpha, const int32_t* A, const int lda, const int32_t* B, const int ldb, const int32_t beta, int32_t* C, const int ldc);
710
+ int i32gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const int32_t alpha, const int32_t* A, const size_t lda, const int32_t* X, const int incX, const int32_t beta, int32_t* Y, const int incY);
711
+ int i64gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const int64_t alpha, const int64_t* A, const int lda, const int64_t* B, const int ldb, const int64_t beta, int64_t* C, const int ldc);
712
+ int i64gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const int64_t alpha, const int64_t* A, const size_t lda, const int64_t* X, const int incX, const int64_t beta, int64_t* Y, const int incY);
713
+ int vgemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const VALUE alpha, const VALUE* A, const int lda, const VALUE* B, const int ldb, const VALUE beta, VALUE* C, const int ldc);
714
+
715
+ // void %%TYPE_ABBREV%%transp(unsigned int M, unsigned int N, const %%TYPE%%* A, int lda, %%TYPE%%* B, int ldb);
716
+ void btransp(const unsigned int, const unsigned int, const u_int8_t*, const int, u_int8_t*, const int);
717
+ void i8transp(const unsigned int, const unsigned int, const int8_t*, const int, int8_t*, const int);
718
+ void i16transp(const unsigned int, const unsigned int, const int16_t*, const int, int16_t*, const int);
719
+ void i32transp(const unsigned int, const unsigned int, const int32_t*, const int, int32_t*, const int);
720
+ void i64transp(const unsigned int, const unsigned int, const int64_t*, const int, int64_t*, const int);
721
+ void f32transp(const unsigned int, const unsigned int, const float*, const int, float*, const int);
722
+ void f64transp(const unsigned int, const unsigned int, const double*, const int, double*, const int);
723
+ void c64transp(const unsigned int, const unsigned int, const complex64*, const int, complex64*, const int);
724
+ void c128transp(const unsigned int, const unsigned int, const complex128*, const int, complex128*, const int);
725
+ void r32transp(const unsigned int, const unsigned int, const rational32*, const int, rational32*, const int);
726
+ void r64transp(const unsigned int, const unsigned int, const rational64*, const int, rational64*, const int);
727
+ void r128transp(const unsigned int, const unsigned int, const rational128*, const int, rational128*, const int);
728
+ void vtransp(const unsigned int, const unsigned int, const VALUE*, const int, VALUE*, const int);
729
+
730
+ int nm_d_b_elementwise(const u_int8_t* A, const u_int8_t* B, u_int8_t* C, size_t n, enum NMatrix_Ops op);
731
+ int nm_d_i8_elementwise(const int8_t* A, const int8_t* B, int8_t* C, size_t n, enum NMatrix_Ops op);
732
+ int nm_d_i16_elementwise(const int16_t* A, const int16_t* B, int16_t* C, size_t n, enum NMatrix_Ops op);
733
+ int nm_d_i32_elementwise(const int32_t* A, const int32_t* B, int32_t* C, size_t n, enum NMatrix_Ops op);
734
+ int nm_d_i64_elementwise(const int64_t* A, const int64_t* B, int64_t* C, size_t n, enum NMatrix_Ops op);
735
+ int nm_d_f32_elementwise(const float* A, const float* B, float* C, size_t n, enum NMatrix_Ops op);
736
+ int nm_d_f64_elementwise(const double* A, const double* B, double* C, size_t n, enum NMatrix_Ops op);
737
+ int nm_d_c64_elementwise(const complex64* A, const complex64* B, complex64* C, size_t n, enum NMatrix_Ops op);
738
+ int nm_d_c128_elementwise(const complex128* A, const complex128* B, complex128* C, size_t n, enum NMatrix_Ops op);
739
+ int nm_d_r32_elementwise(const rational32* A, const rational32* B, rational32* C, size_t n, enum NMatrix_Ops op);
740
+ int nm_d_r64_elementwise(const rational64* A, const rational64* B, rational64* C, size_t n, enum NMatrix_Ops op);
741
+ int nm_d_r128_elementwise(const rational128* A, const rational128* B, rational128* C, size_t n, enum NMatrix_Ops op);
742
+ int nm_d_v_elementwise(const VALUE* A, const VALUE* B, VALUE* C, size_t n, enum NMatrix_Ops op);
743
+
744
+ // These are in blas.c but are needed by smmp2.c (the smmp template stuff)
745
+ rational128 r128_muldiv(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k);
746
+ rational128 r128_addsub(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k);
747
+ rational128 r128_mod(int64_t anum, int64_t aden, int64_t bnum, int64_t bden);
748
+ rational128 r128_bang(int64_t, int64_t);
749
+ rational128 r128_negate(int64_t, int64_t);
750
+ rational64 r64_muldiv(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k);
751
+ rational64 r64_addsub(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k);
752
+ rational64 r64_mod(int32_t anum, int32_t aden, int32_t bnum, int32_t bden);
753
+ rational64 r64_bang(int32_t, int32_t);
754
+ rational64 r64_negate(int32_t, int32_t);
755
+ rational32 r32_muldiv(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k);
756
+ rational32 r32_addsub(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k);
757
+ rational32 r32_mod(int16_t anum, int16_t aden, int16_t bnum, int16_t bden);
758
+ rational32 r32_bang(int16_t, int16_t);
759
+ rational32 r32_negate(int16_t, int16_t);
760
+
761
+ rational32 BOOL2R32(bool);
762
+ rational64 BOOL2R64(bool);
763
+ rational128 BOOL2R128(bool);
764
+
765
+
766
+ /* cblas.c */
767
+ DENSE_PARAM init_cblas_params_for_nm_multiply_matrix(int8_t dtype);
768
+ void cblas_bgemm_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
769
+ void cblas_bgemv_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
770
+ void cblas_i8gemm_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
771
+ void cblas_i8gemv_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
772
+ void cblas_i16gemm_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
773
+ void cblas_i16gemv_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
774
+ void cblas_i32gemm_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
775
+ void cblas_i32gemv_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
776
+ void cblas_i64gemm_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
777
+ void cblas_i64gemv_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
778
+ void cblas_sgemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
779
+ void cblas_sgemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
780
+ void cblas_dgemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
781
+ void cblas_dgemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
782
+ void cblas_cgemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
783
+ void cblas_cgemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
784
+ void cblas_zgemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
785
+ void cblas_zgemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
786
+ void cblas_r32gemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
787
+ void cblas_r32gemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
788
+ void cblas_r64gemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
789
+ void cblas_r64gemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
790
+ void cblas_r128gemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
791
+ void cblas_r128gemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
792
+ void cblas_vgemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
793
+
794
+
795
+ /* smmp2.c */
796
+ int i8_b_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, u_int8_t* a, u_int8_t* b, u_int8_t* c);
797
+ int i16_b_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, u_int8_t* a, u_int8_t* b, u_int8_t* c);
798
+ int i32_b_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, u_int8_t* a, u_int8_t* b, u_int8_t* c);
799
+ int i64_b_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, u_int8_t* a, u_int8_t* b, u_int8_t* c);
800
+ int i8_i8_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, int8_t* a, int8_t* b, int8_t* c);
801
+ int i16_i8_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, int8_t* a, int8_t* b, int8_t* c);
802
+ int i32_i8_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, int8_t* a, int8_t* b, int8_t* c);
803
+ int i64_i8_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, int8_t* a, int8_t* b, int8_t* c);
804
+ int i8_i16_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, int16_t* a, int16_t* b, int16_t* c);
805
+ int i16_i16_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, int16_t* a, int16_t* b, int16_t* c);
806
+ int i32_i16_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, int16_t* a, int16_t* b, int16_t* c);
807
+ int i64_i16_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, int16_t* a, int16_t* b, int16_t* c);
808
+ int i8_i32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, int32_t* a, int32_t* b, int32_t* c);
809
+ int i16_i32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, int32_t* a, int32_t* b, int32_t* c);
810
+ int i32_i32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, int32_t* a, int32_t* b, int32_t* c);
811
+ int i64_i32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, int32_t* a, int32_t* b, int32_t* c);
812
+ int i8_i64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, int64_t* a, int64_t* b, int64_t* c);
813
+ int i16_i64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, int64_t* a, int64_t* b, int64_t* c);
814
+ int i32_i64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, int64_t* a, int64_t* b, int64_t* c);
815
+ int i64_i64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, int64_t* a, int64_t* b, int64_t* c);
816
+ int i8_f32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, float* a, float* b, float* c);
817
+ int i16_f32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, float* a, float* b, float* c);
818
+ int i32_f32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, float* a, float* b, float* c);
819
+ int i64_f32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, float* a, float* b, float* c);
820
+ int i8_f64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, double* a, double* b, double* c);
821
+ int i16_f64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, double* a, double* b, double* c);
822
+ int i32_f64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, double* a, double* b, double* c);
823
+ int i64_f64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, double* a, double* b, double* c);
824
+ int i8_c64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, complex64* a, complex64* b, complex64* c);
825
+ int i16_c64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, complex64* a, complex64* b, complex64* c);
826
+ int i32_c64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, complex64* a, complex64* b, complex64* c);
827
+ int i64_c64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, complex64* a, complex64* b, complex64* c);
828
+ int i8_c128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, complex128* a, complex128* b, complex128* c);
829
+ int i16_c128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, complex128* a, complex128* b, complex128* c);
830
+ int i32_c128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, complex128* a, complex128* b, complex128* c);
831
+ int i64_c128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, complex128* a, complex128* b, complex128* c);
832
+ int i8_r32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, rational32* a, rational32* b, rational32* c);
833
+ int i16_r32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, rational32* a, rational32* b, rational32* c);
834
+ int i32_r32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, rational32* a, rational32* b, rational32* c);
835
+ int i64_r32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, rational32* a, rational32* b, rational32* c);
836
+ int i8_r64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, rational64* a, rational64* b, rational64* c);
837
+ int i16_r64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, rational64* a, rational64* b, rational64* c);
838
+ int i32_r64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, rational64* a, rational64* b, rational64* c);
839
+ int i64_r64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, rational64* a, rational64* b, rational64* c);
840
+ int i8_r128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, rational128* a, rational128* b, rational128* c);
841
+ int i16_r128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, rational128* a, rational128* b, rational128* c);
842
+ int i32_r128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, rational128* a, rational128* b, rational128* c);
843
+ int i64_r128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, rational128* a, rational128* b, rational128* c);
844
+ int i8_v_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, VALUE* a, VALUE* b, VALUE* c);
845
+ int i16_v_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, VALUE* a, VALUE* b, VALUE* c);
846
+ int i32_v_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, VALUE* a, VALUE* b, VALUE* c);
847
+ int i64_v_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, VALUE* a, VALUE* b, VALUE* c);
848
+
849
+
850
+ /* dense.c */
851
+ DENSE_STORAGE* create_dense_storage(int8_t dtype, size_t* shape, size_t rank, void* elements, size_t elements_length);
852
+ void delete_dense_storage(DENSE_STORAGE* s);
853
+ void mark_dense_storage(void* s);
854
+ DENSE_STORAGE* cast_copy_dense_storage(DENSE_STORAGE* rhs, int8_t new_dtype);
855
+
856
+ size_t count_dense_storage_elements(const DENSE_STORAGE* s);
857
+ bool dense_storage_eqeq(const DENSE_STORAGE*, const DENSE_STORAGE*);
858
+
859
+ size_t dense_storage_pos(DENSE_STORAGE* s, size_t* coords);
860
+ void* dense_storage_get(DENSE_STORAGE* s, size_t* coords);
861
+ void dense_storage_set(DENSE_STORAGE* s, size_t* coords, void* val);
862
+
863
+ /* list.c */
864
+ LIST_STORAGE* create_list_storage(int8_t dtype, size_t* shape, size_t rank, void* init_val);
865
+ void delete_list_storage(LIST_STORAGE* s);
866
+ void mark_list_storage(void* s);
867
+ LIST_STORAGE* cast_copy_list_storage(LIST_STORAGE* rhs, int8_t new_dtype);
868
+ size_t count_storage_max_elements(const STORAGE*);
869
+
870
+ void* list_storage_get(LIST_STORAGE* s, size_t* coords);
871
+ void* list_storage_insert(LIST_STORAGE* s, size_t* coords, void* val);
872
+ void* list_storage_remove(LIST_STORAGE* s, size_t* coords);
873
+ bool list_storage_eqeq(const LIST_STORAGE*, const LIST_STORAGE*);
874
+
875
+ /* yale.c */
876
+ void print_vectors(YALE_STORAGE* s);
877
+ YALE_STORAGE* create_yale_storage(int8_t dtype, size_t* shape, size_t rank, size_t init_capacity);
878
+ void init_yale_storage(YALE_STORAGE* s);
879
+ void delete_yale_storage(YALE_STORAGE* s);
880
+ void mark_yale_storage(void* s);
881
+ YALE_STORAGE* cast_copy_yale_storage(YALE_STORAGE* rhs, int8_t new_dtype);
882
+ bool yale_storage_eqeq(const YALE_STORAGE*, const YALE_STORAGE*);
883
+
884
+ void* yale_storage_ref(YALE_STORAGE* s, size_t* coords);
885
+ char yale_storage_set(YALE_STORAGE* s, size_t* coords, void* v);
886
+
887
+ YALE_STORAGE* create_merged_yale_storage(const YALE_STORAGE*, const YALE_STORAGE*);
888
+
889
+ size_t count_list_storage_nd_elements(const LIST_STORAGE*);
890
+ size_t count_list_storage_elements(const LIST_STORAGE*);
891
+
892
+
893
+ /* stype casts */
894
+ DENSE_STORAGE* scast_copy_dense_yale(const YALE_STORAGE* rhs, int8_t l_dtype);
895
+ DENSE_STORAGE* scast_copy_dense_list(const LIST_STORAGE* rhs, int8_t l_dtype);
896
+ YALE_STORAGE* scast_copy_yale_dense(const DENSE_STORAGE* rhs, int8_t l_dtype);
897
+ YALE_STORAGE* scast_copy_yale_list(const LIST_STORAGE* rhs, int8_t l_dtype);
898
+ LIST_STORAGE* scast_copy_list_yale(const YALE_STORAGE* rhs, int8_t l_dtype);
899
+ LIST_STORAGE* scast_copy_list_dense(const DENSE_STORAGE* rhs, int8_t l_dtype);
900
+
901
+ /* nmatrix.c */
902
+ void cast_copy_value_single(void* to, const void* from, int8_t l_dtype, int8_t r_dtype);
903
+ int8_t nm_dtypestring_to_dtype(VALUE str);
904
+ int8_t nm_dtypesymbol_to_dtype(VALUE sym);
905
+ int8_t nm_stypestring_to_stype(VALUE str);
906
+ int8_t nm_stypesymbol_to_stype(VALUE sym);
907
+ int8_t nm_guess_dtype(VALUE v);
908
+ size_t* nm_interpret_shape_arg(VALUE arg, size_t* rank);
909
+ NMATRIX* nm_create(int8_t stype, void* storage);
910
+ void Init_nmatrix();
911
+
912
+ #endif