nmatrix 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/.autotest +23 -0
  2. data/.gemtest +0 -0
  3. data/Gemfile +7 -0
  4. data/History.txt +6 -0
  5. data/LICENSE.txt +21 -0
  6. data/Manifest.txt +51 -0
  7. data/README.rdoc +63 -0
  8. data/Rakefile +154 -0
  9. data/ext/nmatrix/cblas.c +150 -0
  10. data/ext/nmatrix/dense.c +307 -0
  11. data/ext/nmatrix/dense/blas_header.template.c +52 -0
  12. data/ext/nmatrix/dense/elementwise.template.c +107 -0
  13. data/ext/nmatrix/dense/gemm.template.c +159 -0
  14. data/ext/nmatrix/dense/gemv.template.c +130 -0
  15. data/ext/nmatrix/dense/rationalmath.template.c +68 -0
  16. data/ext/nmatrix/depend +18 -0
  17. data/ext/nmatrix/extconf.rb +143 -0
  18. data/ext/nmatrix/generator.rb +594 -0
  19. data/ext/nmatrix/generator/syntax_tree.rb +481 -0
  20. data/ext/nmatrix/list.c +774 -0
  21. data/ext/nmatrix/nmatrix.c +1977 -0
  22. data/ext/nmatrix/nmatrix.h +912 -0
  23. data/ext/nmatrix/rational.c +98 -0
  24. data/ext/nmatrix/yale.c +726 -0
  25. data/ext/nmatrix/yale/complexmath.template.c +71 -0
  26. data/ext/nmatrix/yale/elementwise.template.c +46 -0
  27. data/ext/nmatrix/yale/elementwise_op.template.c +73 -0
  28. data/ext/nmatrix/yale/numbmm.template.c +94 -0
  29. data/ext/nmatrix/yale/smmp1.template.c +21 -0
  30. data/ext/nmatrix/yale/smmp1_header.template.c +38 -0
  31. data/ext/nmatrix/yale/smmp2.template.c +43 -0
  32. data/ext/nmatrix/yale/smmp2_header.template.c +46 -0
  33. data/ext/nmatrix/yale/sort_columns.template.c +56 -0
  34. data/ext/nmatrix/yale/symbmm.template.c +54 -0
  35. data/ext/nmatrix/yale/transp.template.c +68 -0
  36. data/lib/array.rb +67 -0
  37. data/lib/nmatrix.rb +263 -0
  38. data/lib/string.rb +65 -0
  39. data/spec/nmatrix_spec.rb +395 -0
  40. data/spec/nmatrix_yale_spec.rb +239 -0
  41. data/spec/nvector_spec.rb +43 -0
  42. data/spec/syntax_tree_spec.rb +46 -0
  43. metadata +150 -0
@@ -0,0 +1,912 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == nmatrix.h
25
+ //
26
+
27
+ #ifndef NMATRIX_H
28
+ #define NMATRIX_H
29
+
30
+ #include "nmatrix_config.h"
31
+
32
+ #include <cblas.h>
33
+
34
+ #include <math.h>
35
+
36
+ #include <stdlib.h>
37
+ #include <stdio.h>
38
+ #include <string.h>
39
+ #include <ruby.h>
40
+ #define RUBY_ZERO INT2FIX(0)
41
+
42
+ #ifdef BENCHMARK // SOURCE: http://stackoverflow.com/questions/2349776/how-can-i-benchmark-a-c-program-easily
43
+ # include <sys/time.h>
44
+ # include <sys/resource.h>
45
+ #endif
46
+
47
+ #include "dtypes.h"
48
+
49
+ #include <stddef.h>
50
+ #ifdef HAVE_STDINT_H
51
+ # include <stdint.h>
52
+ #endif
53
+
54
+ /*
55
+ Data types used in NArray / NMatrix :
56
+ Please modify these types if your system has any different type.
57
+ */
58
+
59
+
60
+ /* NM_BYTE : unsigned 8-bit integer */
61
+ #ifndef HAVE_U_INT8_T
62
+ # ifdef HAVE_UINT8_T
63
+ typedef uint8_t u_int8_t;
64
+ # else
65
+ typedef unsigned char u_int8_t;
66
+ # endif
67
+ #endif
68
+
69
+ //#ifndef HAVE_INT8_T
70
+ //typedef char int8_t;
71
+ //#endif
72
+
73
+ #ifndef HAVE_INT16_T
74
+ # if SIZEOF_SHORT == 2
75
+ typedef short int16_t;
76
+ # else
77
+ ---->> Please define int16_t manually because sizeof(short) != 2. <<----
78
+ # endif
79
+ #endif /* HAVE_INT16_T */
80
+
81
+ #ifndef HAVE_INT32_T
82
+ # if SIZEOF_LONG == 4
83
+ typedef long int32_t;
84
+ # else
85
+ # if SIZEOF_INT == 4
86
+ typedef int int32_t;
87
+ # else
88
+ ---->> Please define int32_t manually because sizeof(long) != 4. <<----
89
+ # endif
90
+ # endif
91
+ #endif /* HAVE_INT32_T */
92
+
93
+ /* unsigned 32-bit integer */
94
+ #ifndef HAVE_U_INT32_T
95
+ # ifdef HAVE_UINT32_T
96
+ typedef uint32_t u_int32_t;
97
+ # else
98
+ # if SIZEOF_LONG == 4
99
+ typedef unsigned long u_int32_t;
100
+ # else
101
+ # if SIZEOF_INT == 4
102
+ typedef unsigned int u_int32_t;
103
+ # else
104
+ ---->> Please define u_int32_t manually because sizeof(long) != 4. <<----
105
+ # endif
106
+ # endif
107
+ # endif
108
+ #endif /* HAVE_U_INT32_T */
109
+
110
+ #ifndef HAVE_INT64_T
111
+ # if SIZEOF_QUAD == 8
112
+ typedef quad int64_t;
113
+ # else
114
+ # if SIZEOF_LONG == 8
115
+ typedef long int64_t;
116
+ # else
117
+ ---->> Please define int64_t manually because sizeof(quad) != 8. <<----
118
+ # endif
119
+ # endif
120
+ #endif /* HAVE_INT64_T */
121
+
122
+ /* unsigned 64-bit integer */
123
+ #ifndef HAVE_U_INT64_T
124
+ # ifdef HAVE_UINT64_T
125
+ typedef uint64_t u_int64_t;
126
+ # else
127
+ # if SIZEOF_QUAD == 8
128
+ typedef unsigned quad u_int64_t;
129
+ # else
130
+ # if SIZEOF_LONG == 8
131
+ typedef unsigned long u_int64_t;
132
+ # else
133
+ ---->> Please define u_int64_t manually because sizeof(quad) != 8. <<----
134
+ # endif
135
+ # endif
136
+ # endif
137
+ #endif /* HAVE_U_INT64_T */
138
+
139
+
140
+ #ifndef HAVE_SIZE_T /// If you modify this, make sure to modify the definition of y_size_t and Y_SIZE_T!
141
+ typedef u_int64_t size_t;
142
+ # define NM_SIZE_T NM_INT64
143
+ #else
144
+ # if SIZEOF_SIZE_T == 8
145
+ # define NM_SIZE_T NM_INT64
146
+ # else
147
+ # if SIZEOF_SIZE_T == 4
148
+ # define NM_SIZE_T NM_INT32
149
+ # else
150
+ ---->> Please define size_t and y_size_t manually because sizeof(size_t) is neither 8 nor 4. <<----
151
+ # endif
152
+ # endif
153
+ #endif
154
+
155
+ // for when we need to return array indices.
156
+ // This must never be larger than size_t
157
+ typedef uint32_t y_size_t;
158
+ #define Y_SIZE_T NM_INT32
159
+
160
+
161
+ #ifdef HAVE_STDBOOL_H
162
+ # include <stdbool.h>
163
+ #else
164
+ typedef char bool;
165
+ # define true 1;
166
+ # define false 0;
167
+ #endif
168
+
169
+
170
+ typedef struct { float r,i; } complex64;
171
+ typedef struct { double r,i; } complex128;
172
+ typedef struct { int16_t n,d; } rational32;
173
+ typedef struct { int32_t n,d; } rational64;
174
+ typedef struct { int64_t n,d; } rational128;
175
+
176
+
177
+ #if SIZEOF_INT == 8
178
+ # define DEFAULT_DTYPE NM_INT64
179
+ #else
180
+ # if SIZEOF_INT == 4
181
+ # define DEFAULT_DTYPE NM_INT32
182
+ # else
183
+ # define DEFAULT_DTYPE NM_INT16
184
+ # endif
185
+ #endif
186
+
187
+
188
+ #define YALE_GROWTH_CONSTANT 1.5
189
+
190
+
191
+ enum NMatrix_STypes {
192
+ S_DENSE,
193
+ S_LIST,
194
+ S_YALE,
195
+ S_TYPES
196
+ };
197
+
198
+
199
+ // Element-wise operations (see blas/elementwise.template.c)
200
+ enum NMatrix_Ops {
201
+ NM_OP_ADD = '+',
202
+ NM_OP_SUB = '-',
203
+ NM_OP_MUL = '*',
204
+ NM_OP_DIV = '/',
205
+ NM_OP_MOD = '%',
206
+ NM_OP_BANG = '!',
207
+ NM_OP_NEG, // unary minus
208
+ NM_OP_EQEQ, // ==
209
+ NM_OP_NEQ, // !=
210
+ NM_OP_GT = '>', // >
211
+ NM_OP_LT = '<', // <
212
+ NM_OP_GTE = ',', // >=
213
+ NM_OP_LTE = '.', // <=
214
+ NM_OP_NOT = '~',
215
+ NM_OP_AND = '&',
216
+ NM_OP_OR = '|',
217
+ NM_OP_XOR = '^',
218
+ NM_OP_LSH, // <<
219
+ NM_OP_RSH // >>
220
+ };
221
+
222
+
223
+ /* Singly-linked ordered list
224
+ * - holds keys and values
225
+ * - no duplicate keys
226
+ * - keys are ordered
227
+ * - values may be lists themselves
228
+ */
229
+ typedef struct l_node { /* Linked list node */
230
+ size_t key;
231
+ void* val;
232
+ struct l_node * next; // next
233
+ } NODE;
234
+
235
+ typedef struct l_list {
236
+ NODE* first;
237
+ } LIST;
238
+
239
+
240
+ // two vectors and a capacity
241
+ typedef struct y_vector {
242
+ void* ija;
243
+ void* a;
244
+ size_t capacity;
245
+ } VECTOR;
246
+
247
+
248
+ typedef struct common_s { // Common elements found in all _s types.
249
+ int8_t dtype;
250
+ size_t rank;
251
+ size_t* shape;
252
+ } STORAGE;
253
+
254
+
255
+ typedef struct list_s {
256
+ int8_t dtype;
257
+ size_t rank;
258
+ size_t* shape;
259
+ void* default_val;
260
+ LIST* rows;
261
+ } LIST_STORAGE;
262
+
263
+
264
+ typedef struct dense_s {
265
+ int8_t dtype;
266
+ size_t rank;
267
+ size_t* shape;
268
+ void* elements;
269
+ } DENSE_STORAGE;
270
+
271
+
272
+ typedef struct yale_s {
273
+ int8_t dtype;
274
+ size_t rank;
275
+ size_t* shape;
276
+ size_t ndnz; // strictly non-diagonal non-zero count!
277
+ size_t capacity;
278
+ int8_t index_dtype;
279
+ void* ija;
280
+ void* a;
281
+ } YALE_STORAGE;
282
+
283
+
284
+ typedef struct numeric_matrix {
285
+ int8_t stype; /* method of storage (csc, dense, etc) */
286
+ STORAGE* storage; /* pointer to storage struct */
287
+ } NMATRIX;
288
+
289
+
290
+ /* Local */
291
+
292
+ typedef union {
293
+ u_int8_t b[2];
294
+ int16_t s;
295
+ } nm_size16_t;
296
+
297
+ typedef union {
298
+ u_int8_t b[4];
299
+ int32_t i;
300
+ float f;
301
+ } nm_size32_t;
302
+
303
+ typedef union {
304
+ u_int8_t b[8];
305
+ int64_t q;
306
+ float f[2];
307
+ double d;
308
+ complex64 c;
309
+ } nm_size64_t;
310
+
311
+ typedef union {
312
+ u_int8_t b[16];
313
+ int64_t i[2];
314
+ double d[2];
315
+ float f[4];
316
+ complex64 c[2];
317
+ complex128 z;
318
+ rational32 r[4];
319
+ rational64 ra[2];
320
+ rational128 rat;
321
+ VALUE v[2];
322
+ } nm_size128_t;
323
+
324
+
325
+ // For calling cblas_gemm functions (see cblas.c)
326
+ typedef struct cblas_param_t {
327
+ int M, N, K, lda, ldb, ldc;
328
+ void *A, *B, *C;
329
+ nm_size128_t alpha, beta;
330
+ } DENSE_PARAM;
331
+
332
+
333
+ // Formerly in smmp.h:
334
+ typedef struct smmp_param_t {
335
+ void *ia, *ja, *a;
336
+ bool diag;
337
+ } YALE_PARAM;
338
+
339
+ // Shouldn't be necessary, as they're defined in nmatrix.h:
340
+ // (Oddly, though, these fix the error.)
341
+ /*typedef uint8_t u_int8_t;
342
+ typedef uint16_t u_int16_t;
343
+ typedef uint32_t u_int32_t;
344
+ typedef uint64_t u_int64_t; */
345
+
346
+
347
+ // rational.c
348
+ int64_t nmrb_gcd(int64_t x, int64_t y);
349
+
350
+ // BLAS functions
351
+ #define SMMP_MAX_THREE(a,b,c) ((a)>(b) ? ( (a)>(c) ? (a) : (c) ) : ( (b)>(c) ? (b) : (c) ))
352
+ #define SMMP_MIN(a,b) ((a)>(b) ? (b) : (a))
353
+ #define SMMP_MAX(a,b) ((a)>(b) ? (a) : (b))
354
+
355
+ void transp(y_size_t n, y_size_t m, void* ia, void* ja, bool diaga, void* a, void* ib, void* jb, void* b, bool move, int8_t itype, int8_t dtype);
356
+
357
+ void i8_symbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
358
+ void i16_symbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
359
+ void i32_symbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
360
+ void i64_symbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
361
+
362
+ void i8_b_smmp_sort_columns_(y_size_t, YALE_PARAM);
363
+ void i16_b_smmp_sort_columns_(y_size_t, YALE_PARAM);
364
+ void i32_b_smmp_sort_columns_(y_size_t, YALE_PARAM);
365
+ void i64_b_smmp_sort_columns_(y_size_t, YALE_PARAM);
366
+ void i8_i8_smmp_sort_columns_(y_size_t, YALE_PARAM);
367
+ void i16_i8_smmp_sort_columns_(y_size_t, YALE_PARAM);
368
+ void i32_i8_smmp_sort_columns_(y_size_t, YALE_PARAM);
369
+ void i64_i8_smmp_sort_columns_(y_size_t, YALE_PARAM);
370
+ void i8_i16_smmp_sort_columns_(y_size_t, YALE_PARAM);
371
+ void i16_i16_smmp_sort_columns_(y_size_t, YALE_PARAM);
372
+ void i32_i16_smmp_sort_columns_(y_size_t, YALE_PARAM);
373
+ void i64_i16_smmp_sort_columns_(y_size_t, YALE_PARAM);
374
+ void i8_i32_smmp_sort_columns_(y_size_t, YALE_PARAM);
375
+ void i16_i32_smmp_sort_columns_(y_size_t, YALE_PARAM);
376
+ void i32_i32_smmp_sort_columns_(y_size_t, YALE_PARAM);
377
+ void i64_i32_smmp_sort_columns_(y_size_t, YALE_PARAM);
378
+ void i8_i64_smmp_sort_columns_(y_size_t, YALE_PARAM);
379
+ void i16_i64_smmp_sort_columns_(y_size_t, YALE_PARAM);
380
+ void i32_i64_smmp_sort_columns_(y_size_t, YALE_PARAM);
381
+ void i64_i64_smmp_sort_columns_(y_size_t, YALE_PARAM);
382
+ void i8_f32_smmp_sort_columns_(y_size_t, YALE_PARAM);
383
+ void i16_f32_smmp_sort_columns_(y_size_t, YALE_PARAM);
384
+ void i32_f32_smmp_sort_columns_(y_size_t, YALE_PARAM);
385
+ void i64_f32_smmp_sort_columns_(y_size_t, YALE_PARAM);
386
+ void i8_f64_smmp_sort_columns_(y_size_t, YALE_PARAM);
387
+ void i16_f64_smmp_sort_columns_(y_size_t, YALE_PARAM);
388
+ void i32_f64_smmp_sort_columns_(y_size_t, YALE_PARAM);
389
+ void i64_f64_smmp_sort_columns_(y_size_t, YALE_PARAM);
390
+ void i8_c64_smmp_sort_columns_(y_size_t, YALE_PARAM);
391
+ void i16_c64_smmp_sort_columns_(y_size_t, YALE_PARAM);
392
+ void i32_c64_smmp_sort_columns_(y_size_t, YALE_PARAM);
393
+ void i64_c64_smmp_sort_columns_(y_size_t, YALE_PARAM);
394
+ void i8_c128_smmp_sort_columns_(y_size_t, YALE_PARAM);
395
+ void i16_c128_smmp_sort_columns_(y_size_t, YALE_PARAM);
396
+ void i32_c128_smmp_sort_columns_(y_size_t, YALE_PARAM);
397
+ void i64_c128_smmp_sort_columns_(y_size_t, YALE_PARAM);
398
+ void i8_r32_smmp_sort_columns_(y_size_t, YALE_PARAM);
399
+ void i16_r32_smmp_sort_columns_(y_size_t, YALE_PARAM);
400
+ void i32_r32_smmp_sort_columns_(y_size_t, YALE_PARAM);
401
+ void i64_r32_smmp_sort_columns_(y_size_t, YALE_PARAM);
402
+ void i8_r64_smmp_sort_columns_(y_size_t, YALE_PARAM);
403
+ void i16_r64_smmp_sort_columns_(y_size_t, YALE_PARAM);
404
+ void i32_r64_smmp_sort_columns_(y_size_t, YALE_PARAM);
405
+ void i64_r64_smmp_sort_columns_(y_size_t, YALE_PARAM);
406
+ void i8_r128_smmp_sort_columns_(y_size_t, YALE_PARAM);
407
+ void i16_r128_smmp_sort_columns_(y_size_t, YALE_PARAM);
408
+ void i32_r128_smmp_sort_columns_(y_size_t, YALE_PARAM);
409
+ void i64_r128_smmp_sort_columns_(y_size_t, YALE_PARAM);
410
+ void i8_v_smmp_sort_columns_(y_size_t, YALE_PARAM);
411
+ void i16_v_smmp_sort_columns_(y_size_t, YALE_PARAM);
412
+ void i32_v_smmp_sort_columns_(y_size_t, YALE_PARAM);
413
+ void i64_v_smmp_sort_columns_(y_size_t, YALE_PARAM);
414
+
415
+ void i8_b_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
416
+ void i16_b_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
417
+ void i32_b_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
418
+ void i64_b_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
419
+ void i8_i8_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
420
+ void i16_i8_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
421
+ void i32_i8_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
422
+ void i64_i8_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
423
+ void i8_i16_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
424
+ void i16_i16_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
425
+ void i32_i16_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
426
+ void i64_i16_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
427
+ void i8_i32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
428
+ void i16_i32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
429
+ void i32_i32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
430
+ void i64_i32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
431
+ void i8_i64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
432
+ void i16_i64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
433
+ void i32_i64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
434
+ void i64_i64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
435
+ void i8_f32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
436
+ void i16_f32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
437
+ void i32_f32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
438
+ void i64_f32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
439
+ void i8_f64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
440
+ void i16_f64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
441
+ void i32_f64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
442
+ void i64_f64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
443
+ void i8_c64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
444
+ void i16_c64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
445
+ void i32_c64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
446
+ void i64_c64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
447
+ void i8_c128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
448
+ void i16_c128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
449
+ void i32_c128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
450
+ void i64_c128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
451
+ void i8_r32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
452
+ void i16_r32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
453
+ void i32_r32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
454
+ void i64_r32_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
455
+ void i8_r64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
456
+ void i16_r64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
457
+ void i32_r64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
458
+ void i64_r64_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
459
+ void i8_r128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
460
+ void i16_r128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
461
+ void i32_r128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
462
+ void i64_r128_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
463
+ void i8_v_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
464
+ void i16_v_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
465
+ void i32_v_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
466
+ void i64_v_smmp(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
467
+
468
+ void i8_b_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
469
+ void i16_b_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
470
+ void i32_b_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
471
+ void i64_b_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
472
+ void i8_i8_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
473
+ void i16_i8_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
474
+ void i32_i8_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
475
+ void i64_i8_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
476
+ void i8_i16_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
477
+ void i16_i16_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
478
+ void i32_i16_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
479
+ void i64_i16_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
480
+ void i8_i32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
481
+ void i16_i32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
482
+ void i32_i32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
483
+ void i64_i32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
484
+ void i8_i64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
485
+ void i16_i64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
486
+ void i32_i64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
487
+ void i64_i64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
488
+ void i8_f32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
489
+ void i16_f32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
490
+ void i32_f32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
491
+ void i64_f32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
492
+ void i8_f64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
493
+ void i16_f64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
494
+ void i32_f64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
495
+ void i64_f64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
496
+ void i8_c64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
497
+ void i16_c64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
498
+ void i32_c64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
499
+ void i64_c64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
500
+ void i8_c128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
501
+ void i16_c128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
502
+ void i32_c128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
503
+ void i64_c128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
504
+ void i8_r32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
505
+ void i16_r32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
506
+ void i32_r32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
507
+ void i64_r32_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
508
+ void i8_r64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
509
+ void i16_r64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
510
+ void i32_r64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
511
+ void i64_r64_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
512
+ void i8_r128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
513
+ void i16_r128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
514
+ void i32_r128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
515
+ void i64_r128_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
516
+ void i8_v_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
517
+ void i16_v_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
518
+ void i32_v_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
519
+ void i64_v_numbmm_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, YALE_PARAM);
520
+
521
+ void i8_b_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
522
+ void i16_b_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
523
+ void i32_b_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
524
+ void i64_b_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
525
+ void i8_i8_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
526
+ void i16_i8_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
527
+ void i32_i8_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
528
+ void i64_i8_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
529
+ void i8_i16_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
530
+ void i16_i16_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
531
+ void i32_i16_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
532
+ void i64_i16_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
533
+ void i8_i32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
534
+ void i16_i32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
535
+ void i32_i32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
536
+ void i64_i32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
537
+ void i8_i64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
538
+ void i16_i64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
539
+ void i32_i64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
540
+ void i64_i64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
541
+ void i8_f32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
542
+ void i16_f32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
543
+ void i32_f32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
544
+ void i64_f32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
545
+ void i8_f64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
546
+ void i16_f64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
547
+ void i32_f64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
548
+ void i64_f64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
549
+ void i8_c64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
550
+ void i16_c64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
551
+ void i32_c64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
552
+ void i64_c64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
553
+ void i8_c128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
554
+ void i16_c128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
555
+ void i32_c128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
556
+ void i64_c128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
557
+ void i8_r32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
558
+ void i16_r32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
559
+ void i32_r32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
560
+ void i64_r32_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
561
+ void i8_r64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
562
+ void i16_r64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
563
+ void i32_r64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
564
+ void i64_r64_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
565
+ void i8_r128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
566
+ void i16_r128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
567
+ void i32_r128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
568
+ void i64_r128_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
569
+ void i8_v_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
570
+ void i16_v_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
571
+ void i32_v_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
572
+ void i64_v_transp_(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool);
573
+
574
+
575
+
576
+ // For binary operations involving matrices that need to be casted.
577
+ typedef struct storage_pair_t {
578
+ STORAGE* left;
579
+ STORAGE* right;
580
+ } STORAGE_PAIR;
581
+
582
+
583
+ #ifndef NMATRIX_C
584
+ extern VALUE cNMatrix;
585
+
586
+ extern const int nm_sizeof[NM_TYPES+1];
587
+ #endif
588
+
589
+
590
+ #define NM_MAX_RANK 15
591
+
592
+ #define UnwrapNMatrix(obj,var) Data_Get_Struct(obj, struct numeric_matrix, var)
593
+ #define IsNMatrix(obj) (rb_obj_is_kind_of(obj, CNMatrix)==Qtrue)
594
+
595
+ #define NM_STORAGE(val) (((struct numeric_matrix*)DATA_PTR(val))->storage)
596
+ //#define NM_PTR(a, p) ((a)->ptr+(p)*nm_sizeof[(a)->type])
597
+ #define NM_STRUCT(val) ((struct numeric_matrix*)DATA_PTR(val))
598
+ //#define NM_PTR_TYPE(val,type) (type)(((struct numeric_matrix*)DATA_PTR(val))->ptr)
599
+ #define NM_RANK(val) (((STORAGE*)(NM_STORAGE(val)))->rank)
600
+ #define NM_DTYPE(val) (((STORAGE*)(NM_STORAGE(val)))->dtype)
601
+ #define NM_STYPE(val) (((struct numeric_matrix*)DATA_PTR(val))->stype)
602
+ #define NM_SHAPE(val,i) (((STORAGE*)(NM_STORAGE(val)))->shape[(i)])
603
+ #define NM_SHAPE0(val) (((struct numeric_matrix*)DATA_PTR(val))->shape[0])
604
+ #define NM_SHAPE1(val) (((struct numeric_matrix*)DATA_PTR(val))->shape[1])
605
+ #define NM_SIZEOF_DTYPE(val) (nm_sizeof[NM_DTYPE(val)])
606
+ #define NM_REF(val,coords) (RefFuncs[NM_STYPE(val)]( NM_STORAGE(val), coords, NM_SIZEOF_DTYPE(val) ))
607
+
608
+ #define NM_IsNMatrix(obj) (rb_obj_is_kind_of(obj, cNMatrix)==Qtrue)
609
+ #define NM_IsArray(obj) (TYPE(obj)==T_ARRAY || rb_obj_is_kind_of(obj,cNMatrix)==Qtrue)
610
+ #define NM_IsROBJ(d) ((d)->dtype==NM_ROBJ)
611
+ #define NM_IsINTEGER(a) \
612
+ (NM_DTYPE(a)==NM_BYTE || NM_DTYPE(a)==NM_INT8 || NM_DTYPE(a)==NM_INT16 || NM_DTYPE(a)==NM_INT32 || NM_DTYPE(a)==NM_INT64)
613
+ #define NM_IsCOMPLEX(a) \
614
+ (NM_DTYPE(a)==NM_COMPLEX32 || NM_DTYPE(a)==NM_COMPLEX64)
615
+ #define NM_MAX(a,b) (((a)>(b))?(a):(b))
616
+ #define NM_SWAP(a,b,tmp) {(tmp)=(a);(a)=(b);(b)=(tmp);}
617
+
618
+ //#define NUM2REAL(v) NUM2DBL( rb_funcall((v),nm_id_real,0) ) // deprecated
619
+ #define REAL2DBL(v) NUM2DBL( rb_funcall((v),nm_id_real,0) )
620
+ //#define NUM2IMAG(v) NUM2DBL( rb_funcall((v),nm_id_imag,0) ) // deprecated
621
+ #define IMAG2DBL(v) NUM2DBL( rb_funcall((v),nm_id_imag,0) )
622
+
623
+ #define NUM2NUMER(v) NUM2INT( rb_funcall((v), nm_id_numer,0) ) // deprecated
624
+ #define NUMER2INT(v) NUM2INT( rb_funcall((v), nm_id_numer,0) )
625
+ #define NUM2DENOM(v) NUM2INT( rb_funcall((v), nm_id_denom,0) ) // deprecated
626
+ #define DENOM2INT(v) NUM2INT( rb_funcall((v), nm_id_denom,0) )
627
+
628
+ #define IS_NUMERIC(v) (FIXNUM_P(v) || TYPE(v) == T_FLOAT || TYPE(v) == T_COMPLEX || TYPE(v) == T_RATIONAL)
629
+ #define IS_STRING(v) (TYPE(v) == T_STRING)
630
+
631
+ #define CheckNMatrixType(v) if (TYPE(v) != T_DATA || RDATA(v)->dfree != (RUBY_DATA_FUNC)nm_delete) rb_raise(rb_eTypeError, "expected NMatrix on left-hand side of operation");
632
+
633
+ //#define YALE_JA_START(sptr) (((YALE_STORAGE*)(sptr))->shape[0]+1)
634
+ #define YALE_IJA(sptr,elem_size,i) (void*)( (char*)(((YALE_STORAGE*)(sptr))->ija) + i * elem_size )
635
+ //#define YALE_JA(sptr,dtype,j) ((((dtype)*)((YALE_STORAGE*)(sptr))->ija)[(YALE_JA_START(sptr))+j])
636
+ #define YALE_ROW_LENGTH(sptr,elem_size,i) (*(size_t*)YALE_IA((sptr),(elem_size),(i)+1) - *(size_t*)YALE_IJA((sptr),(elem_size),(i)))
637
+ #define YALE_A(sptr,elem_size,i) (void*)((char*)(((YALE_STORAGE*)(sptr))->a) + elem_size * i)
638
+ #define YALE_DIAG(sptr, elem_size, i) ( YALE_A((sptr),(elem_size),(i)) )
639
+ //#define YALE_LU(sptr,dtype,i,j) (((dtype)*)(((YALE_STORAGE*)(sptr))->a)[ YALE_JA_START(sptr) + ])
640
+ #define YALE_MINIMUM(sptr) (((YALE_STORAGE*)(sptr))->shape[0]*2 + 1) // arbitrarily defined
641
+ #define YALE_SIZE_PTR(sptr,elem_size) (void*)((char*)((YALE_STORAGE*)(sptr))->ija + ((YALE_STORAGE*)(sptr))->shape[0]*elem_size )
642
+ #define YALE_MAX_SIZE(sptr) (((YALE_STORAGE*)(sptr))->shape[0] * ((YALE_STORAGE*)(sptr))->shape[1] + 1)
643
+ #define YALE_IA_SIZE(sptr) ((YALE_STORAGE*)(sptr))->shape[0]
644
+
645
+ // None of these next three return anything. They set a reference directly.
646
+ #define YaleGetIJA(victim,s,i) (SetFuncs[Y_SIZE_T][(s)->index_dtype](1, &(victim), 0, YALE_IJA((s), nm_sizeof[s->index_dtype], (i)), 0))
647
+ #define YaleSetIJA(i,s,from) (SetFuncs[s->index_dtype][Y_SIZE_T](1, YALE_IJA((s), nm_sizeof[s->index_dtype], (i)), 0, &(from), 0))
648
+ #define YaleGetSize(sz,s) (SetFuncs[Y_SIZE_T][(s)->index_dtype](1, &sz, 0, (YALE_SIZE_PTR((s), nm_sizeof[(s)->index_dtype])), 0))
649
+ //#define YALE_FIRST_NZ_ROW_ENTRY(sptr,elem_size,i)
650
+
651
+
652
+ #if !defined RSTRING_LEN
653
+ #define RSTRING_LEN(a) RSTRING(a)->len
654
+ #endif
655
+ #if !defined RSTRING_PTR
656
+ #define RSTRING_PTR(a) RSTRING(a)->ptr
657
+ #endif
658
+ #if !defined RARRAY_LEN
659
+ #define RARRAY_LEN(a) RARRAY(a)->len
660
+ #endif
661
+ #if !defined RARRAY_PTR
662
+ #define RARRAY_PTR(a) RARRAY(a)->ptr
663
+ #endif
664
+
665
+ #define NM_INDEX_TYPES NM_FLOAT32
666
+
667
+
668
+ typedef void (*nm_setfunc_t[NM_TYPES][NM_TYPES])(); // copy functions
669
+ typedef void (*nm_incfunc_t[NM_TYPES])(); // increment functions
670
+ typedef void* (*nm_stype_ref_t[S_TYPES])(STORAGE*, size_t*); // get/ref
671
+ typedef VALUE (*nm_stype_ins_t[S_TYPES])(STORAGE*, size_t*, VALUE); // insert
672
+ typedef STORAGE* (*nm_create_storage_t[S_TYPES])();
673
+ typedef STORAGE* (*nm_cast_copy_storage_t[S_TYPES])();
674
+ typedef STORAGE* (*nm_scast_copy_storage_t[S_TYPES][S_TYPES])();
675
+ typedef NMATRIX* (*nm_matrix_multiply_op_t[S_TYPES])();
676
+ typedef NMATRIX* (*nm_elementwise_binary_op_casted_t[S_TYPES])();
677
+ typedef int (*nm_d_elementwise_binary_op_t[NM_TYPES])();
678
+ typedef int (*nm_y_elementwise_binary_op_t[NM_TYPES][NM_INDEX_TYPES])();
679
+ typedef bool (*nm_compare_t[S_TYPES])();
680
+ typedef void (*nm_delete_t[S_TYPES])();
681
+ typedef void (*nm_mark_t[S_TYPES])(void*);
682
+ typedef void (*nm_gemm_t[NM_TYPES])(); // general matrix/matrix multiply
683
+ typedef void (*nm_dense_transpose_t[NM_TYPES])(); // dense transpose
684
+ typedef void (*nm_gemv_t[NM_TYPES])(); // general matrix/vector multiply
685
+ typedef void (*nm_smmp_t[NM_TYPES][NM_INDEX_TYPES])(); // sparse (yale) multiply
686
+ typedef void (*nm_smmp_transpose_t[NM_TYPES][NM_INDEX_TYPES])(y_size_t, y_size_t, YALE_PARAM, YALE_PARAM, bool); // sparse (yale) transpose
687
+ //typedef void (*nm_setsf_t[S_TYPES][S_TYPES])();
688
+ //typedef void (*nm_setdf_t[NM_DTYPES][NM_DTYPES])();
689
+
690
+ extern nm_setfunc_t SetFuncs;
691
+ extern nm_incfunc_t Increment;
692
+ extern ID nm_id_real, nm_id_imag;
693
+ extern ID nm_id_denom, nm_id_numer;
694
+ extern ID nm_id_mult, nm_id_multeq, nm_id_add;
695
+
696
+ /* blas.c */
697
+ int r32gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const rational32 alpha, const rational32* A, const int lda, const rational32* B, const int ldb, const rational32 beta, rational32* C, const int ldc);
698
+ int r32gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const rational32 alpha, const rational32* A, const size_t lda, const rational32* X, const int incX, const rational32 beta, rational32* Y, const int incY);
699
+ int r64gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const rational64 alpha, const rational64* A, const int lda, const rational64* B, const int ldb, const rational64 beta, rational64* C, const int ldc);
700
+ int r64gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const rational64 alpha, const rational64* A, const size_t lda, const rational64* X, const int incX, const rational64 beta, rational64* Y, const int incY);
701
+ int r128gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const rational128 alpha, const rational128* A, const int lda, const rational128* B, const int ldb, const rational128 beta, rational128* C, const int ldc);
702
+ int r128gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const rational128 alpha, const rational128* A, const size_t lda, const rational128* X, const int incX, const rational128 beta, rational128* Y, const int incY);
703
+ int bgemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const u_int8_t alpha, const u_int8_t* A, const int lda, const u_int8_t* B, const int ldb, const u_int8_t beta, u_int8_t* C, const int ldc);
704
+ int bgemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const u_int8_t alpha, const u_int8_t* A, const size_t lda, const u_int8_t* X, const int incX, const u_int8_t beta, u_int8_t* Y, const int incY);
705
+ int i8gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const int8_t alpha, const int8_t* A, const int lda, const int8_t* B, const int ldb, const int8_t beta, int8_t* C, const int ldc);
706
+ int i8gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const int8_t alpha, const int8_t* A, const size_t lda, const int8_t* X, const int incX, const int8_t beta, int8_t* Y, const int incY);
707
+ int i16gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const int16_t alpha, const int16_t* A, const int lda, const int16_t* B, const int ldb, const int16_t beta, int16_t* C, const int ldc);
708
+ int i16gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const int16_t alpha, const int16_t* A, const size_t lda, const int16_t* X, const int incX, const int16_t beta, int16_t* Y, const int incY);
709
+ int i32gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const int32_t alpha, const int32_t* A, const int lda, const int32_t* B, const int ldb, const int32_t beta, int32_t* C, const int ldc);
710
+ int i32gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const int32_t alpha, const int32_t* A, const size_t lda, const int32_t* X, const int incX, const int32_t beta, int32_t* Y, const int incY);
711
+ int i64gemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const int64_t alpha, const int64_t* A, const int lda, const int64_t* B, const int ldb, const int64_t beta, int64_t* C, const int ldc);
712
+ int i64gemv(enum CBLAS_TRANSPOSE Trans, const size_t M, const size_t N, const int64_t alpha, const int64_t* A, const size_t lda, const int64_t* X, const int incX, const int64_t beta, int64_t* Y, const int incY);
713
+ int vgemm(enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const VALUE alpha, const VALUE* A, const int lda, const VALUE* B, const int ldb, const VALUE beta, VALUE* C, const int ldc);
714
+
715
+ // void %%TYPE_ABBREV%%transp(unsigned int M, unsigned int N, const %%TYPE%%* A, int lda, %%TYPE%%* B, int ldb);
716
+ void btransp(const unsigned int, const unsigned int, const u_int8_t*, const int, u_int8_t*, const int);
717
+ void i8transp(const unsigned int, const unsigned int, const int8_t*, const int, int8_t*, const int);
718
+ void i16transp(const unsigned int, const unsigned int, const int16_t*, const int, int16_t*, const int);
719
+ void i32transp(const unsigned int, const unsigned int, const int32_t*, const int, int32_t*, const int);
720
+ void i64transp(const unsigned int, const unsigned int, const int64_t*, const int, int64_t*, const int);
721
+ void f32transp(const unsigned int, const unsigned int, const float*, const int, float*, const int);
722
+ void f64transp(const unsigned int, const unsigned int, const double*, const int, double*, const int);
723
+ void c64transp(const unsigned int, const unsigned int, const complex64*, const int, complex64*, const int);
724
+ void c128transp(const unsigned int, const unsigned int, const complex128*, const int, complex128*, const int);
725
+ void r32transp(const unsigned int, const unsigned int, const rational32*, const int, rational32*, const int);
726
+ void r64transp(const unsigned int, const unsigned int, const rational64*, const int, rational64*, const int);
727
+ void r128transp(const unsigned int, const unsigned int, const rational128*, const int, rational128*, const int);
728
+ void vtransp(const unsigned int, const unsigned int, const VALUE*, const int, VALUE*, const int);
729
+
730
+ int nm_d_b_elementwise(const u_int8_t* A, const u_int8_t* B, u_int8_t* C, size_t n, enum NMatrix_Ops op);
731
+ int nm_d_i8_elementwise(const int8_t* A, const int8_t* B, int8_t* C, size_t n, enum NMatrix_Ops op);
732
+ int nm_d_i16_elementwise(const int16_t* A, const int16_t* B, int16_t* C, size_t n, enum NMatrix_Ops op);
733
+ int nm_d_i32_elementwise(const int32_t* A, const int32_t* B, int32_t* C, size_t n, enum NMatrix_Ops op);
734
+ int nm_d_i64_elementwise(const int64_t* A, const int64_t* B, int64_t* C, size_t n, enum NMatrix_Ops op);
735
+ int nm_d_f32_elementwise(const float* A, const float* B, float* C, size_t n, enum NMatrix_Ops op);
736
+ int nm_d_f64_elementwise(const double* A, const double* B, double* C, size_t n, enum NMatrix_Ops op);
737
+ int nm_d_c64_elementwise(const complex64* A, const complex64* B, complex64* C, size_t n, enum NMatrix_Ops op);
738
+ int nm_d_c128_elementwise(const complex128* A, const complex128* B, complex128* C, size_t n, enum NMatrix_Ops op);
739
+ int nm_d_r32_elementwise(const rational32* A, const rational32* B, rational32* C, size_t n, enum NMatrix_Ops op);
740
+ int nm_d_r64_elementwise(const rational64* A, const rational64* B, rational64* C, size_t n, enum NMatrix_Ops op);
741
+ int nm_d_r128_elementwise(const rational128* A, const rational128* B, rational128* C, size_t n, enum NMatrix_Ops op);
742
+ int nm_d_v_elementwise(const VALUE* A, const VALUE* B, VALUE* C, size_t n, enum NMatrix_Ops op);
743
+
744
+ // These are in blas.c but are needed by smmp2.c (the smmp template stuff)
745
+ rational128 r128_muldiv(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k);
746
+ rational128 r128_addsub(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k);
747
+ rational128 r128_mod(int64_t anum, int64_t aden, int64_t bnum, int64_t bden);
748
+ rational128 r128_bang(int64_t, int64_t);
749
+ rational128 r128_negate(int64_t, int64_t);
750
+ rational64 r64_muldiv(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k);
751
+ rational64 r64_addsub(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k);
752
+ rational64 r64_mod(int32_t anum, int32_t aden, int32_t bnum, int32_t bden);
753
+ rational64 r64_bang(int32_t, int32_t);
754
+ rational64 r64_negate(int32_t, int32_t);
755
+ rational32 r32_muldiv(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k);
756
+ rational32 r32_addsub(int64_t anum, int64_t aden, int64_t bnum, int64_t bden, char k);
757
+ rational32 r32_mod(int16_t anum, int16_t aden, int16_t bnum, int16_t bden);
758
+ rational32 r32_bang(int16_t, int16_t);
759
+ rational32 r32_negate(int16_t, int16_t);
760
+
761
+ rational32 BOOL2R32(bool);
762
+ rational64 BOOL2R64(bool);
763
+ rational128 BOOL2R128(bool);
764
+
765
+
766
+ /* cblas.c */
767
+ DENSE_PARAM init_cblas_params_for_nm_multiply_matrix(int8_t dtype);
768
+ void cblas_bgemm_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
769
+ void cblas_bgemv_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
770
+ void cblas_i8gemm_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
771
+ void cblas_i8gemv_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
772
+ void cblas_i16gemm_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
773
+ void cblas_i16gemv_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
774
+ void cblas_i32gemm_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
775
+ void cblas_i32gemv_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
776
+ void cblas_i64gemm_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
777
+ void cblas_i64gemv_(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
778
+ void cblas_sgemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
779
+ void cblas_sgemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
780
+ void cblas_dgemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
781
+ void cblas_dgemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
782
+ void cblas_cgemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
783
+ void cblas_cgemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
784
+ void cblas_zgemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
785
+ void cblas_zgemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
786
+ void cblas_r32gemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
787
+ void cblas_r32gemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
788
+ void cblas_r64gemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
789
+ void cblas_r64gemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
790
+ void cblas_r128gemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
791
+ void cblas_r128gemv_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, DENSE_PARAM p);
792
+ void cblas_vgemm_(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, DENSE_PARAM p);
793
+
794
+
795
+ /* smmp2.c */
796
+ int i8_b_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, u_int8_t* a, u_int8_t* b, u_int8_t* c);
797
+ int i16_b_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, u_int8_t* a, u_int8_t* b, u_int8_t* c);
798
+ int i32_b_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, u_int8_t* a, u_int8_t* b, u_int8_t* c);
799
+ int i64_b_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, u_int8_t* a, u_int8_t* b, u_int8_t* c);
800
+ int i8_i8_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, int8_t* a, int8_t* b, int8_t* c);
801
+ int i16_i8_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, int8_t* a, int8_t* b, int8_t* c);
802
+ int i32_i8_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, int8_t* a, int8_t* b, int8_t* c);
803
+ int i64_i8_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, int8_t* a, int8_t* b, int8_t* c);
804
+ int i8_i16_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, int16_t* a, int16_t* b, int16_t* c);
805
+ int i16_i16_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, int16_t* a, int16_t* b, int16_t* c);
806
+ int i32_i16_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, int16_t* a, int16_t* b, int16_t* c);
807
+ int i64_i16_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, int16_t* a, int16_t* b, int16_t* c);
808
+ int i8_i32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, int32_t* a, int32_t* b, int32_t* c);
809
+ int i16_i32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, int32_t* a, int32_t* b, int32_t* c);
810
+ int i32_i32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, int32_t* a, int32_t* b, int32_t* c);
811
+ int i64_i32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, int32_t* a, int32_t* b, int32_t* c);
812
+ int i8_i64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, int64_t* a, int64_t* b, int64_t* c);
813
+ int i16_i64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, int64_t* a, int64_t* b, int64_t* c);
814
+ int i32_i64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, int64_t* a, int64_t* b, int64_t* c);
815
+ int i64_i64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, int64_t* a, int64_t* b, int64_t* c);
816
+ int i8_f32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, float* a, float* b, float* c);
817
+ int i16_f32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, float* a, float* b, float* c);
818
+ int i32_f32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, float* a, float* b, float* c);
819
+ int i64_f32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, float* a, float* b, float* c);
820
+ int i8_f64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, double* a, double* b, double* c);
821
+ int i16_f64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, double* a, double* b, double* c);
822
+ int i32_f64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, double* a, double* b, double* c);
823
+ int i64_f64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, double* a, double* b, double* c);
824
+ int i8_c64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, complex64* a, complex64* b, complex64* c);
825
+ int i16_c64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, complex64* a, complex64* b, complex64* c);
826
+ int i32_c64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, complex64* a, complex64* b, complex64* c);
827
+ int i64_c64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, complex64* a, complex64* b, complex64* c);
828
+ int i8_c128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, complex128* a, complex128* b, complex128* c);
829
+ int i16_c128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, complex128* a, complex128* b, complex128* c);
830
+ int i32_c128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, complex128* a, complex128* b, complex128* c);
831
+ int i64_c128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, complex128* a, complex128* b, complex128* c);
832
+ int i8_r32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, rational32* a, rational32* b, rational32* c);
833
+ int i16_r32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, rational32* a, rational32* b, rational32* c);
834
+ int i32_r32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, rational32* a, rational32* b, rational32* c);
835
+ int i64_r32_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, rational32* a, rational32* b, rational32* c);
836
+ int i8_r64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, rational64* a, rational64* b, rational64* c);
837
+ int i16_r64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, rational64* a, rational64* b, rational64* c);
838
+ int i32_r64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, rational64* a, rational64* b, rational64* c);
839
+ int i64_r64_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, rational64* a, rational64* b, rational64* c);
840
+ int i8_r128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, rational128* a, rational128* b, rational128* c);
841
+ int i16_r128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, rational128* a, rational128* b, rational128* c);
842
+ int i32_r128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, rational128* a, rational128* b, rational128* c);
843
+ int i64_r128_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, rational128* a, rational128* b, rational128* c);
844
+ int i8_v_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int8_t* ija, const u_int8_t* ijb, const u_int8_t* ijc, VALUE* a, VALUE* b, VALUE* c);
845
+ int i16_v_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int16_t* ija, const u_int16_t* ijb, const u_int16_t* ijc, VALUE* a, VALUE* b, VALUE* c);
846
+ int i32_v_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int32_t* ija, const u_int32_t* ijb, const u_int32_t* ijc, VALUE* a, VALUE* b, VALUE* c);
847
+ int i64_v_ew(y_size_t n, y_size_t m, enum NMatrix_Ops op, const u_int64_t* ija, const u_int64_t* ijb, const u_int64_t* ijc, VALUE* a, VALUE* b, VALUE* c);
848
+
849
+
850
+ /* dense.c */
851
+ DENSE_STORAGE* create_dense_storage(int8_t dtype, size_t* shape, size_t rank, void* elements, size_t elements_length);
852
+ void delete_dense_storage(DENSE_STORAGE* s);
853
+ void mark_dense_storage(void* s);
854
+ DENSE_STORAGE* cast_copy_dense_storage(DENSE_STORAGE* rhs, int8_t new_dtype);
855
+
856
+ size_t count_dense_storage_elements(const DENSE_STORAGE* s);
857
+ bool dense_storage_eqeq(const DENSE_STORAGE*, const DENSE_STORAGE*);
858
+
859
+ size_t dense_storage_pos(DENSE_STORAGE* s, size_t* coords);
860
+ void* dense_storage_get(DENSE_STORAGE* s, size_t* coords);
861
+ void dense_storage_set(DENSE_STORAGE* s, size_t* coords, void* val);
862
+
863
+ /* list.c */
864
+ LIST_STORAGE* create_list_storage(int8_t dtype, size_t* shape, size_t rank, void* init_val);
865
+ void delete_list_storage(LIST_STORAGE* s);
866
+ void mark_list_storage(void* s);
867
+ LIST_STORAGE* cast_copy_list_storage(LIST_STORAGE* rhs, int8_t new_dtype);
868
+ size_t count_storage_max_elements(const STORAGE*);
869
+
870
+ void* list_storage_get(LIST_STORAGE* s, size_t* coords);
871
+ void* list_storage_insert(LIST_STORAGE* s, size_t* coords, void* val);
872
+ void* list_storage_remove(LIST_STORAGE* s, size_t* coords);
873
+ bool list_storage_eqeq(const LIST_STORAGE*, const LIST_STORAGE*);
874
+
875
+ /* yale.c */
876
+ void print_vectors(YALE_STORAGE* s);
877
+ YALE_STORAGE* create_yale_storage(int8_t dtype, size_t* shape, size_t rank, size_t init_capacity);
878
+ void init_yale_storage(YALE_STORAGE* s);
879
+ void delete_yale_storage(YALE_STORAGE* s);
880
+ void mark_yale_storage(void* s);
881
+ YALE_STORAGE* cast_copy_yale_storage(YALE_STORAGE* rhs, int8_t new_dtype);
882
+ bool yale_storage_eqeq(const YALE_STORAGE*, const YALE_STORAGE*);
883
+
884
+ void* yale_storage_ref(YALE_STORAGE* s, size_t* coords);
885
+ char yale_storage_set(YALE_STORAGE* s, size_t* coords, void* v);
886
+
887
+ YALE_STORAGE* create_merged_yale_storage(const YALE_STORAGE*, const YALE_STORAGE*);
888
+
889
+ size_t count_list_storage_nd_elements(const LIST_STORAGE*);
890
+ size_t count_list_storage_elements(const LIST_STORAGE*);
891
+
892
+
893
+ /* stype casts */
894
+ DENSE_STORAGE* scast_copy_dense_yale(const YALE_STORAGE* rhs, int8_t l_dtype);
895
+ DENSE_STORAGE* scast_copy_dense_list(const LIST_STORAGE* rhs, int8_t l_dtype);
896
+ YALE_STORAGE* scast_copy_yale_dense(const DENSE_STORAGE* rhs, int8_t l_dtype);
897
+ YALE_STORAGE* scast_copy_yale_list(const LIST_STORAGE* rhs, int8_t l_dtype);
898
+ LIST_STORAGE* scast_copy_list_yale(const YALE_STORAGE* rhs, int8_t l_dtype);
899
+ LIST_STORAGE* scast_copy_list_dense(const DENSE_STORAGE* rhs, int8_t l_dtype);
900
+
901
+ /* nmatrix.c */
902
+ void cast_copy_value_single(void* to, const void* from, int8_t l_dtype, int8_t r_dtype);
903
+ int8_t nm_dtypestring_to_dtype(VALUE str);
904
+ int8_t nm_dtypesymbol_to_dtype(VALUE sym);
905
+ int8_t nm_stypestring_to_stype(VALUE str);
906
+ int8_t nm_stypesymbol_to_stype(VALUE sym);
907
+ int8_t nm_guess_dtype(VALUE v);
908
+ size_t* nm_interpret_shape_arg(VALUE arg, size_t* rank);
909
+ NMATRIX* nm_create(int8_t stype, void* storage);
910
+ void Init_nmatrix();
911
+
912
+ #endif