ryeppp 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/templates/ryeppp.c.rb +51 -51
- data/lib/ryeppp/bench.rb +284 -1
- data/lib/ryeppp/version.rb +1 -1
- metadata +2 -2
data/ext/templates/ryeppp.c.rb
CHANGED
@@ -142,14 +142,14 @@ VALUE cRyeppp;
|
|
142
142
|
FUNCS = Proc.new do |verb_name|
|
143
143
|
%{#{if verb_name == 'Multiply'
|
144
144
|
typed_variants(%{
|
145
|
-
static VALUE
|
145
|
+
static VALUE multiply_iv64{{type}}s64{{type}}_iv64{{type}}(VALUE self, VALUE x, VALUE multiply_by) {
|
146
146
|
enum YepStatus status;
|
147
147
|
long i;
|
148
148
|
VALUE new_ary;
|
149
149
|
VALUE *x_a;
|
150
150
|
long l;
|
151
151
|
Yep64{{type}} mult_by;
|
152
|
-
#{declare_yep64_typed_array(%w{x
|
152
|
+
#{declare_yep64_typed_array(%w{x})}
|
153
153
|
|
154
154
|
#{ensure_array_argument('x', 'first')}
|
155
155
|
if (TYPE(multiply_by) != T_FIXNUM && TYPE(multiply_by) != T_BIGNUM && TYPE(multiply_by) != T_FLOAT) {
|
@@ -161,21 +161,21 @@ FUNCS = Proc.new do |verb_name|
|
|
161
161
|
mult_by = (Yep64{{type}})NUM2DBL(multiply_by);
|
162
162
|
|
163
163
|
/* Allocate arrays of inputs and outputs */
|
164
|
-
#{allocate_yep64_typed_array(%w{x
|
164
|
+
#{allocate_yep64_typed_array(%w{x}, 'l')}
|
165
165
|
|
166
166
|
#{initialize_yeppp}
|
167
167
|
|
168
|
-
#{load_ruby_array_into_yeppp_array_parameterized('x', 'i', 'l', :allocated_arrays => %w{x
|
168
|
+
#{load_ruby_array_into_yeppp_array_parameterized('x', 'i', 'l', :allocated_arrays => %w{x})}
|
169
169
|
|
170
170
|
/* Perform the operation */
|
171
|
-
status =
|
171
|
+
status = yepCore_Multiply_IV64{{type}}S64{{type}}_IV64{{type}}(yep_x, mult_by, (YepSize)l);
|
172
172
|
assert(status == YepStatusOk);
|
173
173
|
|
174
|
-
#{load_ruby_array_from_yeppp_array_parameterized('
|
174
|
+
#{load_ruby_array_from_yeppp_array_parameterized('x', 'i', 'l')}
|
175
175
|
|
176
176
|
#{deinitialize_yeppp}
|
177
177
|
|
178
|
-
#{release_array_memory(%w{x
|
178
|
+
#{release_array_memory(%w{x})}
|
179
179
|
|
180
180
|
return new_ary;
|
181
181
|
}
|
@@ -185,14 +185,14 @@ FUNCS = Proc.new do |verb_name|
|
|
185
185
|
|
186
186
|
#{typed_variants(%{
|
187
187
|
// #{verb_name} Arrays of Fixnums.
|
188
|
-
static VALUE #{verb_name.downcase}
|
188
|
+
static VALUE #{verb_name.downcase}_iv64{{type}}v64{{type}}_iv64{{type}}(VALUE self, VALUE x, VALUE y) {
|
189
189
|
enum YepStatus status;
|
190
190
|
VALUE new_ary;
|
191
191
|
long i;
|
192
192
|
VALUE *x_a;
|
193
193
|
VALUE *y_a;
|
194
194
|
long l;
|
195
|
-
#{declare_yep64_typed_array(%w{x y
|
195
|
+
#{declare_yep64_typed_array(%w{x y})}
|
196
196
|
|
197
197
|
#{ensure_array_argument('x', 'first')}
|
198
198
|
#{ensure_array_argument('y', 'second')}
|
@@ -202,22 +202,22 @@ FUNCS = Proc.new do |verb_name|
|
|
202
202
|
l = RARRAY_LEN(x);
|
203
203
|
|
204
204
|
/* Allocate arrays of inputs and outputs */
|
205
|
-
#{allocate_yep64_typed_array(%w{x y
|
205
|
+
#{allocate_yep64_typed_array(%w{x y}, 'l')}
|
206
206
|
|
207
207
|
#{initialize_yeppp}
|
208
208
|
|
209
|
-
#{load_ruby_array_into_yeppp_array_parameterized('x', 'i', 'l', :allocated_arrays => %w{x y
|
210
|
-
#{load_ruby_array_into_yeppp_array_parameterized('y', 'i', 'l', :allocated_arrays => %w{x y
|
209
|
+
#{load_ruby_array_into_yeppp_array_parameterized('x', 'i', 'l', :allocated_arrays => %w{x y})}
|
210
|
+
#{load_ruby_array_into_yeppp_array_parameterized('y', 'i', 'l', :allocated_arrays => %w{x y})}
|
211
211
|
|
212
212
|
/* Perform the #{verb_name} */
|
213
|
-
status = yepCore_#{verb_name}
|
213
|
+
status = yepCore_#{verb_name}_IV64{{type}}V64{{type}}_IV64{{type}}(yep_x, yep_y, (YepSize)l);
|
214
214
|
assert(status == YepStatusOk);
|
215
215
|
|
216
|
-
#{load_ruby_array_from_yeppp_array_parameterized('
|
216
|
+
#{load_ruby_array_from_yeppp_array_parameterized('x', 'i', 'l')}
|
217
217
|
|
218
218
|
#{deinitialize_yeppp}
|
219
219
|
|
220
|
-
#{release_array_memory(%w{x y
|
220
|
+
#{release_array_memory(%w{x y})}
|
221
221
|
|
222
222
|
return new_ary;
|
223
223
|
}
|
@@ -302,14 +302,14 @@ end.join("\n\n"))
|
|
302
302
|
PAIRWISE_MIN_MAX = typed_variants(%w{Min Max}.map do |kind|
|
303
303
|
%{
|
304
304
|
// Get the pairwise #{kind.downcase}ima from Arrays.
|
305
|
-
static VALUE #{kind.downcase}
|
305
|
+
static VALUE #{kind.downcase}_iv64{{type}}v64{{type}}_iv64{{type}}(VALUE self, VALUE x, VALUE y) {
|
306
306
|
enum YepStatus status;
|
307
307
|
long i;
|
308
308
|
VALUE new_ary;
|
309
309
|
VALUE *x_a;
|
310
310
|
VALUE *y_a;
|
311
311
|
long l;
|
312
|
-
#{declare_yep64_typed_array(%w{x y
|
312
|
+
#{declare_yep64_typed_array(%w{x y})}
|
313
313
|
|
314
314
|
#{ensure_array_argument('x', 'first')}
|
315
315
|
#{ensure_array_argument('y', 'second')}
|
@@ -319,22 +319,22 @@ PAIRWISE_MIN_MAX = typed_variants(%w{Min Max}.map do |kind|
|
|
319
319
|
l = RARRAY_LEN(x);
|
320
320
|
|
321
321
|
/* Allocate arrays of inputs and outputs */
|
322
|
-
#{allocate_yep64_typed_array(%w{x y
|
322
|
+
#{allocate_yep64_typed_array(%w{x y}, 'l')}
|
323
323
|
|
324
324
|
#{initialize_yeppp}
|
325
325
|
|
326
|
-
#{load_ruby_array_into_yeppp_array_parameterized('x', 'i', 'l', :allocated_arrays => %w{x y
|
327
|
-
#{load_ruby_array_into_yeppp_array_parameterized('y', 'i', 'l', :allocated_arrays => %w{x y
|
326
|
+
#{load_ruby_array_into_yeppp_array_parameterized('x', 'i', 'l', :allocated_arrays => %w{x y})}
|
327
|
+
#{load_ruby_array_into_yeppp_array_parameterized('y', 'i', 'l', :allocated_arrays => %w{x y})}
|
328
328
|
|
329
329
|
/* Perform the operation */
|
330
|
-
status = yepCore_#{kind}
|
330
|
+
status = yepCore_#{kind}_IV64{{type}}V64{{type}}_IV64{{type}}(yep_x, yep_y, (YepSize)l);
|
331
331
|
assert(status == YepStatusOk);
|
332
332
|
|
333
|
-
#{load_ruby_array_from_yeppp_array_parameterized('
|
333
|
+
#{load_ruby_array_from_yeppp_array_parameterized('x', 'i', 'l')}
|
334
334
|
|
335
335
|
#{deinitialize_yeppp}
|
336
336
|
|
337
|
-
#{release_array_memory(%w{x y
|
337
|
+
#{release_array_memory(%w{x y})}
|
338
338
|
|
339
339
|
return new_ary;
|
340
340
|
}
|
@@ -344,14 +344,14 @@ end.join("\n\n"), :only_type => 'f')
|
|
344
344
|
CONSTANT_MIN_MAX = typed_variants(%w{Min Max}.map do |kind|
|
345
345
|
%{
|
346
346
|
// Get the #{kind.downcase}ima from an Array and a constant.
|
347
|
-
static VALUE #{kind.downcase}
|
347
|
+
static VALUE #{kind.downcase}_iv64{{type}}s64{{type}}_iv64{{type}}(VALUE self, VALUE x, VALUE c) {
|
348
348
|
enum YepStatus status;
|
349
349
|
long i;
|
350
350
|
VALUE new_ary;
|
351
351
|
VALUE *x_a;
|
352
352
|
long l;
|
353
353
|
Yep64f konst;
|
354
|
-
#{declare_yep64_typed_array(%w{x
|
354
|
+
#{declare_yep64_typed_array(%w{x})}
|
355
355
|
|
356
356
|
#{ensure_array_argument('x', 'first')}
|
357
357
|
if (TYPE(c) != T_FIXNUM && TYPE(c) != T_BIGNUM && TYPE(c) != T_FLOAT) {
|
@@ -363,21 +363,21 @@ CONSTANT_MIN_MAX = typed_variants(%w{Min Max}.map do |kind|
|
|
363
363
|
konst = (Yep64f)NUM2{{ruby_type}}(c);
|
364
364
|
|
365
365
|
/* Allocate arrays of inputs and outputs */
|
366
|
-
#{allocate_yep64_typed_array(%w{x
|
366
|
+
#{allocate_yep64_typed_array(%w{x}, 'l')}
|
367
367
|
|
368
368
|
#{initialize_yeppp}
|
369
369
|
|
370
|
-
#{load_ruby_array_into_yeppp_array_parameterized('x', 'i', 'l', :allocated_arrays => %w{x
|
370
|
+
#{load_ruby_array_into_yeppp_array_parameterized('x', 'i', 'l', :allocated_arrays => %w{x})}
|
371
371
|
|
372
372
|
/* Perform the operation */
|
373
|
-
status = yepCore_#{kind}
|
373
|
+
status = yepCore_#{kind}_IV64{{type}}S64{{type}}_IV64{{type}}(yep_x, konst, (YepSize)l);
|
374
374
|
assert(status == YepStatusOk);
|
375
375
|
|
376
|
-
#{load_ruby_array_from_yeppp_array_parameterized('
|
376
|
+
#{load_ruby_array_from_yeppp_array_parameterized('x', 'i', 'l')}
|
377
377
|
|
378
378
|
#{deinitialize_yeppp}
|
379
379
|
|
380
|
-
#{release_array_memory(%w{x
|
380
|
+
#{release_array_memory(%w{x})}
|
381
381
|
|
382
382
|
return new_ary;
|
383
383
|
}
|
@@ -386,13 +386,13 @@ end.join("\n\n"), :only_type => 'f')
|
|
386
386
|
|
387
387
|
NEGATE = typed_variants(%{
|
388
388
|
// Negate an Array.
|
389
|
-
static VALUE
|
389
|
+
static VALUE negate_iv64{{type}}_is64{{type}}(VALUE self, VALUE x) {
|
390
390
|
enum YepStatus status;
|
391
391
|
long i;
|
392
392
|
VALUE new_ary;
|
393
393
|
VALUE *x_a;
|
394
394
|
long l;
|
395
|
-
#{declare_yep64_typed_array(%w{x
|
395
|
+
#{declare_yep64_typed_array(%w{x})}
|
396
396
|
|
397
397
|
#{ensure_array_argument('x', 'first')}
|
398
398
|
|
@@ -400,21 +400,21 @@ NEGATE = typed_variants(%{
|
|
400
400
|
l = RARRAY_LEN(x);
|
401
401
|
|
402
402
|
/* Allocate arrays of inputs and outputs */
|
403
|
-
#{allocate_yep64_typed_array(%w{x
|
403
|
+
#{allocate_yep64_typed_array(%w{x}, 'l')}
|
404
404
|
|
405
405
|
#{initialize_yeppp}
|
406
406
|
|
407
|
-
#{load_ruby_array_into_yeppp_array_parameterized('x', 'i', 'l', :allocated_arrays => %w{x
|
407
|
+
#{load_ruby_array_into_yeppp_array_parameterized('x', 'i', 'l', :allocated_arrays => %w{x})}
|
408
408
|
|
409
409
|
/* Perform the negation */
|
410
|
-
status =
|
410
|
+
status = yepCore_Negate_IV64{{type}}_IV64{{type}}(yep_x, (YepSize)l);
|
411
411
|
assert(status == YepStatusOk);
|
412
412
|
|
413
|
-
#{load_ruby_array_from_yeppp_array_parameterized('
|
413
|
+
#{load_ruby_array_from_yeppp_array_parameterized('x', 'i', 'l')}
|
414
414
|
|
415
415
|
#{deinitialize_yeppp}
|
416
416
|
|
417
|
-
#{release_array_memory(%w{x
|
417
|
+
#{release_array_memory(%w{x})}
|
418
418
|
|
419
419
|
return new_ary;
|
420
420
|
}
|
@@ -545,18 +545,18 @@ void Init_ryeppp() {
|
|
545
545
|
cRyeppp = rb_define_class("Ryeppp", rb_cObject);
|
546
546
|
|
547
547
|
/* Addition */
|
548
|
-
rb_define_singleton_method(cRyeppp, "add_v64fv64f_v64f",
|
549
|
-
rb_define_singleton_method(cRyeppp, "add_v64sv64s_v64s",
|
548
|
+
rb_define_singleton_method(cRyeppp, "add_v64fv64f_v64f", add_iv64fv64f_iv64f, 2);
|
549
|
+
rb_define_singleton_method(cRyeppp, "add_v64sv64s_v64s", add_iv64sv64s_iv64s, 2);
|
550
550
|
|
551
551
|
/* Subtraction */
|
552
|
-
rb_define_singleton_method(cRyeppp, "subtract_v64fv64f_v64f",
|
553
|
-
rb_define_singleton_method(cRyeppp, "subtract_v64sv64s_v64s",
|
552
|
+
rb_define_singleton_method(cRyeppp, "subtract_v64fv64f_v64f", subtract_iv64fv64f_iv64f, 2);
|
553
|
+
rb_define_singleton_method(cRyeppp, "subtract_v64sv64s_v64s", subtract_iv64sv64s_iv64s, 2);
|
554
554
|
|
555
555
|
/* Multiplication */
|
556
|
-
rb_define_singleton_method(cRyeppp, "multiply_v64fs64f_v64f",
|
557
|
-
rb_define_singleton_method(cRyeppp, "multiply_v64sv64s_v64s",
|
558
|
-
rb_define_singleton_method(cRyeppp, "multiply_v64fv64f_v64f",
|
559
|
-
rb_define_singleton_method(cRyeppp, "multiply_v64ss64s_v64s",
|
556
|
+
rb_define_singleton_method(cRyeppp, "multiply_v64fs64f_v64f", multiply_iv64fs64f_iv64f, 2);
|
557
|
+
rb_define_singleton_method(cRyeppp, "multiply_v64sv64s_v64s", multiply_iv64sv64s_iv64s, 2);
|
558
|
+
rb_define_singleton_method(cRyeppp, "multiply_v64fv64f_v64f", multiply_iv64fv64f_iv64f, 2);
|
559
|
+
rb_define_singleton_method(cRyeppp, "multiply_v64ss64s_v64s", multiply_iv64ss64s_iv64s, 2);
|
560
560
|
|
561
561
|
/* Dot Product */
|
562
562
|
rb_define_singleton_method(cRyeppp, "dotproduct_v64fv64f_s64f", dotproduct_v64fv64f_s64f, 2);
|
@@ -570,24 +570,24 @@ void Init_ryeppp() {
|
|
570
570
|
rb_define_singleton_method(cRyeppp, "max_v64s_s64s", max_v64s_s64s, 1);
|
571
571
|
|
572
572
|
/* Pairwise Minima */
|
573
|
-
rb_define_singleton_method(cRyeppp, "min_v64fv64f_v64f",
|
573
|
+
rb_define_singleton_method(cRyeppp, "min_v64fv64f_v64f", min_iv64fv64f_iv64f, 2);
|
574
574
|
// Pairwise signed min is not available.
|
575
575
|
|
576
576
|
/* Pairwise Maxima */
|
577
|
-
rb_define_singleton_method(cRyeppp, "max_v64fv64f_v64f",
|
577
|
+
rb_define_singleton_method(cRyeppp, "max_v64fv64f_v64f", max_iv64fv64f_iv64f, 2);
|
578
578
|
// Pairwise signed max is not available.
|
579
579
|
|
580
580
|
/* Constant Minima */
|
581
|
-
rb_define_singleton_method(cRyeppp, "min_v64fs64f_v64f",
|
581
|
+
rb_define_singleton_method(cRyeppp, "min_v64fs64f_v64f", min_iv64fs64f_iv64f, 2);
|
582
582
|
// Constant signed min is not available.
|
583
583
|
|
584
584
|
/* Constant Maxima */
|
585
|
-
rb_define_singleton_method(cRyeppp, "max_v64fs64f_v64f",
|
585
|
+
rb_define_singleton_method(cRyeppp, "max_v64fs64f_v64f", max_iv64fs64f_iv64f, 2);
|
586
586
|
// Constant signed max is not available.
|
587
587
|
|
588
588
|
/* Negation */
|
589
|
-
rb_define_singleton_method(cRyeppp, "negate_v64f_s64f",
|
590
|
-
rb_define_singleton_method(cRyeppp, "negate_v64s_s64s",
|
589
|
+
rb_define_singleton_method(cRyeppp, "negate_v64f_s64f", negate_iv64f_is64f, 1);
|
590
|
+
rb_define_singleton_method(cRyeppp, "negate_v64s_s64s", negate_iv64s_is64s, 1);
|
591
591
|
|
592
592
|
/* Sums */
|
593
593
|
rb_define_singleton_method(cRyeppp, "sum_v64f_s64f", sum_v64f_s64f, 1);
|
data/lib/ryeppp/bench.rb
CHANGED
@@ -11,6 +11,260 @@ def puts_with_pounds(s)
|
|
11
11
|
end
|
12
12
|
|
13
13
|
class Array
|
14
|
+
def min_v64f_s64f
|
15
|
+
self.inject(self[0]){|minimum, o| o < minimum ? o : minimum}
|
16
|
+
end
|
17
|
+
alias_method :min_v64s_s64s, :min_v64f_s64f
|
18
|
+
def max_v64f_s64f
|
19
|
+
self.inject(self[0]){|maximum, o| o > maximum ? o : maximum}
|
20
|
+
end
|
21
|
+
alias_method :max_v64s_s64s, :max_v64f_s64f
|
22
|
+
inline do |builder|
|
23
|
+
builder.c %{
|
24
|
+
static VALUE c_min_v64f_s64f() {
|
25
|
+
long n = RARRAY_LEN(self);
|
26
|
+
VALUE *x_a = RARRAY_PTR(self);
|
27
|
+
double minimum = x_a[0];
|
28
|
+
|
29
|
+
long i;
|
30
|
+
for (i=1; i<n; i++) {
|
31
|
+
if (TYPE(x_a[i]) != T_FIXNUM && TYPE(x_a[i]) != T_BIGNUM && TYPE(x_a[i]) != T_FLOAT) {
|
32
|
+
rb_raise(rb_eTypeError, "input was not all integers and floats");
|
33
|
+
}
|
34
|
+
if (NUM2DBL(x_a[i]) < minimum) {
|
35
|
+
minimum = NUM2DBL(x_a[i]);
|
36
|
+
}
|
37
|
+
}
|
38
|
+
return DBL2NUM(minimum);
|
39
|
+
}
|
40
|
+
}
|
41
|
+
inline do |builder|
|
42
|
+
builder.c %{
|
43
|
+
static VALUE c_min_v64s_s64s() {
|
44
|
+
long n = RARRAY_LEN(self);
|
45
|
+
VALUE *x_a = RARRAY_PTR(self);
|
46
|
+
long minimum = x_a[0];
|
47
|
+
|
48
|
+
long i;
|
49
|
+
for (i=1; i<n; i++) {
|
50
|
+
if (TYPE(x_a[i]) != T_FIXNUM && TYPE(x_a[i]) != T_BIGNUM && TYPE(x_a[i]) != T_FLOAT) {
|
51
|
+
rb_raise(rb_eTypeError, "input was not all integers and floats");
|
52
|
+
}
|
53
|
+
if (NUM2LONG(x_a[i]) < minimum) {
|
54
|
+
minimum = NUM2LONG(x_a[i]);
|
55
|
+
}
|
56
|
+
}
|
57
|
+
return LONG2NUM(minimum);
|
58
|
+
}
|
59
|
+
}
|
60
|
+
end
|
61
|
+
builder.c %{
|
62
|
+
static VALUE c_max_v64f_s64f() {
|
63
|
+
long n = RARRAY_LEN(self);
|
64
|
+
VALUE *x_a = RARRAY_PTR(self);
|
65
|
+
double maximum = x_a[0];
|
66
|
+
|
67
|
+
long i;
|
68
|
+
for (i=1; i<n; i++) {
|
69
|
+
if (TYPE(x_a[i]) != T_FIXNUM && TYPE(x_a[i]) != T_BIGNUM && TYPE(x_a[i]) != T_FLOAT) {
|
70
|
+
rb_raise(rb_eTypeError, "input was not all integers and floats");
|
71
|
+
}
|
72
|
+
if (NUM2DBL(x_a[i]) > maximum) {
|
73
|
+
maximum = NUM2DBL(x_a[i]);
|
74
|
+
}
|
75
|
+
}
|
76
|
+
return DBL2NUM(maximum);
|
77
|
+
}
|
78
|
+
}
|
79
|
+
inline do |builder|
|
80
|
+
builder.c %{
|
81
|
+
static VALUE c_max_v64s_s64s() {
|
82
|
+
long n = RARRAY_LEN(self);
|
83
|
+
VALUE *x_a = RARRAY_PTR(self);
|
84
|
+
long maximum = x_a[0];
|
85
|
+
|
86
|
+
long i;
|
87
|
+
for (i=1; i<n; i++) {
|
88
|
+
if (TYPE(x_a[i]) != T_FIXNUM && TYPE(x_a[i]) != T_BIGNUM && TYPE(x_a[i]) != T_FLOAT) {
|
89
|
+
rb_raise(rb_eTypeError, "input was not all integers and floats");
|
90
|
+
}
|
91
|
+
if (NUM2LONG(x_a[i]) > maximum) {
|
92
|
+
maximum = NUM2LONG(x_a[i]);
|
93
|
+
}
|
94
|
+
}
|
95
|
+
return LONG2NUM(maximum);
|
96
|
+
}
|
97
|
+
}
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def min_v64fv64f_v64f(b)
|
102
|
+
raise "Invalid sizes: #{self.size}, #{b.size}" unless self.size == b.size
|
103
|
+
self.zip(b).map(&:min)
|
104
|
+
end
|
105
|
+
def max_v64fv64f_v64f(b)
|
106
|
+
raise "Invalid sizes: #{self.size}, #{b.size}" unless self.size == b.size
|
107
|
+
self.zip(b).map(&:max)
|
108
|
+
end
|
109
|
+
inline do |builder|
|
110
|
+
builder.c %{
|
111
|
+
static VALUE c_min_v64fv64f_v64f(VALUE b) {
|
112
|
+
long n = RARRAY_LEN(self);
|
113
|
+
VALUE *x_a = RARRAY_PTR(self);
|
114
|
+
VALUE *x_b = RARRAY_PTR(b);
|
115
|
+
VALUE new_ary = rb_ary_new2(n);
|
116
|
+
|
117
|
+
long i;
|
118
|
+
|
119
|
+
if (n != RARRAY_LEN(b)) {
|
120
|
+
rb_raise(rb_eArgError, "mismatched Array sizes");
|
121
|
+
}
|
122
|
+
for (i=0; i<n; i++) {
|
123
|
+
rb_ary_push(new_ary, (NUM2DBL(x_a[i]) < NUM2DBL(x_b[i])) ? x_a[i] : x_b[i]);
|
124
|
+
}
|
125
|
+
return new_ary;
|
126
|
+
}
|
127
|
+
}
|
128
|
+
builder.c %{
|
129
|
+
static VALUE c_max_v64fv64f_v64f(VALUE b) {
|
130
|
+
long n = RARRAY_LEN(self);
|
131
|
+
VALUE *x_a = RARRAY_PTR(self);
|
132
|
+
VALUE *x_b = RARRAY_PTR(b);
|
133
|
+
VALUE new_ary = rb_ary_new2(n);
|
134
|
+
|
135
|
+
long i;
|
136
|
+
|
137
|
+
if (n != RARRAY_LEN(b)) {
|
138
|
+
rb_raise(rb_eArgError, "mismatched Array sizes");
|
139
|
+
}
|
140
|
+
for (i=0; i<n; i++) {
|
141
|
+
rb_ary_push(new_ary, (NUM2DBL(x_a[i]) > NUM2DBL(x_b[i])) ? x_a[i] : x_b[i]);
|
142
|
+
}
|
143
|
+
return new_ary;
|
144
|
+
}
|
145
|
+
}
|
146
|
+
end
|
147
|
+
def min_v64fs64f_v64f(c)
|
148
|
+
self.map{|o| o < c ? o : c}
|
149
|
+
end
|
150
|
+
def max_v64fs64f_v64f(c)
|
151
|
+
self.map{|o| o > c ? o : c}
|
152
|
+
end
|
153
|
+
inline do |builder|
|
154
|
+
builder.c %{
|
155
|
+
static VALUE c_min_v64fs64f_v64f(VALUE c) {
|
156
|
+
long n = RARRAY_LEN(self);
|
157
|
+
VALUE *x_a = RARRAY_PTR(self);
|
158
|
+
double konst = NUM2DBL(c);
|
159
|
+
VALUE new_ary = rb_ary_new2(n);
|
160
|
+
|
161
|
+
long i;
|
162
|
+
for (i=0; i<n; i++) {
|
163
|
+
rb_ary_push(new_ary, (NUM2DBL(x_a[i]) < konst) ? x_a[i] : c);
|
164
|
+
}
|
165
|
+
return new_ary;
|
166
|
+
}
|
167
|
+
}
|
168
|
+
builder.c %{
|
169
|
+
static VALUE c_max_v64fs64f_v64f(VALUE c) {
|
170
|
+
long n = RARRAY_LEN(self);
|
171
|
+
VALUE *x_a = RARRAY_PTR(self);
|
172
|
+
double konst = NUM2DBL(c);
|
173
|
+
VALUE new_ary = rb_ary_new2(n);
|
174
|
+
|
175
|
+
long i;
|
176
|
+
for (i=0; i<n; i++) {
|
177
|
+
rb_ary_push(new_ary, (NUM2DBL(x_a[i]) > konst) ? x_a[i] : c);
|
178
|
+
}
|
179
|
+
return new_ary;
|
180
|
+
}
|
181
|
+
}
|
182
|
+
end
|
183
|
+
|
184
|
+
def log
|
185
|
+
self.map{|o| Math.log(o)}
|
186
|
+
end
|
187
|
+
def exp
|
188
|
+
self.map{|o| Math::E ** o}
|
189
|
+
end
|
190
|
+
def sin
|
191
|
+
self.map{|o| Math.sin(o)}
|
192
|
+
end
|
193
|
+
def cos
|
194
|
+
self.map{|o| Math.cos(o)}
|
195
|
+
end
|
196
|
+
def tan
|
197
|
+
self.map{|o| Math.tan(o)}
|
198
|
+
end
|
199
|
+
inline do |builder|
|
200
|
+
builder.include('<math.h>')
|
201
|
+
builder.c %{
|
202
|
+
static VALUE c_log() {
|
203
|
+
long n = RARRAY_LEN(self);
|
204
|
+
VALUE *x_a = RARRAY_PTR(self);
|
205
|
+
VALUE new_ary = rb_ary_new2(n);
|
206
|
+
|
207
|
+
long i;
|
208
|
+
for (i=0; i<n; i++) {
|
209
|
+
rb_ary_push(new_ary, DBL2NUM(log(NUM2DBL(x_a[i]))));
|
210
|
+
}
|
211
|
+
return new_ary;
|
212
|
+
}
|
213
|
+
}
|
214
|
+
builder.c %{
|
215
|
+
static VALUE c_exp() {
|
216
|
+
long n = RARRAY_LEN(self);
|
217
|
+
VALUE *x_a = RARRAY_PTR(self);
|
218
|
+
VALUE new_ary = rb_ary_new2(n);
|
219
|
+
|
220
|
+
long i;
|
221
|
+
for (i=0; i<n; i++) {
|
222
|
+
rb_ary_push(new_ary, DBL2NUM(pow(2.717, NUM2DBL(x_a[i]))));
|
223
|
+
}
|
224
|
+
return new_ary;
|
225
|
+
}
|
226
|
+
}
|
227
|
+
builder.c %{
|
228
|
+
static VALUE c_sin() {
|
229
|
+
long n = RARRAY_LEN(self);
|
230
|
+
VALUE *x_a = RARRAY_PTR(self);
|
231
|
+
VALUE new_ary = rb_ary_new2(n);
|
232
|
+
|
233
|
+
long i;
|
234
|
+
for (i=0; i<n; i++) {
|
235
|
+
rb_ary_push(new_ary, DBL2NUM(sin(NUM2DBL(x_a[i]))));
|
236
|
+
}
|
237
|
+
return new_ary;
|
238
|
+
}
|
239
|
+
}
|
240
|
+
builder.c %{
|
241
|
+
static VALUE c_cos() {
|
242
|
+
long n = RARRAY_LEN(self);
|
243
|
+
VALUE *x_a = RARRAY_PTR(self);
|
244
|
+
VALUE new_ary = rb_ary_new2(n);
|
245
|
+
|
246
|
+
long i;
|
247
|
+
for (i=0; i<n; i++) {
|
248
|
+
rb_ary_push(new_ary, DBL2NUM(cos(NUM2DBL(x_a[i]))));
|
249
|
+
}
|
250
|
+
return new_ary;
|
251
|
+
}
|
252
|
+
}
|
253
|
+
builder.c %{
|
254
|
+
static VALUE c_tan() {
|
255
|
+
long n = RARRAY_LEN(self);
|
256
|
+
VALUE *x_a = RARRAY_PTR(self);
|
257
|
+
VALUE new_ary = rb_ary_new2(n);
|
258
|
+
|
259
|
+
long i;
|
260
|
+
for (i=0; i<n; i++) {
|
261
|
+
rb_ary_push(new_ary, DBL2NUM(tan(NUM2DBL(x_a[i]))));
|
262
|
+
}
|
263
|
+
return new_ary;
|
264
|
+
}
|
265
|
+
}
|
266
|
+
end
|
267
|
+
|
14
268
|
def sum
|
15
269
|
self.inject(0){|sum, o| sum + o}
|
16
270
|
end
|
@@ -191,8 +445,10 @@ class Array
|
|
191
445
|
end
|
192
446
|
end
|
193
447
|
|
448
|
+
K = Random.rand
|
194
449
|
WIDTH = 40
|
195
450
|
V_f = (0..1_024*1_024).to_a.map{Random.rand}
|
451
|
+
V_s = (0..1_024*1_024).to_a.map{Random.rand(1_024)}
|
196
452
|
|
197
453
|
# Dot Product
|
198
454
|
puts_with_pounds "Dot Product"
|
@@ -208,6 +464,32 @@ Benchmark.bm(WIDTH) do |x|
|
|
208
464
|
x.report("Ryeppp.dotproduct_v64fv64f_s64f:") { n.times { Ryeppp.dotproduct_v64fv64f_s64f(V_f, V_f) } }
|
209
465
|
end
|
210
466
|
|
467
|
+
# Min and Max
|
468
|
+
puts_with_pounds "Min and Max"
|
469
|
+
Benchmark.bm(WIDTH) do |x|
|
470
|
+
%w{min max}.each do |prefix|
|
471
|
+
x.report("#{prefix}_v64f_s64f:") { n.times { V_f.send("#{prefix}_v64f_s64f") } }
|
472
|
+
x.report("#{prefix}_v64s_s64s:") { n.times { V_s.send("#{prefix}_v64s_s64s") } }
|
473
|
+
x.report("c_#{prefix}_v64f_s64f:") { n.times { V_f.send("c_#{prefix}_v64f_s64f") } }
|
474
|
+
x.report("c_#{prefix}_v64s_s64s:") { n.times { V_s.send("c_#{prefix}_v64s_s64s") } }
|
475
|
+
x.report("Ryeppp.#{prefix}_v64f_s64f:") { n.times { Ryeppp.send("#{prefix}_v64f_s64f", V_f) } }
|
476
|
+
x.report("Ryeppp.#{prefix}_v64s_s64s:") { n.times { Ryeppp.send("#{prefix}_v64s_s64s", V_s) } }
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
|
+
# Pairwise Min and Max
|
481
|
+
puts_with_pounds "Pairwise Min and Max"
|
482
|
+
Benchmark.bm(WIDTH) do |x|
|
483
|
+
%w{min max}.each do |prefix|
|
484
|
+
x.report("#{prefix}_v64fv64f_v64f:") { n.times { V_f.send("#{prefix}_v64fv64f_v64f", V_f) } }
|
485
|
+
x.report("c_#{prefix}_v64fv64f_v64f:") { n.times { V_f.send("c_#{prefix}_v64fv64f_v64f", V_f) } }
|
486
|
+
x.report("Ryeppp.#{prefix}_v64fv64f_v64f:") { n.times { Ryeppp.send("#{prefix}_v64fv64f_v64f", V_f, V_f) } }
|
487
|
+
x.report("#{prefix}_v64fs64f_v64f:") { n.times { V_f.send("#{prefix}_v64fs64f_v64f", K) } }
|
488
|
+
x.report("c_#{prefix}_v64fs64f_v64f:") { n.times { V_f.send("c_#{prefix}_v64fs64f_v64f", K) } }
|
489
|
+
x.report("Ryeppp.#{prefix}_v64fs64f_v64f:") { n.times { Ryeppp.send("#{prefix}_v64fs64f_v64f", V_f, K) } }
|
490
|
+
end
|
491
|
+
end
|
492
|
+
|
211
493
|
# Sums
|
212
494
|
puts_with_pounds "Sums"
|
213
495
|
n = 1
|
@@ -224,7 +506,8 @@ puts_with_pounds "Math Functions"
|
|
224
506
|
n = 1
|
225
507
|
Benchmark.bm(WIDTH) do |x|
|
226
508
|
%w{log exp sin cos tan}.each do |f|
|
227
|
-
x.report("#{f}:") { n.times { V_f.
|
509
|
+
x.report("#{f}:") { n.times { V_f.send(f) } }
|
510
|
+
x.report("c_#{f}:") { n.times { V_f.send("c_#{f}") } }
|
228
511
|
x.report("Ryeppp.#{f}_v64f_v64f:") { n.times { Ryeppp.send("#{f}_v64f_v64f", V_f) } }
|
229
512
|
end
|
230
513
|
end
|
data/lib/ryeppp/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ryeppp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-10-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|