@stdlib/blas-ext-base-dsnansumpw 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/NOTICE +1 -1
- package/README.md +141 -38
- package/dist/index.js +7 -7
- package/dist/index.js.map +3 -3
- package/docs/types/index.d.ts +7 -7
- package/include/stdlib/blas/ext/base/dsnansumpw.h +8 -6
- package/lib/dsnansumpw.js +6 -36
- package/lib/dsnansumpw.native.js +4 -5
- package/lib/index.js +2 -5
- package/lib/ndarray.js +28 -30
- package/lib/ndarray.native.js +6 -13
- package/manifest.json +80 -40
- package/package.json +14 -8
- package/src/addon.c +62 -0
- package/src/{dsnansumpw.c → main.c} +58 -48
- package/include.gypi +0 -53
- package/src/addon.cpp +0 -117
package/lib/ndarray.js
CHANGED
|
@@ -45,21 +45,19 @@ var BLOCKSIZE = 128;
|
|
|
45
45
|
*
|
|
46
46
|
* @param {PositiveInteger} N - number of indexed elements
|
|
47
47
|
* @param {Float32Array} x - input array
|
|
48
|
-
* @param {integer}
|
|
49
|
-
* @param {NonNegativeInteger}
|
|
48
|
+
* @param {integer} strideX - stride length
|
|
49
|
+
* @param {NonNegativeInteger} offsetX - starting index
|
|
50
50
|
* @returns {number} sum
|
|
51
51
|
*
|
|
52
52
|
* @example
|
|
53
53
|
* var Float32Array = require( '@stdlib/array-float32' );
|
|
54
|
-
* var floor = require( '@stdlib/math-base-special-floor' );
|
|
55
54
|
*
|
|
56
55
|
* var x = new Float32Array( [ 2.0, 1.0, 2.0, -2.0, -2.0, 2.0, 3.0, 4.0, NaN, NaN ] );
|
|
57
|
-
* var N = floor( x.length / 2 );
|
|
58
56
|
*
|
|
59
|
-
* var v = dsnansumpw(
|
|
57
|
+
* var v = dsnansumpw( 5, x, 2, 1 );
|
|
60
58
|
* // returns 5.0
|
|
61
59
|
*/
|
|
62
|
-
function dsnansumpw( N, x,
|
|
60
|
+
function dsnansumpw( N, x, strideX, offsetX ) {
|
|
63
61
|
var ix;
|
|
64
62
|
var s0;
|
|
65
63
|
var s1;
|
|
@@ -77,13 +75,13 @@ function dsnansumpw( N, x, stride, offset ) {
|
|
|
77
75
|
if ( N <= 0 ) {
|
|
78
76
|
return 0.0;
|
|
79
77
|
}
|
|
80
|
-
|
|
81
|
-
|
|
78
|
+
ix = offsetX;
|
|
79
|
+
if ( strideX === 0 ) {
|
|
80
|
+
if ( isnanf( x[ ix ] ) ) {
|
|
82
81
|
return 0.0;
|
|
83
82
|
}
|
|
84
|
-
return x[
|
|
83
|
+
return N * x[ ix ];
|
|
85
84
|
}
|
|
86
|
-
ix = offset;
|
|
87
85
|
if ( N < 8 ) {
|
|
88
86
|
// Use simple summation...
|
|
89
87
|
s = 0.0;
|
|
@@ -91,64 +89,64 @@ function dsnansumpw( N, x, stride, offset ) {
|
|
|
91
89
|
if ( isnanf( x[ ix ] ) === false ) {
|
|
92
90
|
s += x[ ix ];
|
|
93
91
|
}
|
|
94
|
-
ix +=
|
|
92
|
+
ix += strideX;
|
|
95
93
|
}
|
|
96
94
|
return s;
|
|
97
95
|
}
|
|
98
96
|
if ( N <= BLOCKSIZE ) {
|
|
99
97
|
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
|
|
100
98
|
s0 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
101
|
-
ix +=
|
|
99
|
+
ix += strideX;
|
|
102
100
|
s1 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
103
|
-
ix +=
|
|
101
|
+
ix += strideX;
|
|
104
102
|
s2 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
105
|
-
ix +=
|
|
103
|
+
ix += strideX;
|
|
106
104
|
s3 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
107
|
-
ix +=
|
|
105
|
+
ix += strideX;
|
|
108
106
|
s4 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
109
|
-
ix +=
|
|
107
|
+
ix += strideX;
|
|
110
108
|
s5 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
111
|
-
ix +=
|
|
109
|
+
ix += strideX;
|
|
112
110
|
s6 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
113
|
-
ix +=
|
|
111
|
+
ix += strideX;
|
|
114
112
|
s7 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
115
|
-
ix +=
|
|
113
|
+
ix += strideX;
|
|
116
114
|
|
|
117
115
|
M = N % 8;
|
|
118
116
|
for ( i = 8; i < N-M; i += 8 ) {
|
|
119
117
|
s0 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
120
|
-
ix +=
|
|
118
|
+
ix += strideX;
|
|
121
119
|
s1 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
122
|
-
ix +=
|
|
120
|
+
ix += strideX;
|
|
123
121
|
s2 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
124
|
-
ix +=
|
|
122
|
+
ix += strideX;
|
|
125
123
|
s3 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
126
|
-
ix +=
|
|
124
|
+
ix += strideX;
|
|
127
125
|
s4 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
128
|
-
ix +=
|
|
126
|
+
ix += strideX;
|
|
129
127
|
s5 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
130
|
-
ix +=
|
|
128
|
+
ix += strideX;
|
|
131
129
|
s6 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
132
|
-
ix +=
|
|
130
|
+
ix += strideX;
|
|
133
131
|
s7 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
|
|
134
|
-
ix +=
|
|
132
|
+
ix += strideX;
|
|
135
133
|
}
|
|
136
134
|
// Pairwise sum the accumulators:
|
|
137
|
-
s = ((s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7));
|
|
135
|
+
s = ( (s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7) );
|
|
138
136
|
|
|
139
137
|
// Clean-up loop...
|
|
140
138
|
for ( i; i < N; i++ ) {
|
|
141
139
|
if ( isnanf( x[ ix ] ) === false ) {
|
|
142
140
|
s += x[ ix ];
|
|
143
141
|
}
|
|
144
|
-
ix +=
|
|
142
|
+
ix += strideX;
|
|
145
143
|
}
|
|
146
144
|
return s;
|
|
147
145
|
}
|
|
148
146
|
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
|
|
149
147
|
n = floor( N/2 );
|
|
150
148
|
n -= n % 8;
|
|
151
|
-
return dsnansumpw( n, x,
|
|
149
|
+
return dsnansumpw( n, x, strideX, ix ) + dsnansumpw( N-n, x, strideX, ix+(n*strideX) ); // eslint-disable-line max-len
|
|
152
150
|
}
|
|
153
151
|
|
|
154
152
|
|
package/lib/ndarray.native.js
CHANGED
|
@@ -20,8 +20,7 @@
|
|
|
20
20
|
|
|
21
21
|
// MODULES //
|
|
22
22
|
|
|
23
|
-
var
|
|
24
|
-
var addon = require( './dsnansumpw.native.js' );
|
|
23
|
+
var addon = require( './../src/addon.node' );
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
// MAIN //
|
|
@@ -31,8 +30,8 @@ var addon = require( './dsnansumpw.native.js' );
|
|
|
31
30
|
*
|
|
32
31
|
* @param {PositiveInteger} N - number of indexed elements
|
|
33
32
|
* @param {Float32Array} x - input array
|
|
34
|
-
* @param {integer}
|
|
35
|
-
* @param {NonNegativeInteger}
|
|
33
|
+
* @param {integer} strideX - stride length
|
|
34
|
+
* @param {NonNegativeInteger} offsetX - starting index
|
|
36
35
|
* @returns {number} sum
|
|
37
36
|
*
|
|
38
37
|
* @example
|
|
@@ -40,18 +39,12 @@ var addon = require( './dsnansumpw.native.js' );
|
|
|
40
39
|
* var floor = require( '@stdlib/math-base-special-floor' );
|
|
41
40
|
*
|
|
42
41
|
* var x = new Float32Array( [ 2.0, 1.0, 2.0, -2.0, -2.0, 2.0, 3.0, 4.0, NaN, NaN ] );
|
|
43
|
-
* var N = floor( x.length / 2 );
|
|
44
42
|
*
|
|
45
|
-
* var v = dsnansumpw(
|
|
43
|
+
* var v = dsnansumpw( 5, x, 2, 1 );
|
|
46
44
|
* // returns 5.0
|
|
47
45
|
*/
|
|
48
|
-
function dsnansumpw( N, x,
|
|
49
|
-
|
|
50
|
-
if ( stride < 0 ) {
|
|
51
|
-
offset += (N-1) * stride;
|
|
52
|
-
}
|
|
53
|
-
view = new Float32Array( x.buffer, x.byteOffset+(x.BYTES_PER_ELEMENT*offset), x.length-offset ); // eslint-disable-line max-len
|
|
54
|
-
return addon( N, view, stride );
|
|
46
|
+
function dsnansumpw( N, x, strideX, offsetX ) {
|
|
47
|
+
return addon.ndarray( N, x, strideX, offsetX );
|
|
55
48
|
}
|
|
56
49
|
|
|
57
50
|
|
package/manifest.json
CHANGED
|
@@ -1,42 +1,82 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
2
|
+
"options": {
|
|
3
|
+
"task": "build"
|
|
4
|
+
},
|
|
5
|
+
"fields": [
|
|
6
|
+
{
|
|
7
|
+
"field": "src",
|
|
8
|
+
"resolve": true,
|
|
9
|
+
"relative": true
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
"field": "include",
|
|
13
|
+
"resolve": true,
|
|
14
|
+
"relative": true
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"field": "libraries",
|
|
18
|
+
"resolve": false,
|
|
19
|
+
"relative": false
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"field": "libpath",
|
|
23
|
+
"resolve": true,
|
|
24
|
+
"relative": false
|
|
25
|
+
}
|
|
26
|
+
],
|
|
27
|
+
"confs": [
|
|
28
|
+
{
|
|
29
|
+
"task": "build",
|
|
30
|
+
"src": [
|
|
31
|
+
"./src/main.c"
|
|
32
|
+
],
|
|
33
|
+
"include": [
|
|
34
|
+
"./include"
|
|
35
|
+
],
|
|
36
|
+
"libraries": [],
|
|
37
|
+
"libpath": [],
|
|
38
|
+
"dependencies": [
|
|
39
|
+
"@stdlib/napi-export",
|
|
40
|
+
"@stdlib/napi-argv",
|
|
41
|
+
"@stdlib/napi-argv-int64",
|
|
42
|
+
"@stdlib/napi-argv-strided-float32array",
|
|
43
|
+
"@stdlib/math-base-assert-is-nanf",
|
|
44
|
+
"@stdlib/napi-create-double",
|
|
45
|
+
"@stdlib/strided-base-stride2offset",
|
|
46
|
+
"@stdlib/blas-base-shared"
|
|
47
|
+
]
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"task": "benchmark",
|
|
51
|
+
"src": [
|
|
52
|
+
"./src/main.c"
|
|
53
|
+
],
|
|
54
|
+
"include": [
|
|
55
|
+
"./include"
|
|
56
|
+
],
|
|
57
|
+
"libraries": [],
|
|
58
|
+
"libpath": [],
|
|
59
|
+
"dependencies": [
|
|
60
|
+
"@stdlib/math-base-assert-is-nanf",
|
|
61
|
+
"@stdlib/strided-base-stride2offset",
|
|
62
|
+
"@stdlib/blas-base-shared"
|
|
63
|
+
]
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
"task": "examples",
|
|
67
|
+
"src": [
|
|
68
|
+
"./src/main.c"
|
|
69
|
+
],
|
|
70
|
+
"include": [
|
|
71
|
+
"./include"
|
|
72
|
+
],
|
|
73
|
+
"libraries": [],
|
|
74
|
+
"libpath": [],
|
|
75
|
+
"dependencies": [
|
|
76
|
+
"@stdlib/math-base-assert-is-nanf",
|
|
77
|
+
"@stdlib/strided-base-stride2offset",
|
|
78
|
+
"@stdlib/blas-base-shared"
|
|
79
|
+
]
|
|
80
|
+
}
|
|
81
|
+
]
|
|
42
82
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stdlib/blas-ext-base-dsnansumpw",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Calculate the sum of single-precision floating-point strided array elements, ignoring NaN values, using pairwise summation with extended accumulation, and returning an extended precision result.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"author": {
|
|
@@ -34,12 +34,19 @@
|
|
|
34
34
|
"url": "https://github.com/stdlib-js/stdlib/issues"
|
|
35
35
|
},
|
|
36
36
|
"dependencies": {
|
|
37
|
-
"@stdlib/assert-is-error": "^0.2.
|
|
38
|
-
"@stdlib/
|
|
39
|
-
"@stdlib/math-base-
|
|
40
|
-
"@stdlib/
|
|
41
|
-
"@stdlib/
|
|
42
|
-
"@stdlib/
|
|
37
|
+
"@stdlib/assert-is-error": "^0.2.2",
|
|
38
|
+
"@stdlib/blas-base-shared": "^0.1.0",
|
|
39
|
+
"@stdlib/math-base-assert-is-nanf": "^0.2.2",
|
|
40
|
+
"@stdlib/math-base-special-floor": "^0.2.3",
|
|
41
|
+
"@stdlib/napi-argv": "^0.2.2",
|
|
42
|
+
"@stdlib/napi-argv-int64": "^0.2.2",
|
|
43
|
+
"@stdlib/napi-argv-strided-float32array": "^0.2.2",
|
|
44
|
+
"@stdlib/napi-create-double": "^0.0.2",
|
|
45
|
+
"@stdlib/napi-export": "^0.3.0",
|
|
46
|
+
"@stdlib/strided-base-stride2offset": "^0.1.0",
|
|
47
|
+
"@stdlib/utils-define-nonenumerable-read-only-property": "^0.2.2",
|
|
48
|
+
"@stdlib/utils-library-manifest": "^0.2.3",
|
|
49
|
+
"@stdlib/utils-try-require": "^0.2.2"
|
|
43
50
|
},
|
|
44
51
|
"devDependencies": {},
|
|
45
52
|
"engines": {
|
|
@@ -80,7 +87,6 @@
|
|
|
80
87
|
"float",
|
|
81
88
|
"float32array"
|
|
82
89
|
],
|
|
83
|
-
"__stdlib__": {},
|
|
84
90
|
"funding": {
|
|
85
91
|
"type": "opencollective",
|
|
86
92
|
"url": "https://opencollective.com/stdlib"
|
package/src/addon.c
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license Apache-2.0
|
|
3
|
+
*
|
|
4
|
+
* Copyright (c) 2024 The Stdlib Authors.
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#include "stdlib/blas/ext/base/dsnansumpw.h"
|
|
20
|
+
#include "stdlib/blas/base/shared.h"
|
|
21
|
+
#include "stdlib/napi/export.h"
|
|
22
|
+
#include "stdlib/napi/argv.h"
|
|
23
|
+
#include "stdlib/napi/argv_int64.h"
|
|
24
|
+
#include "stdlib/napi/argv_strided_float32array.h"
|
|
25
|
+
#include "stdlib/strided/base/stride2offset.h"
|
|
26
|
+
#include "stdlib/napi/create_double.h"
|
|
27
|
+
#include <node_api.h>
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Receives JavaScript callback invocation data.
|
|
31
|
+
*
|
|
32
|
+
* @param env environment under which the function is invoked
|
|
33
|
+
* @param info callback data
|
|
34
|
+
* @return Node-API value
|
|
35
|
+
*/
|
|
36
|
+
static napi_value addon( napi_env env, napi_callback_info info ) {
|
|
37
|
+
STDLIB_NAPI_ARGV( env, info, argv, argc, 3 );
|
|
38
|
+
STDLIB_NAPI_ARGV_INT64( env, N, argv, 0 );
|
|
39
|
+
STDLIB_NAPI_ARGV_INT64( env, strideX, argv, 2 );
|
|
40
|
+
STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, X, N, strideX, argv, 1 );
|
|
41
|
+
STDLIB_NAPI_CREATE_DOUBLE( env, API_SUFFIX(stdlib_strided_dsnansumpw)( N, X, strideX ), v );
|
|
42
|
+
return v;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Receives JavaScript callback invocation data.
|
|
47
|
+
*
|
|
48
|
+
* @param env environment under which the function is invoked
|
|
49
|
+
* @param info callback data
|
|
50
|
+
* @return Node-API value
|
|
51
|
+
*/
|
|
52
|
+
static napi_value addon_method( napi_env env, napi_callback_info info ) {
|
|
53
|
+
STDLIB_NAPI_ARGV( env, info, argv, argc, 4 );
|
|
54
|
+
STDLIB_NAPI_ARGV_INT64( env, N, argv, 0 );
|
|
55
|
+
STDLIB_NAPI_ARGV_INT64( env, strideX, argv, 2 );
|
|
56
|
+
STDLIB_NAPI_ARGV_INT64( env, offsetX, argv, 3 );
|
|
57
|
+
STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, X, N, strideX, argv, 1 );
|
|
58
|
+
STDLIB_NAPI_CREATE_DOUBLE( env, API_SUFFIX(stdlib_strided_dsnansumpw_ndarray)( N, X, strideX, offsetX ), v );
|
|
59
|
+
return v;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
STDLIB_NAPI_MODULE_EXPORT_FCN_WITH_METHOD( addon, "ndarray", addon_method );
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @license Apache-2.0
|
|
3
3
|
*
|
|
4
|
-
* Copyright (c)
|
|
4
|
+
* Copyright (c) 2024 The Stdlib Authors.
|
|
5
5
|
*
|
|
6
6
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
* you may not use this file except in compliance with the License.
|
|
@@ -18,7 +18,8 @@
|
|
|
18
18
|
|
|
19
19
|
#include "stdlib/blas/ext/base/dsnansumpw.h"
|
|
20
20
|
#include "stdlib/math/base/assert/is_nanf.h"
|
|
21
|
-
#include
|
|
21
|
+
#include "stdlib/strided/base/stride2offset.h"
|
|
22
|
+
#include "stdlib/blas/base/shared.h"
|
|
22
23
|
|
|
23
24
|
/**
|
|
24
25
|
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended accumulation, and returning an extended precision result.
|
|
@@ -31,19 +32,39 @@
|
|
|
31
32
|
*
|
|
32
33
|
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
|
|
33
34
|
*
|
|
34
|
-
* @param N
|
|
35
|
-
* @param X
|
|
36
|
-
* @param
|
|
37
|
-
* @return
|
|
35
|
+
* @param N number of indexed elements
|
|
36
|
+
* @param X input array
|
|
37
|
+
* @param strideX stride length
|
|
38
|
+
* @return output value
|
|
38
39
|
*/
|
|
39
|
-
double stdlib_strided_dsnansumpw( const
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
double API_SUFFIX(stdlib_strided_dsnansumpw)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX ) {
|
|
41
|
+
CBLAS_INT ox = stdlib_strided_stride2offset( N, strideX );
|
|
42
|
+
return API_SUFFIX( stdlib_strided_dsnansumpw_ndarray )( N, X, strideX, ox );
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended accumulation and alternative indexing semantics, and returning an extended precision result.
|
|
47
|
+
*
|
|
48
|
+
* ## Method
|
|
49
|
+
*
|
|
50
|
+
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
|
|
51
|
+
*
|
|
52
|
+
* ## References
|
|
53
|
+
*
|
|
54
|
+
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
|
|
55
|
+
*
|
|
56
|
+
* @param N number of indexed elements
|
|
57
|
+
* @param X input array
|
|
58
|
+
* @param strideX stride length
|
|
59
|
+
* @param offsetX starting index
|
|
60
|
+
* @return output value
|
|
61
|
+
*/
|
|
62
|
+
double API_SUFFIX(stdlib_strided_dsnansumpw_ndarray)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX ) {
|
|
63
|
+
CBLAS_INT ix;
|
|
64
|
+
CBLAS_INT M;
|
|
65
|
+
CBLAS_INT n;
|
|
66
|
+
CBLAS_INT i;
|
|
42
67
|
double sum;
|
|
43
|
-
int64_t ix;
|
|
44
|
-
int64_t M;
|
|
45
|
-
int64_t n;
|
|
46
|
-
int64_t i;
|
|
47
68
|
double s0;
|
|
48
69
|
double s1;
|
|
49
70
|
double s2;
|
|
@@ -56,16 +77,12 @@ double stdlib_strided_dsnansumpw( const int64_t N, const float *X, const int64_t
|
|
|
56
77
|
if ( N <= 0 ) {
|
|
57
78
|
return 0.0;
|
|
58
79
|
}
|
|
59
|
-
|
|
60
|
-
|
|
80
|
+
ix = offsetX;
|
|
81
|
+
if ( strideX == 0 ) {
|
|
82
|
+
if ( stdlib_base_is_nanf( X[ ix ] ) ) {
|
|
61
83
|
return 0.0;
|
|
62
84
|
}
|
|
63
|
-
return X[
|
|
64
|
-
}
|
|
65
|
-
if ( stride < 0 ) {
|
|
66
|
-
ix = (1-N) * stride;
|
|
67
|
-
} else {
|
|
68
|
-
ix = 0;
|
|
85
|
+
return N * X[ ix ];
|
|
69
86
|
}
|
|
70
87
|
if ( N < 8 ) {
|
|
71
88
|
// Use simple summation...
|
|
@@ -74,7 +91,7 @@ double stdlib_strided_dsnansumpw( const int64_t N, const float *X, const int64_t
|
|
|
74
91
|
if ( !stdlib_base_is_nanf( X[ ix ] ) ) {
|
|
75
92
|
sum += (double)X[ ix ];
|
|
76
93
|
}
|
|
77
|
-
ix +=
|
|
94
|
+
ix += strideX;
|
|
78
95
|
}
|
|
79
96
|
return sum;
|
|
80
97
|
}
|
|
@@ -82,62 +99,55 @@ double stdlib_strided_dsnansumpw( const int64_t N, const float *X, const int64_t
|
|
|
82
99
|
if ( N <= 128 ) {
|
|
83
100
|
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
|
|
84
101
|
s0 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
85
|
-
ix +=
|
|
102
|
+
ix += strideX;
|
|
86
103
|
s1 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
87
|
-
ix +=
|
|
104
|
+
ix += strideX;
|
|
88
105
|
s2 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
89
|
-
ix +=
|
|
106
|
+
ix += strideX;
|
|
90
107
|
s3 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
91
|
-
ix +=
|
|
108
|
+
ix += strideX;
|
|
92
109
|
s4 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
93
|
-
ix +=
|
|
110
|
+
ix += strideX;
|
|
94
111
|
s5 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
95
|
-
ix +=
|
|
112
|
+
ix += strideX;
|
|
96
113
|
s6 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
97
|
-
ix +=
|
|
114
|
+
ix += strideX;
|
|
98
115
|
s7 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
99
|
-
ix +=
|
|
116
|
+
ix += strideX;
|
|
100
117
|
|
|
101
118
|
M = N % 8;
|
|
102
119
|
for ( i = 8; i < N-M; i += 8 ) {
|
|
103
120
|
s0 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
104
|
-
ix +=
|
|
121
|
+
ix += strideX;
|
|
105
122
|
s1 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
106
|
-
ix +=
|
|
123
|
+
ix += strideX;
|
|
107
124
|
s2 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
108
|
-
ix +=
|
|
125
|
+
ix += strideX;
|
|
109
126
|
s3 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
110
|
-
ix +=
|
|
127
|
+
ix += strideX;
|
|
111
128
|
s4 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
112
|
-
ix +=
|
|
129
|
+
ix += strideX;
|
|
113
130
|
s5 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
114
|
-
ix +=
|
|
131
|
+
ix += strideX;
|
|
115
132
|
s6 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
116
|
-
ix +=
|
|
133
|
+
ix += strideX;
|
|
117
134
|
s7 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
|
|
118
|
-
ix +=
|
|
135
|
+
ix += strideX;
|
|
119
136
|
}
|
|
120
137
|
// Pairwise sum the accumulators:
|
|
121
|
-
sum = ((s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7));
|
|
138
|
+
sum = ( (s0+s1) + (s2+s3) ) + ( (s4+s5) + (s6+s7) );
|
|
122
139
|
|
|
123
140
|
// Clean-up loop...
|
|
124
141
|
for (; i < N; i++ ) {
|
|
125
142
|
if ( !stdlib_base_is_nanf( X[ ix ] ) ) {
|
|
126
143
|
sum += (double)X[ ix ];
|
|
127
144
|
}
|
|
128
|
-
ix +=
|
|
145
|
+
ix += strideX;
|
|
129
146
|
}
|
|
130
147
|
return sum;
|
|
131
148
|
}
|
|
132
149
|
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
|
|
133
150
|
n = N / 2;
|
|
134
151
|
n -= n % 8;
|
|
135
|
-
|
|
136
|
-
xp1 = (float *)X + ( (n-N)*stride );
|
|
137
|
-
xp2 = (float *)X;
|
|
138
|
-
} else {
|
|
139
|
-
xp1 = (float *)X;
|
|
140
|
-
xp2 = (float *)X + ( n*stride );
|
|
141
|
-
}
|
|
142
|
-
return stdlib_strided_dsnansumpw( n, xp1, stride ) + stdlib_strided_dsnansumpw( N-n, xp2, stride );
|
|
152
|
+
return API_SUFFIX(stdlib_strided_dsnansumpw_ndarray)( n, X, strideX, ix ) + API_SUFFIX(stdlib_strided_dsnansumpw_ndarray)( N-n, X, strideX, ix+(n*strideX) );
|
|
143
153
|
}
|
package/include.gypi
DELETED
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
# @license Apache-2.0
|
|
2
|
-
#
|
|
3
|
-
# Copyright (c) 2020 The Stdlib Authors.
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
16
|
-
|
|
17
|
-
# A GYP include file for building a Node.js native add-on.
|
|
18
|
-
#
|
|
19
|
-
# Main documentation:
|
|
20
|
-
#
|
|
21
|
-
# [1]: https://gyp.gsrc.io/docs/InputFormatReference.md
|
|
22
|
-
# [2]: https://gyp.gsrc.io/docs/UserDocumentation.md
|
|
23
|
-
{
|
|
24
|
-
# Define variables to be used throughout the configuration for all targets:
|
|
25
|
-
'variables': {
|
|
26
|
-
# Source directory:
|
|
27
|
-
'src_dir': './src',
|
|
28
|
-
|
|
29
|
-
# Include directories:
|
|
30
|
-
'include_dirs': [
|
|
31
|
-
'<!@(node -e "var arr = require(\'@stdlib/utils-library-manifest\')(\'./manifest.json\',{},{\'basedir\':process.cwd(),\'paths\':\'posix\'}).include; for ( var i = 0; i < arr.length; i++ ) { console.log( arr[ i ] ); }")',
|
|
32
|
-
],
|
|
33
|
-
|
|
34
|
-
# Add-on destination directory:
|
|
35
|
-
'addon_output_dir': './src',
|
|
36
|
-
|
|
37
|
-
# Source files:
|
|
38
|
-
'src_files': [
|
|
39
|
-
'<(src_dir)/addon.cpp',
|
|
40
|
-
'<!@(node -e "var arr = require(\'@stdlib/utils-library-manifest\')(\'./manifest.json\',{},{\'basedir\':process.cwd(),\'paths\':\'posix\'}).src; for ( var i = 0; i < arr.length; i++ ) { console.log( arr[ i ] ); }")',
|
|
41
|
-
],
|
|
42
|
-
|
|
43
|
-
# Library dependencies:
|
|
44
|
-
'libraries': [
|
|
45
|
-
'<!@(node -e "var arr = require(\'@stdlib/utils-library-manifest\')(\'./manifest.json\',{},{\'basedir\':process.cwd(),\'paths\':\'posix\'}).libraries; for ( var i = 0; i < arr.length; i++ ) { console.log( arr[ i ] ); }")',
|
|
46
|
-
],
|
|
47
|
-
|
|
48
|
-
# Library directories:
|
|
49
|
-
'library_dirs': [
|
|
50
|
-
'<!@(node -e "var arr = require(\'@stdlib/utils-library-manifest\')(\'./manifest.json\',{},{\'basedir\':process.cwd(),\'paths\':\'posix\'}).libpath; for ( var i = 0; i < arr.length; i++ ) { console.log( arr[ i ] ); }")',
|
|
51
|
-
],
|
|
52
|
-
}, # end variables
|
|
53
|
-
}
|