@datagrok-libraries/statistics 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -4,7 +4,7 @@
4
4
  "access": "public"
5
5
  },
6
6
  "friendlyName": "statistics",
7
- "version": "0.1.3",
7
+ "version": "0.1.4",
8
8
  "description": "",
9
9
  "dependencies": {
10
10
  "datagrok-api": ">0.95.4",
@@ -1,234 +0,0 @@
1
- /** @type {*} A dictionary of basic binary operations. */
2
- const _operations: {[name: string]: Function} = {
3
- '+': (a: number, b: number) => (a + b),
4
- '-': (a: number, b: number) => (a - b),
5
- '*': (a: number, b: number) => (a * b),
6
- '/': (a: number, b: number) => (a / b),
7
- 'min': (a: number, b: number) => (a < b ? a : b),
8
- 'max': (a: number, b: number) => (a > b ? a : b),
9
- };
10
-
11
- /**
12
- * Returns the indices that would sort an array.
13
- *
14
- * @param {Float32Array} values Array to sort.
15
- * @return {Int32Array} Array of indices that sort values along the first axis.
16
- */
17
- function _argsort(values: Float32Array): Int32Array {
18
- const v = Array.from(values);
19
-
20
- const argsort = (arr: number[]) => arr
21
- .map((v, i) => [v, i]) // Decorate with index
22
- .sort((a, b) => (a[0] == b[0] ? 0 : (a[0] > b[0] ? 1 : -1)))
23
- .map((v, _) => v[1]); // Undecorate
24
-
25
- const forder = argsort(v);
26
-
27
- return Int32Array.from(forder);
28
- }
29
-
30
-
31
- /**
32
- * Take elements from an array.
33
- *
34
- * @param {Int32Array} order The indices of the values to extract.
35
- * @param {Float32Array} values The source array.
36
- * @return {Float32Array} The returned array has the same type as values.
37
- */
38
- function _take(order: Int32Array, values: Float32Array): Float32Array {
39
- // TODO: Implement a general function for TypedArray.
40
- return Float32Array.from(values).map((_, i) => values[order[i]]);
41
- }
42
-
43
- /**
44
- * Assign elements of an array following the order given (floating-point version).
45
- *
46
- * @param {Float32Array} values The source array.
47
- * @param {Int32Array} order The order given.
48
- * @return {Float32Array} The returned array has the same type as values.
49
- */
50
- function _give(values: Float32Array, order: Int32Array): Float32Array {
51
- const v = Float32Array.from(values);
52
-
53
- for (let i = 0; i < order.length; ++i) {
54
- v[order[i]] = values[i];
55
- }
56
- return v;
57
- }
58
-
59
- /**
60
- * Assign elements of an array following the order given (boolean version).
61
- *
62
- * @param {Array<boolean>} values The source array.
63
- * @param {Int32Array} order The order given.
64
- * @return {Array<boolean>} The returned array has the same type as values.
65
- */
66
- function _giveb(values: Array<boolean>, order: Int32Array): Array<boolean> {
67
- const v = Array.from(values);
68
-
69
- for (let i = 0; i < order.length; ++i) {
70
- v[order[i]] = values[i];
71
- }
72
- return v;
73
- }
74
-
75
- /**
76
- * No frills empirical cdf used in fdrcorrection.
77
- *
78
- * @param {Float32Array} x The source array to take a dimension from.
79
- * @return {Float32Array} Empirical cdf.
80
- */
81
- function _ecdf(x: Float32Array): Float32Array {
82
- const nobs = x.length;
83
- return Float32Array.from(x).map((_, i) => (i+1)/nobs);
84
- }
85
-
86
- /**
87
- * cm = np.sum(1./np.arange(1, len(pvals_sorted)+1))
88
- *
89
- * @param {number} n The number given.
90
- * @return {number} cm value.
91
- */
92
- function _cm(n: number): number {
93
- let sum = 0;
94
- for (let i = 0; i < n; ++i) {
95
- sum += 1 / (i+1);
96
- }
97
- return sum;
98
- }
99
-
100
- /**
101
- * Basic operation under a vector and a scalar.
102
- *
103
- * @param {Float32Array} values The source vector.
104
- * @param {number} scale The scalar.
105
- * @param {string} [op='*'] The operation to perform.
106
- * @return {Float32Array} New vector as a result of the operation.
107
- */
108
- function _factor(values: Float32Array, scale: number, op = '*'): Float32Array {
109
- return Float32Array.from(values).map((v, _) => _operations[op](v, scale));
110
- }
111
-
112
- /**
113
- * Basic operation under two vectors.
114
- *
115
- * @param {Float32Array} values The first vector.
116
- * @param {Float32Array} scale The second vector.
117
- * @param {string} [op='*'] The operation to perform.
118
- * @return {Float32Array} New vector as a result of the operation.
119
- */
120
- function _vfactor(values: Float32Array, scale: Float32Array, op = '*'): Float32Array {
121
- return Float32Array.from(values).map((v, i) => _operations[op](v, scale[i]));
122
- }
123
-
124
- /**
125
- * Accumulate the result of applying the min operator to all elements.
126
- *
127
- * @param {Float32Array} values The array to act on.
128
- * @return {Float32Array} The accumulated values.
129
- */
130
- function _minimumAccumulate(values: Float32Array): Float32Array {
131
- const nItems = values.length;
132
- const r = Float32Array.from(values);
133
-
134
- for (let i = 0; i < nItems; ++i) {
135
- r[i] = values.slice(0, i+1).reduce((a, b, _, __) => (_operations['min'](a, b)));
136
- }
137
- return r;
138
- }
139
-
140
- /**
141
- * pvalue correction for false discovery rate
142
- *
143
- * @export
144
- * @param {Float32Array} pvals Set of p-values of the individual tests.
145
- * @param {number} [alpha=0.05] Family-wise error rate. Defaults to 0.05.
146
- * @param {string} [method='n'] {'i', 'indep', 'p', 'poscorr', 'n', 'negcorr'}, optional
147
- * Which method to use for FDR correction.
148
- * ``{'i', 'indep', 'p', 'poscorr'}`` all refer to ``fdr_bh``
149
- * (Benjamini/Hochberg for independent or positively
150
- * correlated tests). ``{'n', 'negcorr'}`` both refer to ``fdr_by``
151
- * (Benjamini/Yekutieli for general or negatively correlated tests).
152
- * Defaults to 'n'.
153
- * @param {boolean} [isSorted=false] If False (default), the p_values will be sorted, but the corrected
154
- * pvalues are in the original order. If True, then it assumed that the
155
- * pvalues are already sorted in ascending order.
156
- * @return {[Array<boolean>, Float32Array]} rejected : ndarray, bool
157
- * True if a hypothesis is rejected, False if not
158
- * pvalue-corrected : ndarray
159
- * pvalues adjusted for multiple hypothesis testing to limit FDR
160
- * @see
161
- * If there is prior information on the fraction of true hypothesis, then alpha
162
- * should be set to ``alpha * m/m_0`` where m is the number of tests,
163
- * given by the p-values, and m_0 is an estimate of the true hypothesis.
164
- * (see Benjamini, Krieger and Yekuteli)
165
- *
166
- * The two-step method of Benjamini, Krieger and Yekutiel that estimates the number
167
- * of false hypotheses will be available (soon).
168
- *
169
- * Both methods exposed via this function (Benjamini/Hochberg, Benjamini/Yekutieli)
170
- * are also available in the function ``multipletests``, as ``method="fdr_bh"`` and
171
- * ``method="fdr_by"``, respectively.
172
- */
173
- export function fdrcorrection(
174
- pvals: Float32Array,
175
- alpha: number = 0.05,
176
- method: string ='n',
177
- isSorted: boolean = false): [Array<boolean>, Float32Array]
178
- // eslint-disable-next-line brace-style
179
- {
180
- const nItems = pvals.length;
181
- let pvalsSorted: Float32Array;
182
- let pvalsSortind: Int32Array;
183
- let cm = 0;
184
-
185
- if (!isSorted) {
186
- pvalsSortind = _argsort(pvals);
187
- pvalsSorted = _take(pvalsSortind, pvals);
188
- } else {
189
- pvalsSortind = new Int32Array(nItems).fill(0).map((_, i) => (i));
190
- pvalsSorted = pvals; // alias
191
- }
192
-
193
- let ecdffactor = _ecdf(pvalsSorted);
194
-
195
- if (['i', 'indep', 'p', 'poscorr'].includes(method)) {
196
- ;
197
- } else if (['n', 'negcorr'].includes(method)) {
198
- cm = _cm(nItems);
199
- ecdffactor = _factor(ecdffactor, cm, '/');
200
- } else {
201
- throw new Error('only indep and negcorr implemented');
202
- }
203
-
204
- const reject: boolean[] = new Array(nItems).fill(false);
205
- let rejectmax = -1;
206
-
207
- for (let i = 0; i < nItems; ++i) {
208
- if (pvalsSorted[i] <= ecdffactor[i]*alpha) {
209
- rejectmax = i;
210
- }
211
- }
212
-
213
- if (rejectmax >= 0) {
214
- for (let i = 0; i < rejectmax; ++i) {
215
- reject[i] = true;
216
- }
217
- }
218
-
219
- let pvalsCorrected = _vfactor(pvalsSorted, ecdffactor, '/');
220
- pvalsCorrected = _minimumAccumulate(pvalsCorrected.reverse()).reverse();
221
-
222
- for (let i = 0; i < nItems; ++i) {
223
- if (pvalsCorrected[i] > 1) {
224
- pvalsCorrected[i] = 1;
225
- }
226
- }
227
-
228
- if (!isSorted) {
229
- const pvalsCorrected_ = _give(pvalsCorrected, pvalsSortind);
230
- const reject_ = _giveb(reject, pvalsSortind);
231
- return [reject_, pvalsCorrected_];
232
- }
233
- return [reject, pvalsCorrected];
234
- }
package/src/tests.ts DELETED
@@ -1,101 +0,0 @@
1
- //@ts-ignore: no types
2
- import * as jStat from 'jstat';
3
-
4
- type testStats = {
5
- 'p-value': number,
6
- 'Mean difference'?: number,
7
- 'Median difference'?: number,
8
- 'p-value more': number,
9
- 'p-value less': number,
10
- };
11
-
12
- export function tTest(arr1: number[], arr2: number[], devKnown=false, devEqual=false): testStats {
13
- const m1: number = jStat.mean(arr1);
14
- const m2: number = jStat.mean(arr2);
15
- const v1: number = jStat.variance(arr1);
16
- const v2: number = jStat.variance(arr2);
17
- const n1 = arr1.length;
18
- const n2 = arr2.length;
19
-
20
- let wv1;
21
- let wv2;
22
- let wv;
23
- let Z;
24
- let K;
25
- let pMore;
26
- let pLess;
27
- let pTot;
28
-
29
- if (!devKnown) {
30
- if (!devEqual) {
31
- wv1 = v1 / n1;
32
- wv2 = v2 / n2;
33
- Z = (m1 - m2) / Math.sqrt(wv1 + wv2);
34
- K = Math.pow((wv1 + wv2), 2) / (wv1 * wv1 / (n1 - 1) + wv2 * wv2 / (n2 - 1));
35
-
36
- pLess = jStat.studentt.cdf(Z, K);
37
- pMore = 1 - pLess;
38
- pTot = 2 * (pLess < pMore ? pLess : pMore);
39
- } else {
40
- K = n1 + n2 - 2;
41
- wv = (v1 * (n1 - 1) + v2 * (n2 - 1)) / K;
42
- Z = Math.sqrt(n1 * n2 / (n1 + n2)) * (m1 - m2) / wv;
43
-
44
- pMore = 1 - jStat.studentt.cdf(Z, K);
45
- pLess = jStat.studentt.cdf(Z, K);
46
- pTot = 2 * (pLess < pMore ? pLess : pMore);
47
- }
48
- } else {
49
- wv1 = v1 / n1;
50
- wv2 = v2 / n2;
51
- Z = (m1 - m2) / Math.sqrt(wv1 + wv2);
52
-
53
- pLess = jStat.normal.pdf(Z, 0, 1);
54
- pMore = 1 - pLess;
55
- pTot = 2 * (pLess < pMore ? pLess : pMore);
56
- }
57
- return {'p-value': pTot, 'Mean difference': m1 - m2, 'p-value more': pMore, 'p-value less': pLess};
58
- }
59
-
60
- export function uTest(x: number[], y: number[], continuity=true): testStats {
61
- const xy = x.concat(y);
62
- const n1 = x.length;
63
- const n2 = y.length;
64
- const med1 = jStat.median(x);
65
- const med2 = jStat.median(y);
66
-
67
- const ranks = jStat.rank(xy);
68
-
69
- const R1 = jStat.sum(ranks.slice(0, n1));
70
- const U1 = R1 - n1 * (n1 + 1) / 2;
71
- const U2 = n1 * n2 - U1;
72
- const U = U1 > U2 ? U1 : U2;
73
-
74
- const mu = n1 * n2 / 2;
75
- const n = n1 + n2;
76
-
77
- const tieTerm = _tieTerm(ranks);
78
- const s = Math.sqrt(n1 * n2 / 12 * ((n + 1) - tieTerm / (n* (n - 1))));
79
-
80
- let numerator = U - mu;
81
-
82
- if (continuity) {
83
- numerator -= 0.5;
84
- }
85
-
86
- const z = numerator / s;
87
-
88
- const p = 2 * (1 - jStat.normal.cdf(z, 0, 1));
89
-
90
- return {'p-value': p, 'Median difference': med1 - med2, 'p-value more': p, 'p-value less': p};
91
- }
92
-
93
- function _tieTerm(ranks: number[]): number {
94
- const ties: {[key: number]: number} = {};
95
-
96
- ranks.forEach((num) => {
97
- ties[num] = (ties[num] || 0) + 1;
98
- });
99
-
100
- return jStat.sum(Object.values(ties));
101
- }