sstat 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/#console# +14 -0
- data/ext/extconf.rb +4 -0
- data/ext/lib/distribution.h +274 -0
- data/ext/lib/global_utility.h +17 -0
- data/ext/lib/survival.h +6 -0
- data/ext/lib/survival_def.h +47 -0
- data/ext/lib/survival_func.h +204 -0
- data/ext/lib/survival_utility.h +292 -0
- data/ext/lib/type_def.h +35 -0
- data/ext/sstat/lib/histogram/histogram.h +8 -0
- data/ext/sstat/lib/histogram/histogram_error.h +35 -0
- data/ext/sstat/lib/histogram/histogram_stat.h +73 -0
- data/ext/sstat/lib/histogram/histogram_type.h +14 -0
- data/ext/sstat/lib/survival.h +1 -0
- data/ext/sstat/lib/survival_def.h +100 -4
- data/ext/sstat/lib/survival_func.h +16 -43
- data/ext/sstat/lib/survival_kaplan_meier.h +314 -0
- data/ext/sstat/lib/survival_utility.h +2 -123
- data/ext/sstat/lib/type_def.h +15 -0
- data/ext/sstat/sstat.c +80 -14
- data/ext/sstat/sstat.h +20 -9
- data/lib/simple_statistics/version.rb +1 -1
- data/lib/sstat.so +0 -0
- metadata +17 -3
- data/ext/sstat/Makefile +0 -238
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
#ifndef _SURVIVAL_STAT_UTILITY_H_
|
|
2
|
+
#define _SURVIVAL_STAT_UTILITy_H_
|
|
3
|
+
|
|
4
|
+
#include <stdio.h>
|
|
5
|
+
#include <stdlib.h>
|
|
6
|
+
#include "global_utility.h"
|
|
7
|
+
#include "survival_def.h"
|
|
8
|
+
#include "type_def.h"
|
|
9
|
+
|
|
10
|
+
struct array create_sorted_unique_array(double* array, int size)
|
|
11
|
+
{
|
|
12
|
+
struct array arr;
|
|
13
|
+
int i;
|
|
14
|
+
/* For performance, we have not used memset here. This might be risky */
|
|
15
|
+
arr.D_ptr = NULL;
|
|
16
|
+
int count;
|
|
17
|
+
|
|
18
|
+
qsort(array, size, sizeof(double), &compare_double);
|
|
19
|
+
|
|
20
|
+
count = 1;
|
|
21
|
+
//calcualte number of unique
|
|
22
|
+
for(i = 1; i < size; ++i)
|
|
23
|
+
{
|
|
24
|
+
if(array[i] != array[i-1])
|
|
25
|
+
{
|
|
26
|
+
count++;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
double * unique_arr = (double *) malloc(count * sizeof(double));
|
|
31
|
+
|
|
32
|
+
//assign unique elements
|
|
33
|
+
count = 1;
|
|
34
|
+
unique_arr[0] = array[0];
|
|
35
|
+
for(i = 1; i < size; ++i)
|
|
36
|
+
{
|
|
37
|
+
if(array[i] != array[i-1])
|
|
38
|
+
{
|
|
39
|
+
unique_arr[count] = array[i];
|
|
40
|
+
count++;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
arr.D_ptr = unique_arr;
|
|
45
|
+
arr.size = count;
|
|
46
|
+
|
|
47
|
+
return arr;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
array merge_two_array(double* array_1, int size_1, double* array_2, int size_2)
|
|
51
|
+
{
|
|
52
|
+
int i;
|
|
53
|
+
struct array arr;
|
|
54
|
+
|
|
55
|
+
//To speed up, choose to do not memset arr.
|
|
56
|
+
arr.D_ptr = NULL;
|
|
57
|
+
|
|
58
|
+
int total_size = size_1 + size_2;
|
|
59
|
+
double * merged_array = (double *) malloc(total_size * sizeof(double));
|
|
60
|
+
for(i = 0; i < size_1; i++)
|
|
61
|
+
{
|
|
62
|
+
merged_array[i] = array_1[i];
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
for( i = 0; i < size_2; i++)
|
|
66
|
+
{
|
|
67
|
+
merged_array[i + size_1] = array_2[i];
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
arr.D_ptr = merged_array;
|
|
71
|
+
arr.size = total_size;
|
|
72
|
+
|
|
73
|
+
return arr;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
int find_first_index_has(double* arr, int size, double value)
|
|
77
|
+
{
|
|
78
|
+
int i;
|
|
79
|
+
for(i = 0; i < size; i++)
|
|
80
|
+
{
|
|
81
|
+
if(fabs(arr[i] - value) < EPSILON)
|
|
82
|
+
{
|
|
83
|
+
return i;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
//no value in the array
|
|
88
|
+
return -1;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
struct Group_N group_N_self_range(double* time, int* censored, int size)
|
|
92
|
+
{
|
|
93
|
+
int i, count_at, uncensored_num_at, censored_num_at;
|
|
94
|
+
double tmp, time_at;
|
|
95
|
+
|
|
96
|
+
// sort time and censored based on time together, time can censored array
|
|
97
|
+
struct point* time_censored_array = (struct point*) malloc(size * sizeof(struct point));
|
|
98
|
+
|
|
99
|
+
//censored, if censored[] is positive
|
|
100
|
+
for (i = 0; i < size; i++)
|
|
101
|
+
{
|
|
102
|
+
time_censored_array[i].x = time[i];
|
|
103
|
+
if (censored[i] > 0)
|
|
104
|
+
time_censored_array[i].y = 1;
|
|
105
|
+
else
|
|
106
|
+
time_censored_array[i].y = -1;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
qsort(time_censored_array, size, sizeof(struct point), &point_compare_x);
|
|
110
|
+
|
|
111
|
+
//count unique uncensored time point
|
|
112
|
+
int count = 0;
|
|
113
|
+
for (i = 0; i < size; i++)
|
|
114
|
+
{ //uncensored
|
|
115
|
+
if (time_censored_array[i].y < 0)
|
|
116
|
+
{
|
|
117
|
+
if (count == 0)
|
|
118
|
+
{
|
|
119
|
+
count++;
|
|
120
|
+
tmp = time_censored_array[i].x;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (count > 0)
|
|
124
|
+
{ //unique
|
|
125
|
+
if (time_censored_array[i].x != tmp)
|
|
126
|
+
{
|
|
127
|
+
count++;
|
|
128
|
+
tmp = time_censored_array[i].x;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
double* unique_uncensored_time = (double *) malloc(count * sizeof(double));
|
|
135
|
+
|
|
136
|
+
count = 0;
|
|
137
|
+
|
|
138
|
+
for (i = 0; i < size; i++)
|
|
139
|
+
{
|
|
140
|
+
if (time_censored_array[i].y < 0)
|
|
141
|
+
{
|
|
142
|
+
if (count == 0)
|
|
143
|
+
{
|
|
144
|
+
count++;
|
|
145
|
+
unique_uncensored_time[count] = time_censored_array[i].x;
|
|
146
|
+
tmp = time_censored_array[i].x;
|
|
147
|
+
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (count > 0)
|
|
151
|
+
{
|
|
152
|
+
if (time_censored_array[i].x != tmp)
|
|
153
|
+
{
|
|
154
|
+
unique_uncensored_time[count] = time_censored_array[i].x;
|
|
155
|
+
|
|
156
|
+
count++;
|
|
157
|
+
tmp = time_censored_array[i].x;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
int* uncensored_num = (int *) malloc(count * sizeof(int));
|
|
164
|
+
int* censored_num = (int *) malloc(count * sizeof(int));
|
|
165
|
+
|
|
166
|
+
//record current time point
|
|
167
|
+
time_at = unique_uncensored_time[0];
|
|
168
|
+
count_at = 0;
|
|
169
|
+
uncensored_num_at = 0;
|
|
170
|
+
censored_num_at = 0;
|
|
171
|
+
|
|
172
|
+
for (i = 0; i < size; i++)
|
|
173
|
+
{
|
|
174
|
+
if (time_censored_array[i].x <= time_at)
|
|
175
|
+
{
|
|
176
|
+
if (time_censored_array[i].y > 0)
|
|
177
|
+
censored_num_at++;
|
|
178
|
+
else
|
|
179
|
+
uncensored_num_at++;
|
|
180
|
+
|
|
181
|
+
if (i == size - 1)
|
|
182
|
+
{
|
|
183
|
+
uncensored_num[count_at] = uncensored_num_at;
|
|
184
|
+
censored_num[count_at] = censored_num_at;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
} else {
|
|
188
|
+
uncensored_num[count_at] = uncensored_num_at;
|
|
189
|
+
censored_num[count_at] = censored_num_at;
|
|
190
|
+
count_at++;
|
|
191
|
+
|
|
192
|
+
uncensored_num_at = 0;
|
|
193
|
+
censored_num_at = 0;
|
|
194
|
+
time_at = unique_uncensored_time[count_at];
|
|
195
|
+
|
|
196
|
+
if (time_censored_array[i].y > 0)
|
|
197
|
+
censored_num_at++;
|
|
198
|
+
else
|
|
199
|
+
uncensored_num_at++;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
Group_N at_risk_result;
|
|
204
|
+
at_risk_result.uncensored = uncensored_num;
|
|
205
|
+
at_risk_result.censored = censored_num;
|
|
206
|
+
at_risk_result.size = count;
|
|
207
|
+
at_risk_result.time = unique_uncensored_time;
|
|
208
|
+
free(time_censored_array);
|
|
209
|
+
return at_risk_result;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
struct Group_N group_N_given_range(double* time, int* censored, int size, double* unique_time, int unique_time_size)
|
|
213
|
+
{
|
|
214
|
+
int i, count_at, uncensored_num_at, censored_num_at;
|
|
215
|
+
double time_at;
|
|
216
|
+
|
|
217
|
+
struct point* time_censored_array = (struct point*) malloc(size * sizeof(struct point));
|
|
218
|
+
|
|
219
|
+
for (i = 0; i < size; i++)
|
|
220
|
+
{
|
|
221
|
+
time_censored_array[i].x = time[i];
|
|
222
|
+
//not very fast here, prefer to define another point
|
|
223
|
+
if (censored[i] > 0)
|
|
224
|
+
time_censored_array[i].y = 1;
|
|
225
|
+
else
|
|
226
|
+
time_censored_array[i].y = -1;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
qsort(time_censored_array, size, sizeof(struct point), &point_compare_x);
|
|
230
|
+
|
|
231
|
+
int* uncensored_num = (int *) malloc(unique_time_size * sizeof(int));
|
|
232
|
+
int* censored_num = (int *) malloc(unique_time_size * sizeof(int));
|
|
233
|
+
|
|
234
|
+
for (i = 0; i < unique_time_size; i++)
|
|
235
|
+
{
|
|
236
|
+
uncensored_num[i] = 0;
|
|
237
|
+
censored_num[i] = 0;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
//record current time point
|
|
241
|
+
time_at = unique_time[0];
|
|
242
|
+
count_at = 0;
|
|
243
|
+
uncensored_num_at = 0;
|
|
244
|
+
censored_num_at = 0;
|
|
245
|
+
|
|
246
|
+
for (i = 0; i < size; i++)
|
|
247
|
+
{
|
|
248
|
+
if (time_censored_array[i].x <= time_at)
|
|
249
|
+
{
|
|
250
|
+
if (time_censored_array[i].y > 0)
|
|
251
|
+
censored_num_at++;
|
|
252
|
+
else
|
|
253
|
+
uncensored_num_at++;
|
|
254
|
+
|
|
255
|
+
if (i == size - 1)
|
|
256
|
+
{
|
|
257
|
+
uncensored_num[count_at] = uncensored_num_at;
|
|
258
|
+
censored_num[count_at] = censored_num_at;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
} else {
|
|
262
|
+
|
|
263
|
+
while (time_censored_array[i].x > time_at)
|
|
264
|
+
{
|
|
265
|
+
uncensored_num[count_at] = uncensored_num_at;
|
|
266
|
+
censored_num[count_at] = censored_num_at;
|
|
267
|
+
count_at++;
|
|
268
|
+
|
|
269
|
+
uncensored_num_at = 0;
|
|
270
|
+
censored_num_at = 0;
|
|
271
|
+
time_at = unique_time[count_at];
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if (time_censored_array[i].y > 0)
|
|
275
|
+
censored_num_at++;
|
|
276
|
+
else
|
|
277
|
+
uncensored_num_at++;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
Group_N at_risk_result;
|
|
282
|
+
at_risk_result.uncensored = uncensored_num;
|
|
283
|
+
at_risk_result.censored = censored_num;
|
|
284
|
+
at_risk_result.size = unique_time_size;
|
|
285
|
+
at_risk_result.time = unique_time;
|
|
286
|
+
|
|
287
|
+
free(time_censored_array);
|
|
288
|
+
|
|
289
|
+
return at_risk_result;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
#endif
|
data/ext/lib/type_def.h
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#ifndef _SSTAT_TYPE_DEF_H_
|
|
2
|
+
#define _SSTAT_TYPE_DEF_H_
|
|
3
|
+
|
|
4
|
+
//EPSILON here is set based on ruby float. we need to chagne it for other project
|
|
5
|
+
#define EPSILON 2.2204460492503131e-16
|
|
6
|
+
|
|
7
|
+
//The following constants are from ATMEL http://www.atmel.com/webdoc/
|
|
8
|
+
#define M_2_SQRTPI 1.12837916709551257390
|
|
9
|
+
#define M_SQRT1_2 0.70710678118654752440
|
|
10
|
+
#define M_1_SQRT2PI (M_2_SQRTPI * M_SQRT1_2 / 2.0)
|
|
11
|
+
#define M_SQRT2 1.41421356237309504880
|
|
12
|
+
#define SQRT32 (4.0 * M_SQRT2)
|
|
13
|
+
|
|
14
|
+
//GSL constants for gaussian distribution
|
|
15
|
+
#define GAUSS_XUPPER (8.572)
|
|
16
|
+
#define GAUSS_XLOWER (-37)
|
|
17
|
+
#define GAUSS_SCALE (16.0)
|
|
18
|
+
#define GAUSS_EPSILON 1e-12
|
|
19
|
+
|
|
20
|
+
typedef struct point{
|
|
21
|
+
double x;
|
|
22
|
+
double y;
|
|
23
|
+
} point;
|
|
24
|
+
|
|
25
|
+
typedef struct curve{
|
|
26
|
+
struct point* point_array;
|
|
27
|
+
int size;
|
|
28
|
+
} curve;
|
|
29
|
+
|
|
30
|
+
typedef struct array{
|
|
31
|
+
double* D_ptr;
|
|
32
|
+
int size;
|
|
33
|
+
} array;
|
|
34
|
+
|
|
35
|
+
#endif
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#ifndef _HISTOGRAM_ERROR_H
|
|
2
|
+
#define _HISTOGRAM_ERROR_H
|
|
3
|
+
#include <string.h>
|
|
4
|
+
#include <stdlib.h>
|
|
5
|
+
|
|
6
|
+
typedef enum {
|
|
7
|
+
HIST_SUCCESS = 0,
|
|
8
|
+
M_ALLOC_ERR = 1,
|
|
9
|
+
OUT_OF_ARANGE = 2
|
|
10
|
+
} HIST_ERR;
|
|
11
|
+
|
|
12
|
+
char* alloc_str_err(const char* err_msg, size_t n) {
|
|
13
|
+
char * err_buf = malloc(n * sizeof(char));
|
|
14
|
+
if (err_buf == 0) {
|
|
15
|
+
return NULL;
|
|
16
|
+
} else {
|
|
17
|
+
strcpy(err_buf, err_msg);
|
|
18
|
+
return err_buf;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
void get_hist_err(char * err_buf, size_t buf_n, HIST_ERR hist_err) {
|
|
23
|
+
if (hist_err == M_ALLOC_ERR) {
|
|
24
|
+
err_buf = alloc_str_err("Histogram: Bad Memory allocation", buf_n);
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (hist_err == OUT_OF_ARANGE) {
|
|
29
|
+
err_buf = alloc_str_err("Histogram: size out of range", buf_n);
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
#endif
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
#ifndef _HISTOGRAM_MEAN_H
|
|
2
|
+
#define _HISTOGRAM_MEAN_H
|
|
3
|
+
#include "histogram_type.h"
|
|
4
|
+
|
|
5
|
+
/*
|
|
6
|
+
*
|
|
7
|
+
*/
|
|
8
|
+
int histogram_mean(const histogram* h, f_T* res)
|
|
9
|
+
{
|
|
10
|
+
int i, n;
|
|
11
|
+
f_T xi, wi;
|
|
12
|
+
f_T hmean = 0;
|
|
13
|
+
f_T W = 0;
|
|
14
|
+
n = h->n;
|
|
15
|
+
|
|
16
|
+
for(i = 0; i < n; i++)
|
|
17
|
+
{
|
|
18
|
+
//make sure size of h->range is n + 1
|
|
19
|
+
xi = (h->range[i+1] + h->range[i]) / 2;
|
|
20
|
+
wi = h->bin[i];
|
|
21
|
+
if(wi > 0)
|
|
22
|
+
{
|
|
23
|
+
W += wi;
|
|
24
|
+
hmean += (hmean - xi) * wi / W;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
(*res) = hmean;
|
|
29
|
+
return 0;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
int histogram_bin_sum(const histogram* h, f_T* res)
|
|
33
|
+
{
|
|
34
|
+
size_t i, n;
|
|
35
|
+
f_T sum = 0;
|
|
36
|
+
n = h->n;
|
|
37
|
+
for(i = 0; i < n; i++)
|
|
38
|
+
{
|
|
39
|
+
sum += h->bin[i];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
(*res) = sum;
|
|
43
|
+
return 0;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
int histogram_median(const histogram* h, f_T* res)
|
|
47
|
+
{
|
|
48
|
+
size_t i, n;
|
|
49
|
+
f_T sum, sum_50;
|
|
50
|
+
int proc_flag = histogram_bin_sum(h, &sum);
|
|
51
|
+
|
|
52
|
+
n = h->n;
|
|
53
|
+
|
|
54
|
+
if(proc_flag != 0)
|
|
55
|
+
return -1; //unexpected error
|
|
56
|
+
|
|
57
|
+
sum_50 = sum / 2.0;
|
|
58
|
+
sum = 0;
|
|
59
|
+
for(i = 0; i < n; i++)
|
|
60
|
+
{
|
|
61
|
+
sum += h->bin[i];
|
|
62
|
+
|
|
63
|
+
if(sum >= sum_50)
|
|
64
|
+
{
|
|
65
|
+
(*res) = (h->range[i] + h->range[i+1]) / 2;
|
|
66
|
+
break;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return 0;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
#endif
|