sstat 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,5 +2,6 @@
2
2
  #define _SURVIVAL_STAT_H_
3
3
 
4
4
  #include "survival_func.h"
5
+ #include "survival_kaplan_meier.h"
5
6
 
6
7
  #endif
@@ -2,14 +2,110 @@
2
2
  #define _SURVIVAL_STAT_DEF_H_
3
3
 
4
4
  #include "type_def.h"
5
- //define survival stat data
6
5
 
7
- typedef struct Group_N
6
+ /* Debug macro from http://c.learncodethehardway.org/book/ex20.html */
7
+
8
+ #include <stdio.h>
9
+ #include <errno.h>
10
+ #include <string.h>
11
+
12
+ #ifdef NDEBUG
13
+ #define debug(M, ...)
14
+ #else
15
+ #define debug(M, ...) fprintf(stderr, "DEBUG %s:%d: " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
16
+ #endif
17
+
18
+ #define clean_errno() (errno == 0 ? "None" : strerror(errno))
19
+
20
+ #define log_err(M, ...) fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
21
+
22
+ #define log_warn(M, ...) fprintf(stderr, "[WARN] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
23
+
24
+ #define log_info(M, ...) fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
25
+
26
+ #define check(A, M, ...) if(!(A)) { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
27
+ #define check_1(A, M, ...) if(!(A)) { log_err(M, ##__VA_ARGS__); errno=0; goto error_1; }
28
+
29
+ #define sentinel(M, ...) { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
30
+
31
+ #define check_mem(A) check((A), "Out of memory.")
32
+ #define check_mem_1(A) check_1((A), "Out of memory.")
33
+
34
+ #define check_debug(A, M, ...) if(!(A)) { debug(M, ##__VA_ARGS__); errno=0; goto error; }
35
+
36
+ enum ERRORS {
37
+ OUTOF_MEMORY_ERROR = 1,
38
+ NOT_EMPTY_ERROR = 2
39
+ };
40
+
41
+ /**
42
+ * @brief stores the number of censored and uncensored samples for each time range
43
+ */
44
+ typedef struct CENS_UC_NUM
8
45
  {
9
46
  int * uncensored;
10
47
  int * censored;
11
- double* time;
48
+ double* time; //time series
12
49
  int size;
13
- } Group_N;
50
+ } CENS_UC_NUM;
51
+
52
+ void free_CENS_UC_NUM(struct CENS_UC_NUM* instance)
53
+ {
54
+ if (instance != NULL)
55
+ {
56
+ if(instance->uncensored != NULL)
57
+ free(instance->uncensored);
58
+
59
+ if(instance->censored != NULL)
60
+ free(instance->censored);
61
+
62
+ if(instance->time != NULL)
63
+ free(instance->time);
64
+
65
+ free(instance);
66
+ }
67
+ }
68
+
69
+
70
+ void print_CENS_UC_NUM(struct CENS_UC_NUM *cens_uncens_instance)
71
+ {
72
+ int i;
73
+ puts("Start to print out Group N: ");
74
+ for( i = 0; i < cens_uncens_instance->size; i++ )
75
+ {
76
+ printf("Time : %f -- Uncensored : %i -- Censored : %i \n",
77
+ cens_uncens_instance-> time[i],
78
+ cens_uncens_instance-> uncensored[i],
79
+ cens_uncens_instance-> censored[i]);
80
+ }
81
+ }
82
+
83
+ int alloc_CENS_UC_NUM(struct CENS_UC_NUM** cens_uncens_instance, int size)
84
+ {
85
+
86
+ (*cens_uncens_instance) = malloc(sizeof(struct CENS_UC_NUM));
87
+ check_mem_1(cens_uncens_instance);
88
+
89
+ (*cens_uncens_instance)->size = size;
90
+
91
+ (*cens_uncens_instance)->uncensored = malloc(size * sizeof(int));
92
+ check_mem_1(cens_uncens_instance);
93
+
94
+ (*cens_uncens_instance)->censored = malloc(size * sizeof(int));
95
+ check_mem_1(cens_uncens_instance);
96
+
97
+ (*cens_uncens_instance)->time = malloc(size * sizeof(double));
98
+ check_mem_1(cens_uncens_instance);
99
+
100
+ return 0;
101
+
102
+ error_1:
103
+ if((*cens_uncens_instance) == NULL)
104
+ return OUTOF_MEMORY_ERROR;
105
+ else
106
+ free_CENS_UC_NUM((*cens_uncens_instance));
107
+ free((*cens_uncens_instance));
108
+ }
109
+
14
110
 
15
111
  #endif
@@ -6,7 +6,7 @@
6
6
  #include "survival_def.h"
7
7
  #include "survival_utility.h"
8
8
 
9
- double precentile(double* array, int size, double target_percentile)
9
+ int precentile_index(double* array, int size, double target_percentile)
10
10
  {
11
11
  double percentage_each = 1.0 / size;
12
12
  int i;
@@ -14,16 +14,24 @@ double precentile(double* array, int size, double target_percentile)
14
14
 
15
15
  for (i = 1; i <= size; i++)
16
16
  {
17
- if ( i * percentage_each < target_percentile )
17
+ if ( i * percentage_each <= target_percentile )
18
18
  {
19
19
  count++;
20
20
  } else
21
21
  {
22
- return array[count];
22
+ return count;
23
23
  }
24
24
  }
25
25
 
26
- return array[size - 1];
26
+ return size -1;
27
+ }
28
+
29
+ double precentile(double* array, int size, double target_percentile)
30
+ {
31
+ int index;
32
+ index = precentile_index(array, size, target_percentile);
33
+
34
+ return array[index];
27
35
  }
28
36
 
29
37
  int index_less_equal(double* array, int size, double target)
@@ -48,14 +56,14 @@ double log_rank_test(double* time_1, int* censored_1, double* time_2, int* censo
48
56
  array merged_uniq_time_pnts = create_sorted_unique_array(merged_time_pnts.D_ptr, merged_time_pnts.size);
49
57
 
50
58
  //The lengths of Group_N_1 and Group_N_2 are not expected to be same. Step 1. create unique time array which inlcude time points for both
51
- Group_N Group_N_1 = group_N_given_range(time_1, censored_1, size_1, merged_uniq_time_pnts.D_ptr, merged_uniq_time_pnts.size);
52
- Group_N Group_N_2 = group_N_given_range(time_2, censored_2, size_2, merged_uniq_time_pnts.D_ptr, merged_uniq_time_pnts.size);
59
+ CENS_UC_NUM Group_N_1 = group_N_given_range(time_1, censored_1, size_1, merged_uniq_time_pnts.D_ptr, merged_uniq_time_pnts.size);
60
+ CENS_UC_NUM Group_N_2 = group_N_given_range(time_2, censored_2, size_2, merged_uniq_time_pnts.D_ptr, merged_uniq_time_pnts.size);
53
61
 
54
62
  double Z = 0;
55
63
  double V_i_sum = 0;
56
64
 
57
- Group_N combined_Group_N_1;
58
- Group_N combined_Group_N_2;
65
+ CENS_UC_NUM combined_Group_N_1;
66
+ CENS_UC_NUM combined_Group_N_2;
59
67
 
60
68
  combined_Group_N_1.uncensored = (int*) malloc(merged_uniq_time_pnts.size * sizeof(int));
61
69
  combined_Group_N_1.censored = (int*) malloc(merged_uniq_time_pnts.size * sizeof(int));
@@ -152,39 +160,4 @@ double log_rank_test(double* time_1, int* censored_1, double* time_2, int* censo
152
160
  return Z;
153
161
  }
154
162
 
155
- struct curve kaplan_meier(double* time, int* censored, int size)
156
- {
157
-
158
- int i, N;
159
-
160
- Group_N at_risk = group_N_self_range(time, censored, size);
161
-
162
- N = size;
163
-
164
- struct point* KM = (struct point*) malloc(at_risk.size * sizeof(struct point));
165
-
166
- for (i = 0; i < at_risk.size; i++)
167
- {
168
- if (i > 0)
169
- {
170
- N = (N - at_risk.uncensored[i - 1] - at_risk.censored[i - 1]);
171
- KM[i].x = at_risk.time[i];
172
- KM[i].y = 1.0 * (N - at_risk.uncensored[i] - at_risk.censored[i]) / (N - at_risk.censored[i]) * KM[i - 1].y;
173
- } else {
174
- KM[0].x = at_risk.time[i];
175
- KM[0].y = 1.0 * (N - at_risk.uncensored[0] - at_risk.censored[0]) / (N - at_risk.censored[0]);
176
- }
177
- }
178
-
179
- curve KM_curve;
180
- KM_curve.point_array = KM;
181
- KM_curve.size = at_risk.size;
182
-
183
- free(at_risk.uncensored);
184
- free(at_risk.censored);
185
- free(at_risk.time);
186
-
187
- return KM_curve;
188
- }
189
-
190
163
  #endif
@@ -0,0 +1,314 @@
1
+ #ifndef _SURVIVAL_STAT_KAPLAN_MEIER_H_
2
+ #define _SURVIVAL_STAT_KAPLAN_MEIER_H_
3
+
4
+ #include "survival_def.h"
5
+ #include <math.h>
6
+
7
+ /**
8
+ * @brief calculate the number of samples censored or uncenosored (die) at each time ragne
9
+ * @param time Event time array
10
+ * @param censored censored information: positive -> censored; zero or negative -> uncensored
11
+ * @return CENS_UC_NUM structure
12
+ */
13
+ int censored_uncensred_each_time_range(double* time, int* censored, int size, struct CENS_UC_NUM** cens_ucens_number)
14
+ {
15
+ int i, count_at, uncensored_num_at, censored_num_at;
16
+ double tmp, time_at;
17
+
18
+ //sort time and censored based on time together, time can censored array
19
+ struct point* time_censored_array = alloc_points(size);
20
+
21
+ //censored, if censored[] is positive
22
+ for (i = 0; i < size; i++)
23
+ {
24
+ time_censored_array[i].x = time[i];
25
+ if (censored[i] > 0)
26
+ time_censored_array[i].y = 1;
27
+ else
28
+ time_censored_array[i].y = -1;
29
+ }
30
+
31
+ qsort(time_censored_array, size, sizeof(struct point), &point_compare_x);
32
+
33
+ //count number of unique uncensored time point
34
+ int count = 0;
35
+ for (i = 0; i < size; i++)
36
+ { //uncensored
37
+ if (time_censored_array[i].y < 0)
38
+ {
39
+ if (count == 0)
40
+ {
41
+ count++;
42
+ tmp = time_censored_array[i].x;
43
+ }
44
+
45
+ if (count > 0)
46
+ { //unique
47
+ if (time_censored_array[i].x != tmp)
48
+ {
49
+ count++;
50
+ tmp = time_censored_array[i].x;
51
+ }
52
+ }
53
+ }
54
+ }
55
+
56
+ double* unique_uncensored_time = (double *) malloc(count * sizeof(double));
57
+
58
+ count = 0;
59
+
60
+ for (i = 0; i < size; i++)
61
+ {
62
+ if (time_censored_array[i].y < 0)
63
+ {
64
+ if (count == 0)
65
+ {
66
+ unique_uncensored_time[count] = time_censored_array[i].x;
67
+ tmp = time_censored_array[i].x;
68
+ count++;
69
+ }
70
+
71
+ if (count > 0)
72
+ {
73
+ if (time_censored_array[i].x != tmp)
74
+ {
75
+ unique_uncensored_time[count] = time_censored_array[i].x;
76
+ tmp = time_censored_array[i].x;
77
+ count++;
78
+ }
79
+ }
80
+ }
81
+ }
82
+
83
+ int* uncensored_num = (int *) malloc(count * sizeof(int));
84
+ int* censored_num = (int *) malloc(count * sizeof(int));
85
+
86
+ //record current time point
87
+ time_at = unique_uncensored_time[0];
88
+ count_at = 0;
89
+ uncensored_num_at = 0;
90
+ censored_num_at = 0;
91
+
92
+ for (i = 0; i < size; i++)
93
+ {
94
+
95
+ if (time_censored_array[i].x <= time_at + 1e-5)
96
+ {
97
+
98
+ if (time_censored_array[i].y > 0)
99
+ censored_num_at++;
100
+ else
101
+ uncensored_num_at++;
102
+
103
+ //if the last sample is censored, follow block stores counting for last time unique uncensored period
104
+ if (i == size - 1)
105
+ {
106
+ count_at++;
107
+ uncensored_num[count_at] = uncensored_num_at;
108
+ censored_num[count_at] = censored_num_at;
109
+ }
110
+
111
+ } else {
112
+ uncensored_num[count_at] = uncensored_num_at;
113
+ censored_num[count_at] = censored_num_at;
114
+ count_at++;
115
+
116
+ //reset uncensored_num_at and censored_num_at
117
+ uncensored_num_at = 0;
118
+ censored_num_at = 0;
119
+
120
+ //go to next time range
121
+ time_at = unique_uncensored_time[count_at];
122
+
123
+ if (time_censored_array[i].y > 0)
124
+ censored_num_at++;
125
+ else
126
+ uncensored_num_at++;
127
+
128
+ /* If the last sample is censored, follow block stores counting for last time unique uncensored period */
129
+ if (i == size - 1)
130
+ {
131
+ uncensored_num[count_at] = uncensored_num_at;
132
+ censored_num[count_at] = censored_num_at;
133
+ }
134
+ }
135
+ }
136
+
137
+ alloc_CENS_UC_NUM(cens_ucens_number, count);
138
+
139
+ for(i = 0; i < count; i++)
140
+ {
141
+ (*cens_ucens_number)->uncensored[i] = uncensored_num[i];
142
+ (*cens_ucens_number)->censored[i] = censored_num[i];
143
+ (*cens_ucens_number)->time[i] = unique_uncensored_time[i];
144
+ }
145
+
146
+ free(time_censored_array);
147
+ free(uncensored_num);
148
+ free(censored_num);
149
+ free(unique_uncensored_time);
150
+
151
+ return 0;
152
+ }
153
+
154
+ /**
155
+ * @brief calculate the kaplan meier
156
+ * @param time Event time array
157
+ * @param censored censored information: positive -> censored; zero or negative -> uncensored
158
+ * @param size of the time array and censored array
159
+ * @return CENS_UC_NUM structure
160
+ */
161
+ int kaplan_meier(double* time, int* censored, int size, curve* KM_curve)
162
+ {
163
+
164
+ int i, N;
165
+ struct CENS_UC_NUM* cens_ucens_number;
166
+
167
+ censored_uncensred_each_time_range(time, censored, size, &cens_ucens_number);
168
+
169
+ N = size; //total sample number
170
+
171
+ struct point* KM = alloc_points(size);
172
+
173
+ for (i = 0; i < cens_ucens_number->size; i++)
174
+ {
175
+ if (i > 0)
176
+ {
177
+ N = (N - cens_ucens_number->uncensored[i - 1]
178
+ - cens_ucens_number->censored[i - 1]);
179
+
180
+ KM[i].x = cens_ucens_number->time[i];
181
+ KM[i].y = 1.0 * (N - cens_ucens_number->uncensored[i] - cens_ucens_number->censored[i]) / (N - cens_ucens_number->censored[i]) * KM[i - 1].y;
182
+ } else {
183
+ KM[0].x = cens_ucens_number->time[i];
184
+ KM[0].y = 1.0 * (N - cens_ucens_number->uncensored[0] - cens_ucens_number->censored[0]) / (N - cens_ucens_number->censored[0]);
185
+ }
186
+ }
187
+
188
+ KM_curve->point_array = KM;
189
+ KM_curve->size = cens_ucens_number->size;
190
+ //free_CENS_UC_NUM(&cens_ucens_number);
191
+ return 0;
192
+ }
193
+
194
+ /**
195
+ * @brief extend the KM curve based on the last 3 points
196
+ */
197
+ int KM_3p_extrapolation(struct CENS_UC_NUM* cens_uc_num, struct CENS_UC_NUM** updated_cens_uc_num, int sample_size)
198
+ {
199
+ double mean_last_uncensored = 0;
200
+ double mean_last_censored = 0;
201
+ double time_interval_mean = 0;
202
+ int num_left = 0;
203
+ int used_sample_num = 0;
204
+ int extrapolation_size = 0;
205
+ int updated_cens_uc_num_size = 0;
206
+ int i;
207
+
208
+ /* calculate the total number (censored and uncensored) already used */
209
+ for(i = 0; i < cens_uc_num->size; i ++)
210
+ {
211
+ used_sample_num += cens_uc_num->censored[i];
212
+ used_sample_num += cens_uc_num->uncensored[i];
213
+ }
214
+
215
+ /* TODO should error check here */
216
+ num_left = sample_size - used_sample_num;
217
+
218
+ for(i = 0; i < 3; i++)
219
+ {
220
+ /*
221
+ * censored uncensored
222
+ * x_1 y_1 (should be included)
223
+ * x_2 y_2 (should be included)
224
+ * x_3 y_3 (should be included)
225
+ * x_last y_last (not used)
226
+ * why? when we calculate the last 3 time intervals, we need 4 points
227
+ */
228
+ mean_last_uncensored += cens_uc_num->uncensored[cens_uc_num->size - 2 - i];
229
+ mean_last_censored += cens_uc_num->censored[cens_uc_num->size - 2 - i];
230
+ }
231
+
232
+ time_interval_mean = cens_uc_num->time[cens_uc_num->size - 2] - cens_uc_num->time[cens_uc_num->size - 5];
233
+
234
+ mean_last_uncensored = mean_last_uncensored / 3;
235
+ mean_last_censored = mean_last_censored / 3;
236
+ time_interval_mean = time_interval_mean / 3;
237
+
238
+ /* Calculate how many points we should extrapolate */
239
+ extrapolation_size = ceil((double)num_left / (mean_last_uncensored + mean_last_censored));
240
+ updated_cens_uc_num_size = cens_uc_num->size + extrapolation_size;
241
+
242
+ check(alloc_CENS_UC_NUM(updated_cens_uc_num, updated_cens_uc_num_size) == 0, "Failed in allocating CENS_UC_NUM structure");
243
+
244
+ for(i = 0; i < cens_uc_num->size; i++)
245
+ {
246
+ (*updated_cens_uc_num)->censored[i] = cens_uc_num->censored[i];
247
+ (*updated_cens_uc_num)->uncensored[i] = cens_uc_num->uncensored[i];
248
+ (*updated_cens_uc_num)->time[i] = cens_uc_num->time[i];
249
+ }
250
+
251
+ for(i = cens_uc_num->size; i < (cens_uc_num->size + extrapolation_size); i++)
252
+ {
253
+ (*updated_cens_uc_num)->time[i] = (*updated_cens_uc_num)->time[i-1] + time_interval_mean;
254
+
255
+ if (mean_last_uncensored<num_left)
256
+ (*updated_cens_uc_num)->uncensored[i] = mean_last_uncensored;
257
+ else
258
+ (*updated_cens_uc_num)->uncensored[i] = num_left;
259
+
260
+ /* We update the num_left to make sure that the total number of samples in the extrapolation group cannot be larger than the total number of samples */
261
+ num_left = num_left - mean_last_uncensored;
262
+
263
+ if (mean_last_censored<num_left)
264
+ (*updated_cens_uc_num)->censored[i] = mean_last_censored;
265
+ else
266
+ (*updated_cens_uc_num)->censored[i] = num_left;
267
+
268
+ num_left = num_left - mean_last_censored;
269
+ }
270
+
271
+ return 0;
272
+
273
+ error:
274
+ free_CENS_UC_NUM((*updated_cens_uc_num));
275
+ return 1;
276
+ }
277
+
278
+ int kaplan_meier_3p_extrapolation(double* time, int* censored, int size, struct curve* KM_curve)
279
+ {
280
+ int proc_state = 0;
281
+ int i;
282
+ struct CENS_UC_NUM* cens_ucens_number = NULL;
283
+ censored_uncensred_each_time_range(time, censored, size, &cens_ucens_number);
284
+ struct CENS_UC_NUM* updated_cens_ucens_number = NULL;
285
+
286
+ proc_state = KM_3p_extrapolation(cens_ucens_number, &updated_cens_ucens_number, size);
287
+ int N = size;
288
+
289
+ struct point* KM = alloc_points(size);
290
+
291
+ for (i = 0; i < updated_cens_ucens_number->size; i++)
292
+ {
293
+ if (i > 0)
294
+ {
295
+ N = (N - updated_cens_ucens_number->uncensored[i - 1]
296
+ - updated_cens_ucens_number->censored[i - 1]);
297
+
298
+ KM[i].x = updated_cens_ucens_number->time[i];
299
+ KM[i].y = 1.0 * (N - updated_cens_ucens_number->uncensored[i] - updated_cens_ucens_number->censored[i]) / (N - updated_cens_ucens_number->censored[i]) * KM[i - 1].y;
300
+ } else {
301
+ KM[0].x = updated_cens_ucens_number->time[i];
302
+ KM[0].y = 1.0 * (N - updated_cens_ucens_number->uncensored[0] - updated_cens_ucens_number->censored[0]) / (N - updated_cens_ucens_number->censored[0]);
303
+ }
304
+ }
305
+
306
+ KM_curve->point_array = KM;
307
+ KM_curve->size = updated_cens_ucens_number->size;
308
+ //print_curve(KM_curve);
309
+
310
+ free_CENS_UC_NUM(cens_ucens_number);
311
+ free_CENS_UC_NUM(updated_cens_ucens_number);
312
+ return 0;
313
+ }
314
+ #endif