RubyGems - sstat - Versions diffs - 0.0.2 → 0.0.3 - Mend

sstat 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/bin/#console# +14 -0
data/ext/extconf.rb +4 -0
data/ext/lib/distribution.h +274 -0
data/ext/lib/global_utility.h +17 -0
data/ext/lib/survival.h +6 -0
data/ext/lib/survival_def.h +47 -0
data/ext/lib/survival_func.h +204 -0
data/ext/lib/survival_utility.h +292 -0
data/ext/lib/type_def.h +35 -0
data/ext/sstat/lib/histogram/histogram.h +8 -0
data/ext/sstat/lib/histogram/histogram_error.h +35 -0
data/ext/sstat/lib/histogram/histogram_stat.h +73 -0
data/ext/sstat/lib/histogram/histogram_type.h +14 -0
data/ext/sstat/lib/survival.h +1 -0
data/ext/sstat/lib/survival_def.h +100 -4
data/ext/sstat/lib/survival_func.h +16 -43
data/ext/sstat/lib/survival_kaplan_meier.h +314 -0
data/ext/sstat/lib/survival_utility.h +2 -123
data/ext/sstat/lib/type_def.h +15 -0
data/ext/sstat/sstat.c +80 -14
data/ext/sstat/sstat.h +20 -9
data/lib/simple_statistics/version.rb +1 -1
data/lib/sstat.so +0 -0
metadata +17 -3
data/ext/sstat/Makefile +0 -238

data/ext/sstat/lib/survival.h CHANGED

@@ -2,5 +2,6 @@
 #define _SURVIVAL_STAT_H_
 #include "survival_func.h"
+#include "survival_kaplan_meier.h"
 #endif

data/ext/sstat/lib/survival_def.h CHANGED

@@ -2,14 +2,110 @@
 #define _SURVIVAL_STAT_DEF_H_
 #include "type_def.h"
-//define survival stat data
-typedef struct Group_N
+/* Debug macro from http://c.learncodethehardway.org/book/ex20.html */
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#ifdef NDEBUG
+#define debug(M, ...)
+#else
+#define debug(M, ...) fprintf(stderr, "DEBUG %s:%d: " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
+#endif
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(M, ...) fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+#define log_warn(M, ...) fprintf(stderr, "[WARN] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+#define log_info(M, ...) fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
+#define check(A, M, ...) if(!(A)) { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
+#define check_1(A, M, ...) if(!(A)) { log_err(M, ##__VA_ARGS__); errno=0; goto error_1; }
+#define sentinel(M, ...)  { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
+#define check_mem(A) check((A), "Out of memory.")
+#define check_mem_1(A) check_1((A), "Out of memory.")
+#define check_debug(A, M, ...) if(!(A)) { debug(M, ##__VA_ARGS__); errno=0; goto error; }
+enum ERRORS {
+	OUTOF_MEMORY_ERROR = 1,
+	NOT_EMPTY_ERROR = 2
+};
+/**
+ * @brief stores the number of censored and uncensored samples for each time range
+ */
+typedef struct CENS_UC_NUM
 {
 	int * uncensored;
 	int * censored;
-	double* time;
+	double* time; //time series
 	int size;
-} Group_N;
+} CENS_UC_NUM;
+void free_CENS_UC_NUM(struct CENS_UC_NUM* instance)
+{
+	if (instance != NULL)
+	{
+		if(instance->uncensored != NULL)
+			free(instance->uncensored);
+		if(instance->censored != NULL)
+			free(instance->censored);
+		if(instance->time != NULL)
+			free(instance->time);
+		free(instance);
+	}
+}
+void print_CENS_UC_NUM(struct CENS_UC_NUM *cens_uncens_instance)
+{
+	int i;
+	puts("Start to print out Group N: ");
+	for( i = 0; i < cens_uncens_instance->size; i++ )
+	{
+		printf("Time : %f -- Uncensored : %i -- Censored : %i \n",
+			cens_uncens_instance-> time[i],
+			cens_uncens_instance-> uncensored[i],
+			cens_uncens_instance-> censored[i]);
+	}
+}
+int alloc_CENS_UC_NUM(struct CENS_UC_NUM** cens_uncens_instance, int size)
+{
+	(*cens_uncens_instance) = malloc(sizeof(struct CENS_UC_NUM));
+	check_mem_1(cens_uncens_instance);
+	(*cens_uncens_instance)->size = size;
+	(*cens_uncens_instance)->uncensored = malloc(size * sizeof(int));
+	check_mem_1(cens_uncens_instance);
+	(*cens_uncens_instance)->censored = malloc(size * sizeof(int));
+	check_mem_1(cens_uncens_instance);
+	(*cens_uncens_instance)->time = malloc(size * sizeof(double));
+	check_mem_1(cens_uncens_instance);
+	return 0;
+error_1:
+	if((*cens_uncens_instance) == NULL)
+		return OUTOF_MEMORY_ERROR;
+	else
+		free_CENS_UC_NUM((*cens_uncens_instance));
+		free((*cens_uncens_instance));
+}
 #endif

data/ext/sstat/lib/survival_func.h CHANGED

@@ -6,7 +6,7 @@
 #include "survival_def.h"
 #include "survival_utility.h"
-double precentile(double* array, int size, double target_percentile)
+int precentile_index(double* array, int size, double target_percentile)
 {
 	double percentage_each = 1.0 / size;
 	int i;
@@ -14,16 +14,24 @@ double precentile(double* array, int size, double target_percentile)
 	for (i = 1; i <= size; i++)
 	{
-		if ( i * percentage_each < target_percentile )
+		if ( i * percentage_each <= target_percentile )
 		{
 			count++;
 		} else
 		{
-			return array[count];
+			return count;
 		}
 	}
-	return array[size - 1];
+	return size -1;
+}
+double precentile(double* array, int size, double target_percentile)
+{
+	int index;
+	index = precentile_index(array, size, target_percentile);
+	return array[index];
 }
 int index_less_equal(double* array, int size, double target)
@@ -48,14 +56,14 @@ double log_rank_test(double* time_1, int* censored_1, double* time_2, int* censo
 	array merged_uniq_time_pnts = create_sorted_unique_array(merged_time_pnts.D_ptr, merged_time_pnts.size);
 	//The lengths of Group_N_1 and Group_N_2 are not expected to be same. Step 1. create unique time array which inlcude time points for both
-	Group_N Group_N_1 = group_N_given_range(time_1, censored_1, size_1, merged_uniq_time_pnts.D_ptr, merged_uniq_time_pnts.size);
-	Group_N Group_N_2 = group_N_given_range(time_2, censored_2, size_2, merged_uniq_time_pnts.D_ptr, merged_uniq_time_pnts.size);
+	CENS_UC_NUM Group_N_1 = group_N_given_range(time_1, censored_1, size_1, merged_uniq_time_pnts.D_ptr, merged_uniq_time_pnts.size);
+	CENS_UC_NUM Group_N_2 = group_N_given_range(time_2, censored_2, size_2, merged_uniq_time_pnts.D_ptr, merged_uniq_time_pnts.size);
 	double Z = 0;
 	double V_i_sum = 0;
-	Group_N combined_Group_N_1;
-	Group_N combined_Group_N_2;
+	CENS_UC_NUM combined_Group_N_1;
+	CENS_UC_NUM combined_Group_N_2;
 	combined_Group_N_1.uncensored = (int*) malloc(merged_uniq_time_pnts.size * sizeof(int));
 	combined_Group_N_1.censored = (int*) malloc(merged_uniq_time_pnts.size * sizeof(int));
@@ -152,39 +160,4 @@ double log_rank_test(double* time_1, int* censored_1, double* time_2, int* censo
 	return Z;
 }
-struct curve kaplan_meier(double* time, int* censored, int size)
-{
-	int i, N;
-	Group_N at_risk = group_N_self_range(time, censored, size);
-	N = size;
-	struct point* KM = (struct point*) malloc(at_risk.size * sizeof(struct point));
-	for (i = 0; i < at_risk.size; i++)
-	{
-		if (i > 0)
-		{
-			N = (N - at_risk.uncensored[i - 1] - at_risk.censored[i - 1]);
-			KM[i].x = at_risk.time[i];
-			KM[i].y = 1.0 * (N - at_risk.uncensored[i] - at_risk.censored[i]) / (N - at_risk.censored[i]) * KM[i - 1].y;
-		} else {
-			KM[0].x = at_risk.time[i];
-			KM[0].y = 1.0 * (N - at_risk.uncensored[0] - at_risk.censored[0]) / (N - at_risk.censored[0]);
-		}
-	}
-	curve KM_curve;
-	KM_curve.point_array = KM;
-	KM_curve.size = at_risk.size;
-	free(at_risk.uncensored);
-	free(at_risk.censored);
-	free(at_risk.time);
-	return KM_curve;
-}
 #endif

data/ext/sstat/lib/survival_kaplan_meier.h ADDED

@@ -0,0 +1,314 @@
+#ifndef _SURVIVAL_STAT_KAPLAN_MEIER_H_
+#define _SURVIVAL_STAT_KAPLAN_MEIER_H_
+#include "survival_def.h"
+#include <math.h>
+/**
+ * @brief calculate the number of samples censored or uncenosored (die) at each time ragne
+ * @param time Event time array
+ * @param censored censored information: positive -> censored; zero or negative -> uncensored
+ * @return CENS_UC_NUM structure
+ */
+int censored_uncensred_each_time_range(double* time, int* censored, int size,  struct CENS_UC_NUM** cens_ucens_number)
+{
+	int i, count_at, uncensored_num_at, censored_num_at;
+	double tmp, time_at;
+	//sort time and censored based on time together, time can censored array
+	struct point* time_censored_array = alloc_points(size);
+	//censored, if censored[] is positive
+	for (i = 0; i < size; i++)
+	{
+		time_censored_array[i].x = time[i];
+		if (censored[i] > 0)
+			time_censored_array[i].y = 1;
+		else
+			time_censored_array[i].y = -1;
+	}
+	qsort(time_censored_array, size, sizeof(struct point), &point_compare_x);
+	//count number of unique uncensored time point
+	int count = 0;
+	for (i = 0; i < size; i++)
+	{	//uncensored
+		if (time_censored_array[i].y < 0)
+		{
+			if (count == 0)
+			{
+				count++;
+				tmp = time_censored_array[i].x;
+			}
+			if (count > 0)
+			{	//unique
+				if (time_censored_array[i].x != tmp)
+				{
+					count++;
+					tmp = time_censored_array[i].x;
+				}
+			}
+		}
+	}
+	double* unique_uncensored_time = (double *) malloc(count * sizeof(double));
+	count = 0;
+	for (i = 0; i < size; i++)
+	{
+		if (time_censored_array[i].y < 0)
+		{
+			if (count == 0)
+			{
+				unique_uncensored_time[count] = time_censored_array[i].x;
+				tmp = time_censored_array[i].x;
+				count++;
+			}
+			if (count > 0)
+			{
+				if (time_censored_array[i].x != tmp)
+				{
+					unique_uncensored_time[count] = time_censored_array[i].x;
+					tmp = time_censored_array[i].x;
+					count++;
+				}
+			}
+		}
+	}
+	int* uncensored_num = (int *) malloc(count * sizeof(int));
+	int* censored_num = (int *) malloc(count * sizeof(int));
+	//record current time point
+	time_at = unique_uncensored_time[0];
+	count_at = 0;
+	uncensored_num_at = 0;
+	censored_num_at = 0;
+	for (i = 0; i < size; i++)
+	{
+		if (time_censored_array[i].x <= time_at + 1e-5)
+		{
+			if (time_censored_array[i].y > 0)
+				censored_num_at++;
+			else
+				uncensored_num_at++;
+			//if the last sample is censored, follow block stores counting for last time unique uncensored period
+			if (i == size - 1)
+			{
+				count_at++;
+				uncensored_num[count_at] = uncensored_num_at;
+				censored_num[count_at] = censored_num_at;
+			}
+		} else {
+			uncensored_num[count_at] = uncensored_num_at;
+			censored_num[count_at] = censored_num_at;
+			count_at++;
+			//reset uncensored_num_at and censored_num_at
+			uncensored_num_at = 0;
+			censored_num_at = 0;
+			//go to next time range
+			time_at = unique_uncensored_time[count_at];
+			if (time_censored_array[i].y > 0)
+				censored_num_at++;
+			else
+				uncensored_num_at++;
+			/* If the last sample is censored, follow block stores counting for last time unique uncensored period */
+			if (i == size - 1)
+			{
+				uncensored_num[count_at] = uncensored_num_at;
+				censored_num[count_at] = censored_num_at;
+			}
+		}
+	}
+	alloc_CENS_UC_NUM(cens_ucens_number, count);
+	for(i = 0; i < count; i++)
+	{
+		(*cens_ucens_number)->uncensored[i] = uncensored_num[i];
+		(*cens_ucens_number)->censored[i] = censored_num[i];
+		(*cens_ucens_number)->time[i] = unique_uncensored_time[i];
+	}
+	free(time_censored_array);
+	free(uncensored_num);
+	free(censored_num);
+	free(unique_uncensored_time);
+	return 0;
+}
+/**
+ * @brief calculate the kaplan meier
+ * @param time Event time array
+ * @param censored censored information: positive -> censored; zero or negative -> uncensored
+ * @param size of the time array and censored array
+ * @return CENS_UC_NUM structure
+ */
+int kaplan_meier(double* time, int* censored, int size, curve* KM_curve)
+{
+	int i, N;
+	struct CENS_UC_NUM* cens_ucens_number;
+	censored_uncensred_each_time_range(time, censored, size, &cens_ucens_number);
+	N = size; //total sample number
+	struct point* KM =  alloc_points(size);
+	for (i = 0; i < cens_ucens_number->size; i++)
+	{
+		if (i > 0)
+		{
+			N = (N - cens_ucens_number->uncensored[i - 1]
+				- cens_ucens_number->censored[i - 1]);
+			KM[i].x = cens_ucens_number->time[i];
+			KM[i].y = 1.0 * (N - cens_ucens_number->uncensored[i] - cens_ucens_number->censored[i]) / (N - cens_ucens_number->censored[i]) * KM[i - 1].y;
+		} else {
+			KM[0].x = cens_ucens_number->time[i];
+			KM[0].y = 1.0 * (N - cens_ucens_number->uncensored[0] - cens_ucens_number->censored[0]) / (N - cens_ucens_number->censored[0]);
+		}
+	}
+	KM_curve->point_array = KM;
+	KM_curve->size = cens_ucens_number->size;
+	//free_CENS_UC_NUM(&cens_ucens_number);
+	return 0;
+}
+/**
+ * @brief extend the KM curve based on the last 3 points
+ */
+int KM_3p_extrapolation(struct CENS_UC_NUM* cens_uc_num, struct CENS_UC_NUM** updated_cens_uc_num, int sample_size)
+{
+	double mean_last_uncensored = 0;
+	double mean_last_censored = 0;
+	double time_interval_mean = 0;
+	int num_left = 0;
+	int used_sample_num = 0;
+	int extrapolation_size = 0;
+	int updated_cens_uc_num_size = 0;
+	int i;
+	/* calculate the total number (censored and uncensored) already used */
+	for(i = 0; i < cens_uc_num->size; i ++)
+	{
+		used_sample_num += cens_uc_num->censored[i];
+		used_sample_num += cens_uc_num->uncensored[i];
+	}
+	/* TODO should error check here */
+	num_left = sample_size - used_sample_num;
+	for(i = 0; i < 3; i++)
+	{
+		/*
+		* censored uncensored
+		*	x_1		y_1	(should be included)
+		*	x_2		y_2	(should be included)
+		*	x_3		y_3	(should be included)
+		*	x_last	y_last (not used)
+		*	why? when we calculate the last 3 time intervals, we need 4 points
+		*/
+		mean_last_uncensored += cens_uc_num->uncensored[cens_uc_num->size - 2 - i];
+		mean_last_censored += cens_uc_num->censored[cens_uc_num->size - 2 - i];
+	}
+	time_interval_mean = cens_uc_num->time[cens_uc_num->size - 2] - cens_uc_num->time[cens_uc_num->size - 5];
+	mean_last_uncensored = mean_last_uncensored / 3;
+	mean_last_censored = mean_last_censored / 3;
+	time_interval_mean = time_interval_mean / 3;
+	/* Calculate how many points we should extrapolate */
+	extrapolation_size = ceil((double)num_left / (mean_last_uncensored + mean_last_censored));
+	updated_cens_uc_num_size = cens_uc_num->size + extrapolation_size;
+	check(alloc_CENS_UC_NUM(updated_cens_uc_num, updated_cens_uc_num_size) == 0, "Failed in allocating CENS_UC_NUM structure");
+	for(i = 0; i < cens_uc_num->size; i++)
+	{
+		(*updated_cens_uc_num)->censored[i] = cens_uc_num->censored[i];
+		(*updated_cens_uc_num)->uncensored[i] = cens_uc_num->uncensored[i];
+		(*updated_cens_uc_num)->time[i] = cens_uc_num->time[i];
+	}
+	for(i = cens_uc_num->size; i < (cens_uc_num->size + extrapolation_size); i++)
+	{
+		(*updated_cens_uc_num)->time[i] = (*updated_cens_uc_num)->time[i-1] + time_interval_mean;
+		if (mean_last_uncensored<num_left)
+			(*updated_cens_uc_num)->uncensored[i] = mean_last_uncensored;
+		else
+			(*updated_cens_uc_num)->uncensored[i] = num_left;
+		/* We update the num_left to make sure that the total number of samples in the extrapolation group cannot be larger than the total number of samples */
+		num_left = num_left - mean_last_uncensored;
+		if (mean_last_censored<num_left)
+			(*updated_cens_uc_num)->censored[i] =  mean_last_censored;
+		else
+			(*updated_cens_uc_num)->censored[i] =  num_left;
+		num_left = num_left - mean_last_censored;
+	}
+	return 0;
+error:
+	free_CENS_UC_NUM((*updated_cens_uc_num));
+	return 1;
+}
+int kaplan_meier_3p_extrapolation(double* time, int* censored, int size, struct curve* KM_curve)
+{
+	int proc_state = 0;
+	int i;
+	struct CENS_UC_NUM* cens_ucens_number = NULL;
+	censored_uncensred_each_time_range(time, censored, size, &cens_ucens_number);
+	struct CENS_UC_NUM* updated_cens_ucens_number = NULL;
+	proc_state = KM_3p_extrapolation(cens_ucens_number, &updated_cens_ucens_number, size);
+	int N = size;
+	struct point* KM =  alloc_points(size);
+	for (i = 0; i < updated_cens_ucens_number->size; i++)
+	{
+		if (i > 0)
+		{
+			N = (N - updated_cens_ucens_number->uncensored[i - 1]
+				- updated_cens_ucens_number->censored[i - 1]);
+			KM[i].x = updated_cens_ucens_number->time[i];
+			KM[i].y = 1.0 * (N - updated_cens_ucens_number->uncensored[i] - updated_cens_ucens_number->censored[i]) / (N - updated_cens_ucens_number->censored[i]) * KM[i - 1].y;
+		} else {
+			KM[0].x = updated_cens_ucens_number->time[i];
+			KM[0].y = 1.0 * (N - updated_cens_ucens_number->uncensored[0] - updated_cens_ucens_number->censored[0]) / (N - updated_cens_ucens_number->censored[0]);
+		}
+	}
+	KM_curve->point_array = KM;
+	KM_curve->size = updated_cens_ucens_number->size;
+	//print_curve(KM_curve);
+	free_CENS_UC_NUM(cens_ucens_number);
+	free_CENS_UC_NUM(updated_cens_ucens_number);
+	return 0;
+}
+#endif