RubyGems - liblinear-ruby - Versions diffs - 1.0.2 → 1.0.3 - Mend

liblinear-ruby 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/ext/linear.cpp CHANGED

@@ -20,8 +20,8 @@ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
 	dst = new T[n];
 	memcpy((void *)dst,(void *)src,sizeof(T)*n);
 }
-#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 #define INF HUGE_VAL
+#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 static void print_string_stdout(const char *s)
 {
@@ -91,6 +91,7 @@ public:
 	void Hv(double *s, double *Hs);
 	int get_nr_variable(void);
+	void get_diag_preconditioner(double *M);
 private:
 	void Xv(double *v, double *Xv);
@@ -169,6 +170,27 @@ int l2r_lr_fun::get_nr_variable(void)
 	return prob->n;
 }
+void l2r_lr_fun::get_diag_preconditioner(double *M)
+{
+	int i;
+	int l = prob->l;
+	int w_size=get_nr_variable();
+	feature_node **x = prob->x;
+	for (i=0; i<w_size; i++)
+		M[i] = 1;
+	for (i=0; i<l; i++)
+	{
+		feature_node *s = x[i];
+		while (s->index!=-1)
+		{
+			M[s->index-1] += s->value*s->value*C[i]*D[i];
+			s++;
+		}
+	}
+}
 void l2r_lr_fun::Hv(double *s, double *Hs)
 {
 	int i;
@@ -225,6 +247,7 @@ public:
 	void Hv(double *s, double *Hs);
 	int get_nr_variable(void);
+	void get_diag_preconditioner(double *M);
 protected:
 	void Xv(double *v, double *Xv);
@@ -304,6 +327,27 @@ int l2r_l2_svc_fun::get_nr_variable(void)
 	return prob->n;
 }
+void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
+{
+	int i;
+	int w_size=get_nr_variable();
+	feature_node **x = prob->x;
+	for (i=0; i<w_size; i++)
+		M[i] = 1;
+	for (i=0; i<sizeI; i++)
+	{
+		int idx = I[i];
+		feature_node *s = x[idx];
+		while (s->index!=-1)
+		{
+			M[s->index-1] += s->value*s->value*C[idx]*2;
+			s++;
+		}
+	}
+}
 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
 {
 	int i;
@@ -1356,7 +1400,7 @@ static void solve_l1r_l2_svc(
 	double Gmax_new, Gnorm1_new;
 	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
 	double d_old, d_diff;
-	double loss_old, loss_new;
+	double loss_old = 0, loss_new;
 	double appxcond, cond;
 	int *index = new int[w_size];
@@ -2223,7 +2267,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
 static double calc_start_C(const problem *prob, const parameter *param)
 {
 	int i;
-	double xTx,max_xTx;
+	double xTx, max_xTx;
 	max_xTx = 0;
 	for(i=0; i<prob->l; i++)
 	{
@@ -2244,10 +2288,157 @@ static double calc_start_C(const problem *prob, const parameter *param)
 		min_C = 1.0 / (prob->l * max_xTx);
 	else if(param->solver_type == L2R_L2LOSS_SVC)
 		min_C = 1.0 / (2 * prob->l * max_xTx);
+	else if(param->solver_type == L2R_L2LOSS_SVR)
+	{
+		double sum_y, loss, y_abs;
+		double delta2 = 0.1;
+		sum_y = 0, loss = 0;
+		for(i=0; i<prob->l; i++)
+		{
+			y_abs = fabs(prob->y[i]);
+			sum_y += y_abs;
+			loss += max(y_abs - param->p, 0.0) * max(y_abs - param->p, 0.0);
+		}
+		if(loss > 0)
+			min_C = delta2 * delta2 * loss / (8 * sum_y * sum_y * max_xTx);
+		else
+			min_C = INF;
+	}
 	return pow( 2, floor(log(min_C) / log(2.0)) );
 }
+static double calc_max_p(const problem *prob, const parameter *param)
+{
+	int i;
+	double max_p = 0.0;
+	for(i = 0; i < prob->l; i++)
+		max_p = max(max_p, fabs(prob->y[i]));
+	return max_p;
+}
+static void find_parameter_C(const problem *prob, parameter *param_tmp, double start_C, double max_C, double *best_C, double *best_score, const int *fold_start, const int *perm, const problem *subprob, int nr_fold)
+{
+	// variables for CV
+	int i;
+	double *target = Malloc(double, prob->l);
+	// variables for warm start
+	double ratio = 2;
+	double **prev_w = Malloc(double*, nr_fold);
+	for(i = 0; i < nr_fold; i++)
+		prev_w[i] = NULL;
+	int num_unchanged_w = 0;
+	void (*default_print_string) (const char *) = liblinear_print_string;
+	if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
+		*best_score = 0.0;
+	else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
+		*best_score = INF;
+	*best_C = start_C;
+	param_tmp->C = start_C;
+	while(param_tmp->C <= max_C)
+	{
+		//Output disabled for running CV at a particular C
+		set_print_string_function(&print_null);
+		for(i=0; i<nr_fold; i++)
+		{
+			int j;
+			int begin = fold_start[i];
+			int end = fold_start[i+1];
+			param_tmp->init_sol = prev_w[i];
+			struct model *submodel = train(&subprob[i],param_tmp);
+			int total_w_size;
+			if(submodel->nr_class == 2)
+				total_w_size = subprob[i].n;
+			else
+				total_w_size = subprob[i].n * submodel->nr_class;
+			if(prev_w[i] == NULL)
+			{
+				prev_w[i] = Malloc(double, total_w_size);
+				for(j=0; j<total_w_size; j++)
+					prev_w[i][j] = submodel->w[j];
+			}
+			else if(num_unchanged_w >= 0)
+			{
+				double norm_w_diff = 0;
+				for(j=0; j<total_w_size; j++)
+				{
+					norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
+					prev_w[i][j] = submodel->w[j];
+				}
+				norm_w_diff = sqrt(norm_w_diff);
+				if(norm_w_diff > 1e-15)
+					num_unchanged_w = -1;
+			}
+			else
+			{
+				for(j=0; j<total_w_size; j++)
+					prev_w[i][j] = submodel->w[j];
+			}
+			for(j=begin; j<end; j++)
+				target[perm[j]] = predict(submodel,prob->x[perm[j]]);
+			free_and_destroy_model(&submodel);
+		}
+		set_print_string_function(default_print_string);
+		if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
+		{
+			int total_correct = 0;
+			for(i=0; i<prob->l; i++)
+				if(target[i] == prob->y[i])
+					++total_correct;
+			double current_rate = (double)total_correct/prob->l;
+			if(current_rate > *best_score)
+			{
+				*best_C = param_tmp->C;
+				*best_score = current_rate;
+			}
+			info("log2c=%7.2f\trate=%g\n",log(param_tmp->C)/log(2.0),100.0*current_rate);
+		}
+		else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
+		{
+			double total_error = 0.0;
+			for(i=0; i<prob->l; i++)
+			{
+				double y = prob->y[i];
+				double v = target[i];
+				total_error += (v-y)*(v-y);
+			}
+			double current_error = total_error/prob->l;
+			if(current_error < *best_score)
+			{
+				*best_C = param_tmp->C;
+				*best_score = current_error;
+			}
+			info("log2c=%7.2f\tp=%7.2f\tMean squared error=%g\n",log(param_tmp->C)/log(2.0),param_tmp->p,current_error);
+		}
+		num_unchanged_w++;
+		if(num_unchanged_w == 5)
+			break;
+		param_tmp->C = param_tmp->C*ratio;
+	}
+	if(param_tmp->C > max_C)
+		info("warning: maximum C reached.\n");
+	free(target);
+	for(i=0; i<nr_fold; i++)
+		free(prev_w[i]);
+	free(prev_w);
+}
 //
 // Interface functions
@@ -2270,8 +2461,14 @@ model* train(const problem *prob, const parameter *param)
 	if(check_regression_model(model_))
 	{
 		model_->w = Malloc(double, w_size);
-		for(i=0; i<w_size; i++)
-			model_->w[i] = 0;
+		if(param->init_sol != NULL)
+			for(i=0;i<w_size;i++)
+				model_->w[i] = param->init_sol[i];
+		else
+			for(i=0;i<w_size;i++)
+				model_->w[i] = 0;
 		model_->nr_class = 2;
 		model_->label = NULL;
 		train_one(prob, param, model_->w, 0, 0);
@@ -2458,25 +2655,17 @@ void cross_validation(const problem *prob, const parameter *param, int nr_fold,
 	free(perm);
 }
-void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
+void find_parameters(const problem *prob, const parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score)
 {
-	// variables for CV
+	// prepare CV folds
 	int i;
 	int *fold_start;
 	int l = prob->l;
 	int *perm = Malloc(int, l);
-	double *target = Malloc(double, prob->l);
 	struct problem *subprob = Malloc(problem,nr_fold);
-	// variables for warm start
-	double ratio = 2;
-	double **prev_w = Malloc(double*, nr_fold);
-	for(i = 0; i < nr_fold; i++)
-		prev_w[i] = NULL;
-	int num_unchanged_w = 0;
-	struct parameter param1 = *param;
-	void (*default_print_string) (const char *) = liblinear_print_string;
 	if (nr_fold > l)
 	{
 		nr_fold = l;
@@ -2520,93 +2709,60 @@ void find_parameter_C(const problem *prob, const parameter *param, int nr_fold,
 	}
-	*best_rate = 0;
-	if(start_C <= 0)
-		start_C = calc_start_C(prob,param);
-	param1.C = start_C;
-	while(param1.C <= max_C)
+	struct parameter param_tmp = *param;
+	*best_p = -1;
+	if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
 	{
-		//Output disabled for running CV at a particular C
-		set_print_string_function(&print_null);
+		if(start_C <= 0)
+			start_C = calc_start_C(prob, &param_tmp);
+		double max_C = 1024;
+		start_C = min(start_C, max_C);
+		double best_C_tmp, best_score_tmp;
+		find_parameter_C(prob, &param_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
+		*best_C = best_C_tmp;
+		*best_score = best_score_tmp;
+	}
+	else if(param->solver_type == L2R_L2LOSS_SVR)
+	{
+		double max_p = calc_max_p(prob, &param_tmp);
+		int num_p_steps = 20;
+		double max_C = 1048576;
+		*best_score = INF;
-		for(i=0; i<nr_fold; i++)
+		i = num_p_steps-1;
+		if(start_p > 0)
+			i = min((int)(start_p/(max_p/num_p_steps)), i);
+		for(; i >= 0; i--)
 		{
-			int j;
-			int begin = fold_start[i];
-			int end = fold_start[i+1];
-			param1.init_sol = prev_w[i];
-			struct model *submodel = train(&subprob[i],&param1);
-			int total_w_size;
-			if(submodel->nr_class == 2)
-				total_w_size = subprob[i].n;
-			else
-				total_w_size = subprob[i].n * submodel->nr_class;
-			if(prev_w[i] == NULL)
-			{
-				prev_w[i] = Malloc(double, total_w_size);
-				for(j=0; j<total_w_size; j++)
-					prev_w[i][j] = submodel->w[j];
-			}
-			else if(num_unchanged_w >= 0)
-			{
-				double norm_w_diff = 0;
-				for(j=0; j<total_w_size; j++)
-				{
-					norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
-					prev_w[i][j] = submodel->w[j];
-				}
-				norm_w_diff = sqrt(norm_w_diff);
-				if(norm_w_diff > 1e-15)
-					num_unchanged_w = -1;
-			}
+			param_tmp.p = i*max_p/num_p_steps;
+			double start_C_tmp;
+			if(start_C <= 0)
+				start_C_tmp = calc_start_C(prob, &param_tmp);
 			else
+				start_C_tmp = start_C;
+			start_C_tmp = min(start_C_tmp, max_C);
+			double best_C_tmp, best_score_tmp;
+			find_parameter_C(prob, &param_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
+			if(best_score_tmp < *best_score)
 			{
-				for(j=0; j<total_w_size; j++)
-					prev_w[i][j] = submodel->w[j];
+				*best_p = param_tmp.p;
+				*best_C = best_C_tmp;
+				*best_score = best_score_tmp;
 			}
-			for(j=begin; j<end; j++)
-				target[perm[j]] = predict(submodel,prob->x[perm[j]]);
-			free_and_destroy_model(&submodel);
 		}
-		set_print_string_function(default_print_string);
-		int total_correct = 0;
-		for(i=0; i<prob->l; i++)
-			if(target[i] == prob->y[i])
-				++total_correct;
-		double current_rate = (double)total_correct/prob->l;
-		if(current_rate > *best_rate)
-		{
-			*best_C = param1.C;
-			*best_rate = current_rate;
-		}
-		info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
-		num_unchanged_w++;
-		if(num_unchanged_w == 3)
-			break;
-		param1.C = param1.C*ratio;
 	}
-	if(param1.C > max_C && max_C > start_C)
-		info("warning: maximum C reached.\n");
 	free(fold_start);
 	free(perm);
-	free(target);
 	for(i=0; i<nr_fold; i++)
 	{
 		free(subprob[i].x);
 		free(subprob[i].y);
-		free(prev_w[i]);
 	}
-	free(prev_w);
 	free(subprob);
 }
@@ -2748,14 +2904,14 @@ int save_model(const char *model_file_name, const struct model *model_)
 	fprintf(fp, "nr_feature %d\n", nr_feature);
-	fprintf(fp, "bias %.16g\n", model_->bias);
+	fprintf(fp, "bias %.17g\n", model_->bias);
 	fprintf(fp, "w\n");
 	for(i=0; i<w_size; i++)
 	{
 		int j;
 		for(j=0; j<nr_w; j++)
-			fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
+			fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
 		fprintf(fp, "\n");
 	}
@@ -2802,6 +2958,11 @@ struct model *load_model(const char *model_file_name)
 	double bias;
 	model *model_ = Malloc(model,1);
 	parameter& param = model_->param;
+	// parameters for training only won't be assigned, but arrays are assigned as NULL for safety
+	param.nr_weight = 0;
+	param.weight_label = NULL;
+	param.weight = NULL;
+	param.init_sol = NULL;
 	model_->label = NULL;