RubyGems - tomz-liblinear-ruby-swig - Versions diffs - 0.1.0 - Mend

tomz-liblinear-ruby-swig 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

data/ext/linear.cpp ADDED Viewed

@@ -0,0 +1,1450 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include "linear.h"
+#include "tron.h"
+typedef signed char schar;
+template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
+#ifndef min
+template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
+#endif
+#ifndef max
+template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
+#endif
+template <class S, class T> inline void clone(T*& dst, S* src, int n)
+{
+	dst = new T[n];
+	memcpy((void *)dst,(void *)src,sizeof(T)*n);
+}
+#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
+#define INF HUGE_VAL
+#if 1
+int info_on = 0;
+static void info(const char *fmt,...)
+{
+	va_list ap;
+  if (info_on==1) {
+	  va_start(ap,fmt);
+	  vprintf(fmt,ap);
+	  va_end(ap);
+  }
+}
+static void info_flush()
+{
+	if (info_on==1) fflush(stdout);
+}
+#else
+static void info(char *fmt,...) {}
+static void info_flush() {}
+#endif
+class l2_lr_fun : public function
+{
+public:
+	l2_lr_fun(const problem *prob, double Cp, double Cn);
+	~l2_lr_fun();
+	double fun(double *w);
+	void grad(double *w, double *g);
+	void Hv(double *s, double *Hs);
+	int get_nr_variable(void);
+private:
+	void Xv(double *v, double *Xv);
+	void XTv(double *v, double *XTv);
+	double *C;
+	double *z;
+	double *D;
+	const problem *prob;
+};
+l2_lr_fun::l2_lr_fun(const problem *prob, double Cp, double Cn)
+{
+	int i;
+	int l=prob->l;
+	int *y=prob->y;
+	this->prob = prob;
+	z = new double[l];
+	D = new double[l];
+	C = new double[l];
+	for (i=0; i<l; i++)
+	{
+		if (y[i] == 1)
+			C[i] = Cp;
+		else
+			C[i] = Cn;
+	}
+}
+l2_lr_fun::~l2_lr_fun()
+{
+	delete[] z;
+	delete[] D;
+	delete[] C;
+}
+double l2_lr_fun::fun(double *w)
+{
+	int i;
+	double f=0;
+	int *y=prob->y;
+	int l=prob->l;
+	int n=prob->n;
+	Xv(w, z);
+	for(i=0;i<l;i++)
+	{
+		double yz = y[i]*z[i];
+		if (yz >= 0)
+			f += C[i]*log(1 + exp(-yz));
+		else
+			f += C[i]*(-yz+log(1 + exp(yz)));
+	}
+	f = 2*f;
+	for(i=0;i<n;i++)
+		f += w[i]*w[i];
+	f /= 2.0;
+	return(f);
+}
+void l2_lr_fun::grad(double *w, double *g)
+{
+	int i;
+	int *y=prob->y;
+	int l=prob->l;
+	int n=prob->n;
+	for(i=0;i<l;i++)
+	{
+		z[i] = 1/(1 + exp(-y[i]*z[i]));
+		D[i] = z[i]*(1-z[i]);
+		z[i] = C[i]*(z[i]-1)*y[i];
+	}
+	XTv(z, g);
+	for(i=0;i<n;i++)
+		g[i] = w[i] + g[i];
+}
+int l2_lr_fun::get_nr_variable(void)
+{
+	return prob->n;
+}
+void l2_lr_fun::Hv(double *s, double *Hs)
+{
+	int i;
+	int l=prob->l;
+	int n=prob->n;
+	double *wa = new double[l];
+	Xv(s, wa);
+	for(i=0;i<l;i++)
+		wa[i] = C[i]*D[i]*wa[i];
+	XTv(wa, Hs);
+	for(i=0;i<n;i++)
+		Hs[i] = s[i] + Hs[i];
+	delete[] wa;
+}
+void l2_lr_fun::Xv(double *v, double *Xv)
+{
+	int i;
+	int l=prob->l;
+	feature_node **x=prob->x;
+	for(i=0;i<l;i++)
+	{
+		feature_node *s=x[i];
+		Xv[i]=0;
+		while(s->index!=-1)
+		{
+			Xv[i]+=v[s->index-1]*s->value;
+			s++;
+		}
+	}
+}
+void l2_lr_fun::XTv(double *v, double *XTv)
+{
+	int i;
+	int l=prob->l;
+	int n=prob->n;
+	feature_node **x=prob->x;
+	for(i=0;i<n;i++)
+		XTv[i]=0;
+	for(i=0;i<l;i++)
+	{
+		feature_node *s=x[i];
+		while(s->index!=-1)
+		{
+			XTv[s->index-1]+=v[i]*s->value;
+			s++;
+		}
+	}
+}
+class l2loss_svm_fun : public function
+{
+public:
+	l2loss_svm_fun(const problem *prob, double Cp, double Cn);
+	~l2loss_svm_fun();
+	double fun(double *w);
+	void grad(double *w, double *g);
+	void Hv(double *s, double *Hs);
+	int get_nr_variable(void);
+private:
+	void Xv(double *v, double *Xv);
+	void subXv(double *v, double *Xv);
+	void subXTv(double *v, double *XTv);
+	double *C;
+	double *z;
+	double *D;
+	int *I;
+	int sizeI;
+	const problem *prob;
+};
+l2loss_svm_fun::l2loss_svm_fun(const problem *prob, double Cp, double Cn)
+{
+	int i;
+	int l=prob->l;
+	int *y=prob->y;
+	this->prob = prob;
+	z = new double[l];
+	D = new double[l];
+	C = new double[l];
+	I = new int[l];
+	for (i=0; i<l; i++)
+	{
+		if (y[i] == 1)
+			C[i] = Cp;
+		else
+			C[i] = Cn;
+	}
+}
+l2loss_svm_fun::~l2loss_svm_fun()
+{
+	delete[] z;
+	delete[] D;
+	delete[] C;
+	delete[] I;
+}
+double l2loss_svm_fun::fun(double *w)
+{
+	int i;
+	double f=0;
+	int *y=prob->y;
+	int l=prob->l;
+	int n=prob->n;
+	Xv(w, z);
+	for(i=0;i<l;i++)
+	{
+		z[i] = y[i]*z[i];
+		double d = 1-z[i];
+		if (d > 0)
+			f += C[i]*d*d;
+	}
+	f = 2*f;
+	for(i=0;i<n;i++)
+		f += w[i]*w[i];
+	f /= 2.0;
+	return(f);
+}
+void l2loss_svm_fun::grad(double *w, double *g)
+{
+	int i;
+	int *y=prob->y;
+	int l=prob->l;
+	int n=prob->n;
+	sizeI = 0;
+	for (i=0;i<l;i++)
+		if (z[i] < 1)
+		{
+			z[sizeI] = C[i]*y[i]*(z[i]-1);
+			I[sizeI] = i;
+			sizeI++;
+		}
+	subXTv(z, g);
+	for(i=0;i<n;i++)
+		g[i] = w[i] + 2*g[i];
+}
+int l2loss_svm_fun::get_nr_variable(void)
+{
+	return prob->n;
+}
+void l2loss_svm_fun::Hv(double *s, double *Hs)
+{
+	int i;
+	int l=prob->l;
+	int n=prob->n;
+	double *wa = new double[l];
+	subXv(s, wa);
+	for(i=0;i<sizeI;i++)
+		wa[i] = C[I[i]]*wa[i];
+	subXTv(wa, Hs);
+	for(i=0;i<n;i++)
+		Hs[i] = s[i] + 2*Hs[i];
+	delete[] wa;
+}
+void l2loss_svm_fun::Xv(double *v, double *Xv)
+{
+	int i;
+	int l=prob->l;
+	feature_node **x=prob->x;
+	for(i=0;i<l;i++)
+	{
+		feature_node *s=x[i];
+		Xv[i]=0;
+		while(s->index!=-1)
+		{
+			Xv[i]+=v[s->index-1]*s->value;
+			s++;
+		}
+	}
+}
+void l2loss_svm_fun::subXv(double *v, double *Xv)
+{
+	int i;
+	feature_node **x=prob->x;
+	for(i=0;i<sizeI;i++)
+	{
+		feature_node *s=x[I[i]];
+		Xv[i]=0;
+		while(s->index!=-1)
+		{
+			Xv[i]+=v[s->index-1]*s->value;
+			s++;
+		}
+	}
+}
+void l2loss_svm_fun::subXTv(double *v, double *XTv)
+{
+	int i;
+	int n=prob->n;
+	feature_node **x=prob->x;
+	for(i=0;i<n;i++)
+		XTv[i]=0;
+	for(i=0;i<sizeI;i++)
+	{
+		feature_node *s=x[I[i]];
+		while(s->index!=-1)
+		{
+			XTv[s->index-1]+=v[i]*s->value;
+			s++;
+		}
+	}
+}
+// A coordinate descent algorithm for
+// multi-class support vector machines by Crammer and Singer
+//
+//  min_{\alpha}  0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
+//    s.t.     \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
+//
+//  where e^m_i = 0 if y_i  = m,
+//        e^m_i = 1 if y_i != m,
+//  C^m_i = C if m  = y_i,
+//  C^m_i = 0 if m != y_i,
+//  and w_m(\alpha) = \sum_i \alpha^m_i x_i
+//
+// Given:
+// x, y, C
+// eps is the stopping tolerance
+//
+// solution will be put in w
+class Solver_MCSVM_CS
+{
+	public:
+		Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
+		~Solver_MCSVM_CS();
+		void Solve(double *w);
+	private:
+		void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
+		bool be_shrunken(int m, int yi, double alpha_i, double minG);
+		double *B, *C, *G;
+		int n, l;
+		int nr_class;
+		int max_iter;
+		double eps;
+		const problem *prob;
+};
+Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps, int max_iter)
+{
+	this->n = prob->n;
+	this->l = prob->l;
+	this->nr_class = nr_class;
+	this->eps = eps;
+	this->max_iter = max_iter;
+	this->prob = prob;
+	this->C = C;
+	this->B = new double[nr_class];
+	this->G = new double[nr_class];
+}
+Solver_MCSVM_CS::~Solver_MCSVM_CS()
+{
+	delete[] B;
+	delete[] G;
+}
+int compare_double(const void *a, const void *b)
+{
+	if(*(double *)a > *(double *)b)
+		return -1;
+	if(*(double *)a < *(double *)b)
+		return 1;
+	return 0;
+}
+void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
+{
+	int r;
+	double *D;
+	clone(D, B, active_i);
+	if(yi < active_i)
+		D[yi] += A_i*C_yi;
+	qsort(D, active_i, sizeof(double), compare_double);
+	double beta = D[0] - A_i*C_yi;
+	for(r=1;r<active_i && beta<r*D[r];r++)
+		beta += D[r];
+	beta /= r;
+	for(r=0;r<active_i;r++)
+	{
+		if(r == yi)
+			alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
+		else
+			alpha_new[r] = min((double)0, (beta - B[r])/A_i);
+	}
+	delete[] D;
+}
+bool Solver_MCSVM_CS::be_shrunken(int m, int yi, double alpha_i, double minG)
+{
+	double bound = 0;
+	if(m == yi)
+		bound = C[yi];
+	if(alpha_i == bound && G[m] < minG)
+		return true;
+	return false;
+}
+void Solver_MCSVM_CS::Solve(double *w)
+{
+	int i, m, s;
+	int iter = 0;
+	double *alpha =  new double[l*nr_class];
+	double *alpha_new = new double[nr_class];
+	int *index = new int[l];
+	double *QD = new double[l];
+	int *d_ind = new int[nr_class];
+	double *d_val = new double[nr_class];
+	int *alpha_index = new int[nr_class*l];
+	int *y_index = new int[l];
+	int active_size = l;
+	int *active_size_i = new int[l];
+	double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
+	bool start_from_all = true;
+	// initial
+	for(i=0;i<l*nr_class;i++)
+		alpha[i] = 0;
+	for(i=0;i<n*nr_class;i++)
+		w[i] = 0;
+	for(i=0;i<l;i++)
+	{
+		for(m=0;m<nr_class;m++)
+			alpha_index[i*nr_class+m] = m;
+		feature_node *xi = prob->x[i];
+		QD[i] = 0;
+		while(xi->index != -1)
+		{
+			QD[i] += (xi->value)*(xi->value);
+			xi++;
+		}
+		active_size_i[i] = nr_class;
+		y_index[i] = prob->y[i];
+		index[i] = i;
+	}
+	while(iter < max_iter)
+	{
+		double stopping = -INF;
+		for(i=0;i<active_size;i++)
+		{
+			int j = i+rand()%(active_size-i);
+			swap(index[i], index[j]);
+		}
+		for(s=0;s<active_size;s++)
+		{
+			i = index[s];
+			double Ai = QD[i];
+			double *alpha_i = &alpha[i*nr_class];
+			int *alpha_index_i = &alpha_index[i*nr_class];
+			if(Ai > 0)
+			{
+				for(m=0;m<active_size_i[i];m++)
+					G[m] = 1;
+				if(y_index[i] < active_size_i[i])
+					G[y_index[i]] = 0;
+				feature_node *xi = prob->x[i];
+				while(xi->index!= -1)
+				{
+					double *w_i = &w[(xi->index-1)*nr_class];
+					for(m=0;m<active_size_i[i];m++)
+						G[m] += w_i[alpha_index_i[m]]*(xi->value);
+					xi++;
+				}
+				double minG = INF;
+				double maxG = -INF;
+				for(m=0;m<active_size_i[i];m++)
+				{
+					if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
+						minG = G[m];
+					if(G[m] > maxG)
+						maxG = G[m];
+				}
+				if(y_index[i] < active_size_i[i])
+					if(alpha_i[prob->y[i]] < C[prob->y[i]] && G[y_index[i]] < minG)
+						minG = G[y_index[i]];
+				for(m=0;m<active_size_i[i];m++)
+				{
+					if(be_shrunken(m, y_index[i], alpha_i[alpha_index_i[m]], minG))
+					{
+						active_size_i[i]--;
+						while(active_size_i[i]>m)
+						{
+							if(!be_shrunken(active_size_i[i], y_index[i],
+											alpha_i[alpha_index_i[active_size_i[i]]], minG))
+							{
+								swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
+								swap(G[m], G[active_size_i[i]]);
+								if(y_index[i] == active_size_i[i])
+									y_index[i] = m;
+								else if(y_index[i] == m)
+									y_index[i] = active_size_i[i];
+								break;
+							}
+							active_size_i[i]--;
+						}
+					}
+				}
+				if(active_size_i[i] <= 1)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+				if(maxG-minG <= 1e-12)
+					continue;
+				else
+					stopping = max(maxG - minG, stopping);
+				for(m=0;m<active_size_i[i];m++)
+					B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
+				solve_sub_problem(Ai, y_index[i], C[prob->y[i]], active_size_i[i], alpha_new);
+				int nz_d = 0;
+				for(m=0;m<active_size_i[i];m++)
+				{
+					double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
+					alpha_i[alpha_index_i[m]] = alpha_new[m];
+					if(fabs(d) >= 1e-12)
+					{
+						d_ind[nz_d] = alpha_index_i[m];
+						d_val[nz_d] = d;
+						nz_d++;
+					}
+				}
+				xi = prob->x[i];
+				while(xi->index != -1)
+				{
+					double *w_i = &w[(xi->index-1)*nr_class];
+					for(m=0;m<nz_d;m++)
+						w_i[d_ind[m]] += d_val[m]*xi->value;
+					xi++;
+				}
+			}
+		}
+		iter++;
+		if(iter % 10 == 0)
+		{
+			info(".");
+			info_flush();
+		}
+		if(stopping < eps_shrink)
+		{
+			if(stopping < eps && start_from_all == true)
+				break;
+			else
+			{
+				active_size = l;
+				for(i=0;i<l;i++)
+					active_size_i[i] = nr_class;
+				info("*"); info_flush();
+				eps_shrink = max(eps_shrink/2, eps);
+				start_from_all = true;
+			}
+		}
+		else
+			start_from_all = false;
+	}
+	info("\noptimization finished, #iter = %d\n",iter);
+	if (iter >= max_iter)
+		info("Warning: reaching max number of iterations\n");
+	// calculate objective value
+	double v = 0;
+	int nSV = 0;
+	for(i=0;i<n*nr_class;i++)
+		v += w[i]*w[i];
+	v = 0.5*v;
+	for(i=0;i<l*nr_class;i++)
+	{
+		v += alpha[i];
+		if(fabs(alpha[i]) > 0)
+			nSV++;
+	}
+	for(i=0;i<l;i++)
+		v -= alpha[i*nr_class+prob->y[i]];
+	info("Objective value = %lf\n",v);
+	info("nSV = %d\n",nSV);
+	delete [] alpha;
+	delete [] alpha_new;
+	delete [] index;
+	delete [] QD;
+	delete [] d_ind;
+	delete [] d_val;
+	delete [] alpha_index;
+	delete [] y_index;
+	delete [] active_size_i;
+}
+// A coordinate descent algorithm for
+// L1-loss and L2-loss SVM dual problems
+//
+//  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
+//    s.t.      0 <= alpha_i <= upper_bound_i,
+//
+//  where Qij = yi yj xi^T xj and
+//  D is a diagonal matrix
+//
+// In L1-SVM case:
+// 		upper_bound_i = Cp if y_i = 1
+// 		upper_bound_i = Cn if y_i = -1
+// 		D_ii = 0
+// In L2-Svm case:
+// 		upper_bound_i = INF
+// 		D_ii = 1/(2*Cp)	if y_i = 1
+// 		D_ii = 1/(2*Cn)	if y_i = -1
+//
+// Given:
+// x, y, Cp, Cn
+// eps is the stopping tolerance
+//
+// solution will be put in w
+static void solve_linear_c_svc(
+	const problem *prob, double *w, double eps,
+	double Cp, double Cn, int solver_type)
+{
+	int l = prob->l;
+	int n = prob->n;
+	int i, s, iter = 0;
+	double C, d, G;
+	double *QD = new double[l];
+	int max_iter = 20000;
+	int *index = new int[l];
+	double *alpha = new double[l];
+	schar *y = new schar[l];
+	int active_size = l;
+	// PG: projected gradient, for shrinking and stopping
+	double PG;
+	double PGmax_old = INF;
+	double PGmin_old = -INF;
+	double PGmax_new, PGmin_new;
+	// default solver_type: L2LOSS_SVM_DUAL
+	double diag_p = 0.5/Cp, diag_n = 0.5/Cn;
+	double upper_bound_p = INF, upper_bound_n = INF;
+	if(solver_type == L1LOSS_SVM_DUAL)
+	{
+		diag_p = 0; diag_n = 0;
+		upper_bound_p = Cp; upper_bound_n = Cn;
+	}
+	for(i=0; i<n; i++)
+		w[i] = 0;
+	for(i=0; i<l; i++)
+	{
+		alpha[i] = 0;
+		if(prob->y[i] > 0)
+		{
+			y[i] = +1;
+			QD[i] = diag_p;
+		}
+		else
+		{
+			y[i] = -1;
+			QD[i] = diag_n;
+		}
+		feature_node *xi = prob->x[i];
+		while (xi->index != -1)
+		{
+			QD[i] += (xi->value)*(xi->value);
+			xi++;
+		}
+		index[i] = i;
+	}
+	while (iter < max_iter)
+	{
+		PGmax_new = -INF;
+		PGmin_new = INF;
+		for (i=0; i<active_size; i++)
+		{
+			int j = i+rand()%(active_size-i);
+			swap(index[i], index[j]);
+		}
+		for (s=0;s<active_size;s++)
+		{
+			i = index[s];
+			G = 0;
+			schar yi = y[i];
+			feature_node *xi = prob->x[i];
+			while(xi->index!= -1)
+			{
+				G += w[xi->index-1]*(xi->value);
+				xi++;
+			}
+			G = G*yi-1;
+			if(yi == 1)
+			{
+				C = upper_bound_p;
+				G += alpha[i]*diag_p;
+			}
+			else
+			{
+				C = upper_bound_n;
+				G += alpha[i]*diag_n;
+			}
+			PG = 0;
+			if (alpha[i] == 0)
+			{
+				if (G > PGmax_old)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+				else if (G < 0)
+					PG = G;
+			}
+			else if (alpha[i] == C)
+			{
+				if (G < PGmin_old)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+				else if (G > 0)
+					PG = G;
+			}
+			else
+				PG = G;
+			PGmax_new = max(PGmax_new, PG);
+			PGmin_new = min(PGmin_new, PG);
+			if(fabs(PG) > 1.0e-12)
+			{
+				double alpha_old = alpha[i];
+				alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
+				d = (alpha[i] - alpha_old)*yi;
+				xi = prob->x[i];
+				while (xi->index != -1)
+				{
+					w[xi->index-1] += d*xi->value;
+					xi++;
+				}
+			}
+		}
+		iter++;
+		if(iter % 10 == 0)
+		{
+			info(".");
+			info_flush();
+		}
+		if(PGmax_new - PGmin_new <= eps)
+		{
+			if(active_size == l)
+				break;
+			else
+			{
+				active_size = l;
+				info("*"); info_flush();
+				PGmax_old = INF;
+				PGmin_old = -INF;
+				continue;
+			}
+		}
+		PGmax_old = PGmax_new;
+		PGmin_old = PGmin_new;
+		if (PGmax_old <= 0)
+			PGmax_old = INF;
+		if (PGmin_old >= 0)
+			PGmin_old = -INF;
+	}
+	info("\noptimization finished, #iter = %d\n",iter);
+	if (iter >= max_iter)
+		info("Warning: reaching max number of iterations\n");
+	// calculate objective value
+	double v = 0;
+	int nSV = 0;
+	for(i=0; i<n; i++)
+		v += w[i]*w[i];
+	for(i=0; i<l; i++)
+	{
+		if (y[i] == 1)
+			v += alpha[i]*(alpha[i]*diag_p - 2);
+		else
+			v += alpha[i]*(alpha[i]*diag_n - 2);
+		if(alpha[i] > 0)
+			++nSV;
+	}
+	info("Objective value = %lf\n",v/2);
+	info("nSV = %d\n",nSV);
+	delete [] QD;
+	delete [] alpha;
+	delete [] y;
+	delete [] index;
+}
+// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
+// perm, length l, must be allocated before calling this subroutine
+void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
+{
+	int l = prob->l;
+	int max_nr_class = 16;
+	int nr_class = 0;
+	int *label = Malloc(int,max_nr_class);
+	int *count = Malloc(int,max_nr_class);
+	int *data_label = Malloc(int,l);
+	int i;
+	for(i=0;i<l;i++)
+	{
+		int this_label = prob->y[i];
+		int j;
+		for(j=0;j<nr_class;j++)
+		{
+			if(this_label == label[j])
+			{
+				++count[j];
+				break;
+			}
+		}
+		data_label[i] = j;
+		if(j == nr_class)
+		{
+			if(nr_class == max_nr_class)
+			{
+				max_nr_class *= 2;
+				label = (int *)realloc(label,max_nr_class*sizeof(int));
+				count = (int *)realloc(count,max_nr_class*sizeof(int));
+			}
+			label[nr_class] = this_label;
+			count[nr_class] = 1;
+			++nr_class;
+		}
+	}
+	int *start = Malloc(int,nr_class);
+	start[0] = 0;
+	for(i=1;i<nr_class;i++)
+		start[i] = start[i-1]+count[i-1];
+	for(i=0;i<l;i++)
+	{
+		perm[start[data_label[i]]] = i;
+		++start[data_label[i]];
+	}
+	start[0] = 0;
+	for(i=1;i<nr_class;i++)
+		start[i] = start[i-1]+count[i-1];
+	*nr_class_ret = nr_class;
+	*label_ret = label;
+	*start_ret = start;
+	*count_ret = count;
+	free(data_label);
+}
+void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
+{
+	double eps=param->eps;
+	int pos = 0;
+	int neg = 0;
+	for(int i=0;i<prob->l;i++)
+		if(prob->y[i]==+1)
+			pos++;
+	neg = prob->l - pos;
+	function *fun_obj=NULL;
+	switch(param->solver_type)
+	{
+		case L2_LR:
+		{
+			fun_obj=new l2_lr_fun(prob, Cp, Cn);
+			TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
+			tron_obj.tron(w);
+			delete fun_obj;
+			break;
+		}
+		case L2LOSS_SVM:
+		{
+			fun_obj=new l2loss_svm_fun(prob, Cp, Cn);
+			TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
+			tron_obj.tron(w);
+			delete fun_obj;
+			break;
+		}
+		case L2LOSS_SVM_DUAL:
+			solve_linear_c_svc(prob, w, eps, Cp, Cn, L2LOSS_SVM_DUAL);
+			break;
+		case L1LOSS_SVM_DUAL:
+			solve_linear_c_svc(prob, w, eps, Cp, Cn, L1LOSS_SVM_DUAL);
+			break;
+		default:
+			fprintf(stderr, "Error: unknown solver_type\n");
+			break;
+	}
+}
+//
+// Interface functions
+//
+model* train(const problem *prob, const parameter *param)
+{
+	int i,j;
+	int l = prob->l;
+	int n = prob->n;
+	model *model_ = Malloc(model,1);
+	if(prob->bias>=0)
+		model_->nr_feature=n-1;
+	else
+		model_->nr_feature=n;
+	model_->param = *param;
+	model_->bias = prob->bias;
+	int nr_class;
+	int *label = NULL;
+	int *start = NULL;
+	int *count = NULL;
+	int *perm = Malloc(int,l);
+	// group training data of the same class
+	group_classes(prob,&nr_class,&label,&start,&count,perm);
+	model_->nr_class=nr_class;
+	model_->label = Malloc(int,nr_class);
+	for(i=0;i<nr_class;i++)
+		model_->label[i] = label[i];
+	// calculate weighted C
+	double *weighted_C = Malloc(double, nr_class);
+	for(i=0;i<nr_class;i++)
+		weighted_C[i] = param->C;
+	for(i=0;i<param->nr_weight;i++)
+	{
+		for(j=0;j<nr_class;j++)
+			if(param->weight_label[i] == label[j])
+				break;
+		if(j == nr_class)
+			fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
+		else
+			weighted_C[j] *= param->weight[i];
+	}
+	// constructing the subproblem
+	feature_node **x = Malloc(feature_node *,l);
+	for(i=0;i<l;i++)
+		x[i] = prob->x[perm[i]];
+	int k;
+	problem sub_prob;
+	sub_prob.l = l;
+	sub_prob.n = n;
+	sub_prob.x = Malloc(feature_node *,sub_prob.l);
+	sub_prob.y = Malloc(int,sub_prob.l);
+	for(k=0; k<sub_prob.l; k++)
+		sub_prob.x[k] = x[k];
+	// multi-class svm by Crammer and Singer
+	if(param->solver_type == MCSVM_CS)
+	{
+		model_->w=Malloc(double, n*nr_class);
+		for(i=0;i<nr_class;i++)
+			for(j=start[i];j<start[i]+count[i];j++)
+				sub_prob.y[j] = i;
+		Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
+		Solver.Solve(model_->w);
+	}
+	else
+	{
+		if(nr_class == 2)
+		{
+			model_->w=Malloc(double, n);
+			int e0 = start[0]+count[0];
+			k=0;
+			for(; k<e0; k++)
+				sub_prob.y[k] = +1;
+			for(; k<sub_prob.l; k++)
+				sub_prob.y[k] = -1;
+			train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
+		}
+		else
+		{
+			model_->w=Malloc(double, n*nr_class);
+			double *w=Malloc(double, n);
+			for(i=0;i<nr_class;i++)
+			{
+				int si = start[i];
+				int ei = si+count[i];
+				k=0;
+				for(; k<si; k++)
+					sub_prob.y[k] = -1;
+				for(; k<ei; k++)
+					sub_prob.y[k] = +1;
+				for(; k<sub_prob.l; k++)
+					sub_prob.y[k] = -1;
+				train_one(&sub_prob, param, w, weighted_C[i], param->C);
+				for(int j=0;j<n;j++)
+					model_->w[j*nr_class+i] = w[j];
+			}
+			free(w);
+		}
+	}
+	free(x);
+	free(label);
+	free(start);
+	free(count);
+	free(perm);
+	free(sub_prob.x);
+	free(sub_prob.y);
+	free(weighted_C);
+	return model_;
+}
+void destroy_model(struct model *model_)
+{
+	if(model_->w != NULL)
+		free(model_->w);
+	if(model_->label != NULL)
+		free(model_->label);
+	free(model_);
+}
+const char *solver_type_table[]=
+{
+	"L2_LR", "L2LOSS_SVM_DUAL", "L2LOSS_SVM","L1LOSS_SVM_DUAL","MCSVM_CS", NULL
+};
+int save_model(const char *model_file_name, const struct model *model_)
+{
+	int i;
+	int nr_feature=model_->nr_feature;
+	int n;
+	const parameter& param = model_->param;
+	if(model_->bias>=0)
+		n=nr_feature+1;
+	else
+		n=nr_feature;
+	FILE *fp = fopen(model_file_name,"w");
+	if(fp==NULL) return -1;
+	int nr_w;
+	if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
+		nr_w=1;
+	else
+		nr_w=model_->nr_class;
+	fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
+	fprintf(fp, "nr_class %d\n", model_->nr_class);
+	fprintf(fp, "label");
+	for(i=0; i<model_->nr_class; i++)
+		fprintf(fp, " %d", model_->label[i]);
+	fprintf(fp, "\n");
+	fprintf(fp, "nr_feature %d\n", nr_feature);
+	fprintf(fp, "bias %.16g\n", model_->bias);
+	fprintf(fp, "w\n");
+	for(i=0; i<n; i++)
+	{
+		int j;
+		for(j=0; j<nr_w; j++)
+			fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
+		fprintf(fp, "\n");
+	}
+	if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
+	else return 0;
+}
+struct model *load_model(const char *model_file_name)
+{
+	FILE *fp = fopen(model_file_name,"r");
+	if(fp==NULL) return NULL;
+	int i;
+	int nr_feature;
+	int n;
+	int nr_class;
+	double bias;
+	model *model_ = Malloc(model,1);
+	parameter& param = model_->param;
+	model_->label = NULL;
+	char cmd[81];
+	while(1)
+	{
+		fscanf(fp,"%80s",cmd);
+		if(strcmp(cmd,"solver_type")==0)
+		{
+			fscanf(fp,"%80s",cmd);
+			int i;
+			for(i=0;solver_type_table[i];i++)
+			{
+				if(strcmp(solver_type_table[i],cmd)==0)
+				{
+					param.solver_type=i;
+					break;
+				}
+			}
+			if(solver_type_table[i] == NULL)
+			{
+				fprintf(stderr,"unknown solver type.\n");
+				free(model_->label);
+				free(model_);
+				return NULL;
+			}
+		}
+		else if(strcmp(cmd,"nr_class")==0)
+		{
+			fscanf(fp,"%d",&nr_class);
+			model_->nr_class=nr_class;
+		}
+		else if(strcmp(cmd,"nr_feature")==0)
+		{
+			fscanf(fp,"%d",&nr_feature);
+			model_->nr_feature=nr_feature;
+		}
+		else if(strcmp(cmd,"bias")==0)
+		{
+			fscanf(fp,"%lf",&bias);
+			model_->bias=bias;
+		}
+		else if(strcmp(cmd,"w")==0)
+		{
+			break;
+		}
+		else if(strcmp(cmd,"label")==0)
+		{
+			int nr_class = model_->nr_class;
+			model_->label = Malloc(int,nr_class);
+			for(int i=0;i<nr_class;i++)
+				fscanf(fp,"%d",&model_->label[i]);
+		}
+		else
+		{
+			fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
+			free(model_);
+			return NULL;
+		}
+	}
+	nr_feature=model_->nr_feature;
+	if(model_->bias>=0)
+		n=nr_feature+1;
+	else
+		n=nr_feature;
+	int nr_w;
+	if(nr_class==2 && param.solver_type != MCSVM_CS)
+		nr_w = 1;
+	else
+		nr_w = nr_class;
+	model_->w=Malloc(double, n*nr_w);
+	for(i=0; i<n; i++)
+	{
+		int j;
+		for(j=0; j<nr_w; j++)
+			fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
+		fscanf(fp, "\n");
+	}
+	if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
+	return model_;
+}
+int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
+{
+	int idx;
+	int n;
+	if(model_->bias>=0)
+		n=model_->nr_feature+1;
+	else
+		n=model_->nr_feature;
+	double *w=model_->w;
+	int nr_class=model_->nr_class;
+	int i;
+	int nr_w;
+	if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
+		nr_w = 1;
+	else
+		nr_w = nr_class;
+	const feature_node *lx=x;
+	for(i=0;i<nr_w;i++)
+		dec_values[i] = 0;
+	for(; (idx=lx->index)!=-1; lx++)
+	{
+		// the dimension of testing data may exceed that of training
+		if(idx<=n)
+			for(i=0;i<nr_w;i++)
+				dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
+	}
+	if(nr_class==2)
+		return (dec_values[0]>0)?model_->label[0]:model_->label[1];
+	else
+	{
+		int dec_max_idx = 0;
+		for(i=1;i<nr_class;i++)
+		{
+			if(dec_values[i] > dec_values[dec_max_idx])
+				dec_max_idx = i;
+		}
+		return model_->label[dec_max_idx];
+	}
+}
+int predict(const model *model_, const feature_node *x)
+{
+	double *dec_values = Malloc(double, model_->nr_class);
+	int label=predict_values(model_, x, dec_values);
+	free(dec_values);
+	return label;
+}
+int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
+{
+	if(model_->param.solver_type==L2_LR)
+	{
+		int i;
+		int nr_class=model_->nr_class;
+		int nr_w;
+		if(nr_class==2)
+			nr_w = 1;
+		else
+			nr_w = nr_class;
+		int label=predict_values(model_, x, prob_estimates);
+		for(i=0;i<nr_w;i++)
+			prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
+		if(nr_class==2) // for binary classification
+			prob_estimates[1]=1.-prob_estimates[0];
+		else
+		{
+			double sum=0;
+			for(i=0; i<nr_class; i++)
+				sum+=prob_estimates[i];
+			for(i=0; i<nr_class; i++)
+				prob_estimates[i]=prob_estimates[i]/sum;
+		}
+		return label;
+	}
+	else
+		return 0;
+}
+void destroy_param(parameter* param)
+{
+	if(param->weight_label != NULL)
+		free(param->weight_label);
+	if(param->weight != NULL)
+		free(param->weight);
+}
+const char *check_parameter(const problem *prob, const parameter *param)
+{
+	if(param->eps <= 0)
+		return "eps <= 0";
+	if(param->C <= 0)
+		return "C <= 0";
+	if(param->solver_type != L2_LR
+		&& param->solver_type != L2LOSS_SVM_DUAL
+		&& param->solver_type != L2LOSS_SVM
+		&& param->solver_type != L1LOSS_SVM_DUAL
+		&& param->solver_type != MCSVM_CS)
+		return "unknown solver type";
+	return NULL;
+}
+void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target)
+{
+	int i;
+	int *fold_start = Malloc(int,nr_fold+1);
+	int l = prob->l;
+	int *perm = Malloc(int,l);
+	for(i=0;i<l;i++) perm[i]=i;
+	for(i=0;i<l;i++)
+	{
+		int j = i+rand()%(l-i);
+		swap(perm[i],perm[j]);
+	}
+	for(i=0;i<=nr_fold;i++)
+		fold_start[i]=i*l/nr_fold;
+	for(i=0;i<nr_fold;i++)
+	{
+		int begin = fold_start[i];
+		int end = fold_start[i+1];
+		int j,k;
+		struct problem subprob;
+		subprob.bias = prob->bias;
+		subprob.n = prob->n;
+		subprob.l = l-(end-begin);
+		subprob.x = Malloc(struct feature_node*,subprob.l);
+		subprob.y = Malloc(int,subprob.l);
+		k=0;
+		for(j=0;j<begin;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			++k;
+		}
+		for(j=end;j<l;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			++k;
+		}
+		struct model *submodel = train(&subprob,param);
+		for(j=begin;j<end;j++)
+			target[perm[j]] = predict(submodel,prob->x[perm[j]]);
+		destroy_model(submodel);
+		free(subprob.x);
+		free(subprob.y);
+	}
+	free(fold_start);
+	free(perm);
+}
+int get_nr_feature(const model *model_)
+{
+	return model_->nr_feature;
+}
+int get_nr_class(const model *model_)
+{
+	return model_->nr_class;
+}
+void get_labels(const model *model_, int* label)
+{
+	if (model_->label != NULL)
+		for(int i=0;i<model_->nr_class;i++)
+			label[i] = model_->label[i];
+}