eluka 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/DOCUMENTATION_STANDARDS +39 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +20 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +69 -0
- data/VERSION +1 -0
- data/examples/example.rb +59 -0
- data/ext/libsvm/COPYRIGHT +31 -0
- data/ext/libsvm/FAQ.html +1749 -0
- data/ext/libsvm/Makefile +25 -0
- data/ext/libsvm/Makefile.win +33 -0
- data/ext/libsvm/README +733 -0
- data/ext/libsvm/extconf.rb +1 -0
- data/ext/libsvm/heart_scale +270 -0
- data/ext/libsvm/java/Makefile +25 -0
- data/ext/libsvm/java/libsvm.jar +0 -0
- data/ext/libsvm/java/libsvm/svm.java +2776 -0
- data/ext/libsvm/java/libsvm/svm.m4 +2776 -0
- data/ext/libsvm/java/libsvm/svm_model.java +21 -0
- data/ext/libsvm/java/libsvm/svm_node.java +6 -0
- data/ext/libsvm/java/libsvm/svm_parameter.java +47 -0
- data/ext/libsvm/java/libsvm/svm_print_interface.java +5 -0
- data/ext/libsvm/java/libsvm/svm_problem.java +7 -0
- data/ext/libsvm/java/svm_predict.java +163 -0
- data/ext/libsvm/java/svm_scale.java +350 -0
- data/ext/libsvm/java/svm_toy.java +471 -0
- data/ext/libsvm/java/svm_train.java +318 -0
- data/ext/libsvm/java/test_applet.html +1 -0
- data/ext/libsvm/python/Makefile +4 -0
- data/ext/libsvm/python/README +331 -0
- data/ext/libsvm/python/svm.py +259 -0
- data/ext/libsvm/python/svmutil.py +242 -0
- data/ext/libsvm/svm-predict.c +226 -0
- data/ext/libsvm/svm-scale.c +353 -0
- data/ext/libsvm/svm-toy/gtk/Makefile +22 -0
- data/ext/libsvm/svm-toy/gtk/callbacks.cpp +423 -0
- data/ext/libsvm/svm-toy/gtk/callbacks.h +54 -0
- data/ext/libsvm/svm-toy/gtk/interface.c +164 -0
- data/ext/libsvm/svm-toy/gtk/interface.h +14 -0
- data/ext/libsvm/svm-toy/gtk/main.c +23 -0
- data/ext/libsvm/svm-toy/gtk/svm-toy.glade +238 -0
- data/ext/libsvm/svm-toy/qt/Makefile +17 -0
- data/ext/libsvm/svm-toy/qt/svm-toy.cpp +413 -0
- data/ext/libsvm/svm-toy/windows/svm-toy.cpp +456 -0
- data/ext/libsvm/svm-train.c +376 -0
- data/ext/libsvm/svm.cpp +3060 -0
- data/ext/libsvm/svm.def +19 -0
- data/ext/libsvm/svm.h +105 -0
- data/ext/libsvm/svm.o +0 -0
- data/ext/libsvm/tools/README +149 -0
- data/ext/libsvm/tools/checkdata.py +108 -0
- data/ext/libsvm/tools/easy.py +79 -0
- data/ext/libsvm/tools/grid.py +359 -0
- data/ext/libsvm/tools/subset.py +146 -0
- data/ext/libsvm/windows/libsvm.dll +0 -0
- data/ext/libsvm/windows/svm-predict.exe +0 -0
- data/ext/libsvm/windows/svm-scale.exe +0 -0
- data/ext/libsvm/windows/svm-toy.exe +0 -0
- data/ext/libsvm/windows/svm-train.exe +0 -0
- data/lib/eluka.rb +10 -0
- data/lib/eluka/bijection.rb +23 -0
- data/lib/eluka/data_point.rb +36 -0
- data/lib/eluka/document.rb +47 -0
- data/lib/eluka/feature_vector.rb +86 -0
- data/lib/eluka/features.rb +31 -0
- data/lib/eluka/model.rb +129 -0
- data/lib/fselect.rb +321 -0
- data/lib/grid.rb +25 -0
- data/test/helper.rb +18 -0
- data/test/test_eluka.rb +7 -0
- metadata +214 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
//
|
2
|
+
// svm_model
|
3
|
+
//
|
4
|
+
package libsvm;
|
5
|
+
public class svm_model implements java.io.Serializable
|
6
|
+
{
|
7
|
+
public svm_parameter param; // parameter
|
8
|
+
public int nr_class; // number of classes, = 2 in regression/one class svm
|
9
|
+
public int l; // total #SV
|
10
|
+
public svm_node[][] SV; // SVs (SV[l])
|
11
|
+
public double[][] sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l])
|
12
|
+
public double[] rho; // constants in decision functions (rho[k*(k-1)/2])
|
13
|
+
public double[] probA; // pariwise probability information
|
14
|
+
public double[] probB;
|
15
|
+
|
16
|
+
// for classification only
|
17
|
+
|
18
|
+
public int[] label; // label of each class (label[k])
|
19
|
+
public int[] nSV; // number of SVs for each class (nSV[k])
|
20
|
+
// nSV[0] + nSV[1] + ... + nSV[k-1] = l
|
21
|
+
};
|
@@ -0,0 +1,47 @@
|
|
1
|
+
package libsvm;
|
2
|
+
public class svm_parameter implements Cloneable,java.io.Serializable
|
3
|
+
{
|
4
|
+
/* svm_type */
|
5
|
+
public static final int C_SVC = 0;
|
6
|
+
public static final int NU_SVC = 1;
|
7
|
+
public static final int ONE_CLASS = 2;
|
8
|
+
public static final int EPSILON_SVR = 3;
|
9
|
+
public static final int NU_SVR = 4;
|
10
|
+
|
11
|
+
/* kernel_type */
|
12
|
+
public static final int LINEAR = 0;
|
13
|
+
public static final int POLY = 1;
|
14
|
+
public static final int RBF = 2;
|
15
|
+
public static final int SIGMOID = 3;
|
16
|
+
public static final int PRECOMPUTED = 4;
|
17
|
+
|
18
|
+
public int svm_type;
|
19
|
+
public int kernel_type;
|
20
|
+
public int degree; // for poly
|
21
|
+
public double gamma; // for poly/rbf/sigmoid
|
22
|
+
public double coef0; // for poly/sigmoid
|
23
|
+
|
24
|
+
// these are for training only
|
25
|
+
public double cache_size; // in MB
|
26
|
+
public double eps; // stopping criteria
|
27
|
+
public double C; // for C_SVC, EPSILON_SVR and NU_SVR
|
28
|
+
public int nr_weight; // for C_SVC
|
29
|
+
public int[] weight_label; // for C_SVC
|
30
|
+
public double[] weight; // for C_SVC
|
31
|
+
public double nu; // for NU_SVC, ONE_CLASS, and NU_SVR
|
32
|
+
public double p; // for EPSILON_SVR
|
33
|
+
public int shrinking; // use the shrinking heuristics
|
34
|
+
public int probability; // do probability estimates
|
35
|
+
|
36
|
+
public Object clone()
|
37
|
+
{
|
38
|
+
try
|
39
|
+
{
|
40
|
+
return super.clone();
|
41
|
+
} catch (CloneNotSupportedException e)
|
42
|
+
{
|
43
|
+
return null;
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
}
|
@@ -0,0 +1,163 @@
|
|
1
|
+
import libsvm.*;
|
2
|
+
import java.io.*;
|
3
|
+
import java.util.*;
|
4
|
+
|
5
|
+
class svm_predict {
|
6
|
+
private static double atof(String s)
|
7
|
+
{
|
8
|
+
return Double.valueOf(s).doubleValue();
|
9
|
+
}
|
10
|
+
|
11
|
+
private static int atoi(String s)
|
12
|
+
{
|
13
|
+
return Integer.parseInt(s);
|
14
|
+
}
|
15
|
+
|
16
|
+
private static void predict(BufferedReader input, DataOutputStream output, svm_model model, int predict_probability) throws IOException
|
17
|
+
{
|
18
|
+
int correct = 0;
|
19
|
+
int total = 0;
|
20
|
+
double error = 0;
|
21
|
+
double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
|
22
|
+
|
23
|
+
int svm_type=svm.svm_get_svm_type(model);
|
24
|
+
int nr_class=svm.svm_get_nr_class(model);
|
25
|
+
double[] prob_estimates=null;
|
26
|
+
|
27
|
+
if(predict_probability == 1)
|
28
|
+
{
|
29
|
+
if(svm_type == svm_parameter.EPSILON_SVR ||
|
30
|
+
svm_type == svm_parameter.NU_SVR)
|
31
|
+
{
|
32
|
+
System.out.print("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma="+svm.svm_get_svr_probability(model)+"\n");
|
33
|
+
}
|
34
|
+
else
|
35
|
+
{
|
36
|
+
int[] labels=new int[nr_class];
|
37
|
+
svm.svm_get_labels(model,labels);
|
38
|
+
prob_estimates = new double[nr_class];
|
39
|
+
output.writeBytes("labels");
|
40
|
+
for(int j=0;j<nr_class;j++)
|
41
|
+
output.writeBytes(" "+labels[j]);
|
42
|
+
output.writeBytes("\n");
|
43
|
+
}
|
44
|
+
}
|
45
|
+
while(true)
|
46
|
+
{
|
47
|
+
String line = input.readLine();
|
48
|
+
if(line == null) break;
|
49
|
+
|
50
|
+
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
|
51
|
+
|
52
|
+
double target = atof(st.nextToken());
|
53
|
+
int m = st.countTokens()/2;
|
54
|
+
svm_node[] x = new svm_node[m];
|
55
|
+
for(int j=0;j<m;j++)
|
56
|
+
{
|
57
|
+
x[j] = new svm_node();
|
58
|
+
x[j].index = atoi(st.nextToken());
|
59
|
+
x[j].value = atof(st.nextToken());
|
60
|
+
}
|
61
|
+
|
62
|
+
double v;
|
63
|
+
if (predict_probability==1 && (svm_type==svm_parameter.C_SVC || svm_type==svm_parameter.NU_SVC))
|
64
|
+
{
|
65
|
+
v = svm.svm_predict_probability(model,x,prob_estimates);
|
66
|
+
output.writeBytes(v+" ");
|
67
|
+
for(int j=0;j<nr_class;j++)
|
68
|
+
output.writeBytes(prob_estimates[j]+" ");
|
69
|
+
output.writeBytes("\n");
|
70
|
+
}
|
71
|
+
else
|
72
|
+
{
|
73
|
+
v = svm.svm_predict(model,x);
|
74
|
+
output.writeBytes(v+"\n");
|
75
|
+
}
|
76
|
+
|
77
|
+
if(v == target)
|
78
|
+
++correct;
|
79
|
+
error += (v-target)*(v-target);
|
80
|
+
sumv += v;
|
81
|
+
sumy += target;
|
82
|
+
sumvv += v*v;
|
83
|
+
sumyy += target*target;
|
84
|
+
sumvy += v*target;
|
85
|
+
++total;
|
86
|
+
}
|
87
|
+
if(svm_type == svm_parameter.EPSILON_SVR ||
|
88
|
+
svm_type == svm_parameter.NU_SVR)
|
89
|
+
{
|
90
|
+
System.out.print("Mean squared error = "+error/total+" (regression)\n");
|
91
|
+
System.out.print("Squared correlation coefficient = "+
|
92
|
+
((total*sumvy-sumv*sumy)*(total*sumvy-sumv*sumy))/
|
93
|
+
((total*sumvv-sumv*sumv)*(total*sumyy-sumy*sumy))+
|
94
|
+
" (regression)\n");
|
95
|
+
}
|
96
|
+
else
|
97
|
+
System.out.print("Accuracy = "+(double)correct/total*100+
|
98
|
+
"% ("+correct+"/"+total+") (classification)\n");
|
99
|
+
}
|
100
|
+
|
101
|
+
private static void exit_with_help()
|
102
|
+
{
|
103
|
+
System.err.print("usage: svm_predict [options] test_file model_file output_file\n"
|
104
|
+
+"options:\n"
|
105
|
+
+"-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); one-class SVM not supported yet\n");
|
106
|
+
System.exit(1);
|
107
|
+
}
|
108
|
+
|
109
|
+
public static void main(String argv[]) throws IOException
|
110
|
+
{
|
111
|
+
int i, predict_probability=0;
|
112
|
+
|
113
|
+
// parse options
|
114
|
+
for(i=0;i<argv.length;i++)
|
115
|
+
{
|
116
|
+
if(argv[i].charAt(0) != '-') break;
|
117
|
+
++i;
|
118
|
+
switch(argv[i-1].charAt(1))
|
119
|
+
{
|
120
|
+
case 'b':
|
121
|
+
predict_probability = atoi(argv[i]);
|
122
|
+
break;
|
123
|
+
default:
|
124
|
+
System.err.print("Unknown option: " + argv[i-1] + "\n");
|
125
|
+
exit_with_help();
|
126
|
+
}
|
127
|
+
}
|
128
|
+
if(i>=argv.length-2)
|
129
|
+
exit_with_help();
|
130
|
+
try
|
131
|
+
{
|
132
|
+
BufferedReader input = new BufferedReader(new FileReader(argv[i]));
|
133
|
+
DataOutputStream output = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(argv[i+2])));
|
134
|
+
svm_model model = svm.svm_load_model(argv[i+1]);
|
135
|
+
if(predict_probability == 1)
|
136
|
+
{
|
137
|
+
if(svm.svm_check_probability_model(model)==0)
|
138
|
+
{
|
139
|
+
System.err.print("Model does not support probabiliy estimates\n");
|
140
|
+
System.exit(1);
|
141
|
+
}
|
142
|
+
}
|
143
|
+
else
|
144
|
+
{
|
145
|
+
if(svm.svm_check_probability_model(model)!=0)
|
146
|
+
{
|
147
|
+
System.out.print("Model supports probability estimates, but disabled in prediction.\n");
|
148
|
+
}
|
149
|
+
}
|
150
|
+
predict(input,output,model,predict_probability);
|
151
|
+
input.close();
|
152
|
+
output.close();
|
153
|
+
}
|
154
|
+
catch(FileNotFoundException e)
|
155
|
+
{
|
156
|
+
exit_with_help();
|
157
|
+
}
|
158
|
+
catch(ArrayIndexOutOfBoundsException e)
|
159
|
+
{
|
160
|
+
exit_with_help();
|
161
|
+
}
|
162
|
+
}
|
163
|
+
}
|
@@ -0,0 +1,350 @@
|
|
1
|
+
import libsvm.*;
|
2
|
+
import java.io.*;
|
3
|
+
import java.util.*;
|
4
|
+
import java.text.DecimalFormat;
|
5
|
+
|
6
|
+
class svm_scale
|
7
|
+
{
|
8
|
+
private String line = null;
|
9
|
+
private double lower = -1.0;
|
10
|
+
private double upper = 1.0;
|
11
|
+
private double y_lower;
|
12
|
+
private double y_upper;
|
13
|
+
private boolean y_scaling = false;
|
14
|
+
private double[] feature_max;
|
15
|
+
private double[] feature_min;
|
16
|
+
private double y_max = -Double.MAX_VALUE;
|
17
|
+
private double y_min = Double.MAX_VALUE;
|
18
|
+
private int max_index;
|
19
|
+
private long num_nonzeros = 0;
|
20
|
+
private long new_num_nonzeros = 0;
|
21
|
+
|
22
|
+
private static void exit_with_help()
|
23
|
+
{
|
24
|
+
System.out.print(
|
25
|
+
"Usage: svm-scale [options] data_filename\n"
|
26
|
+
+"options:\n"
|
27
|
+
+"-l lower : x scaling lower limit (default -1)\n"
|
28
|
+
+"-u upper : x scaling upper limit (default +1)\n"
|
29
|
+
+"-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
|
30
|
+
+"-s save_filename : save scaling parameters to save_filename\n"
|
31
|
+
+"-r restore_filename : restore scaling parameters from restore_filename\n"
|
32
|
+
);
|
33
|
+
System.exit(1);
|
34
|
+
}
|
35
|
+
|
36
|
+
private BufferedReader rewind(BufferedReader fp, String filename) throws IOException
|
37
|
+
{
|
38
|
+
fp.close();
|
39
|
+
return new BufferedReader(new FileReader(filename));
|
40
|
+
}
|
41
|
+
|
42
|
+
private void output_target(double value)
|
43
|
+
{
|
44
|
+
if(y_scaling)
|
45
|
+
{
|
46
|
+
if(value == y_min)
|
47
|
+
value = y_lower;
|
48
|
+
else if(value == y_max)
|
49
|
+
value = y_upper;
|
50
|
+
else
|
51
|
+
value = y_lower + (y_upper-y_lower) *
|
52
|
+
(value-y_min) / (y_max-y_min);
|
53
|
+
}
|
54
|
+
|
55
|
+
System.out.print(value + " ");
|
56
|
+
}
|
57
|
+
|
58
|
+
private void output(int index, double value)
|
59
|
+
{
|
60
|
+
/* skip single-valued attribute */
|
61
|
+
if(feature_max[index] == feature_min[index])
|
62
|
+
return;
|
63
|
+
|
64
|
+
if(value == feature_min[index])
|
65
|
+
value = lower;
|
66
|
+
else if(value == feature_max[index])
|
67
|
+
value = upper;
|
68
|
+
else
|
69
|
+
value = lower + (upper-lower) *
|
70
|
+
(value-feature_min[index])/
|
71
|
+
(feature_max[index]-feature_min[index]);
|
72
|
+
|
73
|
+
if(value != 0)
|
74
|
+
{
|
75
|
+
System.out.print(index + ":" + value + " ");
|
76
|
+
new_num_nonzeros++;
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
private String readline(BufferedReader fp) throws IOException
|
81
|
+
{
|
82
|
+
line = fp.readLine();
|
83
|
+
return line;
|
84
|
+
}
|
85
|
+
|
86
|
+
private void run(String []argv) throws IOException
|
87
|
+
{
|
88
|
+
int i,index;
|
89
|
+
BufferedReader fp = null, fp_restore = null;
|
90
|
+
String save_filename = null;
|
91
|
+
String restore_filename = null;
|
92
|
+
String data_filename = null;
|
93
|
+
|
94
|
+
|
95
|
+
for(i=0;i<argv.length;i++)
|
96
|
+
{
|
97
|
+
if (argv[i].charAt(0) != '-') break;
|
98
|
+
++i;
|
99
|
+
switch(argv[i-1].charAt(1))
|
100
|
+
{
|
101
|
+
case 'l': lower = Double.parseDouble(argv[i]); break;
|
102
|
+
case 'u': upper = Double.parseDouble(argv[i]); break;
|
103
|
+
case 'y':
|
104
|
+
y_lower = Double.parseDouble(argv[i]);
|
105
|
+
++i;
|
106
|
+
y_upper = Double.parseDouble(argv[i]);
|
107
|
+
y_scaling = true;
|
108
|
+
break;
|
109
|
+
case 's': save_filename = argv[i]; break;
|
110
|
+
case 'r': restore_filename = argv[i]; break;
|
111
|
+
default:
|
112
|
+
System.err.println("unknown option");
|
113
|
+
exit_with_help();
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
if(!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
|
118
|
+
{
|
119
|
+
System.err.println("inconsistent lower/upper specification");
|
120
|
+
System.exit(1);
|
121
|
+
}
|
122
|
+
if(restore_filename != null && save_filename != null)
|
123
|
+
{
|
124
|
+
System.err.println("cannot use -r and -s simultaneously");
|
125
|
+
System.exit(1);
|
126
|
+
}
|
127
|
+
|
128
|
+
if(argv.length != i+1)
|
129
|
+
exit_with_help();
|
130
|
+
|
131
|
+
data_filename = argv[i];
|
132
|
+
try {
|
133
|
+
fp = new BufferedReader(new FileReader(data_filename));
|
134
|
+
} catch (Exception e) {
|
135
|
+
System.err.println("can't open file " + data_filename);
|
136
|
+
System.exit(1);
|
137
|
+
}
|
138
|
+
|
139
|
+
/* assumption: min index of attributes is 1 */
|
140
|
+
/* pass 1: find out max index of attributes */
|
141
|
+
max_index = 0;
|
142
|
+
|
143
|
+
if(restore_filename != null)
|
144
|
+
{
|
145
|
+
int idx, c;
|
146
|
+
|
147
|
+
try {
|
148
|
+
fp_restore = new BufferedReader(new FileReader(restore_filename));
|
149
|
+
}
|
150
|
+
catch (Exception e) {
|
151
|
+
System.err.println("can't open file " + restore_filename);
|
152
|
+
System.exit(1);
|
153
|
+
}
|
154
|
+
if((c = fp_restore.read()) == 'y')
|
155
|
+
{
|
156
|
+
fp_restore.readLine();
|
157
|
+
fp_restore.readLine();
|
158
|
+
fp_restore.readLine();
|
159
|
+
}
|
160
|
+
fp_restore.readLine();
|
161
|
+
fp_restore.readLine();
|
162
|
+
|
163
|
+
String restore_line = null;
|
164
|
+
while((restore_line = fp_restore.readLine())!=null)
|
165
|
+
{
|
166
|
+
StringTokenizer st2 = new StringTokenizer(restore_line);
|
167
|
+
idx = Integer.parseInt(st2.nextToken());
|
168
|
+
max_index = Math.max(max_index, idx);
|
169
|
+
}
|
170
|
+
fp_restore = rewind(fp_restore, restore_filename);
|
171
|
+
}
|
172
|
+
|
173
|
+
while (readline(fp) != null)
|
174
|
+
{
|
175
|
+
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
|
176
|
+
st.nextToken();
|
177
|
+
while(st.hasMoreTokens())
|
178
|
+
{
|
179
|
+
index = Integer.parseInt(st.nextToken());
|
180
|
+
max_index = Math.max(max_index, index);
|
181
|
+
st.nextToken();
|
182
|
+
num_nonzeros++;
|
183
|
+
}
|
184
|
+
}
|
185
|
+
|
186
|
+
try {
|
187
|
+
feature_max = new double[(max_index+1)];
|
188
|
+
feature_min = new double[(max_index+1)];
|
189
|
+
} catch(OutOfMemoryError e) {
|
190
|
+
System.err.println("can't allocate enough memory");
|
191
|
+
System.exit(1);
|
192
|
+
}
|
193
|
+
|
194
|
+
for(i=0;i<=max_index;i++)
|
195
|
+
{
|
196
|
+
feature_max[i] = -Double.MAX_VALUE;
|
197
|
+
feature_min[i] = Double.MAX_VALUE;
|
198
|
+
}
|
199
|
+
|
200
|
+
fp = rewind(fp, data_filename);
|
201
|
+
|
202
|
+
/* pass 2: find out min/max value */
|
203
|
+
while(readline(fp) != null)
|
204
|
+
{
|
205
|
+
int next_index = 1;
|
206
|
+
double target;
|
207
|
+
double value;
|
208
|
+
|
209
|
+
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
|
210
|
+
target = Double.parseDouble(st.nextToken());
|
211
|
+
y_max = Math.max(y_max, target);
|
212
|
+
y_min = Math.min(y_min, target);
|
213
|
+
|
214
|
+
while (st.hasMoreTokens())
|
215
|
+
{
|
216
|
+
index = Integer.parseInt(st.nextToken());
|
217
|
+
value = Double.parseDouble(st.nextToken());
|
218
|
+
|
219
|
+
for (i = next_index; i<index; i++)
|
220
|
+
{
|
221
|
+
feature_max[i] = Math.max(feature_max[i], 0);
|
222
|
+
feature_min[i] = Math.min(feature_min[i], 0);
|
223
|
+
}
|
224
|
+
|
225
|
+
feature_max[index] = Math.max(feature_max[index], value);
|
226
|
+
feature_min[index] = Math.min(feature_min[index], value);
|
227
|
+
next_index = index + 1;
|
228
|
+
}
|
229
|
+
|
230
|
+
for(i=next_index;i<=max_index;i++)
|
231
|
+
{
|
232
|
+
feature_max[i] = Math.max(feature_max[i], 0);
|
233
|
+
feature_min[i] = Math.min(feature_min[i], 0);
|
234
|
+
}
|
235
|
+
}
|
236
|
+
|
237
|
+
fp = rewind(fp, data_filename);
|
238
|
+
|
239
|
+
/* pass 2.5: save/restore feature_min/feature_max */
|
240
|
+
if(restore_filename != null)
|
241
|
+
{
|
242
|
+
// fp_restore rewinded in finding max_index
|
243
|
+
int idx, c;
|
244
|
+
double fmin, fmax;
|
245
|
+
|
246
|
+
fp_restore.mark(2); // for reset
|
247
|
+
if((c = fp_restore.read()) == 'y')
|
248
|
+
{
|
249
|
+
fp_restore.readLine(); // pass the '\n' after 'y'
|
250
|
+
StringTokenizer st = new StringTokenizer(fp_restore.readLine());
|
251
|
+
y_lower = Double.parseDouble(st.nextToken());
|
252
|
+
y_upper = Double.parseDouble(st.nextToken());
|
253
|
+
st = new StringTokenizer(fp_restore.readLine());
|
254
|
+
y_min = Double.parseDouble(st.nextToken());
|
255
|
+
y_max = Double.parseDouble(st.nextToken());
|
256
|
+
y_scaling = true;
|
257
|
+
}
|
258
|
+
else
|
259
|
+
fp_restore.reset();
|
260
|
+
|
261
|
+
if(fp_restore.read() == 'x') {
|
262
|
+
fp_restore.readLine(); // pass the '\n' after 'x'
|
263
|
+
StringTokenizer st = new StringTokenizer(fp_restore.readLine());
|
264
|
+
lower = Double.parseDouble(st.nextToken());
|
265
|
+
upper = Double.parseDouble(st.nextToken());
|
266
|
+
String restore_line = null;
|
267
|
+
while((restore_line = fp_restore.readLine())!=null)
|
268
|
+
{
|
269
|
+
StringTokenizer st2 = new StringTokenizer(restore_line);
|
270
|
+
idx = Integer.parseInt(st2.nextToken());
|
271
|
+
fmin = Double.parseDouble(st2.nextToken());
|
272
|
+
fmax = Double.parseDouble(st2.nextToken());
|
273
|
+
if (idx <= max_index)
|
274
|
+
{
|
275
|
+
feature_min[idx] = fmin;
|
276
|
+
feature_max[idx] = fmax;
|
277
|
+
}
|
278
|
+
}
|
279
|
+
}
|
280
|
+
fp_restore.close();
|
281
|
+
}
|
282
|
+
|
283
|
+
if(save_filename != null)
|
284
|
+
{
|
285
|
+
Formatter formatter = new Formatter(new StringBuilder());
|
286
|
+
BufferedWriter fp_save = null;
|
287
|
+
|
288
|
+
try {
|
289
|
+
fp_save = new BufferedWriter(new FileWriter(save_filename));
|
290
|
+
} catch(IOException e) {
|
291
|
+
System.err.println("can't open file " + save_filename);
|
292
|
+
System.exit(1);
|
293
|
+
}
|
294
|
+
|
295
|
+
if(y_scaling)
|
296
|
+
{
|
297
|
+
formatter.format("y\n");
|
298
|
+
formatter.format("%.16g %.16g\n", y_lower, y_upper);
|
299
|
+
formatter.format("%.16g %.16g\n", y_min, y_max);
|
300
|
+
}
|
301
|
+
formatter.format("x\n");
|
302
|
+
formatter.format("%.16g %.16g\n", lower, upper);
|
303
|
+
for(i=1;i<=max_index;i++)
|
304
|
+
{
|
305
|
+
if(feature_min[i] != feature_max[i])
|
306
|
+
formatter.format("%d %.16g %.16g\n", i, feature_min[i], feature_max[i]);
|
307
|
+
}
|
308
|
+
fp_save.write(formatter.toString());
|
309
|
+
fp_save.close();
|
310
|
+
}
|
311
|
+
|
312
|
+
/* pass 3: scale */
|
313
|
+
while(readline(fp) != null)
|
314
|
+
{
|
315
|
+
int next_index = 1;
|
316
|
+
double target;
|
317
|
+
double value;
|
318
|
+
|
319
|
+
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
|
320
|
+
target = Double.parseDouble(st.nextToken());
|
321
|
+
output_target(target);
|
322
|
+
while(st.hasMoreElements())
|
323
|
+
{
|
324
|
+
index = Integer.parseInt(st.nextToken());
|
325
|
+
value = Double.parseDouble(st.nextToken());
|
326
|
+
for (i = next_index; i<index; i++)
|
327
|
+
output(i, 0);
|
328
|
+
output(index, value);
|
329
|
+
next_index = index + 1;
|
330
|
+
}
|
331
|
+
|
332
|
+
for(i=next_index;i<= max_index;i++)
|
333
|
+
output(i, 0);
|
334
|
+
System.out.print("\n");
|
335
|
+
}
|
336
|
+
if (new_num_nonzeros > num_nonzeros)
|
337
|
+
System.err.print(
|
338
|
+
"Warning: original #nonzeros " + num_nonzeros+"\n"
|
339
|
+
+" new #nonzeros " + new_num_nonzeros+"\n"
|
340
|
+
+"Use -l 0 if many original feature values are zeros\n");
|
341
|
+
|
342
|
+
fp.close();
|
343
|
+
}
|
344
|
+
|
345
|
+
public static void main(String argv[]) throws IOException
|
346
|
+
{
|
347
|
+
svm_scale s = new svm_scale();
|
348
|
+
s.run(argv);
|
349
|
+
}
|
350
|
+
}
|