eluka 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/DOCUMENTATION_STANDARDS +39 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +20 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +69 -0
- data/VERSION +1 -0
- data/examples/example.rb +59 -0
- data/ext/libsvm/COPYRIGHT +31 -0
- data/ext/libsvm/FAQ.html +1749 -0
- data/ext/libsvm/Makefile +25 -0
- data/ext/libsvm/Makefile.win +33 -0
- data/ext/libsvm/README +733 -0
- data/ext/libsvm/extconf.rb +1 -0
- data/ext/libsvm/heart_scale +270 -0
- data/ext/libsvm/java/Makefile +25 -0
- data/ext/libsvm/java/libsvm.jar +0 -0
- data/ext/libsvm/java/libsvm/svm.java +2776 -0
- data/ext/libsvm/java/libsvm/svm.m4 +2776 -0
- data/ext/libsvm/java/libsvm/svm_model.java +21 -0
- data/ext/libsvm/java/libsvm/svm_node.java +6 -0
- data/ext/libsvm/java/libsvm/svm_parameter.java +47 -0
- data/ext/libsvm/java/libsvm/svm_print_interface.java +5 -0
- data/ext/libsvm/java/libsvm/svm_problem.java +7 -0
- data/ext/libsvm/java/svm_predict.java +163 -0
- data/ext/libsvm/java/svm_scale.java +350 -0
- data/ext/libsvm/java/svm_toy.java +471 -0
- data/ext/libsvm/java/svm_train.java +318 -0
- data/ext/libsvm/java/test_applet.html +1 -0
- data/ext/libsvm/python/Makefile +4 -0
- data/ext/libsvm/python/README +331 -0
- data/ext/libsvm/python/svm.py +259 -0
- data/ext/libsvm/python/svmutil.py +242 -0
- data/ext/libsvm/svm-predict.c +226 -0
- data/ext/libsvm/svm-scale.c +353 -0
- data/ext/libsvm/svm-toy/gtk/Makefile +22 -0
- data/ext/libsvm/svm-toy/gtk/callbacks.cpp +423 -0
- data/ext/libsvm/svm-toy/gtk/callbacks.h +54 -0
- data/ext/libsvm/svm-toy/gtk/interface.c +164 -0
- data/ext/libsvm/svm-toy/gtk/interface.h +14 -0
- data/ext/libsvm/svm-toy/gtk/main.c +23 -0
- data/ext/libsvm/svm-toy/gtk/svm-toy.glade +238 -0
- data/ext/libsvm/svm-toy/qt/Makefile +17 -0
- data/ext/libsvm/svm-toy/qt/svm-toy.cpp +413 -0
- data/ext/libsvm/svm-toy/windows/svm-toy.cpp +456 -0
- data/ext/libsvm/svm-train.c +376 -0
- data/ext/libsvm/svm.cpp +3060 -0
- data/ext/libsvm/svm.def +19 -0
- data/ext/libsvm/svm.h +105 -0
- data/ext/libsvm/svm.o +0 -0
- data/ext/libsvm/tools/README +149 -0
- data/ext/libsvm/tools/checkdata.py +108 -0
- data/ext/libsvm/tools/easy.py +79 -0
- data/ext/libsvm/tools/grid.py +359 -0
- data/ext/libsvm/tools/subset.py +146 -0
- data/ext/libsvm/windows/libsvm.dll +0 -0
- data/ext/libsvm/windows/svm-predict.exe +0 -0
- data/ext/libsvm/windows/svm-scale.exe +0 -0
- data/ext/libsvm/windows/svm-toy.exe +0 -0
- data/ext/libsvm/windows/svm-train.exe +0 -0
- data/lib/eluka.rb +10 -0
- data/lib/eluka/bijection.rb +23 -0
- data/lib/eluka/data_point.rb +36 -0
- data/lib/eluka/document.rb +47 -0
- data/lib/eluka/feature_vector.rb +86 -0
- data/lib/eluka/features.rb +31 -0
- data/lib/eluka/model.rb +129 -0
- data/lib/fselect.rb +321 -0
- data/lib/grid.rb +25 -0
- data/test/helper.rb +18 -0
- data/test/test_eluka.rb +7 -0
- metadata +214 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
//
|
2
|
+
// svm_model
|
3
|
+
//
|
4
|
+
package libsvm;
|
5
|
+
public class svm_model implements java.io.Serializable
|
6
|
+
{
|
7
|
+
public svm_parameter param; // parameter
|
8
|
+
public int nr_class; // number of classes, = 2 in regression/one class svm
|
9
|
+
public int l; // total #SV
|
10
|
+
public svm_node[][] SV; // SVs (SV[l])
|
11
|
+
public double[][] sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l])
|
12
|
+
public double[] rho; // constants in decision functions (rho[k*(k-1)/2])
|
13
|
+
public double[] probA; // pariwise probability information
|
14
|
+
public double[] probB;
|
15
|
+
|
16
|
+
// for classification only
|
17
|
+
|
18
|
+
public int[] label; // label of each class (label[k])
|
19
|
+
public int[] nSV; // number of SVs for each class (nSV[k])
|
20
|
+
// nSV[0] + nSV[1] + ... + nSV[k-1] = l
|
21
|
+
};
|
@@ -0,0 +1,47 @@
|
|
1
|
+
package libsvm;
|
2
|
+
public class svm_parameter implements Cloneable,java.io.Serializable
|
3
|
+
{
|
4
|
+
/* svm_type */
|
5
|
+
public static final int C_SVC = 0;
|
6
|
+
public static final int NU_SVC = 1;
|
7
|
+
public static final int ONE_CLASS = 2;
|
8
|
+
public static final int EPSILON_SVR = 3;
|
9
|
+
public static final int NU_SVR = 4;
|
10
|
+
|
11
|
+
/* kernel_type */
|
12
|
+
public static final int LINEAR = 0;
|
13
|
+
public static final int POLY = 1;
|
14
|
+
public static final int RBF = 2;
|
15
|
+
public static final int SIGMOID = 3;
|
16
|
+
public static final int PRECOMPUTED = 4;
|
17
|
+
|
18
|
+
public int svm_type;
|
19
|
+
public int kernel_type;
|
20
|
+
public int degree; // for poly
|
21
|
+
public double gamma; // for poly/rbf/sigmoid
|
22
|
+
public double coef0; // for poly/sigmoid
|
23
|
+
|
24
|
+
// these are for training only
|
25
|
+
public double cache_size; // in MB
|
26
|
+
public double eps; // stopping criteria
|
27
|
+
public double C; // for C_SVC, EPSILON_SVR and NU_SVR
|
28
|
+
public int nr_weight; // for C_SVC
|
29
|
+
public int[] weight_label; // for C_SVC
|
30
|
+
public double[] weight; // for C_SVC
|
31
|
+
public double nu; // for NU_SVC, ONE_CLASS, and NU_SVR
|
32
|
+
public double p; // for EPSILON_SVR
|
33
|
+
public int shrinking; // use the shrinking heuristics
|
34
|
+
public int probability; // do probability estimates
|
35
|
+
|
36
|
+
public Object clone()
|
37
|
+
{
|
38
|
+
try
|
39
|
+
{
|
40
|
+
return super.clone();
|
41
|
+
} catch (CloneNotSupportedException e)
|
42
|
+
{
|
43
|
+
return null;
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
}
|
@@ -0,0 +1,163 @@
|
|
1
|
+
import libsvm.*;
|
2
|
+
import java.io.*;
|
3
|
+
import java.util.*;
|
4
|
+
|
5
|
+
class svm_predict {
|
6
|
+
private static double atof(String s)
|
7
|
+
{
|
8
|
+
return Double.valueOf(s).doubleValue();
|
9
|
+
}
|
10
|
+
|
11
|
+
private static int atoi(String s)
|
12
|
+
{
|
13
|
+
return Integer.parseInt(s);
|
14
|
+
}
|
15
|
+
|
16
|
+
private static void predict(BufferedReader input, DataOutputStream output, svm_model model, int predict_probability) throws IOException
|
17
|
+
{
|
18
|
+
int correct = 0;
|
19
|
+
int total = 0;
|
20
|
+
double error = 0;
|
21
|
+
double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
|
22
|
+
|
23
|
+
int svm_type=svm.svm_get_svm_type(model);
|
24
|
+
int nr_class=svm.svm_get_nr_class(model);
|
25
|
+
double[] prob_estimates=null;
|
26
|
+
|
27
|
+
if(predict_probability == 1)
|
28
|
+
{
|
29
|
+
if(svm_type == svm_parameter.EPSILON_SVR ||
|
30
|
+
svm_type == svm_parameter.NU_SVR)
|
31
|
+
{
|
32
|
+
System.out.print("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma="+svm.svm_get_svr_probability(model)+"\n");
|
33
|
+
}
|
34
|
+
else
|
35
|
+
{
|
36
|
+
int[] labels=new int[nr_class];
|
37
|
+
svm.svm_get_labels(model,labels);
|
38
|
+
prob_estimates = new double[nr_class];
|
39
|
+
output.writeBytes("labels");
|
40
|
+
for(int j=0;j<nr_class;j++)
|
41
|
+
output.writeBytes(" "+labels[j]);
|
42
|
+
output.writeBytes("\n");
|
43
|
+
}
|
44
|
+
}
|
45
|
+
while(true)
|
46
|
+
{
|
47
|
+
String line = input.readLine();
|
48
|
+
if(line == null) break;
|
49
|
+
|
50
|
+
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
|
51
|
+
|
52
|
+
double target = atof(st.nextToken());
|
53
|
+
int m = st.countTokens()/2;
|
54
|
+
svm_node[] x = new svm_node[m];
|
55
|
+
for(int j=0;j<m;j++)
|
56
|
+
{
|
57
|
+
x[j] = new svm_node();
|
58
|
+
x[j].index = atoi(st.nextToken());
|
59
|
+
x[j].value = atof(st.nextToken());
|
60
|
+
}
|
61
|
+
|
62
|
+
double v;
|
63
|
+
if (predict_probability==1 && (svm_type==svm_parameter.C_SVC || svm_type==svm_parameter.NU_SVC))
|
64
|
+
{
|
65
|
+
v = svm.svm_predict_probability(model,x,prob_estimates);
|
66
|
+
output.writeBytes(v+" ");
|
67
|
+
for(int j=0;j<nr_class;j++)
|
68
|
+
output.writeBytes(prob_estimates[j]+" ");
|
69
|
+
output.writeBytes("\n");
|
70
|
+
}
|
71
|
+
else
|
72
|
+
{
|
73
|
+
v = svm.svm_predict(model,x);
|
74
|
+
output.writeBytes(v+"\n");
|
75
|
+
}
|
76
|
+
|
77
|
+
if(v == target)
|
78
|
+
++correct;
|
79
|
+
error += (v-target)*(v-target);
|
80
|
+
sumv += v;
|
81
|
+
sumy += target;
|
82
|
+
sumvv += v*v;
|
83
|
+
sumyy += target*target;
|
84
|
+
sumvy += v*target;
|
85
|
+
++total;
|
86
|
+
}
|
87
|
+
if(svm_type == svm_parameter.EPSILON_SVR ||
|
88
|
+
svm_type == svm_parameter.NU_SVR)
|
89
|
+
{
|
90
|
+
System.out.print("Mean squared error = "+error/total+" (regression)\n");
|
91
|
+
System.out.print("Squared correlation coefficient = "+
|
92
|
+
((total*sumvy-sumv*sumy)*(total*sumvy-sumv*sumy))/
|
93
|
+
((total*sumvv-sumv*sumv)*(total*sumyy-sumy*sumy))+
|
94
|
+
" (regression)\n");
|
95
|
+
}
|
96
|
+
else
|
97
|
+
System.out.print("Accuracy = "+(double)correct/total*100+
|
98
|
+
"% ("+correct+"/"+total+") (classification)\n");
|
99
|
+
}
|
100
|
+
|
101
|
+
private static void exit_with_help()
|
102
|
+
{
|
103
|
+
System.err.print("usage: svm_predict [options] test_file model_file output_file\n"
|
104
|
+
+"options:\n"
|
105
|
+
+"-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); one-class SVM not supported yet\n");
|
106
|
+
System.exit(1);
|
107
|
+
}
|
108
|
+
|
109
|
+
public static void main(String argv[]) throws IOException
|
110
|
+
{
|
111
|
+
int i, predict_probability=0;
|
112
|
+
|
113
|
+
// parse options
|
114
|
+
for(i=0;i<argv.length;i++)
|
115
|
+
{
|
116
|
+
if(argv[i].charAt(0) != '-') break;
|
117
|
+
++i;
|
118
|
+
switch(argv[i-1].charAt(1))
|
119
|
+
{
|
120
|
+
case 'b':
|
121
|
+
predict_probability = atoi(argv[i]);
|
122
|
+
break;
|
123
|
+
default:
|
124
|
+
System.err.print("Unknown option: " + argv[i-1] + "\n");
|
125
|
+
exit_with_help();
|
126
|
+
}
|
127
|
+
}
|
128
|
+
if(i>=argv.length-2)
|
129
|
+
exit_with_help();
|
130
|
+
try
|
131
|
+
{
|
132
|
+
BufferedReader input = new BufferedReader(new FileReader(argv[i]));
|
133
|
+
DataOutputStream output = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(argv[i+2])));
|
134
|
+
svm_model model = svm.svm_load_model(argv[i+1]);
|
135
|
+
if(predict_probability == 1)
|
136
|
+
{
|
137
|
+
if(svm.svm_check_probability_model(model)==0)
|
138
|
+
{
|
139
|
+
System.err.print("Model does not support probabiliy estimates\n");
|
140
|
+
System.exit(1);
|
141
|
+
}
|
142
|
+
}
|
143
|
+
else
|
144
|
+
{
|
145
|
+
if(svm.svm_check_probability_model(model)!=0)
|
146
|
+
{
|
147
|
+
System.out.print("Model supports probability estimates, but disabled in prediction.\n");
|
148
|
+
}
|
149
|
+
}
|
150
|
+
predict(input,output,model,predict_probability);
|
151
|
+
input.close();
|
152
|
+
output.close();
|
153
|
+
}
|
154
|
+
catch(FileNotFoundException e)
|
155
|
+
{
|
156
|
+
exit_with_help();
|
157
|
+
}
|
158
|
+
catch(ArrayIndexOutOfBoundsException e)
|
159
|
+
{
|
160
|
+
exit_with_help();
|
161
|
+
}
|
162
|
+
}
|
163
|
+
}
|
@@ -0,0 +1,350 @@
|
|
1
|
+
import libsvm.*;
|
2
|
+
import java.io.*;
|
3
|
+
import java.util.*;
|
4
|
+
import java.text.DecimalFormat;
|
5
|
+
|
6
|
+
class svm_scale
|
7
|
+
{
|
8
|
+
private String line = null;
|
9
|
+
private double lower = -1.0;
|
10
|
+
private double upper = 1.0;
|
11
|
+
private double y_lower;
|
12
|
+
private double y_upper;
|
13
|
+
private boolean y_scaling = false;
|
14
|
+
private double[] feature_max;
|
15
|
+
private double[] feature_min;
|
16
|
+
private double y_max = -Double.MAX_VALUE;
|
17
|
+
private double y_min = Double.MAX_VALUE;
|
18
|
+
private int max_index;
|
19
|
+
private long num_nonzeros = 0;
|
20
|
+
private long new_num_nonzeros = 0;
|
21
|
+
|
22
|
+
private static void exit_with_help()
|
23
|
+
{
|
24
|
+
System.out.print(
|
25
|
+
"Usage: svm-scale [options] data_filename\n"
|
26
|
+
+"options:\n"
|
27
|
+
+"-l lower : x scaling lower limit (default -1)\n"
|
28
|
+
+"-u upper : x scaling upper limit (default +1)\n"
|
29
|
+
+"-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
|
30
|
+
+"-s save_filename : save scaling parameters to save_filename\n"
|
31
|
+
+"-r restore_filename : restore scaling parameters from restore_filename\n"
|
32
|
+
);
|
33
|
+
System.exit(1);
|
34
|
+
}
|
35
|
+
|
36
|
+
private BufferedReader rewind(BufferedReader fp, String filename) throws IOException
|
37
|
+
{
|
38
|
+
fp.close();
|
39
|
+
return new BufferedReader(new FileReader(filename));
|
40
|
+
}
|
41
|
+
|
42
|
+
private void output_target(double value)
|
43
|
+
{
|
44
|
+
if(y_scaling)
|
45
|
+
{
|
46
|
+
if(value == y_min)
|
47
|
+
value = y_lower;
|
48
|
+
else if(value == y_max)
|
49
|
+
value = y_upper;
|
50
|
+
else
|
51
|
+
value = y_lower + (y_upper-y_lower) *
|
52
|
+
(value-y_min) / (y_max-y_min);
|
53
|
+
}
|
54
|
+
|
55
|
+
System.out.print(value + " ");
|
56
|
+
}
|
57
|
+
|
58
|
+
private void output(int index, double value)
|
59
|
+
{
|
60
|
+
/* skip single-valued attribute */
|
61
|
+
if(feature_max[index] == feature_min[index])
|
62
|
+
return;
|
63
|
+
|
64
|
+
if(value == feature_min[index])
|
65
|
+
value = lower;
|
66
|
+
else if(value == feature_max[index])
|
67
|
+
value = upper;
|
68
|
+
else
|
69
|
+
value = lower + (upper-lower) *
|
70
|
+
(value-feature_min[index])/
|
71
|
+
(feature_max[index]-feature_min[index]);
|
72
|
+
|
73
|
+
if(value != 0)
|
74
|
+
{
|
75
|
+
System.out.print(index + ":" + value + " ");
|
76
|
+
new_num_nonzeros++;
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
private String readline(BufferedReader fp) throws IOException
|
81
|
+
{
|
82
|
+
line = fp.readLine();
|
83
|
+
return line;
|
84
|
+
}
|
85
|
+
|
86
|
+
private void run(String []argv) throws IOException
|
87
|
+
{
|
88
|
+
int i,index;
|
89
|
+
BufferedReader fp = null, fp_restore = null;
|
90
|
+
String save_filename = null;
|
91
|
+
String restore_filename = null;
|
92
|
+
String data_filename = null;
|
93
|
+
|
94
|
+
|
95
|
+
for(i=0;i<argv.length;i++)
|
96
|
+
{
|
97
|
+
if (argv[i].charAt(0) != '-') break;
|
98
|
+
++i;
|
99
|
+
switch(argv[i-1].charAt(1))
|
100
|
+
{
|
101
|
+
case 'l': lower = Double.parseDouble(argv[i]); break;
|
102
|
+
case 'u': upper = Double.parseDouble(argv[i]); break;
|
103
|
+
case 'y':
|
104
|
+
y_lower = Double.parseDouble(argv[i]);
|
105
|
+
++i;
|
106
|
+
y_upper = Double.parseDouble(argv[i]);
|
107
|
+
y_scaling = true;
|
108
|
+
break;
|
109
|
+
case 's': save_filename = argv[i]; break;
|
110
|
+
case 'r': restore_filename = argv[i]; break;
|
111
|
+
default:
|
112
|
+
System.err.println("unknown option");
|
113
|
+
exit_with_help();
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
if(!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
|
118
|
+
{
|
119
|
+
System.err.println("inconsistent lower/upper specification");
|
120
|
+
System.exit(1);
|
121
|
+
}
|
122
|
+
if(restore_filename != null && save_filename != null)
|
123
|
+
{
|
124
|
+
System.err.println("cannot use -r and -s simultaneously");
|
125
|
+
System.exit(1);
|
126
|
+
}
|
127
|
+
|
128
|
+
if(argv.length != i+1)
|
129
|
+
exit_with_help();
|
130
|
+
|
131
|
+
data_filename = argv[i];
|
132
|
+
try {
|
133
|
+
fp = new BufferedReader(new FileReader(data_filename));
|
134
|
+
} catch (Exception e) {
|
135
|
+
System.err.println("can't open file " + data_filename);
|
136
|
+
System.exit(1);
|
137
|
+
}
|
138
|
+
|
139
|
+
/* assumption: min index of attributes is 1 */
|
140
|
+
/* pass 1: find out max index of attributes */
|
141
|
+
max_index = 0;
|
142
|
+
|
143
|
+
if(restore_filename != null)
|
144
|
+
{
|
145
|
+
int idx, c;
|
146
|
+
|
147
|
+
try {
|
148
|
+
fp_restore = new BufferedReader(new FileReader(restore_filename));
|
149
|
+
}
|
150
|
+
catch (Exception e) {
|
151
|
+
System.err.println("can't open file " + restore_filename);
|
152
|
+
System.exit(1);
|
153
|
+
}
|
154
|
+
if((c = fp_restore.read()) == 'y')
|
155
|
+
{
|
156
|
+
fp_restore.readLine();
|
157
|
+
fp_restore.readLine();
|
158
|
+
fp_restore.readLine();
|
159
|
+
}
|
160
|
+
fp_restore.readLine();
|
161
|
+
fp_restore.readLine();
|
162
|
+
|
163
|
+
String restore_line = null;
|
164
|
+
while((restore_line = fp_restore.readLine())!=null)
|
165
|
+
{
|
166
|
+
StringTokenizer st2 = new StringTokenizer(restore_line);
|
167
|
+
idx = Integer.parseInt(st2.nextToken());
|
168
|
+
max_index = Math.max(max_index, idx);
|
169
|
+
}
|
170
|
+
fp_restore = rewind(fp_restore, restore_filename);
|
171
|
+
}
|
172
|
+
|
173
|
+
while (readline(fp) != null)
|
174
|
+
{
|
175
|
+
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
|
176
|
+
st.nextToken();
|
177
|
+
while(st.hasMoreTokens())
|
178
|
+
{
|
179
|
+
index = Integer.parseInt(st.nextToken());
|
180
|
+
max_index = Math.max(max_index, index);
|
181
|
+
st.nextToken();
|
182
|
+
num_nonzeros++;
|
183
|
+
}
|
184
|
+
}
|
185
|
+
|
186
|
+
try {
|
187
|
+
feature_max = new double[(max_index+1)];
|
188
|
+
feature_min = new double[(max_index+1)];
|
189
|
+
} catch(OutOfMemoryError e) {
|
190
|
+
System.err.println("can't allocate enough memory");
|
191
|
+
System.exit(1);
|
192
|
+
}
|
193
|
+
|
194
|
+
for(i=0;i<=max_index;i++)
|
195
|
+
{
|
196
|
+
feature_max[i] = -Double.MAX_VALUE;
|
197
|
+
feature_min[i] = Double.MAX_VALUE;
|
198
|
+
}
|
199
|
+
|
200
|
+
fp = rewind(fp, data_filename);
|
201
|
+
|
202
|
+
/* pass 2: find out min/max value */
|
203
|
+
while(readline(fp) != null)
|
204
|
+
{
|
205
|
+
int next_index = 1;
|
206
|
+
double target;
|
207
|
+
double value;
|
208
|
+
|
209
|
+
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
|
210
|
+
target = Double.parseDouble(st.nextToken());
|
211
|
+
y_max = Math.max(y_max, target);
|
212
|
+
y_min = Math.min(y_min, target);
|
213
|
+
|
214
|
+
while (st.hasMoreTokens())
|
215
|
+
{
|
216
|
+
index = Integer.parseInt(st.nextToken());
|
217
|
+
value = Double.parseDouble(st.nextToken());
|
218
|
+
|
219
|
+
for (i = next_index; i<index; i++)
|
220
|
+
{
|
221
|
+
feature_max[i] = Math.max(feature_max[i], 0);
|
222
|
+
feature_min[i] = Math.min(feature_min[i], 0);
|
223
|
+
}
|
224
|
+
|
225
|
+
feature_max[index] = Math.max(feature_max[index], value);
|
226
|
+
feature_min[index] = Math.min(feature_min[index], value);
|
227
|
+
next_index = index + 1;
|
228
|
+
}
|
229
|
+
|
230
|
+
for(i=next_index;i<=max_index;i++)
|
231
|
+
{
|
232
|
+
feature_max[i] = Math.max(feature_max[i], 0);
|
233
|
+
feature_min[i] = Math.min(feature_min[i], 0);
|
234
|
+
}
|
235
|
+
}
|
236
|
+
|
237
|
+
fp = rewind(fp, data_filename);
|
238
|
+
|
239
|
+
/* pass 2.5: save/restore feature_min/feature_max */
|
240
|
+
if(restore_filename != null)
|
241
|
+
{
|
242
|
+
// fp_restore rewinded in finding max_index
|
243
|
+
int idx, c;
|
244
|
+
double fmin, fmax;
|
245
|
+
|
246
|
+
fp_restore.mark(2); // for reset
|
247
|
+
if((c = fp_restore.read()) == 'y')
|
248
|
+
{
|
249
|
+
fp_restore.readLine(); // pass the '\n' after 'y'
|
250
|
+
StringTokenizer st = new StringTokenizer(fp_restore.readLine());
|
251
|
+
y_lower = Double.parseDouble(st.nextToken());
|
252
|
+
y_upper = Double.parseDouble(st.nextToken());
|
253
|
+
st = new StringTokenizer(fp_restore.readLine());
|
254
|
+
y_min = Double.parseDouble(st.nextToken());
|
255
|
+
y_max = Double.parseDouble(st.nextToken());
|
256
|
+
y_scaling = true;
|
257
|
+
}
|
258
|
+
else
|
259
|
+
fp_restore.reset();
|
260
|
+
|
261
|
+
if(fp_restore.read() == 'x') {
|
262
|
+
fp_restore.readLine(); // pass the '\n' after 'x'
|
263
|
+
StringTokenizer st = new StringTokenizer(fp_restore.readLine());
|
264
|
+
lower = Double.parseDouble(st.nextToken());
|
265
|
+
upper = Double.parseDouble(st.nextToken());
|
266
|
+
String restore_line = null;
|
267
|
+
while((restore_line = fp_restore.readLine())!=null)
|
268
|
+
{
|
269
|
+
StringTokenizer st2 = new StringTokenizer(restore_line);
|
270
|
+
idx = Integer.parseInt(st2.nextToken());
|
271
|
+
fmin = Double.parseDouble(st2.nextToken());
|
272
|
+
fmax = Double.parseDouble(st2.nextToken());
|
273
|
+
if (idx <= max_index)
|
274
|
+
{
|
275
|
+
feature_min[idx] = fmin;
|
276
|
+
feature_max[idx] = fmax;
|
277
|
+
}
|
278
|
+
}
|
279
|
+
}
|
280
|
+
fp_restore.close();
|
281
|
+
}
|
282
|
+
|
283
|
+
if(save_filename != null)
|
284
|
+
{
|
285
|
+
Formatter formatter = new Formatter(new StringBuilder());
|
286
|
+
BufferedWriter fp_save = null;
|
287
|
+
|
288
|
+
try {
|
289
|
+
fp_save = new BufferedWriter(new FileWriter(save_filename));
|
290
|
+
} catch(IOException e) {
|
291
|
+
System.err.println("can't open file " + save_filename);
|
292
|
+
System.exit(1);
|
293
|
+
}
|
294
|
+
|
295
|
+
if(y_scaling)
|
296
|
+
{
|
297
|
+
formatter.format("y\n");
|
298
|
+
formatter.format("%.16g %.16g\n", y_lower, y_upper);
|
299
|
+
formatter.format("%.16g %.16g\n", y_min, y_max);
|
300
|
+
}
|
301
|
+
formatter.format("x\n");
|
302
|
+
formatter.format("%.16g %.16g\n", lower, upper);
|
303
|
+
for(i=1;i<=max_index;i++)
|
304
|
+
{
|
305
|
+
if(feature_min[i] != feature_max[i])
|
306
|
+
formatter.format("%d %.16g %.16g\n", i, feature_min[i], feature_max[i]);
|
307
|
+
}
|
308
|
+
fp_save.write(formatter.toString());
|
309
|
+
fp_save.close();
|
310
|
+
}
|
311
|
+
|
312
|
+
/* pass 3: scale */
|
313
|
+
while(readline(fp) != null)
|
314
|
+
{
|
315
|
+
int next_index = 1;
|
316
|
+
double target;
|
317
|
+
double value;
|
318
|
+
|
319
|
+
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
|
320
|
+
target = Double.parseDouble(st.nextToken());
|
321
|
+
output_target(target);
|
322
|
+
while(st.hasMoreElements())
|
323
|
+
{
|
324
|
+
index = Integer.parseInt(st.nextToken());
|
325
|
+
value = Double.parseDouble(st.nextToken());
|
326
|
+
for (i = next_index; i<index; i++)
|
327
|
+
output(i, 0);
|
328
|
+
output(index, value);
|
329
|
+
next_index = index + 1;
|
330
|
+
}
|
331
|
+
|
332
|
+
for(i=next_index;i<= max_index;i++)
|
333
|
+
output(i, 0);
|
334
|
+
System.out.print("\n");
|
335
|
+
}
|
336
|
+
if (new_num_nonzeros > num_nonzeros)
|
337
|
+
System.err.print(
|
338
|
+
"Warning: original #nonzeros " + num_nonzeros+"\n"
|
339
|
+
+" new #nonzeros " + new_num_nonzeros+"\n"
|
340
|
+
+"Use -l 0 if many original feature values are zeros\n");
|
341
|
+
|
342
|
+
fp.close();
|
343
|
+
}
|
344
|
+
|
345
|
+
public static void main(String argv[]) throws IOException
|
346
|
+
{
|
347
|
+
svm_scale s = new svm_scale();
|
348
|
+
s.run(argv);
|
349
|
+
}
|
350
|
+
}
|