svmlab 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/README +219 -0
- data/lib/arraymethods.rb +87 -0
- data/lib/irb.history +100 -0
- data/lib/libsvmdata.rb +122 -0
- data/lib/svmfeature.rb +337 -0
- data/lib/svmfeature2.rb +98 -0
- data/lib/svmlab-config.rb +215 -0
- data/lib/svmlab-irb.rb +98 -0
- data/lib/svmlab-optim.rb +556 -0
- data/lib/svmlab-plot.rb +170 -0
- data/lib/svmlab.rb +365 -0
- data/lib/svmprediction.rb +176 -0
- data/lib/test.cfg +12 -0
- data/lib/test.rb +5 -0
- data/lib/testdata +3 -0
- data/lib/texput.log +20 -0
- data/lib/tmp.irb.rc +81 -0
- data/lib/v6.cfg +124 -0
- metadata +102 -0
data/lib/svmlab-plot.rb
ADDED
@@ -0,0 +1,170 @@
|
|
1
|
+
# ---------------------------------------------------------------------------------------
|
2
|
+
# Plot methods
|
3
|
+
|
4
|
+
require 'gnuplot'
|
5
|
+
|
6
|
+
|
7
|
+
# Each should be an array giving more than one plot
|
8
|
+
def genericplot(plotdata, file, title='Plot', xtitle='X', ytitle='Y')
|
9
|
+
Gnuplot.open do |gp| # This could be either a file or the gnuplot process that we pipe to
|
10
|
+
Gnuplot::Plot.new( gp ) do |plot|
|
11
|
+
plot.title title
|
12
|
+
plot.xlabel xtitle
|
13
|
+
plot.ylabel ytitle
|
14
|
+
plot.set "grid"
|
15
|
+
if file =~ /(png)|(ps)$/
|
16
|
+
# Remember to add following line to your .baschrc file :
|
17
|
+
# export GDFONTPATH=/usr/share/fonts/truetype/ttf-bitstream-vera/
|
18
|
+
plot.terminal "png size 800,600 font Vera 16" if file =~ /png$/
|
19
|
+
#plot.terminal "png size 800,600 large" if file =~ /png$/
|
20
|
+
plot.terminal "postscript color \"Helvetica\" 16" if file =~ /ps$/
|
21
|
+
plot.output file
|
22
|
+
end
|
23
|
+
plot.data = plotdata
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# --- predplot ---
|
29
|
+
# PredictionPlot: Plots true value on the X axis vs. predicted value on the Y axis.
|
30
|
+
def predplot(predarr, legends = [], title = 'SVM Prediction', err = nil, file = '')
|
31
|
+
predarr = [predarr] if !predarr.is_a? Array
|
32
|
+
dataarr = predarr.map do |predictions|
|
33
|
+
x, y = predictions.inject([[],[]]) { |data,(example,val)|
|
34
|
+
data[0] << val['truth']
|
35
|
+
data[1] << val['pred']
|
36
|
+
data }
|
37
|
+
end
|
38
|
+
|
39
|
+
from = dataarr.inject(dataarr[0][0][0]) { |m,a|
|
40
|
+
[m, a[0].min, a[1].min].min }.floor
|
41
|
+
to = dataarr.inject(dataarr[0][0][0]) { |m,a|
|
42
|
+
[m, a[0].max, a[1].max].max }.ceil
|
43
|
+
sampleindex = 0
|
44
|
+
# Fiddling with legends
|
45
|
+
legends = dataarr.map{|d| "Sample #{sampleindex+=1}"} if legends.size==0
|
46
|
+
if err
|
47
|
+
legends = legends.zip(predarr).map { | legend, pred |
|
48
|
+
begin
|
49
|
+
#args = if err.split(/,/).size==1 then 'pred'
|
50
|
+
# else (['pred'] + err.split(/,/)[1..-1]).join(',') end
|
51
|
+
#legend + " (#{err} = ".upcase + "%.2f"%eval("#{err.split(/,/)[0].downcase}(#{args})") + ")"
|
52
|
+
legend + " (#{err} = ".upcase + "%.2f"%eval("pred.#{err}") + ')'
|
53
|
+
rescue
|
54
|
+
legend
|
55
|
+
raise
|
56
|
+
end
|
57
|
+
}
|
58
|
+
end
|
59
|
+
# Setting plotdata
|
60
|
+
plotdata =
|
61
|
+
[ Gnuplot::DataSet.new( dataarr.first ) { |ds|
|
62
|
+
ds.using = '1:2'
|
63
|
+
ds.with = "points"
|
64
|
+
ds.title = legends.first
|
65
|
+
ds.linewidth = 2
|
66
|
+
ds.matrix = nil } ] +
|
67
|
+
[ Gnuplot::DataSet.new( [[from,to], [from,to]] ) { |ds|
|
68
|
+
ds.using = '1:2'
|
69
|
+
ds.with = "lines"
|
70
|
+
ds.title = "Correct diagonal"
|
71
|
+
ds.linewidth = 1
|
72
|
+
ds.matrix = nil } ] +
|
73
|
+
dataarr[1..-1].zip(legends[1..-1]).inject([]) { |arr,((x,y),legend)|
|
74
|
+
arr.push(Gnuplot::DataSet.new( [x,y] ) { |ds|
|
75
|
+
ds.using = '1:2'
|
76
|
+
ds.with = "points"
|
77
|
+
ds.title = legend
|
78
|
+
ds.linewidth = 2
|
79
|
+
ds.matrix = nil }) }
|
80
|
+
genericplot(plotdata, file, title, 'Experimental value', 'Predicted value')
|
81
|
+
nil
|
82
|
+
end
|
83
|
+
|
84
|
+
class SVMLab
|
85
|
+
|
86
|
+
# --- featurecorrelationplot ---
|
87
|
+
# Plots target feature on the Y axis vs. selected feature on the X axis
|
88
|
+
def featurecorrelationplot( feature, file = '', title = 'Feature correlation')
|
89
|
+
x,y = @examples.inject([[],[]]) do |data,(example,val)|
|
90
|
+
raise "#{feature} outside feature range"if not (0...val.size) === feature
|
91
|
+
data[0].push(val[feature] / @scale[feature] + @center[feature])
|
92
|
+
data[1].push(val[0] / @scale[0] + @center[0])
|
93
|
+
data
|
94
|
+
end
|
95
|
+
plotdata = [ Gnuplot::DataSet.new( [x,y] ) { |ds|
|
96
|
+
ds.using = '1:2'
|
97
|
+
ds.with = "points"
|
98
|
+
ds.title = "Feature #{feature} vs target feature"
|
99
|
+
ds.linewidth = 1
|
100
|
+
ds.matrix = nil } ]
|
101
|
+
genericplot(plotdata, file, title, "Feature #{feature}", "Target feature")
|
102
|
+
end
|
103
|
+
|
104
|
+
# --- predplotgroups ---
|
105
|
+
def predplotgroups(predarr, file = '', legends = [], title = 'SVM Prediction', err = nil)
|
106
|
+
substr = @cfg['Feature']['Groups']
|
107
|
+
groups = @examples.map{|k,v| k[(eval substr)] }.uniq
|
108
|
+
# For each group
|
109
|
+
groups.each do |group|
|
110
|
+
predarr2 = predarr.map { |preds|
|
111
|
+
preds.find_all { |k,v| k[(eval substr)] == group }.
|
112
|
+
inject({}) { |k,a| k[ a[0] ] = a[1]
|
113
|
+
k }
|
114
|
+
}
|
115
|
+
predplot(predarr2 ,
|
116
|
+
if file.size==0 then file
|
117
|
+
else [file.split(/\./)[0...-1],group,file.split(/\./).last].join('.') end ,
|
118
|
+
legends, title + " on #{group}", err)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
|
124
|
+
# --- onefeatureplot ---
|
125
|
+
def onefeatureplot(file='', title = 'SVM Prediction')
|
126
|
+
xp = [] # Don't initialize in one line : x=y=[]
|
127
|
+
yp = [] # If doing that, they will both refer to the same array
|
128
|
+
xt = [] # Don't initialize in one line : x=y=[]
|
129
|
+
yt = [] # If doing that, they will both refer to the same array
|
130
|
+
@examples.each {|example, features|
|
131
|
+
xt.push(features[1].to_f)
|
132
|
+
yt.push(features[0].to_f)
|
133
|
+
}
|
134
|
+
(0..1000).each {|i|
|
135
|
+
x = (i * (xt.max-xt.min) / 1000 + xt.min).to_f
|
136
|
+
xp.push(x)
|
137
|
+
yp.push(@model.predict([x]) / @scale[0] + @center[0])
|
138
|
+
}
|
139
|
+
Gnuplot.open do |gp| # This could be either a file or the gnuplot process that we pipe to
|
140
|
+
Gnuplot::Plot.new( gp ) do |plot|
|
141
|
+
plot.title title
|
142
|
+
plot.xlabel "Truth"
|
143
|
+
plot.ylabel "Prediction"
|
144
|
+
plot.set "grid"
|
145
|
+
if file =~ /(png)|(ps)$/
|
146
|
+
plot.terminal "png size 800,600 small" if file =~ /png$/
|
147
|
+
plot.terminal "postscript" if file =~ /ps$/
|
148
|
+
plot.output file
|
149
|
+
end
|
150
|
+
plot.data = [
|
151
|
+
Gnuplot::DataSet.new( [xp,yp] ) { |ds|
|
152
|
+
ds.using = '1:2'
|
153
|
+
ds.with = "lines"
|
154
|
+
ds.title = "SVM prediction"
|
155
|
+
ds.linewidth = 1
|
156
|
+
ds.matrix = nil
|
157
|
+
},
|
158
|
+
Gnuplot::DataSet.new( [xt, yt] ) { |ds|
|
159
|
+
ds.using = '1:2'
|
160
|
+
ds.with = "points"
|
161
|
+
ds.title = "Correct prediction"
|
162
|
+
ds.linewidth = 1
|
163
|
+
ds.matrix = nil
|
164
|
+
}
|
165
|
+
]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
data/lib/svmlab.rb
ADDED
@@ -0,0 +1,365 @@
|
|
1
|
+
require 'SVM'
|
2
|
+
require 'yaml'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'tempfile'
|
5
|
+
#require 'forkoff'
|
6
|
+
|
7
|
+
require 'svmfeature.rb'
|
8
|
+
require 'svmlab-optim.rb'
|
9
|
+
require 'svmlab-plot.rb'
|
10
|
+
require 'svmprediction.rb'
|
11
|
+
require 'svmlab-config.rb'
|
12
|
+
|
13
|
+
# An SVMLab object is created giving the configuration either as a file
|
14
|
+
# object or as a string. The configuration is in YAML format:
|
15
|
+
#
|
16
|
+
# ---
|
17
|
+
# Feature:
|
18
|
+
# <See SVMFeature class documentation>
|
19
|
+
# SVM:
|
20
|
+
# C: <parameter C>
|
21
|
+
# g: <RBF kernel's gamma>
|
22
|
+
# e: <epsilon for regression>
|
23
|
+
# Scale:
|
24
|
+
# <Feature1>:
|
25
|
+
# - <Scale1>
|
26
|
+
# - <Scale2>
|
27
|
+
# - ...
|
28
|
+
# - <ScaleN>
|
29
|
+
# <Feature2>: <Scale>
|
30
|
+
#
|
31
|
+
# The Scale setup has to match the features given in Feature configuration
|
32
|
+
# and each scale can be given as scalar or as array.
|
33
|
+
#
|
34
|
+
|
35
|
+
class SVMLab
|
36
|
+
|
37
|
+
attr_reader :cfg, :pslog, :features
|
38
|
+
|
39
|
+
# All examples are centered and scaled and the centered/scaled examples are stored
|
40
|
+
# in the object variable @examples. Information about the centering/scaling
|
41
|
+
# is stored in the @cfg['SVM'] part of the configuration hash
|
42
|
+
# There are three ways to initialize.
|
43
|
+
# 1) With an SVMLabConfig object
|
44
|
+
# 2) With a configuration file File object
|
45
|
+
# 3) With a string giving the configuration
|
46
|
+
def initialize(cfg)
|
47
|
+
@cfg = if cfg.is_a? SVMLabConfig then cfg
|
48
|
+
else SVMLabConfig.new(cfg) end
|
49
|
+
@features = SVMFeature.new(@cfg['Feature'].to_yaml)
|
50
|
+
@examples = @features.getAllFeatures
|
51
|
+
@ndimensions = nil
|
52
|
+
|
53
|
+
checkScales(@cfg)
|
54
|
+
checkOptimization(@cfg)
|
55
|
+
scaleExamples
|
56
|
+
@groups = setGroups()
|
57
|
+
#puts 'Groups:', @groups.map{ |group,members| {group => members.map{|name,feat| name}.size} }.to_yaml
|
58
|
+
end
|
59
|
+
|
60
|
+
# --- setGroups ---
|
61
|
+
# Return value:
|
62
|
+
# groups hash:
|
63
|
+
# key : group name
|
64
|
+
# value : array of example names
|
65
|
+
# Returns nil if @cfg['Feature']['Groups'] is not set
|
66
|
+
def setGroups()
|
67
|
+
if groups = @cfg['Feature']['Groups']
|
68
|
+
# If using the (n1..n2) syntax
|
69
|
+
if groups =~ /^\(\d(\.{2}|\.{3})\d\)$/
|
70
|
+
hashkeys = @examples.map{|k,v| k[eval(groups)] }.uniq
|
71
|
+
hashkeys.inject({}) { |hash,key|
|
72
|
+
hash[key] = @examples.find_all{ |exname,val| exname[eval(groups)]==key }.map{|i| i[0]}
|
73
|
+
hash }
|
74
|
+
# If using the file prefix syntax
|
75
|
+
elsif (gfiles = Dir::entries(@cfg['Feature']['BaseDir']).grep(/^#{groups}\d+$/)).size>0
|
76
|
+
hashkeys = gfiles.map { |file| file.split(groups).last }
|
77
|
+
#puts '---','Groups :',hashkeys,'---'
|
78
|
+
hashkeys.inject({}) { |hash,key|
|
79
|
+
hash[key] = open(@cfg['Feature']['BaseDir']+groups+key){|f| f.read}.split(/\n/)
|
80
|
+
hash }
|
81
|
+
end
|
82
|
+
else # If no groups set, use leave-one-out crossvalidation
|
83
|
+
@examples.inject({}) { |hash,(key, value)|
|
84
|
+
hash[key] = [ key ]
|
85
|
+
hash
|
86
|
+
}
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# Set the penalty factor C.
|
91
|
+
def C=(arg)
|
92
|
+
@cfg['SVM']['C'] = arg.to_f
|
93
|
+
end
|
94
|
+
|
95
|
+
# Set epsilon for Support Vector Regression.
|
96
|
+
def e=(arg)
|
97
|
+
@cfg['SVM']['e'] = arg.to_f
|
98
|
+
end
|
99
|
+
|
100
|
+
# Set gamma for the RBF kernel.
|
101
|
+
def g=(arg)
|
102
|
+
@cfg['SVM']['g'] = arg.to_f
|
103
|
+
end
|
104
|
+
|
105
|
+
# Returns the n closest neighbors of the @example hash to example
|
106
|
+
# Possibly broken - check @feature hash if an erranous example
|
107
|
+
def getNeighbors(example, n = 1)
|
108
|
+
arr = @examples[example]
|
109
|
+
distance = @examples.sort_by { |a|
|
110
|
+
dist(arr[1...arr.size],a[1][1...a[1].size]) }
|
111
|
+
distance[0..n].map { |a|
|
112
|
+
i = 0
|
113
|
+
a[0] + ' : ' + # Name
|
114
|
+
"%.3f \n"%dist(arr[1...arr.size], a[1][1...a[1].size]) + # Distance
|
115
|
+
#" : %.3f\n"%(a[1][0] - arr[0]) + # Distance in target value
|
116
|
+
@cfg['Feature']['Features'].inject('') { |string,feature|
|
117
|
+
nvector = @features.getExFeature(a[0],feature)
|
118
|
+
featdist = dist(arr[i...i+nvector.size],
|
119
|
+
@examples[a[0]][i...i+nvector.size])
|
120
|
+
i += nvector.size
|
121
|
+
pretty = if feature==@cfg['Feature']['Features'][0] then ' *** ' else ' --- ' end
|
122
|
+
string += pretty + "(%.2f)"%featdist + pretty +
|
123
|
+
feature + " : " +
|
124
|
+
nvector.join(' ') + "\n"
|
125
|
+
}
|
126
|
+
}
|
127
|
+
end
|
128
|
+
|
129
|
+
def dist(a,b)
|
130
|
+
raise "Cannot calculate distance" if a.size != b.size
|
131
|
+
Math.sqrt(a.zip(b).inject(0){|d,(ai,bi)| d+(ai-bi).abs**2})
|
132
|
+
end
|
133
|
+
|
134
|
+
# Finds those examples that have been predicted most far off
|
135
|
+
# the correct value. Returns a string consisting of those
|
136
|
+
# examples along with the closest neighbors.
|
137
|
+
def getOutliers(n = (1..1), n2 = 3, predictions = nil)
|
138
|
+
if !predictions
|
139
|
+
predictions = self.crossvalidate
|
140
|
+
end
|
141
|
+
sortedpred = predictions.sort_by { |(k,v)|
|
142
|
+
- (v['pred'] - v['truth']).abs }
|
143
|
+
n = if n.is_a? Fixnum then (n..n) else n end
|
144
|
+
n.map do |i|
|
145
|
+
"OUTLIER %d : \n"%i +
|
146
|
+
sortedpred[i-1][0] + " was predicted %.3f"%sortedpred[i-1][1]['pred'] +
|
147
|
+
" but the truth is %.3f :\n"%sortedpred[i-1][1]['truth'] +
|
148
|
+
getNeighbors(sortedpred[i-1][0],n2).join('')
|
149
|
+
end.join("\n")
|
150
|
+
end
|
151
|
+
|
152
|
+
# Returns a String of all examples with features.
|
153
|
+
def printExamples
|
154
|
+
@examples.inject('') do |str,(exname,vector)|
|
155
|
+
str += vector.map{|v| v.to_s}.join(' ') + "\n"
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# An outer binding for the RubySVM predict function. This binding introduces
|
160
|
+
# inverse centering and scaling of the predicted value. This in order
|
161
|
+
# to give a real prediction value.
|
162
|
+
def predict(examples, model = nil)
|
163
|
+
if !model then model = self.train end
|
164
|
+
examples = [ examples ] if examples.is_a? String
|
165
|
+
predictions = examples.map do |example|
|
166
|
+
begin
|
167
|
+
vector =
|
168
|
+
if !@examples[example]
|
169
|
+
fcfg = Marshal.load(Marshal.dump(@cfg['Feature']))
|
170
|
+
fcfg.delete('DataSet')
|
171
|
+
fcfg['Example'] = example
|
172
|
+
scaleExample(SVMFeature.new(fcfg.to_yaml).getExAllFeatures(example))
|
173
|
+
else
|
174
|
+
@examples[example]
|
175
|
+
end
|
176
|
+
if @cfg['Feature']['PosClassFrom']
|
177
|
+
model.predict(vector[1..-1]).round
|
178
|
+
else
|
179
|
+
model.predict(vector[1..-1]) /
|
180
|
+
@cfg['SVM']['Scale'][ @cfg['Feature']['Features'][0] ][0] +
|
181
|
+
@cfg['SVM']['Center'][ @cfg['Feature']['Features'][0] ][0]
|
182
|
+
end
|
183
|
+
rescue
|
184
|
+
$!
|
185
|
+
end
|
186
|
+
end
|
187
|
+
if predictions.size==1 then predictions[0]
|
188
|
+
else predictions end
|
189
|
+
end
|
190
|
+
|
191
|
+
# An outer binding for the RubySVM training function.
|
192
|
+
# If no training examples given, it will train on all data in the dataset.
|
193
|
+
def train(examples = nil)
|
194
|
+
svm = SVM::Problem.new
|
195
|
+
if examples then examples.each { |exname|
|
196
|
+
@ndimensions = @examples[exname].size - 1 if !@ndimensions
|
197
|
+
if @examples[exname]
|
198
|
+
svm.addExample( @examples[exname][0], @examples[exname][1..-1] )
|
199
|
+
end }
|
200
|
+
else @examples.each { |name,vector|
|
201
|
+
@ndimensions = vector.size-1 if !@ndimensions
|
202
|
+
svm.addExample( vector[0], vector[1..-1] ) }
|
203
|
+
end
|
204
|
+
begin
|
205
|
+
errout = STDERR.clone
|
206
|
+
out = STDOUT.clone
|
207
|
+
STDERR.reopen(File.open('/dev/null','w'))
|
208
|
+
STDOUT.reopen(File.open('/dev/null','w'))
|
209
|
+
@par = SVM::Parameter.new
|
210
|
+
@par.svm_type = @cfg['Feature']['PosClassFrom'] ? 0 : 3
|
211
|
+
if c=@cfg['SVM']['C'] then @par.C = c.to_f end
|
212
|
+
if e=@cfg['SVM']['e'] then @par.eps = e.to_f end
|
213
|
+
@par.gamma = if g=@cfg['SVM']['g'] then g.to_f
|
214
|
+
else 1.0 / @ndimensions end
|
215
|
+
SVM::Model.new(svm,@par)
|
216
|
+
ensure
|
217
|
+
STDERR.reopen(errout)
|
218
|
+
STDOUT.reopen(out)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
# crossvalidation on a grouping made from "Groups" in cfg
|
223
|
+
# Return values:
|
224
|
+
# - Predictions hash :
|
225
|
+
# key : example name
|
226
|
+
# value : 'truth' => the true value
|
227
|
+
# 'pred' => the predicted value
|
228
|
+
#--
|
229
|
+
# Remaining issues:
|
230
|
+
# 2) No of parallel computations should be in cfg
|
231
|
+
#++
|
232
|
+
def crossvalidate()
|
233
|
+
#parr = @groups.keys.forkoff do |group|
|
234
|
+
parr = @groups.keys.map do |group|
|
235
|
+
members = @groups[group]
|
236
|
+
trainingex = @groups.inject([]){ |exarr,(trgroup,trmem)|
|
237
|
+
(trgroup == group) ? exarr : exarr + trmem }
|
238
|
+
model = self.train(trainingex)
|
239
|
+
# Predict each member of the group left out of training
|
240
|
+
pred = members.inject({}) do |p,predname|
|
241
|
+
p[predname] = {
|
242
|
+
'truth' => if @cfg['Feature']['PosClassFrom']
|
243
|
+
@examples[predname][0].round
|
244
|
+
else @examples[predname][0] /
|
245
|
+
@cfg['SVM']['Scale'][@cfg['Feature']['Features'][0]][0] +
|
246
|
+
@cfg['SVM']['Center'][@cfg['Feature']['Features'][0]][0]
|
247
|
+
end,
|
248
|
+
'pred' => self.predict(predname,model) } if @examples[predname]
|
249
|
+
p
|
250
|
+
end
|
251
|
+
end
|
252
|
+
predictions = parr.inject SVMPrediction.new do |p,predhash|
|
253
|
+
predhash.each { |exname,phash| p[exname] = phash } ; p
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
# Same as crossvalidate, but also outputs configuration and result
|
258
|
+
# to a file.
|
259
|
+
def publish_crossvalidate(path)
|
260
|
+
predictions = self.crossvalidate
|
261
|
+
time = DateTime.now
|
262
|
+
info = {
|
263
|
+
'Time' => time,
|
264
|
+
'Evaluation' => {
|
265
|
+
'RMSD' => predictions.rmsd,
|
266
|
+
#'MeanErr' => predictions.meanerr,
|
267
|
+
'CC' => predictions.cc },
|
268
|
+
#'AUC' => auc(predictions,1),
|
269
|
+
#'PBRMSD' => pbrmsd(predictions, @cfg['Feature']['Groups']),
|
270
|
+
#'WRMSD' => wrmsd(predictions,1),
|
271
|
+
#'F1' => f1(predictions,2) },
|
272
|
+
'Configuration' => @cfg,
|
273
|
+
'Predictions' => predictions.predictions
|
274
|
+
}
|
275
|
+
pdepth = 0
|
276
|
+
(patharr = path.split(/\//))[1...-1].each do
|
277
|
+
dir = patharr[0..pdepth+=1].join('/')
|
278
|
+
Dir.mkdir dir if !File.exists?(dir)
|
279
|
+
end
|
280
|
+
File.open(path,'w') { |f| YAML.dump(info,f) }
|
281
|
+
return predictions
|
282
|
+
end
|
283
|
+
|
284
|
+
private
|
285
|
+
|
286
|
+
# Go through each feature and check its centering
|
287
|
+
# and scaling instructions.
|
288
|
+
def checkScales(cfg)
|
289
|
+
cfg['SVM']['Center'] = {} if !cfg['SVM']['Center']
|
290
|
+
cfg['SVM']['Scale'] = {} if !cfg['SVM']['Scale']
|
291
|
+
dim0 = 0
|
292
|
+
cfg['Feature']['Features'].each_with_index do |feature, index|
|
293
|
+
dim = @cfg['Feature'][feature]['Dimensions']
|
294
|
+
dim ||= 1 # If Dimensions not given
|
295
|
+
cfg['SVM']['Center'][feature] ||= []
|
296
|
+
cfg['SVM']['Scale'][feature] ||= []
|
297
|
+
sc = cfg['SVM']['Scale'][feature]
|
298
|
+
(0...dim).each do |i|
|
299
|
+
#Check centering
|
300
|
+
cfg['SVM']['Center'][feature][i] =
|
301
|
+
if index==0 and @cfg['Feature']['PosClassFrom']
|
302
|
+
0
|
303
|
+
elsif !cfg['SVM']['Center'][feature][i]
|
304
|
+
sum = @examples.inject(0) { |s,(exname,vector)| s + vector[dim0+i] }
|
305
|
+
sum / @examples.size
|
306
|
+
else
|
307
|
+
cfg['SVM']['Center'][feature][i]
|
308
|
+
end
|
309
|
+
#Check scaling
|
310
|
+
if index==0 and @cfg['Feature']['PosClassFrom']
|
311
|
+
sc[i] = 1
|
312
|
+
elsif !sc[i] or (sc[i].is_a? String and sc[i] =~ /max/)
|
313
|
+
absmax = @examples.inject(0) { |s,(exname,vector)|
|
314
|
+
[s, (vector[dim0+i] - cfg['SVM']['Center'][feature][i]).abs].max }
|
315
|
+
scale = if absmax!=0 then 1/absmax else 1 end
|
316
|
+
if sc[i] =~ /max/ then sc[i].sub!('max', scale.to_s)
|
317
|
+
else sc[i] = scale end
|
318
|
+
sc[i] = sc[i].to_f if sc[i] =~ /^\d+\.*\d*$/
|
319
|
+
elsif (sc[i].is_a? String and sc[i] =~ /avg/)
|
320
|
+
abssum = @examples.inject(0) { |s,(exname,vector)|
|
321
|
+
s + (vector[dim0+i] - cfg['SVM']['Center'][feature][i]).abs }
|
322
|
+
scale = @examples.size / abssum
|
323
|
+
if sc[i] =~ /avg/ then sc[i].sub!('avg', scale.to_s)
|
324
|
+
else sc[i] = scale end
|
325
|
+
sc[i] = sc[i].to_f if sc[i] =~ /^\d+\.*\d*$/
|
326
|
+
elsif (sc[i].is_a? String and sc[i] =~ /std/)
|
327
|
+
sqsum = @examples.inject(0) { |s,(exname,vector)|
|
328
|
+
s + (vector[dim0+i] - cfg['SVM']['Center'][feature][i])**2 }
|
329
|
+
scale = Math::sqrt( @examples.size / sqsum )
|
330
|
+
if sc[i] =~ /std/ then sc[i].sub!('std', scale.to_s)
|
331
|
+
else sc[i] = scale end
|
332
|
+
sc[i] = sc[i].to_f if sc[i] =~ /^\d+\.*\d*$/
|
333
|
+
end
|
334
|
+
end
|
335
|
+
dim0 += dim
|
336
|
+
end
|
337
|
+
#puts 'CENTER',@cfg['SVM']['Center'].to_yaml
|
338
|
+
#puts 'SCALE',@cfg['SVM']['Scale'].to_yaml
|
339
|
+
end
|
340
|
+
|
341
|
+
# The object variable @examples's features are centered around zero and scaled
|
342
|
+
# according to the configuration. I.e. each feature has a center term and a scaling
|
343
|
+
# factor.
|
344
|
+
def scaleExamples()
|
345
|
+
@examples.each do |name, vector|
|
346
|
+
@examples[name] = scaleExample(vector)
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
def scaleExample(vector)
|
351
|
+
scaledvector = []
|
352
|
+
@cfg['Feature']['Features'].inject(0) { |i,feature|
|
353
|
+
dim = @cfg['Feature'][feature]['Dimensions']
|
354
|
+
dim = 1 if !dim
|
355
|
+
(0...dim).each { |j|
|
356
|
+
scaledvector[i+j] =
|
357
|
+
(vector[i+j] - @cfg['SVM']['Center'][feature][j]) *
|
358
|
+
@cfg['SVM']['Scale'][feature][j].to_f }
|
359
|
+
i + dim
|
360
|
+
}
|
361
|
+
scaledvector
|
362
|
+
end
|
363
|
+
|
364
|
+
end
|
365
|
+
|