svmlab 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/README +219 -0
- data/lib/arraymethods.rb +87 -0
- data/lib/irb.history +100 -0
- data/lib/libsvmdata.rb +122 -0
- data/lib/svmfeature.rb +337 -0
- data/lib/svmfeature2.rb +98 -0
- data/lib/svmlab-config.rb +215 -0
- data/lib/svmlab-irb.rb +98 -0
- data/lib/svmlab-optim.rb +556 -0
- data/lib/svmlab-plot.rb +170 -0
- data/lib/svmlab.rb +365 -0
- data/lib/svmprediction.rb +176 -0
- data/lib/test.cfg +12 -0
- data/lib/test.rb +5 -0
- data/lib/testdata +3 -0
- data/lib/texput.log +20 -0
- data/lib/tmp.irb.rc +81 -0
- data/lib/v6.cfg +124 -0
- metadata +102 -0
data/lib/svmlab-plot.rb
ADDED
@@ -0,0 +1,170 @@
|
|
1
|
+
# ---------------------------------------------------------------------------------------
|
2
|
+
# Plot methods
|
3
|
+
|
4
|
+
require 'gnuplot'
|
5
|
+
|
6
|
+
|
7
|
+
# Each should be an array giving more than one plot
|
8
|
+
def genericplot(plotdata, file, title='Plot', xtitle='X', ytitle='Y')
|
9
|
+
Gnuplot.open do |gp| # This could be either a file or the gnuplot process that we pipe to
|
10
|
+
Gnuplot::Plot.new( gp ) do |plot|
|
11
|
+
plot.title title
|
12
|
+
plot.xlabel xtitle
|
13
|
+
plot.ylabel ytitle
|
14
|
+
plot.set "grid"
|
15
|
+
if file =~ /(png)|(ps)$/
|
16
|
+
# Remember to add following line to your .baschrc file :
|
17
|
+
# export GDFONTPATH=/usr/share/fonts/truetype/ttf-bitstream-vera/
|
18
|
+
plot.terminal "png size 800,600 font Vera 16" if file =~ /png$/
|
19
|
+
#plot.terminal "png size 800,600 large" if file =~ /png$/
|
20
|
+
plot.terminal "postscript color \"Helvetica\" 16" if file =~ /ps$/
|
21
|
+
plot.output file
|
22
|
+
end
|
23
|
+
plot.data = plotdata
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# --- predplot ---
|
29
|
+
# PredictionPlot: Plots true value on the X axis vs. predicted value on the Y axis.
|
30
|
+
def predplot(predarr, legends = [], title = 'SVM Prediction', err = nil, file = '')
|
31
|
+
predarr = [predarr] if !predarr.is_a? Array
|
32
|
+
dataarr = predarr.map do |predictions|
|
33
|
+
x, y = predictions.inject([[],[]]) { |data,(example,val)|
|
34
|
+
data[0] << val['truth']
|
35
|
+
data[1] << val['pred']
|
36
|
+
data }
|
37
|
+
end
|
38
|
+
|
39
|
+
from = dataarr.inject(dataarr[0][0][0]) { |m,a|
|
40
|
+
[m, a[0].min, a[1].min].min }.floor
|
41
|
+
to = dataarr.inject(dataarr[0][0][0]) { |m,a|
|
42
|
+
[m, a[0].max, a[1].max].max }.ceil
|
43
|
+
sampleindex = 0
|
44
|
+
# Fiddling with legends
|
45
|
+
legends = dataarr.map{|d| "Sample #{sampleindex+=1}"} if legends.size==0
|
46
|
+
if err
|
47
|
+
legends = legends.zip(predarr).map { | legend, pred |
|
48
|
+
begin
|
49
|
+
#args = if err.split(/,/).size==1 then 'pred'
|
50
|
+
# else (['pred'] + err.split(/,/)[1..-1]).join(',') end
|
51
|
+
#legend + " (#{err} = ".upcase + "%.2f"%eval("#{err.split(/,/)[0].downcase}(#{args})") + ")"
|
52
|
+
legend + " (#{err} = ".upcase + "%.2f"%eval("pred.#{err}") + ')'
|
53
|
+
rescue
|
54
|
+
legend
|
55
|
+
raise
|
56
|
+
end
|
57
|
+
}
|
58
|
+
end
|
59
|
+
# Setting plotdata
|
60
|
+
plotdata =
|
61
|
+
[ Gnuplot::DataSet.new( dataarr.first ) { |ds|
|
62
|
+
ds.using = '1:2'
|
63
|
+
ds.with = "points"
|
64
|
+
ds.title = legends.first
|
65
|
+
ds.linewidth = 2
|
66
|
+
ds.matrix = nil } ] +
|
67
|
+
[ Gnuplot::DataSet.new( [[from,to], [from,to]] ) { |ds|
|
68
|
+
ds.using = '1:2'
|
69
|
+
ds.with = "lines"
|
70
|
+
ds.title = "Correct diagonal"
|
71
|
+
ds.linewidth = 1
|
72
|
+
ds.matrix = nil } ] +
|
73
|
+
dataarr[1..-1].zip(legends[1..-1]).inject([]) { |arr,((x,y),legend)|
|
74
|
+
arr.push(Gnuplot::DataSet.new( [x,y] ) { |ds|
|
75
|
+
ds.using = '1:2'
|
76
|
+
ds.with = "points"
|
77
|
+
ds.title = legend
|
78
|
+
ds.linewidth = 2
|
79
|
+
ds.matrix = nil }) }
|
80
|
+
genericplot(plotdata, file, title, 'Experimental value', 'Predicted value')
|
81
|
+
nil
|
82
|
+
end
|
83
|
+
|
84
|
+
class SVMLab
|
85
|
+
|
86
|
+
# --- featurecorrelationplot ---
|
87
|
+
# Plots target feature on the Y axis vs. selected feature on the X axis
|
88
|
+
def featurecorrelationplot( feature, file = '', title = 'Feature correlation')
|
89
|
+
x,y = @examples.inject([[],[]]) do |data,(example,val)|
|
90
|
+
raise "#{feature} outside feature range"if not (0...val.size) === feature
|
91
|
+
data[0].push(val[feature] / @scale[feature] + @center[feature])
|
92
|
+
data[1].push(val[0] / @scale[0] + @center[0])
|
93
|
+
data
|
94
|
+
end
|
95
|
+
plotdata = [ Gnuplot::DataSet.new( [x,y] ) { |ds|
|
96
|
+
ds.using = '1:2'
|
97
|
+
ds.with = "points"
|
98
|
+
ds.title = "Feature #{feature} vs target feature"
|
99
|
+
ds.linewidth = 1
|
100
|
+
ds.matrix = nil } ]
|
101
|
+
genericplot(plotdata, file, title, "Feature #{feature}", "Target feature")
|
102
|
+
end
|
103
|
+
|
104
|
+
# --- predplotgroups ---
|
105
|
+
def predplotgroups(predarr, file = '', legends = [], title = 'SVM Prediction', err = nil)
|
106
|
+
substr = @cfg['Feature']['Groups']
|
107
|
+
groups = @examples.map{|k,v| k[(eval substr)] }.uniq
|
108
|
+
# For each group
|
109
|
+
groups.each do |group|
|
110
|
+
predarr2 = predarr.map { |preds|
|
111
|
+
preds.find_all { |k,v| k[(eval substr)] == group }.
|
112
|
+
inject({}) { |k,a| k[ a[0] ] = a[1]
|
113
|
+
k }
|
114
|
+
}
|
115
|
+
predplot(predarr2 ,
|
116
|
+
if file.size==0 then file
|
117
|
+
else [file.split(/\./)[0...-1],group,file.split(/\./).last].join('.') end ,
|
118
|
+
legends, title + " on #{group}", err)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
|
124
|
+
# --- onefeatureplot ---
|
125
|
+
def onefeatureplot(file='', title = 'SVM Prediction')
|
126
|
+
xp = [] # Don't initialize in one line : x=y=[]
|
127
|
+
yp = [] # If doing that, they will both refer to the same array
|
128
|
+
xt = [] # Don't initialize in one line : x=y=[]
|
129
|
+
yt = [] # If doing that, they will both refer to the same array
|
130
|
+
@examples.each {|example, features|
|
131
|
+
xt.push(features[1].to_f)
|
132
|
+
yt.push(features[0].to_f)
|
133
|
+
}
|
134
|
+
(0..1000).each {|i|
|
135
|
+
x = (i * (xt.max-xt.min) / 1000 + xt.min).to_f
|
136
|
+
xp.push(x)
|
137
|
+
yp.push(@model.predict([x]) / @scale[0] + @center[0])
|
138
|
+
}
|
139
|
+
Gnuplot.open do |gp| # This could be either a file or the gnuplot process that we pipe to
|
140
|
+
Gnuplot::Plot.new( gp ) do |plot|
|
141
|
+
plot.title title
|
142
|
+
plot.xlabel "Truth"
|
143
|
+
plot.ylabel "Prediction"
|
144
|
+
plot.set "grid"
|
145
|
+
if file =~ /(png)|(ps)$/
|
146
|
+
plot.terminal "png size 800,600 small" if file =~ /png$/
|
147
|
+
plot.terminal "postscript" if file =~ /ps$/
|
148
|
+
plot.output file
|
149
|
+
end
|
150
|
+
plot.data = [
|
151
|
+
Gnuplot::DataSet.new( [xp,yp] ) { |ds|
|
152
|
+
ds.using = '1:2'
|
153
|
+
ds.with = "lines"
|
154
|
+
ds.title = "SVM prediction"
|
155
|
+
ds.linewidth = 1
|
156
|
+
ds.matrix = nil
|
157
|
+
},
|
158
|
+
Gnuplot::DataSet.new( [xt, yt] ) { |ds|
|
159
|
+
ds.using = '1:2'
|
160
|
+
ds.with = "points"
|
161
|
+
ds.title = "Correct prediction"
|
162
|
+
ds.linewidth = 1
|
163
|
+
ds.matrix = nil
|
164
|
+
}
|
165
|
+
]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
data/lib/svmlab.rb
ADDED
@@ -0,0 +1,365 @@
|
|
1
|
+
require 'SVM'
|
2
|
+
require 'yaml'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'tempfile'
|
5
|
+
#require 'forkoff'
|
6
|
+
|
7
|
+
require 'svmfeature.rb'
|
8
|
+
require 'svmlab-optim.rb'
|
9
|
+
require 'svmlab-plot.rb'
|
10
|
+
require 'svmprediction.rb'
|
11
|
+
require 'svmlab-config.rb'
|
12
|
+
|
13
|
+
# An SVMLab object is created giving the configuration either as a file
|
14
|
+
# object or as a string. The configuration is in YAML format:
|
15
|
+
#
|
16
|
+
# ---
|
17
|
+
# Feature:
|
18
|
+
# <See SVMFeature class documentation>
|
19
|
+
# SVM:
|
20
|
+
# C: <parameter C>
|
21
|
+
# g: <RBF kernel's gamma>
|
22
|
+
# e: <epsilon for regression>
|
23
|
+
# Scale:
|
24
|
+
# <Feature1>:
|
25
|
+
# - <Scale1>
|
26
|
+
# - <Scale2>
|
27
|
+
# - ...
|
28
|
+
# - <ScaleN>
|
29
|
+
# <Feature2>: <Scale>
|
30
|
+
#
|
31
|
+
# The Scale setup has to match the features given in Feature configuration
|
32
|
+
# and each scale can be given as scalar or as array.
|
33
|
+
#
|
34
|
+
|
35
|
+
class SVMLab
|
36
|
+
|
37
|
+
attr_reader :cfg, :pslog, :features
|
38
|
+
|
39
|
+
# All examples are centered and scaled and the centered/scaled examples are stored
|
40
|
+
# in the object variable @examples. Information about the centering/scaling
|
41
|
+
# is stored in the @cfg['SVM'] part of the configuration hash
|
42
|
+
# There are three ways to initialize.
|
43
|
+
# 1) With an SVMLabConfig object
|
44
|
+
# 2) With a configuration file File object
|
45
|
+
# 3) With a string giving the configuration
|
46
|
+
def initialize(cfg)
|
47
|
+
@cfg = if cfg.is_a? SVMLabConfig then cfg
|
48
|
+
else SVMLabConfig.new(cfg) end
|
49
|
+
@features = SVMFeature.new(@cfg['Feature'].to_yaml)
|
50
|
+
@examples = @features.getAllFeatures
|
51
|
+
@ndimensions = nil
|
52
|
+
|
53
|
+
checkScales(@cfg)
|
54
|
+
checkOptimization(@cfg)
|
55
|
+
scaleExamples
|
56
|
+
@groups = setGroups()
|
57
|
+
#puts 'Groups:', @groups.map{ |group,members| {group => members.map{|name,feat| name}.size} }.to_yaml
|
58
|
+
end
|
59
|
+
|
60
|
+
# --- setGroups ---
|
61
|
+
# Return value:
|
62
|
+
# groups hash:
|
63
|
+
# key : group name
|
64
|
+
# value : array of example names
|
65
|
+
# Returns nil if @cfg['Feature']['Groups'] is not set
|
66
|
+
def setGroups()
|
67
|
+
if groups = @cfg['Feature']['Groups']
|
68
|
+
# If using the (n1..n2) syntax
|
69
|
+
if groups =~ /^\(\d(\.{2}|\.{3})\d\)$/
|
70
|
+
hashkeys = @examples.map{|k,v| k[eval(groups)] }.uniq
|
71
|
+
hashkeys.inject({}) { |hash,key|
|
72
|
+
hash[key] = @examples.find_all{ |exname,val| exname[eval(groups)]==key }.map{|i| i[0]}
|
73
|
+
hash }
|
74
|
+
# If using the file prefix syntax
|
75
|
+
elsif (gfiles = Dir::entries(@cfg['Feature']['BaseDir']).grep(/^#{groups}\d+$/)).size>0
|
76
|
+
hashkeys = gfiles.map { |file| file.split(groups).last }
|
77
|
+
#puts '---','Groups :',hashkeys,'---'
|
78
|
+
hashkeys.inject({}) { |hash,key|
|
79
|
+
hash[key] = open(@cfg['Feature']['BaseDir']+groups+key){|f| f.read}.split(/\n/)
|
80
|
+
hash }
|
81
|
+
end
|
82
|
+
else # If no groups set, use leave-one-out crossvalidation
|
83
|
+
@examples.inject({}) { |hash,(key, value)|
|
84
|
+
hash[key] = [ key ]
|
85
|
+
hash
|
86
|
+
}
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# Set the penalty factor C.
|
91
|
+
def C=(arg)
|
92
|
+
@cfg['SVM']['C'] = arg.to_f
|
93
|
+
end
|
94
|
+
|
95
|
+
# Set epsilon for Support Vector Regression.
|
96
|
+
def e=(arg)
|
97
|
+
@cfg['SVM']['e'] = arg.to_f
|
98
|
+
end
|
99
|
+
|
100
|
+
# Set gamma for the RBF kernel.
|
101
|
+
def g=(arg)
|
102
|
+
@cfg['SVM']['g'] = arg.to_f
|
103
|
+
end
|
104
|
+
|
105
|
+
# Returns the n closest neighbors of the @example hash to example
|
106
|
+
# Possibly broken - check @feature hash if an erranous example
|
107
|
+
def getNeighbors(example, n = 1)
|
108
|
+
arr = @examples[example]
|
109
|
+
distance = @examples.sort_by { |a|
|
110
|
+
dist(arr[1...arr.size],a[1][1...a[1].size]) }
|
111
|
+
distance[0..n].map { |a|
|
112
|
+
i = 0
|
113
|
+
a[0] + ' : ' + # Name
|
114
|
+
"%.3f \n"%dist(arr[1...arr.size], a[1][1...a[1].size]) + # Distance
|
115
|
+
#" : %.3f\n"%(a[1][0] - arr[0]) + # Distance in target value
|
116
|
+
@cfg['Feature']['Features'].inject('') { |string,feature|
|
117
|
+
nvector = @features.getExFeature(a[0],feature)
|
118
|
+
featdist = dist(arr[i...i+nvector.size],
|
119
|
+
@examples[a[0]][i...i+nvector.size])
|
120
|
+
i += nvector.size
|
121
|
+
pretty = if feature==@cfg['Feature']['Features'][0] then ' *** ' else ' --- ' end
|
122
|
+
string += pretty + "(%.2f)"%featdist + pretty +
|
123
|
+
feature + " : " +
|
124
|
+
nvector.join(' ') + "\n"
|
125
|
+
}
|
126
|
+
}
|
127
|
+
end
|
128
|
+
|
129
|
+
def dist(a,b)
|
130
|
+
raise "Cannot calculate distance" if a.size != b.size
|
131
|
+
Math.sqrt(a.zip(b).inject(0){|d,(ai,bi)| d+(ai-bi).abs**2})
|
132
|
+
end
|
133
|
+
|
134
|
+
# Finds those examples that have been predicted most far off
|
135
|
+
# the correct value. Returns a string consisting of those
|
136
|
+
# examples along with the closest neighbors.
|
137
|
+
def getOutliers(n = (1..1), n2 = 3, predictions = nil)
|
138
|
+
if !predictions
|
139
|
+
predictions = self.crossvalidate
|
140
|
+
end
|
141
|
+
sortedpred = predictions.sort_by { |(k,v)|
|
142
|
+
- (v['pred'] - v['truth']).abs }
|
143
|
+
n = if n.is_a? Fixnum then (n..n) else n end
|
144
|
+
n.map do |i|
|
145
|
+
"OUTLIER %d : \n"%i +
|
146
|
+
sortedpred[i-1][0] + " was predicted %.3f"%sortedpred[i-1][1]['pred'] +
|
147
|
+
" but the truth is %.3f :\n"%sortedpred[i-1][1]['truth'] +
|
148
|
+
getNeighbors(sortedpred[i-1][0],n2).join('')
|
149
|
+
end.join("\n")
|
150
|
+
end
|
151
|
+
|
152
|
+
# Returns a String of all examples with features.
|
153
|
+
def printExamples
|
154
|
+
@examples.inject('') do |str,(exname,vector)|
|
155
|
+
str += vector.map{|v| v.to_s}.join(' ') + "\n"
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# An outer binding for the RubySVM predict function. This binding introduces
|
160
|
+
# inverse centering and scaling of the predicted value. This in order
|
161
|
+
# to give a real prediction value.
|
162
|
+
def predict(examples, model = nil)
|
163
|
+
if !model then model = self.train end
|
164
|
+
examples = [ examples ] if examples.is_a? String
|
165
|
+
predictions = examples.map do |example|
|
166
|
+
begin
|
167
|
+
vector =
|
168
|
+
if !@examples[example]
|
169
|
+
fcfg = Marshal.load(Marshal.dump(@cfg['Feature']))
|
170
|
+
fcfg.delete('DataSet')
|
171
|
+
fcfg['Example'] = example
|
172
|
+
scaleExample(SVMFeature.new(fcfg.to_yaml).getExAllFeatures(example))
|
173
|
+
else
|
174
|
+
@examples[example]
|
175
|
+
end
|
176
|
+
if @cfg['Feature']['PosClassFrom']
|
177
|
+
model.predict(vector[1..-1]).round
|
178
|
+
else
|
179
|
+
model.predict(vector[1..-1]) /
|
180
|
+
@cfg['SVM']['Scale'][ @cfg['Feature']['Features'][0] ][0] +
|
181
|
+
@cfg['SVM']['Center'][ @cfg['Feature']['Features'][0] ][0]
|
182
|
+
end
|
183
|
+
rescue
|
184
|
+
$!
|
185
|
+
end
|
186
|
+
end
|
187
|
+
if predictions.size==1 then predictions[0]
|
188
|
+
else predictions end
|
189
|
+
end
|
190
|
+
|
191
|
+
# An outer binding for the RubySVM training function.
|
192
|
+
# If no training examples given, it will train on all data in the dataset.
|
193
|
+
def train(examples = nil)
|
194
|
+
svm = SVM::Problem.new
|
195
|
+
if examples then examples.each { |exname|
|
196
|
+
@ndimensions = @examples[exname].size - 1 if !@ndimensions
|
197
|
+
if @examples[exname]
|
198
|
+
svm.addExample( @examples[exname][0], @examples[exname][1..-1] )
|
199
|
+
end }
|
200
|
+
else @examples.each { |name,vector|
|
201
|
+
@ndimensions = vector.size-1 if !@ndimensions
|
202
|
+
svm.addExample( vector[0], vector[1..-1] ) }
|
203
|
+
end
|
204
|
+
begin
|
205
|
+
errout = STDERR.clone
|
206
|
+
out = STDOUT.clone
|
207
|
+
STDERR.reopen(File.open('/dev/null','w'))
|
208
|
+
STDOUT.reopen(File.open('/dev/null','w'))
|
209
|
+
@par = SVM::Parameter.new
|
210
|
+
@par.svm_type = @cfg['Feature']['PosClassFrom'] ? 0 : 3
|
211
|
+
if c=@cfg['SVM']['C'] then @par.C = c.to_f end
|
212
|
+
if e=@cfg['SVM']['e'] then @par.eps = e.to_f end
|
213
|
+
@par.gamma = if g=@cfg['SVM']['g'] then g.to_f
|
214
|
+
else 1.0 / @ndimensions end
|
215
|
+
SVM::Model.new(svm,@par)
|
216
|
+
ensure
|
217
|
+
STDERR.reopen(errout)
|
218
|
+
STDOUT.reopen(out)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
# crossvalidation on a grouping made from "Groups" in cfg
|
223
|
+
# Return values:
|
224
|
+
# - Predictions hash :
|
225
|
+
# key : example name
|
226
|
+
# value : 'truth' => the true value
|
227
|
+
# 'pred' => the predicted value
|
228
|
+
#--
|
229
|
+
# Remaining issues:
|
230
|
+
# 2) No of parallel computations should be in cfg
|
231
|
+
#++
|
232
|
+
def crossvalidate()
|
233
|
+
#parr = @groups.keys.forkoff do |group|
|
234
|
+
parr = @groups.keys.map do |group|
|
235
|
+
members = @groups[group]
|
236
|
+
trainingex = @groups.inject([]){ |exarr,(trgroup,trmem)|
|
237
|
+
(trgroup == group) ? exarr : exarr + trmem }
|
238
|
+
model = self.train(trainingex)
|
239
|
+
# Predict each member of the group left out of training
|
240
|
+
pred = members.inject({}) do |p,predname|
|
241
|
+
p[predname] = {
|
242
|
+
'truth' => if @cfg['Feature']['PosClassFrom']
|
243
|
+
@examples[predname][0].round
|
244
|
+
else @examples[predname][0] /
|
245
|
+
@cfg['SVM']['Scale'][@cfg['Feature']['Features'][0]][0] +
|
246
|
+
@cfg['SVM']['Center'][@cfg['Feature']['Features'][0]][0]
|
247
|
+
end,
|
248
|
+
'pred' => self.predict(predname,model) } if @examples[predname]
|
249
|
+
p
|
250
|
+
end
|
251
|
+
end
|
252
|
+
predictions = parr.inject SVMPrediction.new do |p,predhash|
|
253
|
+
predhash.each { |exname,phash| p[exname] = phash } ; p
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
# Same as crossvalidate, but also outputs configuration and result
|
258
|
+
# to a file.
|
259
|
+
def publish_crossvalidate(path)
|
260
|
+
predictions = self.crossvalidate
|
261
|
+
time = DateTime.now
|
262
|
+
info = {
|
263
|
+
'Time' => time,
|
264
|
+
'Evaluation' => {
|
265
|
+
'RMSD' => predictions.rmsd,
|
266
|
+
#'MeanErr' => predictions.meanerr,
|
267
|
+
'CC' => predictions.cc },
|
268
|
+
#'AUC' => auc(predictions,1),
|
269
|
+
#'PBRMSD' => pbrmsd(predictions, @cfg['Feature']['Groups']),
|
270
|
+
#'WRMSD' => wrmsd(predictions,1),
|
271
|
+
#'F1' => f1(predictions,2) },
|
272
|
+
'Configuration' => @cfg,
|
273
|
+
'Predictions' => predictions.predictions
|
274
|
+
}
|
275
|
+
pdepth = 0
|
276
|
+
(patharr = path.split(/\//))[1...-1].each do
|
277
|
+
dir = patharr[0..pdepth+=1].join('/')
|
278
|
+
Dir.mkdir dir if !File.exists?(dir)
|
279
|
+
end
|
280
|
+
File.open(path,'w') { |f| YAML.dump(info,f) }
|
281
|
+
return predictions
|
282
|
+
end
|
283
|
+
|
284
|
+
private
|
285
|
+
|
286
|
+
# Go through each feature and check its centering
|
287
|
+
# and scaling instructions.
|
288
|
+
def checkScales(cfg)
|
289
|
+
cfg['SVM']['Center'] = {} if !cfg['SVM']['Center']
|
290
|
+
cfg['SVM']['Scale'] = {} if !cfg['SVM']['Scale']
|
291
|
+
dim0 = 0
|
292
|
+
cfg['Feature']['Features'].each_with_index do |feature, index|
|
293
|
+
dim = @cfg['Feature'][feature]['Dimensions']
|
294
|
+
dim ||= 1 # If Dimensions not given
|
295
|
+
cfg['SVM']['Center'][feature] ||= []
|
296
|
+
cfg['SVM']['Scale'][feature] ||= []
|
297
|
+
sc = cfg['SVM']['Scale'][feature]
|
298
|
+
(0...dim).each do |i|
|
299
|
+
#Check centering
|
300
|
+
cfg['SVM']['Center'][feature][i] =
|
301
|
+
if index==0 and @cfg['Feature']['PosClassFrom']
|
302
|
+
0
|
303
|
+
elsif !cfg['SVM']['Center'][feature][i]
|
304
|
+
sum = @examples.inject(0) { |s,(exname,vector)| s + vector[dim0+i] }
|
305
|
+
sum / @examples.size
|
306
|
+
else
|
307
|
+
cfg['SVM']['Center'][feature][i]
|
308
|
+
end
|
309
|
+
#Check scaling
|
310
|
+
if index==0 and @cfg['Feature']['PosClassFrom']
|
311
|
+
sc[i] = 1
|
312
|
+
elsif !sc[i] or (sc[i].is_a? String and sc[i] =~ /max/)
|
313
|
+
absmax = @examples.inject(0) { |s,(exname,vector)|
|
314
|
+
[s, (vector[dim0+i] - cfg['SVM']['Center'][feature][i]).abs].max }
|
315
|
+
scale = if absmax!=0 then 1/absmax else 1 end
|
316
|
+
if sc[i] =~ /max/ then sc[i].sub!('max', scale.to_s)
|
317
|
+
else sc[i] = scale end
|
318
|
+
sc[i] = sc[i].to_f if sc[i] =~ /^\d+\.*\d*$/
|
319
|
+
elsif (sc[i].is_a? String and sc[i] =~ /avg/)
|
320
|
+
abssum = @examples.inject(0) { |s,(exname,vector)|
|
321
|
+
s + (vector[dim0+i] - cfg['SVM']['Center'][feature][i]).abs }
|
322
|
+
scale = @examples.size / abssum
|
323
|
+
if sc[i] =~ /avg/ then sc[i].sub!('avg', scale.to_s)
|
324
|
+
else sc[i] = scale end
|
325
|
+
sc[i] = sc[i].to_f if sc[i] =~ /^\d+\.*\d*$/
|
326
|
+
elsif (sc[i].is_a? String and sc[i] =~ /std/)
|
327
|
+
sqsum = @examples.inject(0) { |s,(exname,vector)|
|
328
|
+
s + (vector[dim0+i] - cfg['SVM']['Center'][feature][i])**2 }
|
329
|
+
scale = Math::sqrt( @examples.size / sqsum )
|
330
|
+
if sc[i] =~ /std/ then sc[i].sub!('std', scale.to_s)
|
331
|
+
else sc[i] = scale end
|
332
|
+
sc[i] = sc[i].to_f if sc[i] =~ /^\d+\.*\d*$/
|
333
|
+
end
|
334
|
+
end
|
335
|
+
dim0 += dim
|
336
|
+
end
|
337
|
+
#puts 'CENTER',@cfg['SVM']['Center'].to_yaml
|
338
|
+
#puts 'SCALE',@cfg['SVM']['Scale'].to_yaml
|
339
|
+
end
|
340
|
+
|
341
|
+
# The object variable @examples's features are centered around zero and scaled
|
342
|
+
# according to the configuration. I.e. each feature has a center term and a scaling
|
343
|
+
# factor.
|
344
|
+
def scaleExamples()
|
345
|
+
@examples.each do |name, vector|
|
346
|
+
@examples[name] = scaleExample(vector)
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
def scaleExample(vector)
|
351
|
+
scaledvector = []
|
352
|
+
@cfg['Feature']['Features'].inject(0) { |i,feature|
|
353
|
+
dim = @cfg['Feature'][feature]['Dimensions']
|
354
|
+
dim = 1 if !dim
|
355
|
+
(0...dim).each { |j|
|
356
|
+
scaledvector[i+j] =
|
357
|
+
(vector[i+j] - @cfg['SVM']['Center'][feature][j]) *
|
358
|
+
@cfg['SVM']['Scale'][feature][j].to_f }
|
359
|
+
i + dim
|
360
|
+
}
|
361
|
+
scaledvector
|
362
|
+
end
|
363
|
+
|
364
|
+
end
|
365
|
+
|