svmlab 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,337 @@
1
+ require 'yaml'
2
+ #require 'forkoff'
3
+ require 'svmlab-config.rb'
4
+
5
+ # An SVMFeature object is initialized with either a string in YAML format
6
+ # or a file object pointing to a file with configuration given in YAML format.
7
+ #
8
+ # The SVMFeature class is the 'middleware' between software that calculates
9
+ # features and the SVMLab class. SVMFeature's features are :
10
+ # - Maintanence of a database of calculated features. This is done to
11
+ # minimize CPU time needed when developing an SVM experiment. The motto
12
+ # is that a calculation should be done only once and then never again.
13
+ # - Meta features can be defined that uses other features to calculate
14
+ # a feature . In these cases, a structure similar to the entire configuration
15
+ # should be given for this feature.
16
+ #
17
+ # ---CONFIGURATION---
18
+ #
19
+ # SVMFeature is initiated by a configuration file in YAML format.
20
+ # Required fields in configuration are:
21
+ # (all paths can be given either absolute or relative to BaseDir)
22
+ #
23
+ # ---
24
+ # Features:
25
+ # - <targetfeature>
26
+ # - <feature1>
27
+ # - <feature2>
28
+ # ...
29
+ # BaseDir: <base directory>
30
+ # DataSet: <path of file giving the dataset>
31
+ # OR
32
+ # <prefix of a set of files giving the dataset>
33
+ # Example: If Dataset is not given, this gives name(s) of examples
34
+ # to use.
35
+ # Groups: <range (n1..n2) or (n1...n2) in example name to use for grouping>
36
+ # OR
37
+ # <file prefix relative to BaseDir for files giving groups>
38
+ # Methods: <path of .rb file holding all feature calculation methods>
39
+ # --------------------------------------------------------------
40
+ # targetfeature:
41
+ # HomeDir: <home directory>
42
+ # If HomeDir is not given, it will be set to BaseDir/featurename
43
+ # Method: <the method calculating this feature>
44
+ # If Method is not given, an attempts is made to acquire
45
+ # the feature from the database. If it fails, ERROR is reported.
46
+ # Dimensions: <the number of dimensions in this feature>
47
+ # If Dimensions is not given, it will be assumed to be 1
48
+ # and only the first value for each example will be used
49
+ # <Further specific configuration of this feature>
50
+ # feature1:
51
+ # HomeDir: <home directory>
52
+ # Method: <the method calculating this feature>
53
+ # Dimensions: <the number of dimensions in this feature>
54
+ # <Further specific configuration of this feature>
55
+ # feature2:
56
+ # ...
57
+ # featureN:
58
+ # If featureN configuration is not given, then all default settings
59
+ # will apply to this feature.
60
+ # ...
61
+ #
62
+ #
63
+
64
+ class SVMFeature < Hash
65
+
66
+ attr_reader :dim
67
+
68
+ # config is either a file object or a string object.
69
+ def initialize(config)
70
+ @config = SVMFeaturesConfig.new(config)
71
+ #Get examples
72
+ @examples =
73
+ if dataset = @config['DataSet']
74
+ dir = dataset.split(/\//)[0...-1].join('/')+'/'
75
+ if File::file?(dataset)
76
+ open(dataset) { |f| f.read }.split
77
+ elsif (files = Dir::entries(dir).
78
+ grep(/^#{dataset.split(/\//).last}/)).
79
+ size>0
80
+ files.inject([]) { |exarray,fname|
81
+ exarray += open(dir+fname){|f| f.read}.split }
82
+ end
83
+ elsif example = @config['Example']
84
+ if example.is_a? Array
85
+ example
86
+ else
87
+ [ example ]
88
+ end
89
+ end
90
+ # Set @feature to an empty hash
91
+ @feature = {}
92
+ #tmparr = @examples.forkoff :processes => 1 do |ex|
93
+ #tmparr = @examples.forkmap 1 do |ex|
94
+ tmparr = @examples.map do |ex|
95
+ begin
96
+ getExAllFeatures(ex)
97
+ rescue
98
+ STDERR.puts $!
99
+ $!
100
+ end
101
+ end
102
+ @examples.zip(tmparr).each do |k,v|
103
+ self[k] = v
104
+ end
105
+ end
106
+
107
+ # Returns a string giving the path to the file holding
108
+ # the feature for the current settings.
109
+ # Returns nil if current setting do not match any file.
110
+ def getDataFile(feature)
111
+ dfile = nil
112
+ filemapfname = @config[feature]['HomeDir'] + 'filemap.yml'
113
+ return nil if !File.file?(filemapfname)
114
+ open(filemapfname,'r') do |f|
115
+ if f.flock(File::LOCK_SH)
116
+ begin
117
+ filemap = YAML.load(f)
118
+ if cfg = filemap.find {|c| c['Config'] == @config[feature]}
119
+ dfile = @config[feature]['HomeDir'] + cfg['FeatureFile']
120
+ else
121
+ dfile = nil
122
+ end
123
+ ensure
124
+ f.flock(File::LOCK_UN)
125
+ end
126
+ end
127
+ end
128
+ dfile
129
+ end
130
+
131
+ # Returns an array of floats giving the selected feature of the selected example.
132
+ def getExFeature(example, feature)
133
+ x = getExFeatureInternal(example, feature)
134
+ if !x then raise "ERROR: #{feature} is nil." end
135
+ if x =~ /^ERROR/
136
+ raise "ERROR (#{feature}): #{x.split[1..-1].join(' ')}"
137
+ end
138
+ if (dim=x.split.size) != @config[feature]['Dimensions']
139
+ raise "ERROR (#{feature}): Number of dimensions (#{dim})" +
140
+ " for #{example} is not correct"
141
+ end
142
+ x.split.map{ |v| Float(v)}
143
+ end
144
+
145
+ # Returns a string giving the selected feature of the selected example.
146
+ def getExFeatureInternal(example,feature)
147
+ val = nil
148
+
149
+ # 0. Check the hash
150
+ if @feature[feature] and @feature[feature][example]
151
+ return @feature[feature][example]
152
+ end
153
+
154
+ # 1. Look in database if value is available
155
+ if dfile = getDataFile(feature)
156
+ open(dfile,'r') do |f|
157
+ if f.flock(File::LOCK_SH)
158
+ begin
159
+ val = if @feature[feature] = YAML.load(f)
160
+ then @feature[feature][example]
161
+ else nil end
162
+ ensure
163
+ f.flock(File::LOCK_UN)
164
+ end
165
+ end
166
+ end
167
+ end
168
+ return val if val
169
+
170
+ # 2. Calculate the value
171
+ calhash = {}
172
+ begin
173
+ method = @config[feature]['Method']
174
+ raise ArgumentError, "No method given for calculation" if !method
175
+ calhash = eval("#{@config[feature]['Method']}(@config[feature],example)")
176
+ if !calhash.is_a? Hash
177
+ raise "Incorrect output format from #{@config[feature]['Method']}"
178
+ end
179
+ calhash.each do |k,v|
180
+ if !v.is_a? String
181
+ raise "Incorrect output class (#{v.class})" +
182
+ "from #{@config[feature]['Method']}"
183
+ end
184
+ end
185
+ rescue ArgumentError
186
+ raise
187
+ rescue NameError
188
+ raise NameError, "Method #{method} not found."
189
+ rescue
190
+ error = 'ERROR:' + $!.to_s.split(/\n/).shift.split(/\:/).pop
191
+ calhash = {example => error}
192
+ end
193
+ if !calhash or !calhash[example]
194
+ calhash[example] = 'ERROR: No output from method'
195
+ end
196
+ val = calhash[example]
197
+
198
+ # Update filemap.yml
199
+ if !getDataFile(feature)
200
+ filemapfname = @config[feature]['HomeDir'] + 'filemap.yml'
201
+ open(filemapfname,'w') {} if !File.file?(filemapfname)
202
+ open(filemapfname,'r+') do |f|
203
+ if f.flock(File::LOCK_EX)
204
+ begin
205
+ filemap = if tmp = YAML.load(f) then tmp
206
+ else [] end
207
+ datafile = feature + filemap.size.to_s + '.yml'
208
+ filemap.push({'FeatureFile' => datafile,
209
+ 'Config' => @config[feature] })
210
+ f.rewind
211
+ YAML.dump(filemap,f)
212
+ ensure
213
+ f.flock(File::LOCK_UN)
214
+ end
215
+ end
216
+ end
217
+ end
218
+
219
+ # Add all outcome of calculation to the database
220
+ dfile = getDataFile(feature)
221
+ open(dfile,'a+') do |f|
222
+ if f.flock(File::LOCK_EX)
223
+ begin
224
+ oldhash = YAML.load(f)
225
+ f.puts '--- ' if !oldhash
226
+ calhash.each do |k,v|
227
+ if !oldhash or !oldhash[k]
228
+ h = {k => v}
229
+ f.puts h.to_yaml[5..-1]
230
+ end
231
+ end
232
+ ensure
233
+ f.flock(File::LOCK_UN)
234
+ end
235
+ end
236
+ end
237
+ puts "#{DateTime.now}, Calculated #{example}, #{feature} = #{val}"
238
+ val
239
+ end
240
+
241
+ # Returns an array of floats containing all features for given example
242
+ def getExAllFeatures(example)
243
+ @config['Features'].inject([]) do |array,feature|
244
+ f = getExFeature(example, feature)
245
+ if array.empty? and (c=@config['PosClassFrom'])
246
+ f.first >= c ? [1] : [-1]
247
+ else
248
+ array + f
249
+ end
250
+ end
251
+ end
252
+
253
+ # Returns a hash for all examples in the data set
254
+ # key : example name
255
+ # value : array of values (float)
256
+ # getAllFeatures is dependent on a data set being given in the configuration
257
+ def getAllFeatures()
258
+ @examples.inject({}) { |output,example|
259
+ begin
260
+ output[example] = getExAllFeatures(example)
261
+ rescue
262
+ #puts "Excluding #{example}"
263
+ end
264
+ output
265
+ }
266
+ end
267
+
268
+ # Prints all examples and their features
269
+ # Prints to a file if given, otherwise to standard output.
270
+ def printFeatures(file = nil)
271
+ features = self.getAllFeatures
272
+ if file
273
+ open(file,'w') do |f|
274
+ features.each do |k,v|
275
+ f.puts k + ' ' + v.join(' ')
276
+ end
277
+ end
278
+ else
279
+ features.each do |k,v|
280
+ puts k + ' ' + v.join(' ')
281
+ end
282
+ end
283
+ nil
284
+ end
285
+
286
+ # Returns a string of the n examples with highest target feature
287
+ def getTopRanking(n = 0)
288
+ self.sort{ |(k1,v1),(k2,v2)| v1[0]<=>v2[0]}.
289
+ reverse[0..(n-1)].map{|i| "#{i.first}\t#{i[1].first}"}.
290
+ join("\n")
291
+ end
292
+
293
+ def featname(index)
294
+ i = 0
295
+ s = ''
296
+ @config['Features'].each do |feature|
297
+ if i + @config[feature]['Dimensions'] > index
298
+ if s==''
299
+ s =
300
+ if @config[feature]['Dimensions']==1
301
+ feature
302
+ else
303
+ "#{feature}_#{index - i}"
304
+ end
305
+ end
306
+ else
307
+ i += @config[feature]['Dimensions']
308
+ end
309
+ end
310
+ s
311
+ end
312
+
313
+ def to_s
314
+ (0...@config.dim).map do |i|
315
+ self.featname(i)
316
+ end.join(' ') + "\n" +
317
+ self.keys.sort.map do |key|
318
+ "#{key} #{self[key].join(' ')}"
319
+ end.join("\n")
320
+ end
321
+
322
+ # --- [] ---
323
+ # If indexing with a regular expression, a new SVMPrediction object is created
324
+ # containing all elements with matching keys.
325
+ def [](expr)
326
+ if expr.is_a? Regexp
327
+ subs = SVMPrediction.new
328
+ self.find_all { |(k,v)| k =~ expr }.each do |i|
329
+ subs[i[0]] = i[1]
330
+ end
331
+ subs
332
+ else
333
+ super(expr)
334
+ end
335
+ end
336
+
337
+ end
@@ -0,0 +1,98 @@
1
+ class SVMFeature < Hash
2
+
3
+ attr_reader :cfg
4
+
5
+ # config is either a file object or a string object.
6
+ def initialize(config)
7
+ @cfg = if config.is_a? SVMFeaturesConfig
8
+ config
9
+ else
10
+ SVMFeaturesConfig.new(config)
11
+ end
12
+ updateFromDatabases!
13
+ end
14
+
15
+ def [](key)
16
+ return super.join(' ') if super
17
+ "I don't have it!"
18
+ end
19
+
20
+ #def []=(key,val)
21
+ # STDERR.puts "You can't just PUT a value in SVMFeature..."
22
+ # nil
23
+ #end
24
+
25
+ def updateFromDatabases!
26
+ @features = @cfg['Features'].map do |feature|
27
+ readDatabase(feature)
28
+ end
29
+ sizes = @features.map{|h| h.size}
30
+ minsize = sizes.min
31
+ index = sizes.index{|i| i==minsize}
32
+ @features[index].each do |k,v|
33
+ unless @features.find{|h| !h[k] or h[k]=~/ERROR/}
34
+ self[k] = @features.map{|h| h[k]}#.join(' ')
35
+ end
36
+ end
37
+ end
38
+
39
+ def readDatabase(feature)
40
+ file = getDataFileToRead(feature)
41
+ return {} unless file
42
+ open(file,'r') do |f|
43
+ if f.flock(File::LOCK_SH)
44
+ begin
45
+ YAML.load(f)
46
+ ensure
47
+ f.flock(File::LOCK_UN)
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ # Returns a string giving the path to the file holding
54
+ # the feature for the current settings.
55
+ # Returns nil if current setting do not match any file.
56
+ def getDataFileToRead(feature)
57
+ dfile = nil
58
+ filemapfname = File.join(@cfg[feature]['HomeDir'], 'filemap.yml')
59
+ return nil if !File.file?(filemapfname)
60
+ open(filemapfname,'r') do |f|
61
+ if f.flock(File::LOCK_SH)
62
+ begin
63
+ filemap = YAML.load(f)
64
+ if cfg = filemap.find {|c| c['Config'] == @cfg[feature]}
65
+ dfile = File.join(@cfg[feature]['HomeDir'], cfg['FeatureFile'])
66
+ else
67
+ dfile = nil
68
+ end
69
+ ensure
70
+ f.flock(File::LOCK_UN)
71
+ end
72
+ end
73
+ end
74
+ dfile
75
+ end
76
+
77
+ def getDataFileToWrite(feature)
78
+ return file if file=getDataFileToRead(feature)
79
+ file = File.join(@cfg[feature]['HomeDir'], 'filemap.yml')
80
+ open(file,'w') {} if !File.file?(file)
81
+ open(file,'r+') do |f|
82
+ if f.flock(File::LOCK_EX)
83
+ begin
84
+ filemap = (tmp=YAML.load(f)) ? tmp : []
85
+ newdatafile = feature + filemap.size.to_s + '.yml'
86
+ filemap.push({'FeatureFile' => newdatafile,
87
+ 'Config' => @config[feature] })
88
+ f.rewind
89
+ YAML.dump(filemap,f)
90
+ ensure
91
+ f.flock(File::LOCK_UN)
92
+ end
93
+ end
94
+ end
95
+ file
96
+ end
97
+
98
+ end
@@ -0,0 +1,215 @@
1
+ # SVMLabConfig
2
+ # is created by giving a text string as argument.
3
+
4
+ require 'yaml'
5
+
6
+ def method?(arg)
7
+ begin
8
+ method(arg)
9
+ true
10
+ rescue
11
+ false
12
+ end
13
+ end
14
+
15
+ # ---SVMFeature configuration---
16
+ #
17
+ # SVMFeature is initiated by a configuration file in YAML format.
18
+ # Required fields in configuration are:
19
+ # (all paths can be given either absolute or relative to BaseDir)
20
+ #
21
+ # ---
22
+ # Features:
23
+ # - <targetfeature>
24
+ # - <feature1>
25
+ # - <feature2>
26
+ # ...
27
+ # BaseDir: <base directory>
28
+ # DataSet: <path of file giving the dataset>
29
+ # OR
30
+ # <prefix of a set of files giving the dataset>
31
+ # Example: If Dataset is not given, this gives name(s) of examples
32
+ # to use.
33
+ # Groups: <range (n1..n2) or (n1...n2) in example name to use for grouping>
34
+ # OR
35
+ # <file prefix relative to BaseDir for files giving groups>
36
+ # Methods: <path of .rb file holding all feature calculation methods>
37
+ # --------------------------------------------------------------
38
+ # targetfeature:
39
+ # HomeDir: <home directory>
40
+ # If HomeDir is not given, it will be set to BaseDir/featurename
41
+ # Method: <the method calculating this feature>
42
+ # If Method is not given, an attempts is made to acquire
43
+ # the feature from the database. If it fails, ERROR is reported.
44
+ # Dimensions: <the number of dimensions in this feature>
45
+ # If Dimensions is not given, it will be assumed to be 1
46
+ # and only the first value for each example will be used
47
+ # <Further specific configuration of this feature>
48
+ # feature1:
49
+ # HomeDir: <home directory>
50
+ # Method: <the method calculating this feature>
51
+ # Dimensions: <the number of dimensions in this feature>
52
+ # <Further specific configuration of this feature>
53
+ # feature2:
54
+ # ...
55
+ # featureN:
56
+ # If featureN configuration is not given, then all default settings
57
+ # will apply to this feature.
58
+ # ...
59
+ #
60
+ #
61
+ class SVMFeaturesConfig < Hash
62
+
63
+ attr_reader :cfg, :dim
64
+
65
+ #----------------SVMFeatureConfig--------------------------------------------
66
+ class SVMFeatureConfig < Hash
67
+ def initialize(maincfg, feature)
68
+ if !(cfg = maincfg[feature])
69
+ raise ArgumentError, "#{feature} configuration not given."
70
+ else
71
+ cfg.each do |k,v|
72
+ self[k] = v
73
+ end
74
+ end
75
+ # Method
76
+ method = self['Method']
77
+ if !method
78
+ raise ArgumentError, "Method for #{feature} not given."
79
+ end
80
+ if !method? method
81
+ raise ArgumentError, "Method #{method} for #{feature} not found."
82
+ end
83
+ # HomeDir
84
+ dir = self['HomeDir'] ? self['HomeDir'] : feature.dup
85
+ dir << '/' if dir[-1..-1]!='/'
86
+ self['HomeDir'] = maincfg.setupDirInfo(dir)
87
+ Dir.mkdir(self['HomeDir']) if !File.directory?(self['HomeDir'])
88
+ # Dimensions
89
+ self['Dimensions'] = self['Dimensions'] ? self['Dimensions'] : 1
90
+ # Meta-feature
91
+ if self['Features']
92
+ self['BaseDir'] = maincfg['BaseDir']
93
+ end
94
+
95
+ end
96
+
97
+ def to_s
98
+ self.to_yaml
99
+ end
100
+ end
101
+ #----------------------------------------------------------------------------
102
+
103
+ def initialize(arg)
104
+ begin
105
+ cfg = YAML.load(arg)
106
+ cfg.each do |k,v|
107
+ self[k] = v
108
+ end
109
+ # Set up dataset file
110
+ if self['DataSet']
111
+ self['DataSet'] = setupDirInfo(self['DataSet'])
112
+ if !File.exists? self['DataSet']
113
+ raise ArgumentError, "Cannot find dataset file #{self['DataSet']}."
114
+ end
115
+ end
116
+ # Get feature methods
117
+ if methods = self['Methods']
118
+ if methods.is_a? Array
119
+ methods.each { |method| require setupDirInfo(method) }
120
+ elsif methods.is_a? String
121
+ require setupDirInfo(methods)
122
+ end
123
+ end
124
+ # Set up each feature's configuration
125
+ raise ArgumentError, "Features not given." if !self['Features']
126
+ @dim = 0
127
+ self['Features'].each do |feature|
128
+ self[feature] = SVMFeatureConfig.new(self,feature)
129
+ @dim += self[feature]['Dimensions']
130
+ end
131
+ rescue ArgumentError, LoadError
132
+ raise $!,$!,nil
133
+ end
134
+ end
135
+
136
+ def setupDirInfo(path)
137
+ if path !~ /^\//
138
+ base = self['BaseDir']
139
+ raise ArgumentError, "BaseDir not given." if !base
140
+ path.sub!(/#{path}/, base + '/' + path)
141
+ path.sub!(/\/\//,'/')
142
+ end
143
+ path
144
+ end
145
+
146
+ end
147
+
148
+ class SVMLabConfig
149
+
150
+ def initialize(cfg)
151
+ @cfg = if cfg.split("\n").size==1
152
+ YAML.load(File.new(cfg))
153
+ else
154
+ YAML.load(cfg)
155
+ end
156
+ end
157
+
158
+ def [](a)
159
+ @cfg[a]
160
+ end
161
+
162
+ def scales=(scale)
163
+ @cfg['SVM']['Scale'].each{ |k,v|
164
+ (0...v.size).each { |i|
165
+ @cfg['SVM']['Scale'][k][i] =
166
+ if scale.is_a? String
167
+ scale.dup
168
+ else
169
+ scale
170
+ end
171
+ } }
172
+ end
173
+
174
+ def C=(arg)
175
+ @cfg['SVM']['C'] = arg
176
+ end
177
+
178
+ def e=(arg)
179
+ @cfg['SVM']['e'] = arg
180
+ end
181
+
182
+ def to_s
183
+ @cfg.to_yaml
184
+ end
185
+
186
+ def sayhi
187
+ puts "Hello! I'm a SVMLabConfig object!"
188
+ end
189
+
190
+ # SMVLabConfigStructure =
191
+ # '---
192
+ #Feature:
193
+ # Features:
194
+ # - exA
195
+ # exA:
196
+ # Method: exB
197
+ # Dimensions: exC
198
+ # Methods: exD
199
+ # BaseDir: exE
200
+ # Groups: ExF
201
+ # DataSet: exG
202
+ #SVM:
203
+ # PosClassFrom: Float
204
+ # PShalf: Integer
205
+ # C: Float
206
+ # ScaleMethod:
207
+ # Name: String
208
+ # Nhalf: Integer
209
+ # StepMethod: String
210
+ # e: Float
211
+ # Scale:
212
+ # exA:
213
+ # - exH
214
+ #'
215
+ end