svmlab 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,176 @@
1
+ require 'arraymethods.rb'
2
+
3
+ class SVMPrediction < Hash
4
+
5
+ # --- difficultFeature ---
6
+ def difficultFeature(features)
7
+ if !features.is_a? SVMFeature
8
+ raise "Please give a SVMFeature object as argument."
9
+ end
10
+ keys = self.keys
11
+ names = (1...features.dim).map do |i|
12
+ features.featname(i)
13
+ end
14
+ maxlen = names.map{|n| n.size}.max
15
+ correlations = (1...features.dim).map do |i|
16
+ predmiss = keys.map { |k|
17
+ (self[k]['truth'] - self[k]['pred']).abs }
18
+ feat = keys.map { |k| features[k][i] }
19
+ "#{names[i-1].rjust(maxlen)} %.2f"%correlation(predmiss, feat)
20
+ end
21
+ correlations.sort_by{|line| line.split.last.to_f.abs}.reverse
22
+ end
23
+
24
+ # --- [] ---
25
+ # If indexing with a regular expression, a new SVMPrediction object is created
26
+ # containing all elements with matching keys.
27
+ def [](expr)
28
+ if expr.is_a? Regexp
29
+ subs = SVMPrediction.new
30
+ self.find_all { |(k,v)| k =~ expr }.each do |i|
31
+ subs[i[0]] = i[1]
32
+ end
33
+ subs
34
+ else
35
+ super(expr)
36
+ end
37
+ end
38
+
39
+ # --- (minus) ---
40
+ # Removes all predictions in arg that is found in self.
41
+ def -(arg)
42
+ self.inject(SVMPrediction.new) do |subs, (k,v)|
43
+ if !arg[k]
44
+ subs[k] = v if !arg[k]
45
+ end
46
+ subs
47
+ end
48
+ end
49
+
50
+ # Each should be an array giving more than one plot
51
+ def genericplot(plotdata, file, title='Plot', xtitle='X', ytitle='Y')
52
+ Gnuplot.open do |gp| # This could be either a file or the gnuplot process that we pipe to
53
+ Gnuplot::Plot.new( gp ) do |plot|
54
+ plot.title title
55
+ plot.xlabel xtitle
56
+ plot.ylabel ytitle
57
+ plot.set "grid"
58
+ if file =~ /(png)|(ps)$/
59
+ plot.terminal "png size 1024,800 font '/usr/share/fonts/truetype/ttf-bitstream-vera/VeraBd.ttf' 20" if file =~ /png$/
60
+ plot.terminal "postscript eps color" if file =~ /ps$/
61
+ plot.output file
62
+ end
63
+ plot.data = plotdata
64
+ end
65
+ end
66
+ nil
67
+ end
68
+
69
+ # --- plot ---
70
+ # Plots true value on the X axis vs. predicted value on the Y axis.
71
+ def plot(legends = [], title = 'SVM Prediction', err = nil, file = '')
72
+ # For historical reasons
73
+ predarr = [ self ]
74
+ # Set up dataarr
75
+ dataarr = predarr.map do |predictions|
76
+ x, y = predictions.inject([[],[]]) { |data,(example,val)|
77
+ data[0].push(val['truth'])
78
+ data[1].push(val['pred'])
79
+ data }
80
+ end
81
+
82
+ from = dataarr.inject(dataarr[0][0][0]) { |m,a|
83
+ [m, a[0].min, a[1].min].min }.floor
84
+ to = dataarr.inject(dataarr[0][0][0]) { |m,a|
85
+ [m, a[0].max, a[1].max].max }.ceil
86
+ sampleindex = 0
87
+ # Fiddling with legends
88
+ legends = dataarr.map{|d| "Sample #{sampleindex+=1}"} if legends.size==0
89
+ err = ['rmsd','cc'] if !err # Default behaviour
90
+ err = [err] if err.is_a? String
91
+ if err
92
+ legends = legends.map { |legend|
93
+ legend + ' (' +
94
+ err.map { |e|
95
+ begin
96
+ args = if e.split(/,/).size==1 then nil
97
+ else '(' + (e.split(/,/)[1..-1]).join(',') + ')' end
98
+ "#{e} = ".upcase + "%.2f"%eval("self.#{e.split(/,/).first.downcase}#{args}")
99
+ rescue
100
+ $!
101
+ end
102
+ }.join(', ') + ')'
103
+ }
104
+ end
105
+ # Setting plotdata
106
+ plotdata = [ Gnuplot::DataSet.new( [[from,to], [from,to]] ) { |ds|
107
+ ds.using = '1:2'
108
+ ds.with = "lines"
109
+ ds.title = "Correct diagonal"
110
+ ds.linewidth = 1
111
+ ds.matrix = nil } ] +
112
+ dataarr.zip(legends).inject([]) { |arr,((x,y),legend)|
113
+ arr.push(Gnuplot::DataSet.new( [x,y] ) { |ds|
114
+ ds.using = '1:2'
115
+ ds.with = "points"
116
+ ds.title = legend
117
+ ds.linewidth = 2
118
+ ds.matrix = nil }) }
119
+ genericplot(plotdata, file, title, 'Experimental value', 'Predicted value')
120
+ end
121
+
122
+ # --- rmsd ---
123
+ # Root mean square deviation
124
+ def rmsd
125
+ a = [] # Don't initialize in one line : x=y=[]
126
+ b = [] # If doing that, they will both refer to the same array
127
+ self.each { |example,val|
128
+ a.push(val['truth'].to_f)
129
+ b.push(val['pred'].to_f)
130
+ }
131
+ if (x = a.zip(b).inject(0) {|sd, (d, q)| sd + (d - q)**2 }.to_f / a.length) > 0
132
+ Math.sqrt(x)
133
+ else
134
+ 0
135
+ end
136
+ end
137
+
138
+ # --- cc ---
139
+ # Correlation coefficient
140
+ def cc
141
+ a = [] # Don't initialize in one line : x=y=[]
142
+ b = [] # If doing that, they will both refer to the same array
143
+ self.each { |example,val|
144
+ a.push(val['truth'].to_f)
145
+ b.push(val['pred'].to_f)
146
+ }
147
+ amean = a.inject(0) {|sum,i| sum + i} / a.size.to_f
148
+ bmean = b.inject(0) {|sum,i| sum + i} / b.size.to_f
149
+ ssa = a.inject(0.0) {|ss,i| ss + (i-amean)**2}
150
+ ssb = b.inject(0) {|ss,i| ss + (i-bmean)**2}
151
+ ssab = a.zip(b).inject(0) {|ss,(ai,bi)| ss + (ai-amean) * (bi-bmean)}
152
+ if ssab > 0
153
+ Math::sqrt(ssab**2 / (ssa * ssb))
154
+ else
155
+ 0
156
+ end
157
+ end
158
+
159
+ # --- f1 ---
160
+ # Precision Recall measure
161
+ def f1( border = 0)
162
+ tp = self.select { |k,v| v['truth'] >= border and v['pred'] >= border}.size.to_f
163
+ fp = self.select { |k,v| v['truth'] < border and v['pred'] >= border}.size.to_f
164
+ fn = self.select { |k,v| v['truth'] >= border and v['pred'] < border}.size.to_f
165
+ precision = if (denom=tp+fp) > 0 then tp / denom
166
+ else 0 end
167
+ recall = if (denom=tp+fn) > 0 then tp / denom
168
+ else 0 end
169
+ if (denom = precision + recall) > 0
170
+ 2 * precision * recall / denom
171
+ else
172
+ 0
173
+ end
174
+ end
175
+
176
+ end
@@ -0,0 +1,12 @@
1
+ SVM:
2
+ C: 1 #step exp1.0
3
+ e: 3 #step exp1.0
4
+ g: 0.4 #step exp0.5
5
+ Optimization:
6
+ Method: patternsearch
7
+ Nhalf: 3
8
+ Measure: cc
9
+ Scale:
10
+ Default: max
11
+ LibSVMDataset: /home/fred/SVMLab/trunk/lib/testdata
12
+ #PosClassFrom: 1.7
@@ -0,0 +1,5 @@
1
+ require 'wirble'
2
+ Wirble.init
3
+ Wirble.colorize
4
+
5
+ puts "Welcome!"
@@ -0,0 +1,3 @@
1
+ 1 1:2.0 2:6.0
2
+ -1 1:2.0 2:10.0
3
+ -1 1:2.0 2:14.0
@@ -0,0 +1,20 @@
1
+ This is pdfTeXk, Version 3.141592-1.40.3 (Web2C 7.5.6) (format=latex 2008.11.29) 16 MAR 2009 10:48
2
+ entering extended mode
3
+ %&-line parsing enabled.
4
+ **main.tex
5
+
6
+ ! Emergency stop.
7
+ <*> main.tex
8
+
9
+ *** (job aborted, file error in nonstop mode)
10
+
11
+
12
+ Here is how much of TeX's memory you used:
13
+ 3 strings out of 94102
14
+ 111 string characters out of 1165832
15
+ 47703 words of memory out of 1500000
16
+ 3383 multiletter control sequences out of 10000+50000
17
+ 3640 words of font info for 14 fonts, out of 1200000 for 2000
18
+ 637 hyphenation exceptions out of 8191
19
+ 0i,0n,0p,1b,6s stack positions out of 5000i,500n,6000p,200000b,5000s
20
+ No pages of output.
@@ -0,0 +1,81 @@
1
+ require 'irb/completion'
2
+ require 'svmlab'
3
+
4
+ IRB.conf[:PROMPT][:SVMLab] = { # name of prompt mode
5
+ :PROMPT_I => "SVMLab:%03n:%i> ", # normal prompt
6
+ :PROMPT_S => "SVMLab:%03n:%i%l ", # prompt for continuing strings
7
+ :PROMPT_C => "SVMLab:%03n:%i* ", # prompt for continuing statement
8
+ :RETURN => " ==>%s\n" # format to return value
9
+ }
10
+
11
+ IRB.conf[:PROMPT_MODE] = :SVMLab
12
+
13
+
14
+ # Sebastian Delmont
15
+ # Pretty print methods
16
+ ANSI_BOLD = "\033[1m"
17
+ ANSI_RESET = "\033[0m"
18
+ ANSI_LGRAY = "\033[0;37m"
19
+ ANSI_GRAY = "\033[1;30m"
20
+
21
+ def pm(obj, *options) # Print methods
22
+ methods = obj.methods
23
+ methods -= Object.methods unless options.include? :more
24
+ filter = options.select {|opt| opt.kind_of? Regexp}.first
25
+ methods = methods.select {|name| name =~ filter} if filter
26
+
27
+ data = methods.sort.collect do |name|
28
+ method = obj.method(name)
29
+ if method.arity == 0
30
+ args = "()"
31
+ elsif method.arity > 0
32
+ n = method.arity
33
+ args = "(#{(1..n).collect {|i| "arg#{i}"}.join(", ")})"
34
+ elsif method.arity < 0
35
+ n = -method.arity
36
+ args = "(#{(1..n).collect {|i| "arg#{i}"}.join(", ")}, ...)"
37
+ end
38
+ klass = $1 if method.inspect =~ /Method: (.*?)#/
39
+ [name, args, klass]
40
+ end
41
+ max_name = data.collect {|item| item[0].size}.max
42
+ max_args = data.collect {|item| item[1].size}.max
43
+ data.each do |item|
44
+ print " #{ANSI_BOLD}#{item[0].rjust(max_name)}#{ANSI_RESET}"
45
+ print "#{ANSI_GRAY}#{item[1].ljust(max_args)}#{ANSI_RESET}"
46
+ print " #{ANSI_LGRAY}#{item[2]}#{ANSI_RESET}\n"
47
+ end
48
+ data.size
49
+ end
50
+
51
+ # To enable history saving between sessions
52
+ IRB.conf[:SAVE_HISTORY] = 100
53
+
54
+ # Stian Haklev / Joel VanderWerf
55
+ # To reduce lengthy output
56
+ class IRB::Context
57
+ attr_accessor :max_output_size
58
+
59
+ alias initialize_before_max_output_size initialize
60
+ def initialize(*args)
61
+ initialize_before_max_output_size(*args)
62
+ @max_output_size = IRB.conf[:MAX_OUTPUT_SIZE] || 80
63
+ end
64
+ end
65
+
66
+ class IRB::Irb
67
+ def output_value
68
+ text =
69
+ if @context.inspect?
70
+ sprintf @context.return_format, @context.last_value.inspect
71
+ else
72
+ sprintf @context.return_format, @context.last_value
73
+ end
74
+ max = @context.max_output_size
75
+ if text.size < max
76
+ puts text
77
+ else
78
+ puts text[0..max-1] + "..." + text[-2..-1]
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,124 @@
1
+ ---
2
+ #------------------------------------------------------------
3
+ # CONFIGURATION OF SVM PARAMETERS
4
+ #------------------------------------------------------------
5
+ SVM:
6
+ C: 1 step exp1.0
7
+ e: 0.3 step exp1.0
8
+ g: 0.04 step exp1.0
9
+ #C: 1 #step exp1.0
10
+ #e: 0.17 #step exp1.0
11
+ #g: 0.4 #step exp1.0
12
+ Optimization:
13
+ Method: "patternsearch"
14
+ Nhalf: 3
15
+ Scale:
16
+ ddG:
17
+ - max #step exp0.1
18
+ KFCprop:
19
+ - max #step exp0.1 # size
20
+ - max #step exp0.1 # chemical properties
21
+ - max #step exp0.1
22
+ - max #step exp0.1
23
+ KFCN:
24
+ - max #step exp0.1
25
+ - max #step exp0.1
26
+ - max #step exp0.1
27
+ - max #step exp0.1
28
+ - max #step exp0.1
29
+ - max #step exp0.1
30
+ - max #step exp0.1
31
+ - max #step exp0.1
32
+ - max #step exp0.1
33
+ - max #step exp0.1
34
+ KFCS:
35
+ - max #step exp0.1
36
+ - max #step exp0.1
37
+ - max #step exp0.1
38
+ - max #step exp0.1
39
+ - max #step exp0.1
40
+ - max #step exp0.1
41
+ - max #step exp0.1
42
+ - max #step exp0.1
43
+ - max #step exp0.1
44
+ - max #step exp0.1
45
+ KFCPolarBonds:
46
+ - max
47
+ KFCGenericBonds:
48
+ - max
49
+ KFCHbonds:
50
+ - max #step exp0.1
51
+ HBPlusBonds:
52
+ - max
53
+ zdscore:
54
+ - max
55
+ - max
56
+ - max
57
+ #------------------------------------------------------------
58
+ # CONFIGURATION OF FEATURE PARAMETERS
59
+ #------------------------------------------------------------
60
+ Feature:
61
+ #PosClassFrom: 2.0
62
+ # Features is a list of all features used
63
+ # Each feature need to have its configuration given
64
+ Features:
65
+ - ddG
66
+ - KFCN
67
+ - KFCS
68
+ - KFCPolarBonds
69
+ - KFCGenericBonds
70
+ - KFCHbonds
71
+ - KFCprop
72
+ #- HBPlusBonds
73
+ #- zdscore
74
+ # "BaseDir" is the directory from which all paths are derived
75
+ BaseDir: /home/fred/DeltaDeltaG/Features/
76
+ # "DataSet" is the file (relative to BaseDir or absolute)
77
+ # that holds names of examples
78
+ DataSet: KFCdatasettest
79
+ # "Methods" is the .rb file that holds all feature methods
80
+ Methods:
81
+ - KFCfeatures.rb
82
+ - fredrik-hbplus.rb
83
+ - /home/fred/DeltaDeltaG/ZDOCK-Features/zdock-svm.rb
84
+ Groups: (0..3)
85
+ #---SEPARATE FEATURE CONFIGURATIONS FROM HERE--------------
86
+ #---------------------------ddG----------------------------
87
+ ddG:
88
+ Dimensions: 1
89
+ # "HomeDir" is optional. If not given, it will be set to <BaseDir>/<Feature name>
90
+ HomeDir: ddG/
91
+ # "Method" is mandatory and is the name of the method calculating the feature
92
+ Method: kfc_ddG
93
+ #----------------------------KFCprop-----------------------
94
+ KFCprop:
95
+ Dimensions: 4
96
+ Method: kfcProperties
97
+ #----------------------------KFCN--------------------------
98
+ KFCN:
99
+ Dimensions: 10
100
+ Method: kfcN
101
+ #----------------------------KFCS--------------------------
102
+ KFCS:
103
+ Dimensions: 10
104
+ Method: kfcS
105
+ #----------------------------KFCPolarBonds-----------------
106
+ KFCPolarBonds:
107
+ Method: kfcPolarBonds
108
+ #----------------------------KFCGenericBonds---------------
109
+ KFCGenericBonds:
110
+ Method: kfcGenericBonds
111
+ #----------------------------KFCHbonds---------------------
112
+ KFCHbonds:
113
+ Method: kfcHbonds
114
+ #----------------------------HBPlusBonds-------------------
115
+ HBPlusBonds:
116
+ Dimensions: 1
117
+ Method: hbonds
118
+ MaxH2O: 2
119
+ OnlySideChain: yes
120
+ #----------------------------zdscore-----------------------
121
+ zdscore:
122
+ Dimensions: 3
123
+ Method: zdscoreSimple
124
+ HomeDir: /home/fred/DeltaDeltaG/ZDOCK-Features/zdscoreSimple/
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: svmlab
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 0
8
+ - 0
9
+ version: 1.0.0
10
+ platform: ruby
11
+ authors:
12
+ - Fredrik Johansson
13
+ autorequire: svmlab
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-05-26 00:00:00 +09:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: forkoff
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ version: "0"
30
+ type: :runtime
31
+ version_requirements: *id001
32
+ - !ruby/object:Gem::Dependency
33
+ name: gnuplot
34
+ prerelease: false
35
+ requirement: &id002 !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ segments:
40
+ - 0
41
+ version: "0"
42
+ type: :runtime
43
+ version_requirements: *id002
44
+ description:
45
+ email: fredjoha@gmail.com
46
+ executables: []
47
+
48
+ extensions: []
49
+
50
+ extra_rdoc_files: []
51
+
52
+ files:
53
+ - lib/svmlab-optim.rb
54
+ - lib/v6.cfg
55
+ - lib/tmp.irb.rc
56
+ - lib/libsvmdata.rb
57
+ - lib/testdata
58
+ - lib/test.cfg
59
+ - lib/svmfeature2.rb
60
+ - lib/test.rb
61
+ - lib/svmlab-config.rb
62
+ - lib/svmlab-plot.rb
63
+ - lib/svmlab.rb
64
+ - lib/irb.history
65
+ - lib/arraymethods.rb
66
+ - lib/svmfeature.rb
67
+ - lib/svmlab-irb.rb
68
+ - lib/texput.log
69
+ - lib/svmprediction.rb
70
+ - lib/README
71
+ has_rdoc: true
72
+ homepage: http://ediacara.bmr.kyushu-u.ac.jp/fredrik
73
+ licenses: []
74
+
75
+ post_install_message:
76
+ rdoc_options: []
77
+
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ segments:
85
+ - 0
86
+ version: "0"
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ segments:
92
+ - 0
93
+ version: "0"
94
+ requirements: []
95
+
96
+ rubyforge_project:
97
+ rubygems_version: 1.3.6
98
+ signing_key:
99
+ specification_version: 3
100
+ summary: A tool for experimenting with Support Vector Machines.
101
+ test_files: []
102
+