rsvm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ #ifndef _LIBSVM_H
2
+ #define _LIBSVM_H
3
+
4
+ #define LIBSVM_VERSION 312
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ extern int libsvm_version;
11
+
12
+ struct svm_node
13
+ {
14
+ int index;
15
+ double value;
16
+ };
17
+
18
+ struct svm_problem
19
+ {
20
+ int l;
21
+ double *y;
22
+ struct svm_node **x;
23
+ double *W; /* instance weight */
24
+ };
25
+
26
+ enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */
27
+ enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
28
+
29
+ struct svm_parameter
30
+ {
31
+ int svm_type;
32
+ int kernel_type;
33
+ int degree; /* for poly */
34
+ double gamma; /* for poly/rbf/sigmoid */
35
+ double coef0; /* for poly/sigmoid */
36
+
37
+ /* these are for training only */
38
+ double cache_size; /* in MB */
39
+ double eps; /* stopping criteria */
40
+ double C; /* for C_SVC, EPSILON_SVR and NU_SVR */
41
+ int nr_weight; /* for C_SVC */
42
+ int *weight_label; /* for C_SVC */
43
+ double* weight; /* for C_SVC */
44
+ double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
45
+ double p; /* for EPSILON_SVR */
46
+ int shrinking; /* use the shrinking heuristics */
47
+ int probability; /* do probability estimates */
48
+ };
49
+
50
+ //
51
+ // svm_model
52
+ //
53
+ struct svm_model
54
+ {
55
+ struct svm_parameter param; /* parameter */
56
+ int nr_class; /* number of classes, = 2 in regression/one class svm */
57
+ int l; /* total #SV */
58
+ struct svm_node **SV; /* SVs (SV[l]) */
59
+ double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
60
+ double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
61
+ double *probA; /* pariwise probability information */
62
+ double *probB;
63
+
64
+ /* for classification only */
65
+
66
+ int *label; /* label of each class (label[k]) */
67
+ int *nSV; /* number of SVs for each class (nSV[k]) */
68
+ /* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
69
+ /* XXX */
70
+ int free_sv; /* 1 if svm_model is created by svm_load_model*/
71
+ /* 0 if svm_model is created by svm_train */
72
+ };
73
+
74
+ struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param);
75
+ void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);
76
+
77
+ int svm_save_model(const char *model_file_name, const struct svm_model *model);
78
+ struct svm_model *svm_load_model(const char *model_file_name);
79
+
80
+ int svm_get_svm_type(const struct svm_model *model);
81
+ int svm_get_nr_class(const struct svm_model *model);
82
+ void svm_get_labels(const struct svm_model *model, int *label);
83
+ double svm_get_svr_probability(const struct svm_model *model);
84
+
85
+ double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);
86
+ double svm_predict(const struct svm_model *model, const struct svm_node *x);
87
+ double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
88
+
89
+ void svm_free_model_content(struct svm_model *model_ptr);
90
+ void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
91
+ void svm_destroy_param(struct svm_parameter *param);
92
+
93
+ const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
94
+ int svm_check_probability_model(const struct svm_model *model);
95
+
96
+ void svm_set_print_string_function(void (*print_func)(const char *));
97
+
98
+ #ifdef __cplusplus
99
+ }
100
+ #endif
101
+
102
+ #endif /* _LIBSVM_H */
@@ -0,0 +1,119 @@
1
+ require "svm/version"
2
+ require 'ffi'
3
+
4
+ require_relative 'svm/debug'
5
+
6
+ module Svm
7
+ extend FFI::Library
8
+ extend Svm::Debug
9
+
10
+ ffi_lib File.join(File.dirname(__FILE__), "libsvm/libsvm.#{RbConfig::CONFIG['DLEXT']}")
11
+
12
+ enum :svm_type, [:c_svc, :nu_svc, :one_class, :epsilon_svr, :nu_svr]
13
+ enum :kernel_type, [:linear, :poly, :rbf, :sigmoid, :precomputed]
14
+
15
+ class NodeStruct < FFI::Struct
16
+ layout :index, :int,
17
+ :value, :double
18
+
19
+ def self.node_array_from(sample_xs)
20
+ num_features = sample_xs.size
21
+
22
+ nodes_ptr = FFI::MemoryPointer.new(NodeStruct, num_features + 1)
23
+
24
+ num_features.times.each do |j|
25
+ node = NodeStruct.new(nodes_ptr + j * NodeStruct.size)
26
+ node[:index] = j
27
+ node[:value] = sample_xs[j].to_f
28
+ end
29
+
30
+ # Last node is a terminator. See libsvm README.
31
+ node = NodeStruct.new(nodes_ptr + num_features * NodeStruct.size)
32
+ node[:index] = -1
33
+ node[:value] = 0
34
+
35
+ nodes_ptr
36
+ end
37
+ end
38
+
39
+ class ProblemStruct < FFI::Struct
40
+ layout :l, :int,
41
+ :y, :pointer,
42
+ :svm_node, :pointer,
43
+ :W, :pointer
44
+ end
45
+
46
+ class ParameterStruct < FFI::Struct
47
+ layout :svm_type, :svm_type,
48
+ :kernel_type, :kernel_type,
49
+ :degree, :int,
50
+ :gamma, :double,
51
+ :coef0, :double,
52
+ :cache_size, :double,
53
+ :eps, :double,
54
+ :c, :double,
55
+ :nr_weight, :int,
56
+ :weight_label, :pointer,
57
+ :weight, :pointer,
58
+ :nu, :double,
59
+ :p, :double,
60
+ :shrinking, :int,
61
+ :probability, :int
62
+ end
63
+
64
+ class ModelStruct < FFI::ManagedStruct
65
+ layout :param, ParameterStruct,
66
+ :nr_class, :int,
67
+ :l, :int,
68
+ :svm_node, :pointer,
69
+ :sv_coef, :pointer,
70
+ :rho, :pointer,
71
+ :probA, :pointer,
72
+ :probB, :pointer,
73
+ :label, :pointer,
74
+ :nSV, :pointer,
75
+ :free_sv, :int
76
+
77
+ def self.release(ptr)
78
+ Svm.svm_free_model_content(ptr)
79
+ end
80
+ end
81
+
82
+ attach_function 'svm_train', [:pointer, :pointer], :pointer
83
+
84
+ attach_function 'svm_cross_validation', [:pointer, :pointer, :int, :pointer], :void
85
+ attach_function 'svm_save_model', [:string, :pointer], :int
86
+ attach_function 'svm_load_model', [:string], :pointer
87
+ attach_function 'svm_get_svm_type', [:pointer], :int
88
+ attach_function 'svm_get_nr_class', [ :pointer], :int
89
+ attach_function 'svm_get_labels', [:pointer, :pointer], :void
90
+ attach_function 'svm_get_svr_probability', [:pointer], :double
91
+
92
+ attach_function 'svm_predict_values', [:pointer, :pointer, :pointer], :double
93
+ attach_function 'svm_predict', [:pointer, :pointer], :double
94
+ attach_function 'svm_predict_probability', [:pointer, :pointer, :pointer], :double
95
+
96
+ attach_function 'svm_free_model_content', [:pointer], :void
97
+ attach_function 'svm_free_and_destroy_model', [:pointer], :void
98
+ attach_function 'svm_destroy_param', [:pointer], :void
99
+
100
+ attach_function 'svm_check_parameter', [:pointer, :pointer ], :string
101
+ attach_function 'svm_check_probability_model', [:pointer,], :int
102
+ attach_function 'svm_set_print_string_function', [:pointer,], :void
103
+
104
+
105
+ DebugCallback = FFI::Function.new(:void, [:string]) do |message|
106
+ print message if Svm.debug
107
+ end
108
+
109
+ Svm.svm_set_print_string_function(DebugCallback)
110
+ Svm.debug = false
111
+ end
112
+
113
+
114
+
115
+ require_relative 'svm/cross_validation'
116
+ require_relative 'svm/options'
117
+ require_relative 'svm/problem'
118
+ require_relative 'svm/model'
119
+ require_relative 'svm/scaler'
@@ -0,0 +1,39 @@
1
+ module Svm
2
+ module CrossValidation
3
+
4
+ def results_for_cross_validation(n_folds = 5, custom_options = nil)
5
+ results = cross_validate(n_folds, custom_options)
6
+
7
+ num_samples.times.collect { |i| value(i) == results[i] ? weight_for(i) : 0 }.inject(:+)
8
+ end
9
+
10
+ def cross_validate(n_folds = 5, more_options = nil)
11
+ set(more_options) if more_options
12
+
13
+ predicted_results_pointer = FFI::MemoryPointer.new(:double, num_samples)
14
+
15
+ Svm.svm_cross_validation(problem_struct, options.parameter_struct, n_folds, predicted_results_pointer)
16
+
17
+ predicted_results_pointer.read_array_of_double(num_samples)
18
+ end
19
+
20
+ def find_best_parameters(n_folds = 5)
21
+ c_exponents = (-1..14).to_a
22
+ gamma_exponents = (-13..-1).to_a
23
+
24
+ combinations = c_exponents.product(gamma_exponents)
25
+
26
+ max = combinations.max_by do |comb|
27
+ c = 2 ** comb[0]
28
+ gamma = 2 ** comb[1]
29
+
30
+ results_for_cross_validation(n_folds, :c => c, :gamma => gamma)
31
+ end
32
+
33
+ c = 2**max[0]
34
+ gamma = 2**max[1]
35
+
36
+ {:c => c, :gamma => gamma}
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,12 @@
1
+ module Svm
2
+ module Debug
3
+
4
+ def debug
5
+ defined?(@debug) && @debug
6
+ end
7
+
8
+ def debug=(do_debug)
9
+ @debug = do_debug
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,68 @@
1
+ module Svm
2
+ class ModelSerializationError < StandardError; end
3
+ class ModelError < StandardError; end
4
+
5
+ class Model
6
+ attr_reader :model_struct
7
+ attr_accessor :scaler
8
+
9
+ def initialize(model_struct)
10
+ @model_struct = model_struct
11
+ end
12
+
13
+ def save(path)
14
+ result = Svm.svm_save_model(path, model_struct.pointer)
15
+ raise ModelSerializationError.new("Unable to save model to file. Error: #{result}") unless result == 0
16
+ end
17
+
18
+ def self.load(path)
19
+ model_struct_pointer = Svm.svm_load_model(path)
20
+ raise ModelSerializationError.new("Unable to load model from file. Error: #{result}") unless model_struct_pointer != FFI::Pointer::NULL
21
+
22
+ model_struct = ModelStruct.new(model_struct_pointer)
23
+ self.new(model_struct)
24
+ end
25
+
26
+ def number_of_classes
27
+ Svm.svm_get_nr_class(model_struct)
28
+ end
29
+
30
+ def labels
31
+ labels_array = FFI::MemoryPointer.new(:int, number_of_classes)
32
+
33
+ Svm.svm_get_labels(model_struct, labels_array)
34
+
35
+ labels_array.read_array_of_int(number_of_classes)
36
+ end
37
+
38
+ def predict(sample)
39
+ scaler.scale(sample) if scaler
40
+
41
+ nodes_ptr = NodeStruct.node_array_from(sample)
42
+ Svm.svm_predict(model_struct, nodes_ptr)
43
+ end
44
+
45
+ def predict_probabilities(sample)
46
+ unless Svm.svm_check_probability_model(model_struct) == 1
47
+ raise ModelError.new("Model doesn't have probability info")
48
+ end
49
+
50
+ scaler.scale(sample) if scaler
51
+
52
+ nodes_ptr = NodeStruct.node_array_from(sample)
53
+
54
+ prob_array = FFI::MemoryPointer.new(:double, number_of_classes)
55
+
56
+ Svm.svm_predict_probability(model_struct, nodes_ptr, prob_array)
57
+ probabilities = prob_array.read_array_of_double(number_of_classes)
58
+
59
+ number_of_classes.times.inject({}) do |hash, index|
60
+ label = labels[index]
61
+ prob = probabilities[index]
62
+
63
+ hash[label] = prob
64
+ hash
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,69 @@
1
+ module Svm
2
+ class ParameterError < StandardError; end
3
+
4
+ class Options
5
+ attr_reader :parameter_struct
6
+
7
+ DEFAULT_OPTIONS = {
8
+ :svm_type => :c_svc,
9
+ :kernel_type => :rbf,
10
+ :degree => 3,
11
+ :gamma => 0,
12
+ :coef0 => 0,
13
+ :nu => 0.5,
14
+ :cache_size => 100.0,
15
+ :c => 1,
16
+ :eps => 0.001,
17
+ :p => 0.1,
18
+ :shrinking => 1,
19
+ :probability => 0,
20
+ :nr_weight => 0,
21
+ :cross_validation => 0,
22
+ :nr_fold => 0,
23
+ :scale => true
24
+ }
25
+
26
+ def initialize(user_options = {})
27
+ @parameter_struct = ParameterStruct.new
28
+ add(DEFAULT_OPTIONS.merge(user_options))
29
+ end
30
+
31
+ def add(more_options)
32
+ options_hash.merge!(more_options)
33
+
34
+ more_options.each do |key, value|
35
+ parameter_struct[key] = value if parameter_struct.members.include?(key)
36
+ end
37
+ end
38
+
39
+ def label_weights=(weights)
40
+ @weights = weights
41
+
42
+ num_labels = weights.keys.size
43
+
44
+ parameter_struct[:nr_weight] = num_labels
45
+
46
+ parameter_struct[:weight_label] = FFI::MemoryPointer.new(:int, num_labels)
47
+ parameter_struct[:weight] = FFI::MemoryPointer.new(:double, num_labels)
48
+
49
+ labels_array = weights.keys.collect(&:to_i)
50
+
51
+ parameter_struct[:weight_label].write_array_of_int(labels_array)
52
+ parameter_struct[:weight].write_array_of_double(weights.values)
53
+ end
54
+
55
+ def weights
56
+ @weights ||= Hash.new(1.0)
57
+ end
58
+
59
+ def [](option)
60
+ options_hash[option]
61
+ end
62
+
63
+ private
64
+
65
+ def options_hash
66
+ @options_hash ||= {}
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,151 @@
1
+ require 'csv'
2
+
3
+ module Svm
4
+ class Problem
5
+ include CrossValidation
6
+
7
+ attr_reader :num_samples
8
+ attr_reader :num_features
9
+ attr_reader :options
10
+
11
+ attr_accessor :scaler
12
+
13
+ def self.load_from_csv(csv_path, options = {})
14
+ data = CSV.read(csv_path).collect do |row|
15
+ row.collect { |field| field.to_f }
16
+ end
17
+
18
+ instance = self.new(options)
19
+ instance.data = data
20
+
21
+ instance
22
+ end
23
+
24
+ def initialize(user_options = {})
25
+ @nodes_pointers = []
26
+ @options = Options.new(user_options)
27
+ end
28
+
29
+ def data=(samples, weights = nil)
30
+ @num_samples = samples.size
31
+ @num_features = samples.first.size - 1
32
+ @sample_weights = weights if weights
33
+
34
+ if options[:scale]
35
+ self.scaler = Scaler.scale(samples)
36
+ scaler.release_data!
37
+ end
38
+
39
+ problem_struct[:l] = num_samples
40
+ problem_struct[:svm_node] = FFI::MemoryPointer.new(FFI::Pointer, num_samples)
41
+ problem_struct[:y] = FFI::MemoryPointer.new(FFI::Type::DOUBLE, num_samples)
42
+ problem_struct[:W] = FFI::MemoryPointer.new(FFI::Type::DOUBLE, num_samples)
43
+
44
+ # Allocate memory for the samples
45
+ # There are num_samples each with num_features nodes
46
+
47
+ num_samples.times.each do |i|
48
+ sample = samples[i].collect(&:to_f)
49
+
50
+ sample_value = sample.first
51
+ sample_xs = sample[1..sample.size-1]
52
+ sample_weight = sample_weights[i]
53
+
54
+ problem_struct[:y].put_double(FFI::Type::DOUBLE.size * i, sample_value)
55
+ problem_struct[:W].put_double(FFI::Type::DOUBLE.size * i, sample_weight)
56
+
57
+ # Allocate memory for the sample
58
+ nodes_ptr = NodeStruct.node_array_from(sample_xs)
59
+ problem_struct[:svm_node].put_pointer(FFI::Pointer.size*i, nodes_ptr)
60
+
61
+ # We have to keep a reference to the pointer so it is not gargabe collected
62
+ @nodes_pointers << nodes_ptr
63
+ end
64
+ end
65
+
66
+ def sample(index)
67
+ sample_ptr = @nodes_pointers[index]
68
+
69
+ num_features.times.collect do |j|
70
+ node = NodeStruct.new(sample_ptr + NodeStruct.size * j)
71
+ node[:value]
72
+ end
73
+ end
74
+
75
+ def value(index)
76
+ problem_struct[:y].get_double(FFI::Type::DOUBLE.size * index)
77
+ end
78
+
79
+ def length
80
+ problem_struct[:l]
81
+ end
82
+
83
+ def generate_model(more_options = {})
84
+ set(more_options)
85
+
86
+ model_pointer = Svm.svm_train(problem_struct.pointer, options.parameter_struct.pointer)
87
+ model_struct = ModelStruct.new(model_pointer)
88
+
89
+ model = Model.new(model_struct)
90
+ model.scaler = scaler
91
+
92
+ model
93
+ end
94
+
95
+ def suggested_labels_weights
96
+ labels.inject({}) do |hash, label|
97
+ num = num_samples_for(label).to_f
98
+ hash[label.to_i] = num/num_samples
99
+ hash
100
+ end
101
+ end
102
+
103
+ def num_samples_for(label)
104
+ num_samples.times.count { |i| value(i) == label }
105
+ end
106
+
107
+ def labels
108
+ num_samples.times.collect { |i| value(i) }.uniq
109
+ end
110
+
111
+ def label_weights=(weights)
112
+ options.label_weights = weights
113
+ check_parameters!
114
+ end
115
+
116
+ def weight_for(i)
117
+ sample_weights[i] || 1.0
118
+ end
119
+
120
+ def sample_weights=(weights)
121
+ @sample_weights = weights
122
+ end
123
+
124
+ def sample_weights
125
+ @sample_weights ||= Array.new(num_samples, 1.0)
126
+ end
127
+
128
+ def estimate_probabilities=(option)
129
+ value = option ? 1 : 0
130
+
131
+ options.parameter_struct[:probability] = value
132
+ end
133
+
134
+ def set(custom_options)
135
+ options.add(custom_options)
136
+ check_parameters!
137
+ end
138
+
139
+ private
140
+
141
+ def problem_struct
142
+ @problem_struct ||= ProblemStruct.new
143
+ end
144
+
145
+ def check_parameters!
146
+ error = Svm.svm_check_parameter(problem_struct, options.parameter_struct)
147
+ raise ParameterError.new("The provided options are not valid: #{error}") if error
148
+ end
149
+
150
+ end
151
+ end