ml4r 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. data/ext/ml4r/LinearRegression/LinearRegression.cpp +305 -0
  2. data/ext/ml4r/LinearRegression/OLSLinearRegression.cpp +75 -0
  3. data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeExperiment.cpp +50 -0
  4. data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeNode.cpp +195 -0
  5. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitter.cpp +551 -0
  6. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterCategorical.cpp +22 -0
  7. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterContinuous.cpp +21 -0
  8. data/ext/ml4r/MachineLearning/DecisionTree/SplitDefinition.cpp +142 -0
  9. data/ext/ml4r/MachineLearning/GBM/BernoulliCalculator.cpp +95 -0
  10. data/ext/ml4r/MachineLearning/GBM/GBMEstimator.cpp +601 -0
  11. data/ext/ml4r/MachineLearning/GBM/GBMOutput.cpp +86 -0
  12. data/ext/ml4r/MachineLearning/GBM/GBMRunner.cpp +117 -0
  13. data/ext/ml4r/MachineLearning/GBM/GaussianCalculator.cpp +94 -0
  14. data/ext/ml4r/MachineLearning/GBM/ZenithGBM.cpp +317 -0
  15. data/ext/ml4r/MachineLearning/MLData/MLData.cpp +232 -0
  16. data/ext/ml4r/MachineLearning/MLData/MLDataFields.cpp +1 -0
  17. data/ext/ml4r/MachineLearning/MLData/MLDataReader.cpp +139 -0
  18. data/ext/ml4r/MachineLearning/MLData/ZenithMLData.cpp +96 -0
  19. data/ext/ml4r/MachineLearning/MLData/ZenithMLDataReader.cpp +113 -0
  20. data/ext/ml4r/MachineLearning/MLExperiment.cpp +69 -0
  21. data/ext/ml4r/MachineLearning/MLRunner.cpp +183 -0
  22. data/ext/ml4r/MachineLearning/MLUtils.cpp +15 -0
  23. data/ext/ml4r/MachineLearning/RandomForest/RandomForestEstimator.cpp +172 -0
  24. data/ext/ml4r/MachineLearning/RandomForest/RandomForestOutput.cpp +66 -0
  25. data/ext/ml4r/MachineLearning/RandomForest/RandomForestRunner.cpp +84 -0
  26. data/ext/ml4r/MachineLearning/RandomForest/ZenithRandomForest.cpp +184 -0
  27. data/ext/ml4r/ml4r.cpp +34 -0
  28. data/ext/ml4r/ml4r_wrap.cpp +15727 -0
  29. data/ext/ml4r/utils/MathUtils.cpp +204 -0
  30. data/ext/ml4r/utils/StochasticUtils.cpp +73 -0
  31. data/ext/ml4r/utils/Utils.cpp +14 -0
  32. data/ext/ml4r/utils/VlcMessage.cpp +3 -0
  33. metadata +33 -1
@@ -0,0 +1,232 @@
1
+ #include "MachineLearning/MLData/MLData.h"
2
+ #include "MachineLearning/MLExperiment.h"
3
+ #include "utils/Utils.h"
4
+
5
+ #include <boost/foreach.hpp>
6
+ #include <boost/lexical_cast.hpp>
7
+ using boost::lexical_cast;
8
+
9
+ MLData::MLData() : m_missingValueDefined(false)
10
+ {}
11
+
12
+ MLData::~MLData()
13
+ {}
14
+
15
+ vector<shared_ptr<MLExperiment> >& MLData::getExperiments()
16
+ {
17
+ return m_experiments;
18
+ }
19
+
20
+ vector<shared_ptr<MLExperiment> >& MLData::getTrainingExperiments(int fold)
21
+ {
22
+ return m_trainingExperiments[fold];
23
+ }
24
+
25
+ shared_ptr<MLExperiment> MLData::getExperimentWithId(int experimentId)
26
+ {
27
+ if (m_experimentsById.find(experimentId) == m_experimentsById.end())
28
+ throw std::runtime_error(string("Could not find experiment with id: " + lexical_cast<string>(experimentId)).c_str());
29
+
30
+ return m_experimentsById[experimentId];
31
+ }
32
+
33
+ void MLData::setExperiments(vector<shared_ptr<MLExperiment> > experiments)
34
+ {
35
+ m_experiments = experiments;
36
+ BOOST_FOREACH(shared_ptr<MLExperiment>& experiment, m_experiments)
37
+ m_experimentsById[experiment->getExperimentId()] = experiment;
38
+
39
+ createFolds(1, 0);
40
+ }
41
+
42
+ vector<string>& MLData::getFeatures()
43
+ {
44
+ return m_featureNames;
45
+ }
46
+
47
+ void MLData::setFeatures(vector<string> features)
48
+ {
49
+ m_featureNames = features;
50
+ int index = -1;
51
+ BOOST_FOREACH(string& e, m_featureNames)
52
+ {
53
+ ++index;
54
+ m_featureIndices[e] = index;
55
+ }
56
+ }
57
+
58
+ void MLData::constructCategories(vector<string> categoricalFeatures)
59
+ {
60
+ BOOST_FOREACH(string& categoricalFeature, categoricalFeatures)
61
+ {
62
+ if (m_featureIndices.find(categoricalFeature) == m_featureIndices.end())
63
+ throw std::runtime_error("Could not find categorical feature '" + categoricalFeature + "' in list of features to load or run");
64
+
65
+ int featureIndex = m_featureIndices[categoricalFeature];
66
+ m_categoricalFeatureIndices.insert(featureIndex);
67
+
68
+ }
69
+ }
70
+
71
+ int MLData::getFeatureIndex(string& feature)
72
+ {
73
+ const map<string,int>::iterator it = m_featureIndices.find(feature);
74
+ if (it == m_featureIndices.end())
75
+ throw std::runtime_error("Cannot get feature index for feature '" + feature + "'. Feature not loaded.");
76
+
77
+ return it->second;
78
+ }
79
+
80
+ set<int>& MLData::getCategoricalFeatureIndices()
81
+ {
82
+ return m_categoricalFeatureIndices;
83
+ }
84
+
85
+ void MLData::setInitialPredictionsDefined(bool defined)
86
+ {
87
+ m_initialPredictionsDefined = defined;
88
+ }
89
+
90
+ bool MLData::initialPredictionsDefined()
91
+ {
92
+ return m_initialPredictionsDefined;
93
+ }
94
+
95
+ void MLData::createFolds(int numFolds, int randomSeed)
96
+ {
97
+ m_foldNumbers.clear();
98
+ for (int i = 0; i < numFolds; ++i)
99
+ m_foldNumbers.push_back(i);
100
+
101
+ m_cvExperiments.clear();
102
+ m_trainingExperiments.clear();
103
+
104
+ if (m_foldNumbers.size() == 1)
105
+ {
106
+ m_trainingExperiments[0] = m_experiments;
107
+ return;
108
+ }
109
+
110
+ srand(randomSeed);
111
+ vector<int> folds = Utils::vectorRange<int>(0,numFolds-1);
112
+ vector<int> repeatedFolds = Utils::vectorRepeat(folds, m_experiments.size());
113
+ vector<int> randomlySortedFolds = Utils::vectorShuffle(repeatedFolds);
114
+
115
+
116
+ int index = -1;
117
+ BOOST_FOREACH(shared_ptr<MLExperiment> experiment, m_experiments)
118
+ {
119
+ ++index;
120
+ for (int fold = 0; fold < numFolds; ++fold)
121
+ {
122
+ if (randomlySortedFolds.at(index) == fold)
123
+ m_cvExperiments[fold].push_back(experiment);
124
+ else
125
+ m_trainingExperiments[fold].push_back(experiment);
126
+ }
127
+ }
128
+ }
129
+
130
+
131
+ void MLData::setFolds( vector<int> experimentFolds )
132
+ {
133
+ m_cvExperiments.clear();
134
+ m_trainingExperiments.clear();
135
+ m_foldNumbers.clear();
136
+
137
+ if (experimentFolds.size() != m_experiments.size())
138
+ throw std::runtime_error("[MLData::setFolds] to use this method, the list of folds must have the same length as the list of training experiments!");
139
+
140
+ BOOST_FOREACH(int fold, experimentFolds)
141
+ m_foldNumbers.push_back(fold);
142
+
143
+ int index = -1;
144
+ BOOST_FOREACH(shared_ptr<MLExperiment> experiment, m_experiments)
145
+ {
146
+ ++index;
147
+ BOOST_FOREACH(int fold, m_foldNumbers)
148
+ {
149
+ int experimentFold = experimentFolds.at(index);
150
+ if (fold == experimentFold)
151
+ m_cvExperiments[fold].push_back(experiment);
152
+ else
153
+ m_trainingExperiments[fold].push_back(experiment);
154
+ }
155
+ }
156
+ }
157
+
158
+ int MLData::getNumFolds()
159
+ {
160
+ return (int) m_foldNumbers.size();
161
+ }
162
+
163
+ vector<shared_ptr<MLExperiment> >& MLData::getCrossValidationExperiments(int fold)
164
+ {
165
+ return m_cvExperiments[fold];
166
+ }
167
+
168
+ void MLData::setMissingValue(double missingValue)
169
+ {
170
+ m_missingValueDefined = true;
171
+ m_missingValue = missingValue;
172
+ }
173
+
174
+ bool MLData::missingValueDefined()
175
+ {
176
+ return m_missingValueDefined;
177
+ }
178
+
179
+ double MLData::getMissingValue()
180
+ {
181
+ if (!m_missingValueDefined)
182
+ throw std::runtime_error("Cannot ask for missing value when it is not defined!");
183
+
184
+ return m_missingValue;
185
+ }
186
+
187
+ void MLData::setInitialPredictions( vector<double> initialPredictions )
188
+ {
189
+ if (m_experiments.size() != initialPredictions.size())
190
+ throw std::runtime_error("Initial predictions are not of the same length as experiments. " +
191
+ lexical_cast<string>(initialPredictions.size()) + " versus " + lexical_cast<string>(m_experiments.size()));
192
+
193
+ int index = -1;
194
+ BOOST_FOREACH(shared_ptr<MLExperiment> experiment, m_experiments)
195
+ {
196
+ ++index;
197
+ experiment->setPrediction(initialPredictions.at(index));
198
+ }
199
+ m_initialPredictionsDefined = true;
200
+ }
201
+
202
+ shared_ptr<MLExperiment> MLData::getExperiment( int experimentIndex )
203
+ {
204
+ return m_experiments.at(experimentIndex);
205
+ }
206
+
207
+ vector<int>& MLData::getFoldNumbers()
208
+ {
209
+ return m_foldNumbers;
210
+ }
211
+
212
+ vector<int> MLData::getFolds()
213
+ {
214
+ vector<int> folds(m_experiments.size());
215
+
216
+ typedef pair<int, vector<shared_ptr<MLExperiment> > > ElementType;
217
+ BOOST_FOREACH(ElementType p, m_cvExperiments)
218
+ {
219
+ int fold = p.first;
220
+ vector<shared_ptr<MLExperiment> > experiments = p.second;
221
+
222
+ BOOST_FOREACH(shared_ptr<MLExperiment>& experiment, experiments)
223
+ {
224
+ folds.at(experiment->getExperimentIndex()) = fold;
225
+ }
226
+ }
227
+ return folds;
228
+ }
229
+
230
+
231
+
232
+
@@ -0,0 +1 @@
1
+ #include "MachineLearning/MLData/MLDataFields.h"
@@ -0,0 +1,139 @@
1
+ #include "MachineLearning/MLData/MLDataReader.h"
2
+ #include "MachineLearning/MLData/MLDataFields.h"
3
+ #include "MachineLearning/MLData/MLData.h"
4
+ #include "MachineLearning/MLExperiment.h"
5
+
6
+ #include <sstream>
7
+ #include <boost/foreach.hpp>
8
+ #include <boost/make_shared.hpp>
9
+ #include <stdexcept>
10
+ #include <boost/lexical_cast.hpp>
11
+ using std::runtime_error;
12
+ using boost::make_shared;
13
+ using std::ostringstream;
14
+ using boost::lexical_cast;
15
+ #include "sqlite3.h"
16
+ #include "utils/VlcMessage.h"
17
+
18
+ MLDataReader::MLDataReader()
19
+ {
20
+ fieldsSpec = make_shared<MLDataFields>();
21
+ missingValueDefined = false;
22
+ }
23
+
24
+ MLDataReader::~MLDataReader()
25
+ {
26
+
27
+ }
28
+
29
+ void MLDataReader::execute(MLData* mlData)
30
+ {
31
+ vlcMessage.Begin("Input (sqlite3)");
32
+ if (tableName.empty())
33
+ throw runtime_error("tableName not specified for sqlite3 database!");
34
+
35
+ sqlite3 *database;
36
+ sqlite3_open(databaseName.c_str(), &database);
37
+
38
+ int experimentIndex = -1;
39
+
40
+ sqlite3_stmt *statement;
41
+
42
+ vector<string> featuresToLoad = fieldsSpec->featuresFields;
43
+
44
+ vector<shared_ptr<MLExperiment> > experiments;
45
+
46
+ int attributesStartIndex = 1;
47
+ int weightIndex = -1;
48
+ int initialPredictionsIndex = -1;
49
+ int responseIndex = -1;
50
+
51
+ if (!fieldsSpec->actualYField.empty()) responseIndex = attributesStartIndex++;
52
+ if (!fieldsSpec->weightsField.empty()) weightIndex = attributesStartIndex++;
53
+ if (!fieldsSpec->initialPredictionsField.empty()) initialPredictionsIndex = attributesStartIndex++;
54
+
55
+ string sql = getSelectSql();
56
+
57
+ vector<double> features(featuresToLoad.size());
58
+
59
+ if(sqlite3_prepare_v2(database, sql.c_str(), -1, &statement, 0) != SQLITE_OK)
60
+ throw runtime_error("Couldn't prepare sql: " + sql);
61
+
62
+ while(true)
63
+ {
64
+ if (sqlite3_step(statement) != SQLITE_ROW) break;
65
+
66
+ int experimentId = sqlite3_column_int(statement, 0);
67
+
68
+ double yValue = responseIndex != -1 ? sqlite3_column_double(statement, 1) : 0.0;
69
+ double weight = weightIndex != -1 ? sqlite3_column_double(statement, weightIndex) : 1.0;
70
+ double initialPrediction = initialPredictionsIndex != -1 ? sqlite3_column_double(statement, initialPredictionsIndex) : -1;
71
+
72
+ if (responseIndex != -1 && missingValueDefined && yValue == missingValue)
73
+ continue;
74
+
75
+ int index = attributesStartIndex;
76
+ BOOST_FOREACH(double& f, features)
77
+ {
78
+ f = sqlite3_column_double(statement, index++);
79
+ // attributes.at(i) = q.GetDouble(f_attributes.at(i));
80
+ }
81
+
82
+ // create an MLExperiment
83
+ ++experimentIndex;
84
+ shared_ptr<MLExperiment> experiment =
85
+ shared_ptr<MLExperiment>(new MLExperiment(experimentId, experimentIndex, yValue, initialPrediction, weight, features));
86
+
87
+ experiments.push_back(experiment);
88
+ }
89
+
90
+ sqlite3_finalize(statement);
91
+ string error = sqlite3_errmsg(database);
92
+ if (error != "not an error")
93
+ vlcMessage.Write("Error: " + sql + "\n" + error, 1);
94
+
95
+ // finally, set up our MLData object
96
+ mlData->setExperiments(experiments);
97
+ mlData->setFeatures(featuresToLoad);
98
+ mlData->constructCategories(categoricalFeatures);
99
+ mlData->setInitialPredictionsDefined(initialPredictionsIndex != -1);
100
+
101
+ if (missingValueDefined)
102
+ mlData->setMissingValue(missingValue);
103
+
104
+ reportOnData(mlData, fieldsSpec);
105
+
106
+ vlcMessage.End();
107
+ }
108
+
109
+ string MLDataReader::getSelectSql()
110
+ {
111
+ ostringstream str;
112
+ str << "SELECT " << "tbl.'" << fieldsSpec->experimentIdField << "'";
113
+
114
+ if (fieldsSpec->actualYField != "")
115
+ str << ", tbl.'" << fieldsSpec->actualYField << "'";
116
+
117
+ if (fieldsSpec->weightsField != "")
118
+ str << ", tbl.'" << fieldsSpec->weightsField << "'";
119
+
120
+ if (fieldsSpec->initialPredictionsField != "")
121
+ str << ", tbl.'" << fieldsSpec->initialPredictionsField << "'";
122
+
123
+ vector<string> featuresToLoad = fieldsSpec->featuresFields;
124
+
125
+ for (unsigned int i = 0; i < featuresToLoad.size(); ++i)
126
+ str << ", tbl.'" << featuresToLoad.at(i) << "'";
127
+
128
+ str << " FROM '" << tableName << "' tbl";
129
+ str << " ORDER BY tbl.'" << fieldsSpec->experimentIdField << "'";
130
+ return str.str();
131
+ }
132
+
133
+ void MLDataReader::reportOnData(MLData* data, shared_ptr<MLDataFields> fieldsSpec)
134
+ {
135
+ vector<shared_ptr<MLExperiment> > experiments = data->getExperiments();
136
+ vlcMessage.Write("Successfully read " + lexical_cast<string>(experiments.size()) + " experiments");
137
+ vlcMessage.Write("Loaded " + lexical_cast<string>(fieldsSpec->featuresFields.size()) + " fields");
138
+ }
139
+
@@ -0,0 +1,96 @@
1
+ // #include "MachineLearning/MLData/ZenithMLData.h"
2
+ // #include "MachineLearning/MLData/MLData.h"
3
+ // #include "MachineLearning/MLExperiment.h"
4
+
5
+ // #include "RubyUtils.h"
6
+ // using namespace RubyUtils;
7
+
8
+ // void zenith_mldata_Free(void* v)
9
+ // {
10
+ // delete (reinterpret_cast<MLData*>(v));
11
+ // }
12
+
13
+ // OtInterface::VALUE zenith_mldata_New(int argc, VALUE* argv, VALUE klass)
14
+ // {
15
+ // VALUE obj = otRuby->DataWrapStruct(klass, 0, zenith_mldata_Free, 0);
16
+ // otRuby->rb_obj_call_init(obj, argc, argv);
17
+ // return obj;
18
+ // }
19
+
20
+ // OtInterface::VALUE zenith_mldata_Initialize(VALUE self)
21
+ // {
22
+ // if (otRuby->GetDataPtr(self)) zenith_mldata_Free(otRuby->GetDataPtr(self));
23
+ // otRuby->SetDataPtr(self, NULL);
24
+
25
+ // MLData* mlData = new MLData();
26
+ // if (mlData == NULL) otRuby->rb_sys_fail("MLData class could not be created");
27
+ // otRuby->SetDataPtr(self, mlData);
28
+ // return self;
29
+ // }
30
+
31
+ // OtInterface::VALUE zenith_mldata_createFolds(VALUE self, VALUE rb_numFolds, VALUE rb_randomSeed)
32
+ // {
33
+ // MLData* mlData = (MLData*)otRuby->GetDataPtr(self);
34
+ // int numFolds;
35
+ // int randomSeed;
36
+ // RubyUtils::fromValue(rb_numFolds, numFolds);
37
+ // RubyUtils::fromValue(rb_randomSeed, randomSeed);
38
+ // mlData->createFolds(numFolds, randomSeed);
39
+ // return TOtRubyInterface::Qnil;
40
+ // }
41
+
42
+ // OtInterface::VALUE zenith_mldata_getResponse(VALUE self)
43
+ // {
44
+ // vector<double> response;
45
+ // MLData* mlData = (MLData*)otRuby->GetDataPtr(self);
46
+
47
+ // vector<shared_ptr<MLExperiment> >& experiments = mlData->getExperiments();
48
+ // response.reserve(experiments.size());
49
+
50
+ // BOOST_FOREACH(auto& experiment, experiments)
51
+ // {
52
+ // response.push_back(experiment->getY());
53
+ // }
54
+ // return RubyUtils::toValue(response);
55
+ // }
56
+
57
+ // OtInterface::VALUE zenith_mldata_getExperimentIds( VALUE self )
58
+ // {
59
+ // vector<double> response;
60
+ // MLData* mlData = (MLData*)otRuby->GetDataPtr(self);
61
+
62
+ // vector<shared_ptr<MLExperiment> >& experiments = mlData->getExperiments();
63
+ // response.reserve(experiments.size());
64
+
65
+ // BOOST_FOREACH(auto& experiment, experiments)
66
+ // {
67
+ // response.push_back(experiment->getExperimentId());
68
+ // }
69
+ // return RubyUtils::toValue(response);
70
+ // }
71
+
72
+ // OtInterface::VALUE zenith_mldata_setInitialPredictions( VALUE self, VALUE rb_predictions )
73
+ // {
74
+ // MLData* mlData = (MLData*)otRuby->GetDataPtr(self);
75
+ // vector<double> predictions;
76
+ // vlcMessage.Write("Setting predictions");
77
+ // RubyUtils::fromValue(rb_predictions, predictions);
78
+ // vlcMessage.Write("Successfully converted to vector");
79
+ // mlData->setInitialPredictions(predictions);
80
+ // vlcMessage.Write("Successfully applied to MLData");
81
+ // return TOtRubyInterface::Qnil;
82
+ // }
83
+
84
+ // OtInterface::VALUE zenith_mldata_setFolds( VALUE self, VALUE rb_folds )
85
+ // {
86
+ // MLData* mlData = (MLData*)otRuby->GetDataPtr(self);
87
+ // vector<int> folds = RubyUtils::fromValue<vector<int> >(rb_folds);
88
+ // mlData->setFolds(folds);
89
+ // return TOtRubyInterface::Qnil;
90
+ // }
91
+
92
+ // OtInterface::VALUE zenith_mldata_getFolds( VALUE self )
93
+ // {
94
+ // MLData* mlData = (MLData*)otRuby->GetDataPtr(self);
95
+ // return RubyUtils::toValue(mlData->getFolds());
96
+ // }
@@ -0,0 +1,113 @@
1
+ // #include "MachineLearning/MLData/ZenithMLDataReader.h"
2
+ // #include "MachineLearning/MLData/MLDataReader.h"
3
+ // #include "MachineLearning/MLData/MLDataFields.h"
4
+ // #include "MachineLearning/MLData/ZenithMLData.h"
5
+
6
+ // #include "RubyUtils.h"
7
+ // using namespace RubyUtils;
8
+
9
+ // void zenith_mldatareader_Free(void* v)
10
+ // {
11
+ // delete (reinterpret_cast<MLDataReader*>(v));
12
+ // }
13
+
14
+ // OtInterface::VALUE zenith_mldatareader_New(int argc, VALUE* argv, VALUE klass)
15
+ // {
16
+ // VALUE obj = otRuby->DataWrapStruct(klass, 0, zenith_mldatareader_Free, 0);
17
+ // otRuby->rb_obj_call_init(obj, argc, argv);
18
+ // return obj;
19
+ // }
20
+
21
+ // OtInterface::VALUE zenith_mldatareader_Initialize(VALUE self)
22
+ // {
23
+ // if (otRuby->GetDataPtr(self)) zenith_mldatareader_Free(otRuby->GetDataPtr(self));
24
+ // otRuby->SetDataPtr(self, NULL);
25
+
26
+ // MLDataReader* mlDataReader = new MLDataReader();
27
+ // if (mlDataReader == NULL) otRuby->rb_sys_fail("ZenithMLDataReader class could not be created");
28
+ // otRuby->SetDataPtr(self, mlDataReader);
29
+ // return self;
30
+ // }
31
+
32
+ // OtInterface::VALUE zenith_mldatareader_execute(VALUE self)
33
+ // {
34
+ // MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
35
+ // VALUE obj = zenith_mldata_New(0, 0, rb_cMLData);
36
+ // MLData* mlData = (MLData*)otRuby->GetDataPtr(obj);
37
+
38
+ // try
39
+ // {
40
+ // mlDataReader->execute(mlData);
41
+ // }
42
+ // catch (std::exception e)
43
+ // {
44
+ // vlcMessage.Raise((string("Caught error: ") + e.what()).c_str());
45
+ // }
46
+
47
+ // return obj;
48
+ // }
49
+
50
+ // OtInterface::VALUE zenith_mldatareader_setDatabaseName(VALUE self, VALUE databaseNameValue)
51
+ // {
52
+ // MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
53
+ // mlDataReader->databaseName = RubyUtils::fromValue<string>(databaseNameValue);
54
+ // return TOtRubyInterface::Qnil;
55
+ // }
56
+
57
+ // OtInterface::VALUE zenith_mldatareader_setTableName(VALUE self, VALUE tableNameValue)
58
+ // {
59
+ // MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
60
+ // mlDataReader->tableName = RubyUtils::fromValue<string>(tableNameValue);
61
+ // return TOtRubyInterface::Qnil;
62
+ // }
63
+
64
+ // OtInterface::VALUE zenith_mldatareader_setFeaturesToLoad(VALUE self, VALUE featuresValue)
65
+ // {
66
+ // MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
67
+ // mlDataReader->fieldsSpec->featuresFields = RubyUtils::fromValue<vector<string> >(featuresValue);
68
+ // return TOtRubyInterface::Qnil;
69
+ // }
70
+
71
+ // OtInterface::VALUE zenith_mldatareader_setCategoricalFeatures(VALUE self, VALUE categoricalFeaturesValue)
72
+ // {
73
+ // MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
74
+ // mlDataReader->categoricalFeatures = RubyUtils::fromValue<vector<string> >(categoricalFeaturesValue);
75
+ // return TOtRubyInterface::Qnil;
76
+ // }
77
+
78
+ // OtInterface::VALUE zenith_mldatareader_setActualYField(VALUE self, VALUE yFieldValue)
79
+ // {
80
+ // MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
81
+ // mlDataReader->fieldsSpec->actualYField = RubyUtils::fromValue<string>(yFieldValue);
82
+ // return TOtRubyInterface::Qnil;
83
+ // }
84
+
85
+ // OtInterface::VALUE zenith_mldatareader_setExperimentIdField(VALUE self, VALUE experimentIdFieldValue)
86
+ // {
87
+ // MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
88
+ // mlDataReader->fieldsSpec->experimentIdField = RubyUtils::fromValue<string>(experimentIdFieldValue);
89
+ // return TOtRubyInterface::Qnil;
90
+ // }
91
+
92
+ // OtInterface::VALUE zenith_mldatareader_setWeightsField(VALUE self, VALUE weightsFieldValue)
93
+ // {
94
+ // MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
95
+ // mlDataReader->fieldsSpec->weightsField = RubyUtils::fromValue<string>(weightsFieldValue);
96
+ // return TOtRubyInterface::Qnil;
97
+
98
+ // }
99
+
100
+ // OtInterface::VALUE zenith_mldatareader_setInitialPredictionsField(VALUE self, VALUE initialEstimatesFieldValue)
101
+ // {
102
+ // MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
103
+ // mlDataReader->fieldsSpec->initialPredictionsField = RubyUtils::fromValue<string>(initialEstimatesFieldValue);
104
+ // return TOtRubyInterface::Qnil;
105
+ // }
106
+
107
+ // OtInterface::VALUE zenith_mldatareader_setMissingValue(VALUE self, VALUE missingValue)
108
+ // {
109
+ // MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
110
+ // mlDataReader->missingValue = RubyUtils::fromValue<double>(missingValue);
111
+ // mlDataReader->missingValueDefined = true;
112
+ // return TOtRubyInterface::Qnil;
113
+ // }
@@ -0,0 +1,69 @@
1
+ #include "MachineLearning/MLExperiment.h"
2
+
3
+ MLExperiment::MLExperiment(int experimentId, int experimentIndex, double y, double initialPrediction,
4
+ double weight, vector<double> features)
5
+ : m_experimentId(experimentId), m_experimentIndex(experimentIndex), m_yValue(y), m_prediction(initialPrediction),
6
+ m_weight(weight), m_features(features)
7
+ {
8
+
9
+ }
10
+
11
+ MLExperiment::MLExperiment(shared_ptr<MLExperiment> mlExperiment)
12
+ : m_yValue(mlExperiment->getY()),
13
+ m_experimentId(mlExperiment->getExperimentId()),
14
+ m_experimentIndex(mlExperiment->getExperimentIndex()),
15
+ m_prediction(mlExperiment->getPrediction()),
16
+ m_weight(mlExperiment->getWeight()),
17
+ m_features(mlExperiment->getFeatureValues())
18
+ {
19
+ }
20
+
21
+ MLExperiment::MLExperiment()
22
+ {
23
+
24
+ }
25
+
26
+ MLExperiment::~MLExperiment()
27
+ {
28
+
29
+ }
30
+
31
+ double MLExperiment::getY()
32
+ {
33
+ return m_yValue;
34
+ }
35
+
36
+ int MLExperiment::getExperimentId()
37
+ {
38
+ return m_experimentId;
39
+ }
40
+
41
+ int MLExperiment::getExperimentIndex()
42
+ {
43
+ return m_experimentIndex;
44
+ }
45
+
46
+ double MLExperiment::getPrediction()
47
+ {
48
+ return m_prediction;
49
+ }
50
+
51
+ double MLExperiment::getWeight()
52
+ {
53
+ return m_weight;
54
+ }
55
+
56
+ vector<double>& MLExperiment::getFeatureValues()
57
+ {
58
+ return m_features;
59
+ }
60
+
61
+ double MLExperiment::getFeatureValue(int featureIndex)
62
+ {
63
+ return m_features.at(featureIndex);
64
+ }
65
+
66
+ void MLExperiment::setPrediction(double prediction)
67
+ {
68
+ m_prediction = prediction;
69
+ }