ml4r 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/ml4r/LinearRegression/LinearRegression.cpp +305 -0
- data/ext/ml4r/LinearRegression/OLSLinearRegression.cpp +75 -0
- data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeExperiment.cpp +50 -0
- data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeNode.cpp +195 -0
- data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitter.cpp +551 -0
- data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterCategorical.cpp +22 -0
- data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterContinuous.cpp +21 -0
- data/ext/ml4r/MachineLearning/DecisionTree/SplitDefinition.cpp +142 -0
- data/ext/ml4r/MachineLearning/GBM/BernoulliCalculator.cpp +95 -0
- data/ext/ml4r/MachineLearning/GBM/GBMEstimator.cpp +601 -0
- data/ext/ml4r/MachineLearning/GBM/GBMOutput.cpp +86 -0
- data/ext/ml4r/MachineLearning/GBM/GBMRunner.cpp +117 -0
- data/ext/ml4r/MachineLearning/GBM/GaussianCalculator.cpp +94 -0
- data/ext/ml4r/MachineLearning/GBM/ZenithGBM.cpp +317 -0
- data/ext/ml4r/MachineLearning/MLData/MLData.cpp +232 -0
- data/ext/ml4r/MachineLearning/MLData/MLDataFields.cpp +1 -0
- data/ext/ml4r/MachineLearning/MLData/MLDataReader.cpp +139 -0
- data/ext/ml4r/MachineLearning/MLData/ZenithMLData.cpp +96 -0
- data/ext/ml4r/MachineLearning/MLData/ZenithMLDataReader.cpp +113 -0
- data/ext/ml4r/MachineLearning/MLExperiment.cpp +69 -0
- data/ext/ml4r/MachineLearning/MLRunner.cpp +183 -0
- data/ext/ml4r/MachineLearning/MLUtils.cpp +15 -0
- data/ext/ml4r/MachineLearning/RandomForest/RandomForestEstimator.cpp +172 -0
- data/ext/ml4r/MachineLearning/RandomForest/RandomForestOutput.cpp +66 -0
- data/ext/ml4r/MachineLearning/RandomForest/RandomForestRunner.cpp +84 -0
- data/ext/ml4r/MachineLearning/RandomForest/ZenithRandomForest.cpp +184 -0
- data/ext/ml4r/ml4r.cpp +34 -0
- data/ext/ml4r/ml4r_wrap.cpp +15727 -0
- data/ext/ml4r/utils/MathUtils.cpp +204 -0
- data/ext/ml4r/utils/StochasticUtils.cpp +73 -0
- data/ext/ml4r/utils/Utils.cpp +14 -0
- data/ext/ml4r/utils/VlcMessage.cpp +3 -0
- metadata +33 -1
@@ -0,0 +1,232 @@
|
|
1
|
+
#include "MachineLearning/MLData/MLData.h"
|
2
|
+
#include "MachineLearning/MLExperiment.h"
|
3
|
+
#include "utils/Utils.h"
|
4
|
+
|
5
|
+
#include <boost/foreach.hpp>
|
6
|
+
#include <boost/lexical_cast.hpp>
|
7
|
+
using boost::lexical_cast;
|
8
|
+
|
9
|
+
MLData::MLData() : m_missingValueDefined(false)
|
10
|
+
{}
|
11
|
+
|
12
|
+
MLData::~MLData()
|
13
|
+
{}
|
14
|
+
|
15
|
+
vector<shared_ptr<MLExperiment> >& MLData::getExperiments()
|
16
|
+
{
|
17
|
+
return m_experiments;
|
18
|
+
}
|
19
|
+
|
20
|
+
vector<shared_ptr<MLExperiment> >& MLData::getTrainingExperiments(int fold)
|
21
|
+
{
|
22
|
+
return m_trainingExperiments[fold];
|
23
|
+
}
|
24
|
+
|
25
|
+
shared_ptr<MLExperiment> MLData::getExperimentWithId(int experimentId)
|
26
|
+
{
|
27
|
+
if (m_experimentsById.find(experimentId) == m_experimentsById.end())
|
28
|
+
throw std::runtime_error(string("Could not find experiment with id: " + lexical_cast<string>(experimentId)).c_str());
|
29
|
+
|
30
|
+
return m_experimentsById[experimentId];
|
31
|
+
}
|
32
|
+
|
33
|
+
void MLData::setExperiments(vector<shared_ptr<MLExperiment> > experiments)
|
34
|
+
{
|
35
|
+
m_experiments = experiments;
|
36
|
+
BOOST_FOREACH(shared_ptr<MLExperiment>& experiment, m_experiments)
|
37
|
+
m_experimentsById[experiment->getExperimentId()] = experiment;
|
38
|
+
|
39
|
+
createFolds(1, 0);
|
40
|
+
}
|
41
|
+
|
42
|
+
vector<string>& MLData::getFeatures()
|
43
|
+
{
|
44
|
+
return m_featureNames;
|
45
|
+
}
|
46
|
+
|
47
|
+
void MLData::setFeatures(vector<string> features)
|
48
|
+
{
|
49
|
+
m_featureNames = features;
|
50
|
+
int index = -1;
|
51
|
+
BOOST_FOREACH(string& e, m_featureNames)
|
52
|
+
{
|
53
|
+
++index;
|
54
|
+
m_featureIndices[e] = index;
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
void MLData::constructCategories(vector<string> categoricalFeatures)
|
59
|
+
{
|
60
|
+
BOOST_FOREACH(string& categoricalFeature, categoricalFeatures)
|
61
|
+
{
|
62
|
+
if (m_featureIndices.find(categoricalFeature) == m_featureIndices.end())
|
63
|
+
throw std::runtime_error("Could not find categorical feature '" + categoricalFeature + "' in list of features to load or run");
|
64
|
+
|
65
|
+
int featureIndex = m_featureIndices[categoricalFeature];
|
66
|
+
m_categoricalFeatureIndices.insert(featureIndex);
|
67
|
+
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
int MLData::getFeatureIndex(string& feature)
|
72
|
+
{
|
73
|
+
const map<string,int>::iterator it = m_featureIndices.find(feature);
|
74
|
+
if (it == m_featureIndices.end())
|
75
|
+
throw std::runtime_error("Cannot get feature index for feature '" + feature + "'. Feature not loaded.");
|
76
|
+
|
77
|
+
return it->second;
|
78
|
+
}
|
79
|
+
|
80
|
+
set<int>& MLData::getCategoricalFeatureIndices()
|
81
|
+
{
|
82
|
+
return m_categoricalFeatureIndices;
|
83
|
+
}
|
84
|
+
|
85
|
+
void MLData::setInitialPredictionsDefined(bool defined)
|
86
|
+
{
|
87
|
+
m_initialPredictionsDefined = defined;
|
88
|
+
}
|
89
|
+
|
90
|
+
bool MLData::initialPredictionsDefined()
|
91
|
+
{
|
92
|
+
return m_initialPredictionsDefined;
|
93
|
+
}
|
94
|
+
|
95
|
+
void MLData::createFolds(int numFolds, int randomSeed)
|
96
|
+
{
|
97
|
+
m_foldNumbers.clear();
|
98
|
+
for (int i = 0; i < numFolds; ++i)
|
99
|
+
m_foldNumbers.push_back(i);
|
100
|
+
|
101
|
+
m_cvExperiments.clear();
|
102
|
+
m_trainingExperiments.clear();
|
103
|
+
|
104
|
+
if (m_foldNumbers.size() == 1)
|
105
|
+
{
|
106
|
+
m_trainingExperiments[0] = m_experiments;
|
107
|
+
return;
|
108
|
+
}
|
109
|
+
|
110
|
+
srand(randomSeed);
|
111
|
+
vector<int> folds = Utils::vectorRange<int>(0,numFolds-1);
|
112
|
+
vector<int> repeatedFolds = Utils::vectorRepeat(folds, m_experiments.size());
|
113
|
+
vector<int> randomlySortedFolds = Utils::vectorShuffle(repeatedFolds);
|
114
|
+
|
115
|
+
|
116
|
+
int index = -1;
|
117
|
+
BOOST_FOREACH(shared_ptr<MLExperiment> experiment, m_experiments)
|
118
|
+
{
|
119
|
+
++index;
|
120
|
+
for (int fold = 0; fold < numFolds; ++fold)
|
121
|
+
{
|
122
|
+
if (randomlySortedFolds.at(index) == fold)
|
123
|
+
m_cvExperiments[fold].push_back(experiment);
|
124
|
+
else
|
125
|
+
m_trainingExperiments[fold].push_back(experiment);
|
126
|
+
}
|
127
|
+
}
|
128
|
+
}
|
129
|
+
|
130
|
+
|
131
|
+
void MLData::setFolds( vector<int> experimentFolds )
|
132
|
+
{
|
133
|
+
m_cvExperiments.clear();
|
134
|
+
m_trainingExperiments.clear();
|
135
|
+
m_foldNumbers.clear();
|
136
|
+
|
137
|
+
if (experimentFolds.size() != m_experiments.size())
|
138
|
+
throw std::runtime_error("[MLData::setFolds] to use this method, the list of folds must have the same length as the list of training experiments!");
|
139
|
+
|
140
|
+
BOOST_FOREACH(int fold, experimentFolds)
|
141
|
+
m_foldNumbers.push_back(fold);
|
142
|
+
|
143
|
+
int index = -1;
|
144
|
+
BOOST_FOREACH(shared_ptr<MLExperiment> experiment, m_experiments)
|
145
|
+
{
|
146
|
+
++index;
|
147
|
+
BOOST_FOREACH(int fold, m_foldNumbers)
|
148
|
+
{
|
149
|
+
int experimentFold = experimentFolds.at(index);
|
150
|
+
if (fold == experimentFold)
|
151
|
+
m_cvExperiments[fold].push_back(experiment);
|
152
|
+
else
|
153
|
+
m_trainingExperiments[fold].push_back(experiment);
|
154
|
+
}
|
155
|
+
}
|
156
|
+
}
|
157
|
+
|
158
|
+
int MLData::getNumFolds()
|
159
|
+
{
|
160
|
+
return (int) m_foldNumbers.size();
|
161
|
+
}
|
162
|
+
|
163
|
+
vector<shared_ptr<MLExperiment> >& MLData::getCrossValidationExperiments(int fold)
|
164
|
+
{
|
165
|
+
return m_cvExperiments[fold];
|
166
|
+
}
|
167
|
+
|
168
|
+
void MLData::setMissingValue(double missingValue)
|
169
|
+
{
|
170
|
+
m_missingValueDefined = true;
|
171
|
+
m_missingValue = missingValue;
|
172
|
+
}
|
173
|
+
|
174
|
+
bool MLData::missingValueDefined()
|
175
|
+
{
|
176
|
+
return m_missingValueDefined;
|
177
|
+
}
|
178
|
+
|
179
|
+
double MLData::getMissingValue()
|
180
|
+
{
|
181
|
+
if (!m_missingValueDefined)
|
182
|
+
throw std::runtime_error("Cannot ask for missing value when it is not defined!");
|
183
|
+
|
184
|
+
return m_missingValue;
|
185
|
+
}
|
186
|
+
|
187
|
+
void MLData::setInitialPredictions( vector<double> initialPredictions )
|
188
|
+
{
|
189
|
+
if (m_experiments.size() != initialPredictions.size())
|
190
|
+
throw std::runtime_error("Initial predictions are not of the same length as experiments. " +
|
191
|
+
lexical_cast<string>(initialPredictions.size()) + " versus " + lexical_cast<string>(m_experiments.size()));
|
192
|
+
|
193
|
+
int index = -1;
|
194
|
+
BOOST_FOREACH(shared_ptr<MLExperiment> experiment, m_experiments)
|
195
|
+
{
|
196
|
+
++index;
|
197
|
+
experiment->setPrediction(initialPredictions.at(index));
|
198
|
+
}
|
199
|
+
m_initialPredictionsDefined = true;
|
200
|
+
}
|
201
|
+
|
202
|
+
shared_ptr<MLExperiment> MLData::getExperiment( int experimentIndex )
|
203
|
+
{
|
204
|
+
return m_experiments.at(experimentIndex);
|
205
|
+
}
|
206
|
+
|
207
|
+
vector<int>& MLData::getFoldNumbers()
|
208
|
+
{
|
209
|
+
return m_foldNumbers;
|
210
|
+
}
|
211
|
+
|
212
|
+
vector<int> MLData::getFolds()
|
213
|
+
{
|
214
|
+
vector<int> folds(m_experiments.size());
|
215
|
+
|
216
|
+
typedef pair<int, vector<shared_ptr<MLExperiment> > > ElementType;
|
217
|
+
BOOST_FOREACH(ElementType p, m_cvExperiments)
|
218
|
+
{
|
219
|
+
int fold = p.first;
|
220
|
+
vector<shared_ptr<MLExperiment> > experiments = p.second;
|
221
|
+
|
222
|
+
BOOST_FOREACH(shared_ptr<MLExperiment>& experiment, experiments)
|
223
|
+
{
|
224
|
+
folds.at(experiment->getExperimentIndex()) = fold;
|
225
|
+
}
|
226
|
+
}
|
227
|
+
return folds;
|
228
|
+
}
|
229
|
+
|
230
|
+
|
231
|
+
|
232
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
#include "MachineLearning/MLData/MLDataFields.h"
|
@@ -0,0 +1,139 @@
|
|
1
|
+
#include "MachineLearning/MLData/MLDataReader.h"
|
2
|
+
#include "MachineLearning/MLData/MLDataFields.h"
|
3
|
+
#include "MachineLearning/MLData/MLData.h"
|
4
|
+
#include "MachineLearning/MLExperiment.h"
|
5
|
+
|
6
|
+
#include <sstream>
|
7
|
+
#include <boost/foreach.hpp>
|
8
|
+
#include <boost/make_shared.hpp>
|
9
|
+
#include <stdexcept>
|
10
|
+
#include <boost/lexical_cast.hpp>
|
11
|
+
using std::runtime_error;
|
12
|
+
using boost::make_shared;
|
13
|
+
using std::ostringstream;
|
14
|
+
using boost::lexical_cast;
|
15
|
+
#include "sqlite3.h"
|
16
|
+
#include "utils/VlcMessage.h"
|
17
|
+
|
18
|
+
MLDataReader::MLDataReader()
|
19
|
+
{
|
20
|
+
fieldsSpec = make_shared<MLDataFields>();
|
21
|
+
missingValueDefined = false;
|
22
|
+
}
|
23
|
+
|
24
|
+
MLDataReader::~MLDataReader()
|
25
|
+
{
|
26
|
+
|
27
|
+
}
|
28
|
+
|
29
|
+
void MLDataReader::execute(MLData* mlData)
|
30
|
+
{
|
31
|
+
vlcMessage.Begin("Input (sqlite3)");
|
32
|
+
if (tableName.empty())
|
33
|
+
throw runtime_error("tableName not specified for sqlite3 database!");
|
34
|
+
|
35
|
+
sqlite3 *database;
|
36
|
+
sqlite3_open(databaseName.c_str(), &database);
|
37
|
+
|
38
|
+
int experimentIndex = -1;
|
39
|
+
|
40
|
+
sqlite3_stmt *statement;
|
41
|
+
|
42
|
+
vector<string> featuresToLoad = fieldsSpec->featuresFields;
|
43
|
+
|
44
|
+
vector<shared_ptr<MLExperiment> > experiments;
|
45
|
+
|
46
|
+
int attributesStartIndex = 1;
|
47
|
+
int weightIndex = -1;
|
48
|
+
int initialPredictionsIndex = -1;
|
49
|
+
int responseIndex = -1;
|
50
|
+
|
51
|
+
if (!fieldsSpec->actualYField.empty()) responseIndex = attributesStartIndex++;
|
52
|
+
if (!fieldsSpec->weightsField.empty()) weightIndex = attributesStartIndex++;
|
53
|
+
if (!fieldsSpec->initialPredictionsField.empty()) initialPredictionsIndex = attributesStartIndex++;
|
54
|
+
|
55
|
+
string sql = getSelectSql();
|
56
|
+
|
57
|
+
vector<double> features(featuresToLoad.size());
|
58
|
+
|
59
|
+
if(sqlite3_prepare_v2(database, sql.c_str(), -1, &statement, 0) != SQLITE_OK)
|
60
|
+
throw runtime_error("Couldn't prepare sql: " + sql);
|
61
|
+
|
62
|
+
while(true)
|
63
|
+
{
|
64
|
+
if (sqlite3_step(statement) != SQLITE_ROW) break;
|
65
|
+
|
66
|
+
int experimentId = sqlite3_column_int(statement, 0);
|
67
|
+
|
68
|
+
double yValue = responseIndex != -1 ? sqlite3_column_double(statement, 1) : 0.0;
|
69
|
+
double weight = weightIndex != -1 ? sqlite3_column_double(statement, weightIndex) : 1.0;
|
70
|
+
double initialPrediction = initialPredictionsIndex != -1 ? sqlite3_column_double(statement, initialPredictionsIndex) : -1;
|
71
|
+
|
72
|
+
if (responseIndex != -1 && missingValueDefined && yValue == missingValue)
|
73
|
+
continue;
|
74
|
+
|
75
|
+
int index = attributesStartIndex;
|
76
|
+
BOOST_FOREACH(double& f, features)
|
77
|
+
{
|
78
|
+
f = sqlite3_column_double(statement, index++);
|
79
|
+
// attributes.at(i) = q.GetDouble(f_attributes.at(i));
|
80
|
+
}
|
81
|
+
|
82
|
+
// create an MLExperiment
|
83
|
+
++experimentIndex;
|
84
|
+
shared_ptr<MLExperiment> experiment =
|
85
|
+
shared_ptr<MLExperiment>(new MLExperiment(experimentId, experimentIndex, yValue, initialPrediction, weight, features));
|
86
|
+
|
87
|
+
experiments.push_back(experiment);
|
88
|
+
}
|
89
|
+
|
90
|
+
sqlite3_finalize(statement);
|
91
|
+
string error = sqlite3_errmsg(database);
|
92
|
+
if (error != "not an error")
|
93
|
+
vlcMessage.Write("Error: " + sql + "\n" + error, 1);
|
94
|
+
|
95
|
+
// finally, set up our MLData object
|
96
|
+
mlData->setExperiments(experiments);
|
97
|
+
mlData->setFeatures(featuresToLoad);
|
98
|
+
mlData->constructCategories(categoricalFeatures);
|
99
|
+
mlData->setInitialPredictionsDefined(initialPredictionsIndex != -1);
|
100
|
+
|
101
|
+
if (missingValueDefined)
|
102
|
+
mlData->setMissingValue(missingValue);
|
103
|
+
|
104
|
+
reportOnData(mlData, fieldsSpec);
|
105
|
+
|
106
|
+
vlcMessage.End();
|
107
|
+
}
|
108
|
+
|
109
|
+
string MLDataReader::getSelectSql()
|
110
|
+
{
|
111
|
+
ostringstream str;
|
112
|
+
str << "SELECT " << "tbl.'" << fieldsSpec->experimentIdField << "'";
|
113
|
+
|
114
|
+
if (fieldsSpec->actualYField != "")
|
115
|
+
str << ", tbl.'" << fieldsSpec->actualYField << "'";
|
116
|
+
|
117
|
+
if (fieldsSpec->weightsField != "")
|
118
|
+
str << ", tbl.'" << fieldsSpec->weightsField << "'";
|
119
|
+
|
120
|
+
if (fieldsSpec->initialPredictionsField != "")
|
121
|
+
str << ", tbl.'" << fieldsSpec->initialPredictionsField << "'";
|
122
|
+
|
123
|
+
vector<string> featuresToLoad = fieldsSpec->featuresFields;
|
124
|
+
|
125
|
+
for (unsigned int i = 0; i < featuresToLoad.size(); ++i)
|
126
|
+
str << ", tbl.'" << featuresToLoad.at(i) << "'";
|
127
|
+
|
128
|
+
str << " FROM '" << tableName << "' tbl";
|
129
|
+
str << " ORDER BY tbl.'" << fieldsSpec->experimentIdField << "'";
|
130
|
+
return str.str();
|
131
|
+
}
|
132
|
+
|
133
|
+
void MLDataReader::reportOnData(MLData* data, shared_ptr<MLDataFields> fieldsSpec)
|
134
|
+
{
|
135
|
+
vector<shared_ptr<MLExperiment> > experiments = data->getExperiments();
|
136
|
+
vlcMessage.Write("Successfully read " + lexical_cast<string>(experiments.size()) + " experiments");
|
137
|
+
vlcMessage.Write("Loaded " + lexical_cast<string>(fieldsSpec->featuresFields.size()) + " fields");
|
138
|
+
}
|
139
|
+
|
@@ -0,0 +1,96 @@
|
|
1
|
+
// #include "MachineLearning/MLData/ZenithMLData.h"
|
2
|
+
// #include "MachineLearning/MLData/MLData.h"
|
3
|
+
// #include "MachineLearning/MLExperiment.h"
|
4
|
+
|
5
|
+
// #include "RubyUtils.h"
|
6
|
+
// using namespace RubyUtils;
|
7
|
+
|
8
|
+
// void zenith_mldata_Free(void* v)
|
9
|
+
// {
|
10
|
+
// delete (reinterpret_cast<MLData*>(v));
|
11
|
+
// }
|
12
|
+
|
13
|
+
// OtInterface::VALUE zenith_mldata_New(int argc, VALUE* argv, VALUE klass)
|
14
|
+
// {
|
15
|
+
// VALUE obj = otRuby->DataWrapStruct(klass, 0, zenith_mldata_Free, 0);
|
16
|
+
// otRuby->rb_obj_call_init(obj, argc, argv);
|
17
|
+
// return obj;
|
18
|
+
// }
|
19
|
+
|
20
|
+
// OtInterface::VALUE zenith_mldata_Initialize(VALUE self)
|
21
|
+
// {
|
22
|
+
// if (otRuby->GetDataPtr(self)) zenith_mldata_Free(otRuby->GetDataPtr(self));
|
23
|
+
// otRuby->SetDataPtr(self, NULL);
|
24
|
+
|
25
|
+
// MLData* mlData = new MLData();
|
26
|
+
// if (mlData == NULL) otRuby->rb_sys_fail("MLData class could not be created");
|
27
|
+
// otRuby->SetDataPtr(self, mlData);
|
28
|
+
// return self;
|
29
|
+
// }
|
30
|
+
|
31
|
+
// OtInterface::VALUE zenith_mldata_createFolds(VALUE self, VALUE rb_numFolds, VALUE rb_randomSeed)
|
32
|
+
// {
|
33
|
+
// MLData* mlData = (MLData*)otRuby->GetDataPtr(self);
|
34
|
+
// int numFolds;
|
35
|
+
// int randomSeed;
|
36
|
+
// RubyUtils::fromValue(rb_numFolds, numFolds);
|
37
|
+
// RubyUtils::fromValue(rb_randomSeed, randomSeed);
|
38
|
+
// mlData->createFolds(numFolds, randomSeed);
|
39
|
+
// return TOtRubyInterface::Qnil;
|
40
|
+
// }
|
41
|
+
|
42
|
+
// OtInterface::VALUE zenith_mldata_getResponse(VALUE self)
|
43
|
+
// {
|
44
|
+
// vector<double> response;
|
45
|
+
// MLData* mlData = (MLData*)otRuby->GetDataPtr(self);
|
46
|
+
|
47
|
+
// vector<shared_ptr<MLExperiment> >& experiments = mlData->getExperiments();
|
48
|
+
// response.reserve(experiments.size());
|
49
|
+
|
50
|
+
// BOOST_FOREACH(auto& experiment, experiments)
|
51
|
+
// {
|
52
|
+
// response.push_back(experiment->getY());
|
53
|
+
// }
|
54
|
+
// return RubyUtils::toValue(response);
|
55
|
+
// }
|
56
|
+
|
57
|
+
// OtInterface::VALUE zenith_mldata_getExperimentIds( VALUE self )
|
58
|
+
// {
|
59
|
+
// vector<double> response;
|
60
|
+
// MLData* mlData = (MLData*)otRuby->GetDataPtr(self);
|
61
|
+
|
62
|
+
// vector<shared_ptr<MLExperiment> >& experiments = mlData->getExperiments();
|
63
|
+
// response.reserve(experiments.size());
|
64
|
+
|
65
|
+
// BOOST_FOREACH(auto& experiment, experiments)
|
66
|
+
// {
|
67
|
+
// response.push_back(experiment->getExperimentId());
|
68
|
+
// }
|
69
|
+
// return RubyUtils::toValue(response);
|
70
|
+
// }
|
71
|
+
|
72
|
+
// OtInterface::VALUE zenith_mldata_setInitialPredictions( VALUE self, VALUE rb_predictions )
|
73
|
+
// {
|
74
|
+
// MLData* mlData = (MLData*)otRuby->GetDataPtr(self);
|
75
|
+
// vector<double> predictions;
|
76
|
+
// vlcMessage.Write("Setting predictions");
|
77
|
+
// RubyUtils::fromValue(rb_predictions, predictions);
|
78
|
+
// vlcMessage.Write("Successfully converted to vector");
|
79
|
+
// mlData->setInitialPredictions(predictions);
|
80
|
+
// vlcMessage.Write("Successfully applied to MLData");
|
81
|
+
// return TOtRubyInterface::Qnil;
|
82
|
+
// }
|
83
|
+
|
84
|
+
// OtInterface::VALUE zenith_mldata_setFolds( VALUE self, VALUE rb_folds )
|
85
|
+
// {
|
86
|
+
// MLData* mlData = (MLData*)otRuby->GetDataPtr(self);
|
87
|
+
// vector<int> folds = RubyUtils::fromValue<vector<int> >(rb_folds);
|
88
|
+
// mlData->setFolds(folds);
|
89
|
+
// return TOtRubyInterface::Qnil;
|
90
|
+
// }
|
91
|
+
|
92
|
+
// OtInterface::VALUE zenith_mldata_getFolds( VALUE self )
|
93
|
+
// {
|
94
|
+
// MLData* mlData = (MLData*)otRuby->GetDataPtr(self);
|
95
|
+
// return RubyUtils::toValue(mlData->getFolds());
|
96
|
+
// }
|
@@ -0,0 +1,113 @@
|
|
1
|
+
// #include "MachineLearning/MLData/ZenithMLDataReader.h"
|
2
|
+
// #include "MachineLearning/MLData/MLDataReader.h"
|
3
|
+
// #include "MachineLearning/MLData/MLDataFields.h"
|
4
|
+
// #include "MachineLearning/MLData/ZenithMLData.h"
|
5
|
+
|
6
|
+
// #include "RubyUtils.h"
|
7
|
+
// using namespace RubyUtils;
|
8
|
+
|
9
|
+
// void zenith_mldatareader_Free(void* v)
|
10
|
+
// {
|
11
|
+
// delete (reinterpret_cast<MLDataReader*>(v));
|
12
|
+
// }
|
13
|
+
|
14
|
+
// OtInterface::VALUE zenith_mldatareader_New(int argc, VALUE* argv, VALUE klass)
|
15
|
+
// {
|
16
|
+
// VALUE obj = otRuby->DataWrapStruct(klass, 0, zenith_mldatareader_Free, 0);
|
17
|
+
// otRuby->rb_obj_call_init(obj, argc, argv);
|
18
|
+
// return obj;
|
19
|
+
// }
|
20
|
+
|
21
|
+
// OtInterface::VALUE zenith_mldatareader_Initialize(VALUE self)
|
22
|
+
// {
|
23
|
+
// if (otRuby->GetDataPtr(self)) zenith_mldatareader_Free(otRuby->GetDataPtr(self));
|
24
|
+
// otRuby->SetDataPtr(self, NULL);
|
25
|
+
|
26
|
+
// MLDataReader* mlDataReader = new MLDataReader();
|
27
|
+
// if (mlDataReader == NULL) otRuby->rb_sys_fail("ZenithMLDataReader class could not be created");
|
28
|
+
// otRuby->SetDataPtr(self, mlDataReader);
|
29
|
+
// return self;
|
30
|
+
// }
|
31
|
+
|
32
|
+
// OtInterface::VALUE zenith_mldatareader_execute(VALUE self)
|
33
|
+
// {
|
34
|
+
// MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
|
35
|
+
// VALUE obj = zenith_mldata_New(0, 0, rb_cMLData);
|
36
|
+
// MLData* mlData = (MLData*)otRuby->GetDataPtr(obj);
|
37
|
+
|
38
|
+
// try
|
39
|
+
// {
|
40
|
+
// mlDataReader->execute(mlData);
|
41
|
+
// }
|
42
|
+
// catch (std::exception e)
|
43
|
+
// {
|
44
|
+
// vlcMessage.Raise((string("Caught error: ") + e.what()).c_str());
|
45
|
+
// }
|
46
|
+
|
47
|
+
// return obj;
|
48
|
+
// }
|
49
|
+
|
50
|
+
// OtInterface::VALUE zenith_mldatareader_setDatabaseName(VALUE self, VALUE databaseNameValue)
|
51
|
+
// {
|
52
|
+
// MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
|
53
|
+
// mlDataReader->databaseName = RubyUtils::fromValue<string>(databaseNameValue);
|
54
|
+
// return TOtRubyInterface::Qnil;
|
55
|
+
// }
|
56
|
+
|
57
|
+
// OtInterface::VALUE zenith_mldatareader_setTableName(VALUE self, VALUE tableNameValue)
|
58
|
+
// {
|
59
|
+
// MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
|
60
|
+
// mlDataReader->tableName = RubyUtils::fromValue<string>(tableNameValue);
|
61
|
+
// return TOtRubyInterface::Qnil;
|
62
|
+
// }
|
63
|
+
|
64
|
+
// OtInterface::VALUE zenith_mldatareader_setFeaturesToLoad(VALUE self, VALUE featuresValue)
|
65
|
+
// {
|
66
|
+
// MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
|
67
|
+
// mlDataReader->fieldsSpec->featuresFields = RubyUtils::fromValue<vector<string> >(featuresValue);
|
68
|
+
// return TOtRubyInterface::Qnil;
|
69
|
+
// }
|
70
|
+
|
71
|
+
// OtInterface::VALUE zenith_mldatareader_setCategoricalFeatures(VALUE self, VALUE categoricalFeaturesValue)
|
72
|
+
// {
|
73
|
+
// MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
|
74
|
+
// mlDataReader->categoricalFeatures = RubyUtils::fromValue<vector<string> >(categoricalFeaturesValue);
|
75
|
+
// return TOtRubyInterface::Qnil;
|
76
|
+
// }
|
77
|
+
|
78
|
+
// OtInterface::VALUE zenith_mldatareader_setActualYField(VALUE self, VALUE yFieldValue)
|
79
|
+
// {
|
80
|
+
// MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
|
81
|
+
// mlDataReader->fieldsSpec->actualYField = RubyUtils::fromValue<string>(yFieldValue);
|
82
|
+
// return TOtRubyInterface::Qnil;
|
83
|
+
// }
|
84
|
+
|
85
|
+
// OtInterface::VALUE zenith_mldatareader_setExperimentIdField(VALUE self, VALUE experimentIdFieldValue)
|
86
|
+
// {
|
87
|
+
// MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
|
88
|
+
// mlDataReader->fieldsSpec->experimentIdField = RubyUtils::fromValue<string>(experimentIdFieldValue);
|
89
|
+
// return TOtRubyInterface::Qnil;
|
90
|
+
// }
|
91
|
+
|
92
|
+
// OtInterface::VALUE zenith_mldatareader_setWeightsField(VALUE self, VALUE weightsFieldValue)
|
93
|
+
// {
|
94
|
+
// MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
|
95
|
+
// mlDataReader->fieldsSpec->weightsField = RubyUtils::fromValue<string>(weightsFieldValue);
|
96
|
+
// return TOtRubyInterface::Qnil;
|
97
|
+
|
98
|
+
// }
|
99
|
+
|
100
|
+
// OtInterface::VALUE zenith_mldatareader_setInitialPredictionsField(VALUE self, VALUE initialEstimatesFieldValue)
|
101
|
+
// {
|
102
|
+
// MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
|
103
|
+
// mlDataReader->fieldsSpec->initialPredictionsField = RubyUtils::fromValue<string>(initialEstimatesFieldValue);
|
104
|
+
// return TOtRubyInterface::Qnil;
|
105
|
+
// }
|
106
|
+
|
107
|
+
// OtInterface::VALUE zenith_mldatareader_setMissingValue(VALUE self, VALUE missingValue)
|
108
|
+
// {
|
109
|
+
// MLDataReader* mlDataReader = (MLDataReader*)otRuby->GetDataPtr(self);
|
110
|
+
// mlDataReader->missingValue = RubyUtils::fromValue<double>(missingValue);
|
111
|
+
// mlDataReader->missingValueDefined = true;
|
112
|
+
// return TOtRubyInterface::Qnil;
|
113
|
+
// }
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#include "MachineLearning/MLExperiment.h"
|
2
|
+
|
3
|
+
MLExperiment::MLExperiment(int experimentId, int experimentIndex, double y, double initialPrediction,
|
4
|
+
double weight, vector<double> features)
|
5
|
+
: m_experimentId(experimentId), m_experimentIndex(experimentIndex), m_yValue(y), m_prediction(initialPrediction),
|
6
|
+
m_weight(weight), m_features(features)
|
7
|
+
{
|
8
|
+
|
9
|
+
}
|
10
|
+
|
11
|
+
MLExperiment::MLExperiment(shared_ptr<MLExperiment> mlExperiment)
|
12
|
+
: m_yValue(mlExperiment->getY()),
|
13
|
+
m_experimentId(mlExperiment->getExperimentId()),
|
14
|
+
m_experimentIndex(mlExperiment->getExperimentIndex()),
|
15
|
+
m_prediction(mlExperiment->getPrediction()),
|
16
|
+
m_weight(mlExperiment->getWeight()),
|
17
|
+
m_features(mlExperiment->getFeatureValues())
|
18
|
+
{
|
19
|
+
}
|
20
|
+
|
21
|
+
MLExperiment::MLExperiment()
|
22
|
+
{
|
23
|
+
|
24
|
+
}
|
25
|
+
|
26
|
+
MLExperiment::~MLExperiment()
|
27
|
+
{
|
28
|
+
|
29
|
+
}
|
30
|
+
|
31
|
+
double MLExperiment::getY()
|
32
|
+
{
|
33
|
+
return m_yValue;
|
34
|
+
}
|
35
|
+
|
36
|
+
int MLExperiment::getExperimentId()
|
37
|
+
{
|
38
|
+
return m_experimentId;
|
39
|
+
}
|
40
|
+
|
41
|
+
int MLExperiment::getExperimentIndex()
|
42
|
+
{
|
43
|
+
return m_experimentIndex;
|
44
|
+
}
|
45
|
+
|
46
|
+
double MLExperiment::getPrediction()
|
47
|
+
{
|
48
|
+
return m_prediction;
|
49
|
+
}
|
50
|
+
|
51
|
+
double MLExperiment::getWeight()
|
52
|
+
{
|
53
|
+
return m_weight;
|
54
|
+
}
|
55
|
+
|
56
|
+
vector<double>& MLExperiment::getFeatureValues()
|
57
|
+
{
|
58
|
+
return m_features;
|
59
|
+
}
|
60
|
+
|
61
|
+
double MLExperiment::getFeatureValue(int featureIndex)
|
62
|
+
{
|
63
|
+
return m_features.at(featureIndex);
|
64
|
+
}
|
65
|
+
|
66
|
+
void MLExperiment::setPrediction(double prediction)
|
67
|
+
{
|
68
|
+
m_prediction = prediction;
|
69
|
+
}
|