ml4r 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. data/ext/ml4r/LinearRegression/LinearRegression.cpp +305 -0
  2. data/ext/ml4r/LinearRegression/OLSLinearRegression.cpp +75 -0
  3. data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeExperiment.cpp +50 -0
  4. data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeNode.cpp +195 -0
  5. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitter.cpp +551 -0
  6. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterCategorical.cpp +22 -0
  7. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterContinuous.cpp +21 -0
  8. data/ext/ml4r/MachineLearning/DecisionTree/SplitDefinition.cpp +142 -0
  9. data/ext/ml4r/MachineLearning/GBM/BernoulliCalculator.cpp +95 -0
  10. data/ext/ml4r/MachineLearning/GBM/GBMEstimator.cpp +601 -0
  11. data/ext/ml4r/MachineLearning/GBM/GBMOutput.cpp +86 -0
  12. data/ext/ml4r/MachineLearning/GBM/GBMRunner.cpp +117 -0
  13. data/ext/ml4r/MachineLearning/GBM/GaussianCalculator.cpp +94 -0
  14. data/ext/ml4r/MachineLearning/GBM/ZenithGBM.cpp +317 -0
  15. data/ext/ml4r/MachineLearning/MLData/MLData.cpp +232 -0
  16. data/ext/ml4r/MachineLearning/MLData/MLDataFields.cpp +1 -0
  17. data/ext/ml4r/MachineLearning/MLData/MLDataReader.cpp +139 -0
  18. data/ext/ml4r/MachineLearning/MLData/ZenithMLData.cpp +96 -0
  19. data/ext/ml4r/MachineLearning/MLData/ZenithMLDataReader.cpp +113 -0
  20. data/ext/ml4r/MachineLearning/MLExperiment.cpp +69 -0
  21. data/ext/ml4r/MachineLearning/MLRunner.cpp +183 -0
  22. data/ext/ml4r/MachineLearning/MLUtils.cpp +15 -0
  23. data/ext/ml4r/MachineLearning/RandomForest/RandomForestEstimator.cpp +172 -0
  24. data/ext/ml4r/MachineLearning/RandomForest/RandomForestOutput.cpp +66 -0
  25. data/ext/ml4r/MachineLearning/RandomForest/RandomForestRunner.cpp +84 -0
  26. data/ext/ml4r/MachineLearning/RandomForest/ZenithRandomForest.cpp +184 -0
  27. data/ext/ml4r/ml4r.cpp +34 -0
  28. data/ext/ml4r/ml4r_wrap.cpp +15727 -0
  29. data/ext/ml4r/utils/MathUtils.cpp +204 -0
  30. data/ext/ml4r/utils/StochasticUtils.cpp +73 -0
  31. data/ext/ml4r/utils/Utils.cpp +14 -0
  32. data/ext/ml4r/utils/VlcMessage.cpp +3 -0
  33. metadata +33 -1
@@ -0,0 +1,183 @@
1
+ #include "MachineLearning/MLRunner.h"
2
+ #include "MachineLearning/MLData/MLData.h"
3
+ #include "MachineLearning/MLOutput.h"
4
+ #include "MachineLearning/MLExperiment.h"
5
+ #include "MachineLearning/MLParameters.h"
6
+ #include "MachineLearning/MLEstimator.h"
7
+ #include "MachineLearning/MLEstimatorFactory.h"
8
+
9
+ #include "utils/VlcMessage.h"
10
+
11
+ // #ifdef TBB_USE_THREADING_TOOLS
12
+ // #undef TBB_USE_THREADING_TOOLS
13
+ // #endif
14
+ // #define TBB_USE_THREADING_TOOLS 1
15
+ // #include "tbb/task_scheduler_init.h"
16
+ // #include "tbb/parallel_for.h"
17
+ // #include "tbb/blocked_range.h"
18
+ // #include "tbb/explicit_range.h"
19
+
20
+ #include <boost/foreach.hpp>
21
+
22
+ MLRunner::MLRunner()
23
+ : m_data(0)
24
+ {
25
+
26
+ }
27
+
28
+ MLRunner::~MLRunner()
29
+ {
30
+
31
+ }
32
+
33
+ void MLRunner::execute()
34
+ {
35
+ checks();
36
+ config();
37
+ input();
38
+ estimate();
39
+ output();
40
+ }
41
+
42
+ void MLRunner::checks()
43
+ {
44
+ if (m_data == 0)
45
+ throw std::runtime_error("[MLRunner::checks()] - MLData is not defined - have you provided input data?");
46
+ }
47
+
48
+ void MLRunner::config()
49
+ {
50
+
51
+ }
52
+
53
+ void MLRunner::input()
54
+ {
55
+
56
+ }
57
+
58
+ void MLRunner::estimate()
59
+ {
60
+ vector<int>& foldNumbers = m_data->getFoldNumbers();
61
+ long numFolds = foldNumbers.size();
62
+ long numThreads = numFolds; // TODO: change this!
63
+
64
+ // tbb::task_scheduler_init init(numFolds);
65
+ // static tbb::simple_partitioner sp;
66
+
67
+ //int grainSize = numFolds / numThreads;
68
+
69
+ m_outputObjects.resize(numFolds);
70
+ m_estimators.resize(numFolds);
71
+
72
+ //tbb::parallel_for(explicit_range<size_t>(0, numFolds, grainSize),
73
+ // [&](const explicit_range<size_t>& r) {
74
+ // int threadNumber = r.begin() / grainSize;
75
+ // for(size_t foldIndex=r.begin(); foldIndex!=r.end(); ++foldIndex)
76
+ for (long foldIndex = 0; foldIndex < numFolds; ++foldIndex)
77
+ {
78
+ vlcMessage.Begin("Estimating");
79
+ int foldNumber = foldNumbers.at(foldIndex);
80
+
81
+ shared_ptr<MLEstimator> estimator = createEstimator(m_data, m_data->getTrainingExperiments(foldNumber));
82
+ m_estimators.at(foldIndex) = estimator;
83
+ m_outputObjects.at(foldIndex) = estimator->estimate();
84
+
85
+ vlcMessage.End();
86
+ }
87
+ //}, sp);
88
+ }
89
+
90
+ void MLRunner::output()
91
+ {
92
+
93
+ }
94
+
95
+ void MLRunner::setData( MLData* data )
96
+ {
97
+ m_data = data;
98
+ }
99
+
100
+ MLData* MLRunner::getData()
101
+ {
102
+ return m_data;
103
+ }
104
+
105
+ vector<double> MLRunner::getPredictions( MLData* newData )
106
+ {
107
+ if (m_data->initialPredictionsDefined() && !newData->initialPredictionsDefined())
108
+ throw std::runtime_error("Cannot apply model to new data as initial predictions are not defined (but were in initial data).");
109
+
110
+ if (m_data->getFeatures() != newData->getFeatures())
111
+ throw std::runtime_error("Features in prediction dataset do not match those in the estimation dataset (order is important)");
112
+
113
+ return getPredictions(newData->getExperiments());
114
+ }
115
+
116
+ vector<double> MLRunner::getPredictions( vector<shared_ptr<MLExperiment> > experiments )
117
+ {
118
+ // we can get a prediction from each of our outputs, when then need to be averaged.
119
+ vector<double> predictions;
120
+ predictions.reserve(experiments.size());
121
+
122
+ BOOST_FOREACH(shared_ptr<MLExperiment> experiment, experiments)
123
+ {
124
+ vector<double> experimentPredictions;
125
+ experimentPredictions.reserve(m_outputObjects.size());
126
+
127
+ BOOST_FOREACH(shared_ptr<MLOutput> outputObject, m_outputObjects)
128
+ {
129
+ experimentPredictions.push_back(outputObject->predictForExperiment(experiment));
130
+ }
131
+ predictions.push_back(m_outputObjects.front()->calculateAveragePredictions(experimentPredictions));
132
+ }
133
+ return predictions;
134
+ }
135
+
136
+ vector<double> MLRunner::getMeanTrainingPredictions()
137
+ {
138
+ long experimentCount = m_data->getExperiments().size();
139
+ vector<double> meanPredictions;
140
+ meanPredictions.reserve(experimentCount);
141
+
142
+ vector<vector<double> > predictionsForEachFold(experimentCount);
143
+
144
+ BOOST_FOREACH(vector<double>& experimentPredictions, predictionsForEachFold)
145
+ experimentPredictions.reserve(m_outputObjects.size() - 1);
146
+
147
+ BOOST_FOREACH(shared_ptr<MLOutput> outputObject, m_outputObjects)
148
+ {
149
+ BOOST_FOREACH(int experimentIndex, outputObject->getTrainingExperimentIndicies())
150
+ {
151
+ double prediction = outputObject->predictForExperiment(m_data->getExperiment(experimentIndex));
152
+ predictionsForEachFold.at(experimentIndex).push_back(prediction);
153
+ }
154
+ }
155
+
156
+ // take the mean of our predictions
157
+ BOOST_FOREACH(vector<double>& experimentPredictions, predictionsForEachFold)
158
+ meanPredictions.push_back(m_outputObjects.front()->calculateAveragePredictions(experimentPredictions));
159
+
160
+ return meanPredictions;
161
+ }
162
+
163
+ vector<double> MLRunner::getCrossValidationPredictions()
164
+ {
165
+ int experimentCount = (int) m_data->getExperiments().size();
166
+ vector<double> predictions(experimentCount);
167
+
168
+ int foldIndex = -1;
169
+ vector<int> foldNumbers = m_data->getFoldNumbers();
170
+
171
+ BOOST_FOREACH(shared_ptr<MLOutput> outputObject, m_outputObjects)
172
+ {
173
+ ++foldIndex;
174
+ int foldNumber = foldNumbers.at(foldIndex);
175
+
176
+ BOOST_FOREACH(shared_ptr<MLExperiment> experiment, m_data->getCrossValidationExperiments(foldNumber))
177
+ {
178
+ double prediction = outputObject->predictForExperiment(experiment);
179
+ predictions.at(experiment->getExperimentIndex()) = prediction;
180
+ }
181
+ }
182
+ return predictions;
183
+ }
@@ -0,0 +1,15 @@
1
+ #include "MachineLearning/MLUtils.h"
2
+
3
+ #include <boost/foreach.hpp>
4
+
5
+ double MLUtils::getMeanY(vector<shared_ptr<MLExperiment> > experiments)
6
+ {
7
+ double sumY = 0.0, sumWeight = 0.0;
8
+ BOOST_FOREACH(shared_ptr<MLExperiment>& e, experiments)
9
+ {
10
+ sumY += e->getY() * e->getWeight();
11
+ sumWeight += e->getWeight();
12
+ }
13
+
14
+ return sumY / sumWeight;
15
+ }
@@ -0,0 +1,172 @@
1
+ #include "MachineLearning/RandomForest/RandomForestEstimator.h"
2
+ #include "MachineLearning/MLData/MLData.h"
3
+ #include "MachineLearning/DecisionTree/DecisionTreeExperiment.h"
4
+ #include "MachineLearning/DecisionTree/DecisionTreeNode.h"
5
+ #include "MachineLearning/DecisionTree/NodeSplitterCategorical.h"
6
+ #include "MachineLearning/DecisionTree/NodeSplitterContinuous.h"
7
+ #include "MachineLearning/DecisionTree/SplitDefinition.h"
8
+ #include "MachineLearning/RandomForest/RandomForestOutput.h"
9
+ #include "MachineLearning/MLUtils.h"
10
+
11
+ #include "utils/VlcMessage.h"
12
+
13
+ #include <boost/make_shared.hpp>
14
+ using boost::make_shared;
15
+
16
+ RandomForestEstimator::RandomForestEstimator(MLData* data,
17
+ vector<shared_ptr<MLExperiment> > experiments,
18
+ shared_ptr<RandomForestParameters> parameters)
19
+ : MLEstimator(data, experiments), m_parameters(parameters)
20
+ {
21
+ m_decisionTreeExperiments.reserve(experiments.size());
22
+ BOOST_FOREACH(shared_ptr<MLExperiment>& experiment, experiments)
23
+ m_decisionTreeExperiments.push_back(make_shared<DecisionTreeExperiment>(experiment));
24
+
25
+ vector<int> experimentIndicies;
26
+ experimentIndicies.reserve(experiments.size());
27
+ BOOST_FOREACH(shared_ptr<MLExperiment>& experiment, experiments)
28
+ experimentIndicies.push_back(experiment->getExperimentIndex());
29
+
30
+ m_output = shared_ptr<RandomForestOutput>(new RandomForestOutput(m_data, experimentIndicies, m_parameters));
31
+ }
32
+
33
+ RandomForestEstimator::~RandomForestEstimator()
34
+ {
35
+
36
+ }
37
+
38
+ shared_ptr<MLOutput> RandomForestEstimator::estimate()
39
+ {
40
+ initializeEstimator();
41
+ updateZ();
42
+
43
+ for (int iteration = 0; iteration < m_parameters->numIterations; ++iteration)
44
+ {
45
+ if (m_parameters->verbose)
46
+ vlcMessage.Begin((string("Iteration ") + boost::lexical_cast<string>(iteration + 1)).c_str());
47
+
48
+ performIteration();
49
+
50
+ if (m_parameters->verbose)
51
+ vlcMessage.End();
52
+ }
53
+ return shared_ptr<MLOutput>(m_output);
54
+ }
55
+
56
+ shared_ptr<MLOutput> RandomForestEstimator::estimateMore(int numTrees)
57
+ {
58
+ initializeEstimator();
59
+ updateZ();
60
+ int numberOfExistingTrees = m_output->getNumTrees();
61
+
62
+ for (int iteration = 0; iteration < numTrees; ++iteration)
63
+ {
64
+ if (m_parameters->verbose)
65
+ vlcMessage.Begin((string("Iteration ") + boost::lexical_cast<string>(numberOfExistingTrees + iteration + 1)).c_str());
66
+
67
+ performIteration();
68
+
69
+ if (m_parameters->verbose)
70
+ vlcMessage.End();
71
+ }
72
+ return shared_ptr<MLOutput>(m_output);
73
+ }
74
+
75
+ void RandomForestEstimator::updateZ()
76
+ {
77
+ BOOST_FOREACH(shared_ptr<DecisionTreeExperiment> e, m_decisionTreeExperiments)
78
+ e->setZ(e->getY());
79
+ }
80
+
81
+ void RandomForestEstimator::performIteration()
82
+ {
83
+ vector<shared_ptr<DecisionTreeExperiment> > experiments;
84
+ size_t bagSize = m_decisionTreeExperiments.size() * m_parameters->bagFraction;
85
+
86
+ if (m_parameters->withReplacement)
87
+ experiments = MLUtils::bagObjectsWithReplacement<shared_ptr<DecisionTreeExperiment> >(m_decisionTreeExperiments, (int) bagSize);
88
+ else
89
+ {
90
+ pair<vector<shared_ptr<DecisionTreeExperiment> >,vector<shared_ptr<DecisionTreeExperiment> > > inAndOutOfBag =
91
+ MLUtils::bagObjectsWithoutReplacement<shared_ptr<DecisionTreeExperiment> >(m_decisionTreeExperiments, (int) std::min(m_decisionTreeExperiments.size(), bagSize));
92
+ experiments = inAndOutOfBag.first;
93
+ }
94
+
95
+ if (m_parameters->verbose)
96
+ vlcMessage.Begin("Constructing decision tree");
97
+
98
+ constructDecisionTree(experiments);
99
+
100
+ m_output->addHeadDecisionTreeNode(m_decisionTreeHead);
101
+ m_decisionTreeHead->clearExperimentsWithinTree();
102
+
103
+ if (m_parameters->verbose)
104
+ vlcMessage.End();
105
+ }
106
+
107
+ void RandomForestEstimator::constructDecisionTree(vector<shared_ptr<DecisionTreeExperiment> >& experiments)
108
+ {
109
+ vector<shared_ptr<DecisionTreeNode> > currentGeneration;
110
+ vector<shared_ptr<DecisionTreeNode> > nextGeneration;
111
+
112
+ // create a head DecisionTreeNode
113
+ double sumZ = 0.0, sumW = 0.0;
114
+ BOOST_FOREACH(shared_ptr<DecisionTreeExperiment>& e, experiments)
115
+ {
116
+ double w = e->getWeight();
117
+ sumW += w;
118
+ sumZ += w * e->getZ();
119
+ }
120
+
121
+ m_decisionTreeHead = shared_ptr<DecisionTreeNode>(new DecisionTreeNode(experiments, sumZ, sumW, ROOT, shared_ptr<SplitDefinition>()));
122
+ currentGeneration.push_back(m_decisionTreeHead);
123
+
124
+ NodeSplitter splitter(m_data, m_parameters->minObservations, m_parameters->scale);
125
+
126
+ while (!currentGeneration.empty())
127
+ {
128
+ BOOST_FOREACH(shared_ptr<DecisionTreeNode> nodeToSplit, currentGeneration)
129
+ {
130
+ if (nodeToSplit->getSumW() == 0)
131
+ continue;
132
+
133
+ // choose M variables to test splitting on
134
+ // find terminal node with best improvement for any of those variables
135
+ pair<vector<int>,vector<int> > inAndOut = MLUtils::bagObjectsWithoutReplacement<int>(m_featureIndices, std::min((int)m_featureIndices.size(), m_parameters->tryMVariables));
136
+ vector<int> featuresToConsider = inAndOut.first;
137
+
138
+ double bestImprovement = 0.0;
139
+ shared_ptr<SplitDefinition> bestSplit;
140
+
141
+ vector<shared_ptr<DecisionTreeNode> > children = splitter.splitNode(nodeToSplit, featuresToConsider);
142
+ BOOST_FOREACH(shared_ptr<DecisionTreeNode>& child, children)
143
+ {
144
+ nextGeneration.push_back(child);
145
+ }
146
+ }
147
+ currentGeneration = nextGeneration;
148
+ nextGeneration.clear();
149
+ }
150
+
151
+
152
+ }
153
+
154
+ void RandomForestEstimator::initializeEstimator()
155
+ {
156
+ m_missingValueDefined = m_data->missingValueDefined();
157
+ if (m_missingValueDefined)
158
+ m_missingValue = m_data->getMissingValue();
159
+
160
+
161
+ constructFeatureIndices();
162
+ // sortTrainingExperiments();
163
+ }
164
+
165
+ void RandomForestEstimator::constructFeatureIndices()
166
+ {
167
+ BOOST_FOREACH(string feature, m_parameters->featuresToRun)
168
+ {
169
+ // note that in a given run, we may not "run" with all loaded variables.
170
+ m_featureIndices.push_back(m_data->getFeatureIndex(feature));
171
+ }
172
+ }
@@ -0,0 +1,66 @@
1
+ #include "MachineLearning/RandomForest/RandomForestOutput.h"
2
+ #include "MachineLearning/RandomForest/RandomForestParameters.h"
3
+ #include "MachineLearning/DecisionTree/DecisionTreeNode.h"
4
+ #include "MachineLearning/DecisionTree/DecisionTreeExperiment.h"
5
+
6
+ #include "utils/VlcMessage.h"
7
+
8
+ #include <boost/make_shared.hpp>
9
+ using boost::make_shared;
10
+
11
+ RandomForestOutput::RandomForestOutput( MLData* trainingData, vector<int> trainingExperimentIndicies, shared_ptr<RandomForestParameters> parameters )
12
+ : MLOutput(trainingData, trainingExperimentIndicies), m_parameters(parameters)
13
+ {
14
+
15
+ }
16
+
17
+ RandomForestOutput::~RandomForestOutput()
18
+ {
19
+
20
+ }
21
+
22
+ shared_ptr<RandomForestParameters> RandomForestOutput::getParameters()
23
+ {
24
+ return m_parameters;
25
+ }
26
+
27
+ double RandomForestOutput::predictForExperiment( shared_ptr<MLExperiment> experiment )
28
+ {
29
+ shared_ptr<DecisionTreeExperiment> dtExperiment = make_shared<DecisionTreeExperiment>(experiment);
30
+
31
+ double sumPrediction = 0.0;
32
+ int count = 0;
33
+ BOOST_FOREACH(shared_ptr<DecisionTreeNode>& head, m_headNodes)
34
+ {
35
+ shared_ptr<DecisionTreeNode> node = head->getTerminalNodeForExperiment(dtExperiment);
36
+ if (node.get() == 0)
37
+ node = head;
38
+
39
+ if (node->getSumW() == 0)
40
+ {
41
+
42
+ vlcMessage.Write("Zero weight!! WTF!!");
43
+ vlcMessage.Write("SumZ: "+ boost::lexical_cast<string>(node->getSumZ()));
44
+ vlcMessage.Write("exp.size() " + boost::lexical_cast<string>(node->getExperiments().size()));
45
+ vlcMessage.Write("Node is head: " + boost::lexical_cast<string>(node == head));
46
+ }
47
+
48
+ if (node->isTerminalNode())
49
+ {
50
+ sumPrediction += node->getSumZ() / node->getSumW();
51
+ count++;
52
+ }
53
+
54
+ }
55
+ return sumPrediction / count;
56
+ }
57
+
58
+ void RandomForestOutput::addHeadDecisionTreeNode( shared_ptr<DecisionTreeNode> node )
59
+ {
60
+ m_headNodes.push_back(node);
61
+ }
62
+
63
+ int RandomForestOutput::getNumTrees()
64
+ {
65
+ return (int) m_headNodes.size();
66
+ }
@@ -0,0 +1,84 @@
1
+ #include "MachineLearning/RandomForest/RandomForestRunner.h"
2
+ #include "MachineLearning/RandomForest/RandomForestOutput.h"
3
+ #include "MachineLearning/RandomForest/RandomForestEstimator.h"
4
+ #include "MachineLearning/DecisionTree/DecisionTreeNode.h"
5
+ #include "MachineLearning/MLData/MLData.h"
6
+
7
+ #include "utils/VlcMessage.h"
8
+
9
+ // #ifdef TBB_USE_THREADING_TOOLS
10
+ // #undef TBB_USE_THREADING_TOOLS
11
+ // #endif
12
+ // #define TBB_USE_THREADING_TOOLS 1
13
+ // #include "tbb/task_scheduler_init.h"
14
+ // #include "tbb/parallel_for.h"
15
+ // #include "tbb/blocked_range.h"
16
+ // #include "tbb/explicit_range.h"
17
+
18
+ #include <boost/pointer_cast.hpp>
19
+ using boost::dynamic_pointer_cast;
20
+ #include <math.h>
21
+
22
+ RandomForestRunner::RandomForestRunner()
23
+ {
24
+
25
+ }
26
+
27
+ RandomForestRunner::~RandomForestRunner()
28
+ {
29
+
30
+ }
31
+
32
+ void RandomForestRunner::estimateMore(int numTrees)
33
+ {
34
+ int numFolds = m_data->getNumFolds();
35
+ int numThreads = m_data->getNumFolds();
36
+
37
+ // tbb::task_scheduler_init init(numFolds);
38
+ // static tbb::simple_partitioner sp;
39
+
40
+ int grainSize = numFolds / numThreads;
41
+
42
+ // tbb::parallel_for(explicit_range<size_t>(0, numFolds, grainSize),
43
+ // [&](const explicit_range<size_t>& r) {
44
+ // int threadNumber = r.begin() / grainSize;
45
+ // for(size_t foldIndex=r.begin(); foldIndex!=r.end(); ++foldIndex)
46
+ for(int foldIndex=numFolds; foldIndex<numFolds; ++foldIndex)
47
+ {
48
+ vlcMessage.Begin("Estimating more...");
49
+
50
+ shared_ptr<RandomForestEstimator> estimator = dynamic_pointer_cast<RandomForestEstimator>(m_estimators.at(foldIndex));
51
+ estimator->estimateMore(numTrees);
52
+
53
+ vlcMessage.End();
54
+ }
55
+ // }, sp);
56
+ }
57
+
58
+ void RandomForestRunner::config()
59
+ {
60
+ vector<string>& dataFeatures = m_data->getFeatures();
61
+
62
+ if (parameters->featuresToRun.empty())
63
+ parameters->featuresToRun = dataFeatures;
64
+ else
65
+ {
66
+ BOOST_FOREACH(string& feature, parameters->featuresToRun)
67
+ {
68
+ if (Utils::vectorIndex(dataFeatures, feature) == -1)
69
+ throw std::runtime_error("Feature '" + feature + "' specified as part of parameter 'featuresToRun', but feature not found in data");
70
+ }
71
+ }
72
+ if (parameters->featuresToRun.empty())
73
+ throw std::runtime_error("There are no features to run!");
74
+
75
+ if (m_data->missingValueDefined())
76
+ DecisionTreeNode::setMissingValue(m_data->getMissingValue());
77
+ }
78
+
79
+
80
+
81
+ shared_ptr<MLEstimator> RandomForestRunner::createEstimator( MLData* data, vector<shared_ptr<MLExperiment> > trainingExperiments )
82
+ {
83
+ return shared_ptr<MLEstimator>(shared_ptr<RandomForestEstimator>(new RandomForestEstimator(data, trainingExperiments, parameters)));
84
+ }