ml4r 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. data/ext/ml4r/LinearRegression/LinearRegression.cpp +305 -0
  2. data/ext/ml4r/LinearRegression/OLSLinearRegression.cpp +75 -0
  3. data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeExperiment.cpp +50 -0
  4. data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeNode.cpp +195 -0
  5. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitter.cpp +551 -0
  6. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterCategorical.cpp +22 -0
  7. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterContinuous.cpp +21 -0
  8. data/ext/ml4r/MachineLearning/DecisionTree/SplitDefinition.cpp +142 -0
  9. data/ext/ml4r/MachineLearning/GBM/BernoulliCalculator.cpp +95 -0
  10. data/ext/ml4r/MachineLearning/GBM/GBMEstimator.cpp +601 -0
  11. data/ext/ml4r/MachineLearning/GBM/GBMOutput.cpp +86 -0
  12. data/ext/ml4r/MachineLearning/GBM/GBMRunner.cpp +117 -0
  13. data/ext/ml4r/MachineLearning/GBM/GaussianCalculator.cpp +94 -0
  14. data/ext/ml4r/MachineLearning/GBM/ZenithGBM.cpp +317 -0
  15. data/ext/ml4r/MachineLearning/MLData/MLData.cpp +232 -0
  16. data/ext/ml4r/MachineLearning/MLData/MLDataFields.cpp +1 -0
  17. data/ext/ml4r/MachineLearning/MLData/MLDataReader.cpp +139 -0
  18. data/ext/ml4r/MachineLearning/MLData/ZenithMLData.cpp +96 -0
  19. data/ext/ml4r/MachineLearning/MLData/ZenithMLDataReader.cpp +113 -0
  20. data/ext/ml4r/MachineLearning/MLExperiment.cpp +69 -0
  21. data/ext/ml4r/MachineLearning/MLRunner.cpp +183 -0
  22. data/ext/ml4r/MachineLearning/MLUtils.cpp +15 -0
  23. data/ext/ml4r/MachineLearning/RandomForest/RandomForestEstimator.cpp +172 -0
  24. data/ext/ml4r/MachineLearning/RandomForest/RandomForestOutput.cpp +66 -0
  25. data/ext/ml4r/MachineLearning/RandomForest/RandomForestRunner.cpp +84 -0
  26. data/ext/ml4r/MachineLearning/RandomForest/ZenithRandomForest.cpp +184 -0
  27. data/ext/ml4r/ml4r.cpp +34 -0
  28. data/ext/ml4r/ml4r_wrap.cpp +15727 -0
  29. data/ext/ml4r/utils/MathUtils.cpp +204 -0
  30. data/ext/ml4r/utils/StochasticUtils.cpp +73 -0
  31. data/ext/ml4r/utils/Utils.cpp +14 -0
  32. data/ext/ml4r/utils/VlcMessage.cpp +3 -0
  33. metadata +33 -1
@@ -0,0 +1,183 @@
1
+ #include "MachineLearning/MLRunner.h"
2
+ #include "MachineLearning/MLData/MLData.h"
3
+ #include "MachineLearning/MLOutput.h"
4
+ #include "MachineLearning/MLExperiment.h"
5
+ #include "MachineLearning/MLParameters.h"
6
+ #include "MachineLearning/MLEstimator.h"
7
+ #include "MachineLearning/MLEstimatorFactory.h"
8
+
9
+ #include "utils/VlcMessage.h"
10
+
11
+ // #ifdef TBB_USE_THREADING_TOOLS
12
+ // #undef TBB_USE_THREADING_TOOLS
13
+ // #endif
14
+ // #define TBB_USE_THREADING_TOOLS 1
15
+ // #include "tbb/task_scheduler_init.h"
16
+ // #include "tbb/parallel_for.h"
17
+ // #include "tbb/blocked_range.h"
18
+ // #include "tbb/explicit_range.h"
19
+
20
+ #include <boost/foreach.hpp>
21
+
22
+ MLRunner::MLRunner()
23
+ : m_data(0)
24
+ {
25
+
26
+ }
27
+
28
+ MLRunner::~MLRunner()
29
+ {
30
+
31
+ }
32
+
33
+ void MLRunner::execute()
34
+ {
35
+ checks();
36
+ config();
37
+ input();
38
+ estimate();
39
+ output();
40
+ }
41
+
42
+ void MLRunner::checks()
43
+ {
44
+ if (m_data == 0)
45
+ throw std::runtime_error("[MLRunner::checks()] - MLData is not defined - have you provided input data?");
46
+ }
47
+
48
+ void MLRunner::config()
49
+ {
50
+
51
+ }
52
+
53
+ void MLRunner::input()
54
+ {
55
+
56
+ }
57
+
58
+ void MLRunner::estimate()
59
+ {
60
+ vector<int>& foldNumbers = m_data->getFoldNumbers();
61
+ long numFolds = foldNumbers.size();
62
+ long numThreads = numFolds; // TODO: change this!
63
+
64
+ // tbb::task_scheduler_init init(numFolds);
65
+ // static tbb::simple_partitioner sp;
66
+
67
+ //int grainSize = numFolds / numThreads;
68
+
69
+ m_outputObjects.resize(numFolds);
70
+ m_estimators.resize(numFolds);
71
+
72
+ //tbb::parallel_for(explicit_range<size_t>(0, numFolds, grainSize),
73
+ // [&](const explicit_range<size_t>& r) {
74
+ // int threadNumber = r.begin() / grainSize;
75
+ // for(size_t foldIndex=r.begin(); foldIndex!=r.end(); ++foldIndex)
76
+ for (long foldIndex = 0; foldIndex < numFolds; ++foldIndex)
77
+ {
78
+ vlcMessage.Begin("Estimating");
79
+ int foldNumber = foldNumbers.at(foldIndex);
80
+
81
+ shared_ptr<MLEstimator> estimator = createEstimator(m_data, m_data->getTrainingExperiments(foldNumber));
82
+ m_estimators.at(foldIndex) = estimator;
83
+ m_outputObjects.at(foldIndex) = estimator->estimate();
84
+
85
+ vlcMessage.End();
86
+ }
87
+ //}, sp);
88
+ }
89
+
90
+ void MLRunner::output()
91
+ {
92
+
93
+ }
94
+
95
+ void MLRunner::setData( MLData* data )
96
+ {
97
+ m_data = data;
98
+ }
99
+
100
+ MLData* MLRunner::getData()
101
+ {
102
+ return m_data;
103
+ }
104
+
105
+ vector<double> MLRunner::getPredictions( MLData* newData )
106
+ {
107
+ if (m_data->initialPredictionsDefined() && !newData->initialPredictionsDefined())
108
+ throw std::runtime_error("Cannot apply model to new data as initial predictions are not defined (but were in initial data).");
109
+
110
+ if (m_data->getFeatures() != newData->getFeatures())
111
+ throw std::runtime_error("Features in prediction dataset do not match those in the estimation dataset (order is important)");
112
+
113
+ return getPredictions(newData->getExperiments());
114
+ }
115
+
116
+ vector<double> MLRunner::getPredictions( vector<shared_ptr<MLExperiment> > experiments )
117
+ {
118
+ // we can get a prediction from each of our outputs, when then need to be averaged.
119
+ vector<double> predictions;
120
+ predictions.reserve(experiments.size());
121
+
122
+ BOOST_FOREACH(shared_ptr<MLExperiment> experiment, experiments)
123
+ {
124
+ vector<double> experimentPredictions;
125
+ experimentPredictions.reserve(m_outputObjects.size());
126
+
127
+ BOOST_FOREACH(shared_ptr<MLOutput> outputObject, m_outputObjects)
128
+ {
129
+ experimentPredictions.push_back(outputObject->predictForExperiment(experiment));
130
+ }
131
+ predictions.push_back(m_outputObjects.front()->calculateAveragePredictions(experimentPredictions));
132
+ }
133
+ return predictions;
134
+ }
135
+
136
+ vector<double> MLRunner::getMeanTrainingPredictions()
137
+ {
138
+ long experimentCount = m_data->getExperiments().size();
139
+ vector<double> meanPredictions;
140
+ meanPredictions.reserve(experimentCount);
141
+
142
+ vector<vector<double> > predictionsForEachFold(experimentCount);
143
+
144
+ BOOST_FOREACH(vector<double>& experimentPredictions, predictionsForEachFold)
145
+ experimentPredictions.reserve(m_outputObjects.size() - 1);
146
+
147
+ BOOST_FOREACH(shared_ptr<MLOutput> outputObject, m_outputObjects)
148
+ {
149
+ BOOST_FOREACH(int experimentIndex, outputObject->getTrainingExperimentIndicies())
150
+ {
151
+ double prediction = outputObject->predictForExperiment(m_data->getExperiment(experimentIndex));
152
+ predictionsForEachFold.at(experimentIndex).push_back(prediction);
153
+ }
154
+ }
155
+
156
+ // take the mean of our predictions
157
+ BOOST_FOREACH(vector<double>& experimentPredictions, predictionsForEachFold)
158
+ meanPredictions.push_back(m_outputObjects.front()->calculateAveragePredictions(experimentPredictions));
159
+
160
+ return meanPredictions;
161
+ }
162
+
163
+ vector<double> MLRunner::getCrossValidationPredictions()
164
+ {
165
+ int experimentCount = (int) m_data->getExperiments().size();
166
+ vector<double> predictions(experimentCount);
167
+
168
+ int foldIndex = -1;
169
+ vector<int> foldNumbers = m_data->getFoldNumbers();
170
+
171
+ BOOST_FOREACH(shared_ptr<MLOutput> outputObject, m_outputObjects)
172
+ {
173
+ ++foldIndex;
174
+ int foldNumber = foldNumbers.at(foldIndex);
175
+
176
+ BOOST_FOREACH(shared_ptr<MLExperiment> experiment, m_data->getCrossValidationExperiments(foldNumber))
177
+ {
178
+ double prediction = outputObject->predictForExperiment(experiment);
179
+ predictions.at(experiment->getExperimentIndex()) = prediction;
180
+ }
181
+ }
182
+ return predictions;
183
+ }
@@ -0,0 +1,15 @@
1
+ #include "MachineLearning/MLUtils.h"
2
+
3
+ #include <boost/foreach.hpp>
4
+
5
+ double MLUtils::getMeanY(vector<shared_ptr<MLExperiment> > experiments)
6
+ {
7
+ double sumY = 0.0, sumWeight = 0.0;
8
+ BOOST_FOREACH(shared_ptr<MLExperiment>& e, experiments)
9
+ {
10
+ sumY += e->getY() * e->getWeight();
11
+ sumWeight += e->getWeight();
12
+ }
13
+
14
+ return sumY / sumWeight;
15
+ }
@@ -0,0 +1,172 @@
1
+ #include "MachineLearning/RandomForest/RandomForestEstimator.h"
2
+ #include "MachineLearning/MLData/MLData.h"
3
+ #include "MachineLearning/DecisionTree/DecisionTreeExperiment.h"
4
+ #include "MachineLearning/DecisionTree/DecisionTreeNode.h"
5
+ #include "MachineLearning/DecisionTree/NodeSplitterCategorical.h"
6
+ #include "MachineLearning/DecisionTree/NodeSplitterContinuous.h"
7
+ #include "MachineLearning/DecisionTree/SplitDefinition.h"
8
+ #include "MachineLearning/RandomForest/RandomForestOutput.h"
9
+ #include "MachineLearning/MLUtils.h"
10
+
11
+ #include "utils/VlcMessage.h"
12
+
13
+ #include <boost/make_shared.hpp>
14
+ using boost::make_shared;
15
+
16
+ RandomForestEstimator::RandomForestEstimator(MLData* data,
17
+ vector<shared_ptr<MLExperiment> > experiments,
18
+ shared_ptr<RandomForestParameters> parameters)
19
+ : MLEstimator(data, experiments), m_parameters(parameters)
20
+ {
21
+ m_decisionTreeExperiments.reserve(experiments.size());
22
+ BOOST_FOREACH(shared_ptr<MLExperiment>& experiment, experiments)
23
+ m_decisionTreeExperiments.push_back(make_shared<DecisionTreeExperiment>(experiment));
24
+
25
+ vector<int> experimentIndicies;
26
+ experimentIndicies.reserve(experiments.size());
27
+ BOOST_FOREACH(shared_ptr<MLExperiment>& experiment, experiments)
28
+ experimentIndicies.push_back(experiment->getExperimentIndex());
29
+
30
+ m_output = shared_ptr<RandomForestOutput>(new RandomForestOutput(m_data, experimentIndicies, m_parameters));
31
+ }
32
+
33
+ RandomForestEstimator::~RandomForestEstimator()
34
+ {
35
+
36
+ }
37
+
38
+ shared_ptr<MLOutput> RandomForestEstimator::estimate()
39
+ {
40
+ initializeEstimator();
41
+ updateZ();
42
+
43
+ for (int iteration = 0; iteration < m_parameters->numIterations; ++iteration)
44
+ {
45
+ if (m_parameters->verbose)
46
+ vlcMessage.Begin((string("Iteration ") + boost::lexical_cast<string>(iteration + 1)).c_str());
47
+
48
+ performIteration();
49
+
50
+ if (m_parameters->verbose)
51
+ vlcMessage.End();
52
+ }
53
+ return shared_ptr<MLOutput>(m_output);
54
+ }
55
+
56
+ shared_ptr<MLOutput> RandomForestEstimator::estimateMore(int numTrees)
57
+ {
58
+ initializeEstimator();
59
+ updateZ();
60
+ int numberOfExistingTrees = m_output->getNumTrees();
61
+
62
+ for (int iteration = 0; iteration < numTrees; ++iteration)
63
+ {
64
+ if (m_parameters->verbose)
65
+ vlcMessage.Begin((string("Iteration ") + boost::lexical_cast<string>(numberOfExistingTrees + iteration + 1)).c_str());
66
+
67
+ performIteration();
68
+
69
+ if (m_parameters->verbose)
70
+ vlcMessage.End();
71
+ }
72
+ return shared_ptr<MLOutput>(m_output);
73
+ }
74
+
75
+ void RandomForestEstimator::updateZ()
76
+ {
77
+ BOOST_FOREACH(shared_ptr<DecisionTreeExperiment> e, m_decisionTreeExperiments)
78
+ e->setZ(e->getY());
79
+ }
80
+
81
+ void RandomForestEstimator::performIteration()
82
+ {
83
+ vector<shared_ptr<DecisionTreeExperiment> > experiments;
84
+ size_t bagSize = m_decisionTreeExperiments.size() * m_parameters->bagFraction;
85
+
86
+ if (m_parameters->withReplacement)
87
+ experiments = MLUtils::bagObjectsWithReplacement<shared_ptr<DecisionTreeExperiment> >(m_decisionTreeExperiments, (int) bagSize);
88
+ else
89
+ {
90
+ pair<vector<shared_ptr<DecisionTreeExperiment> >,vector<shared_ptr<DecisionTreeExperiment> > > inAndOutOfBag =
91
+ MLUtils::bagObjectsWithoutReplacement<shared_ptr<DecisionTreeExperiment> >(m_decisionTreeExperiments, (int) std::min(m_decisionTreeExperiments.size(), bagSize));
92
+ experiments = inAndOutOfBag.first;
93
+ }
94
+
95
+ if (m_parameters->verbose)
96
+ vlcMessage.Begin("Constructing decision tree");
97
+
98
+ constructDecisionTree(experiments);
99
+
100
+ m_output->addHeadDecisionTreeNode(m_decisionTreeHead);
101
+ m_decisionTreeHead->clearExperimentsWithinTree();
102
+
103
+ if (m_parameters->verbose)
104
+ vlcMessage.End();
105
+ }
106
+
107
+ void RandomForestEstimator::constructDecisionTree(vector<shared_ptr<DecisionTreeExperiment> >& experiments)
108
+ {
109
+ vector<shared_ptr<DecisionTreeNode> > currentGeneration;
110
+ vector<shared_ptr<DecisionTreeNode> > nextGeneration;
111
+
112
+ // create a head DecisionTreeNode
113
+ double sumZ = 0.0, sumW = 0.0;
114
+ BOOST_FOREACH(shared_ptr<DecisionTreeExperiment>& e, experiments)
115
+ {
116
+ double w = e->getWeight();
117
+ sumW += w;
118
+ sumZ += w * e->getZ();
119
+ }
120
+
121
+ m_decisionTreeHead = shared_ptr<DecisionTreeNode>(new DecisionTreeNode(experiments, sumZ, sumW, ROOT, shared_ptr<SplitDefinition>()));
122
+ currentGeneration.push_back(m_decisionTreeHead);
123
+
124
+ NodeSplitter splitter(m_data, m_parameters->minObservations, m_parameters->scale);
125
+
126
+ while (!currentGeneration.empty())
127
+ {
128
+ BOOST_FOREACH(shared_ptr<DecisionTreeNode> nodeToSplit, currentGeneration)
129
+ {
130
+ if (nodeToSplit->getSumW() == 0)
131
+ continue;
132
+
133
+ // choose M variables to test splitting on
134
+ // find terminal node with best improvement for any of those variables
135
+ pair<vector<int>,vector<int> > inAndOut = MLUtils::bagObjectsWithoutReplacement<int>(m_featureIndices, std::min((int)m_featureIndices.size(), m_parameters->tryMVariables));
136
+ vector<int> featuresToConsider = inAndOut.first;
137
+
138
+ double bestImprovement = 0.0;
139
+ shared_ptr<SplitDefinition> bestSplit;
140
+
141
+ vector<shared_ptr<DecisionTreeNode> > children = splitter.splitNode(nodeToSplit, featuresToConsider);
142
+ BOOST_FOREACH(shared_ptr<DecisionTreeNode>& child, children)
143
+ {
144
+ nextGeneration.push_back(child);
145
+ }
146
+ }
147
+ currentGeneration = nextGeneration;
148
+ nextGeneration.clear();
149
+ }
150
+
151
+
152
+ }
153
+
154
+ void RandomForestEstimator::initializeEstimator()
155
+ {
156
+ m_missingValueDefined = m_data->missingValueDefined();
157
+ if (m_missingValueDefined)
158
+ m_missingValue = m_data->getMissingValue();
159
+
160
+
161
+ constructFeatureIndices();
162
+ // sortTrainingExperiments();
163
+ }
164
+
165
+ void RandomForestEstimator::constructFeatureIndices()
166
+ {
167
+ BOOST_FOREACH(string feature, m_parameters->featuresToRun)
168
+ {
169
+ // note that in a given run, we may not "run" with all loaded variables.
170
+ m_featureIndices.push_back(m_data->getFeatureIndex(feature));
171
+ }
172
+ }
@@ -0,0 +1,66 @@
1
+ #include "MachineLearning/RandomForest/RandomForestOutput.h"
2
+ #include "MachineLearning/RandomForest/RandomForestParameters.h"
3
+ #include "MachineLearning/DecisionTree/DecisionTreeNode.h"
4
+ #include "MachineLearning/DecisionTree/DecisionTreeExperiment.h"
5
+
6
+ #include "utils/VlcMessage.h"
7
+
8
+ #include <boost/make_shared.hpp>
9
+ using boost::make_shared;
10
+
11
+ RandomForestOutput::RandomForestOutput( MLData* trainingData, vector<int> trainingExperimentIndicies, shared_ptr<RandomForestParameters> parameters )
12
+ : MLOutput(trainingData, trainingExperimentIndicies), m_parameters(parameters)
13
+ {
14
+
15
+ }
16
+
17
+ RandomForestOutput::~RandomForestOutput()
18
+ {
19
+
20
+ }
21
+
22
+ shared_ptr<RandomForestParameters> RandomForestOutput::getParameters()
23
+ {
24
+ return m_parameters;
25
+ }
26
+
27
+ double RandomForestOutput::predictForExperiment( shared_ptr<MLExperiment> experiment )
28
+ {
29
+ shared_ptr<DecisionTreeExperiment> dtExperiment = make_shared<DecisionTreeExperiment>(experiment);
30
+
31
+ double sumPrediction = 0.0;
32
+ int count = 0;
33
+ BOOST_FOREACH(shared_ptr<DecisionTreeNode>& head, m_headNodes)
34
+ {
35
+ shared_ptr<DecisionTreeNode> node = head->getTerminalNodeForExperiment(dtExperiment);
36
+ if (node.get() == 0)
37
+ node = head;
38
+
39
+ if (node->getSumW() == 0)
40
+ {
41
+
42
+ vlcMessage.Write("Zero weight!! WTF!!");
43
+ vlcMessage.Write("SumZ: "+ boost::lexical_cast<string>(node->getSumZ()));
44
+ vlcMessage.Write("exp.size() " + boost::lexical_cast<string>(node->getExperiments().size()));
45
+ vlcMessage.Write("Node is head: " + boost::lexical_cast<string>(node == head));
46
+ }
47
+
48
+ if (node->isTerminalNode())
49
+ {
50
+ sumPrediction += node->getSumZ() / node->getSumW();
51
+ count++;
52
+ }
53
+
54
+ }
55
+ return sumPrediction / count;
56
+ }
57
+
58
+ void RandomForestOutput::addHeadDecisionTreeNode( shared_ptr<DecisionTreeNode> node )
59
+ {
60
+ m_headNodes.push_back(node);
61
+ }
62
+
63
+ int RandomForestOutput::getNumTrees()
64
+ {
65
+ return (int) m_headNodes.size();
66
+ }
@@ -0,0 +1,84 @@
1
+ #include "MachineLearning/RandomForest/RandomForestRunner.h"
2
+ #include "MachineLearning/RandomForest/RandomForestOutput.h"
3
+ #include "MachineLearning/RandomForest/RandomForestEstimator.h"
4
+ #include "MachineLearning/DecisionTree/DecisionTreeNode.h"
5
+ #include "MachineLearning/MLData/MLData.h"
6
+
7
+ #include "utils/VlcMessage.h"
8
+
9
+ // #ifdef TBB_USE_THREADING_TOOLS
10
+ // #undef TBB_USE_THREADING_TOOLS
11
+ // #endif
12
+ // #define TBB_USE_THREADING_TOOLS 1
13
+ // #include "tbb/task_scheduler_init.h"
14
+ // #include "tbb/parallel_for.h"
15
+ // #include "tbb/blocked_range.h"
16
+ // #include "tbb/explicit_range.h"
17
+
18
+ #include <boost/pointer_cast.hpp>
19
+ using boost::dynamic_pointer_cast;
20
+ #include <math.h>
21
+
22
+ RandomForestRunner::RandomForestRunner()
23
+ {
24
+
25
+ }
26
+
27
+ RandomForestRunner::~RandomForestRunner()
28
+ {
29
+
30
+ }
31
+
32
+ void RandomForestRunner::estimateMore(int numTrees)
33
+ {
34
+ int numFolds = m_data->getNumFolds();
35
+ int numThreads = m_data->getNumFolds();
36
+
37
+ // tbb::task_scheduler_init init(numFolds);
38
+ // static tbb::simple_partitioner sp;
39
+
40
+ int grainSize = numFolds / numThreads;
41
+
42
+ // tbb::parallel_for(explicit_range<size_t>(0, numFolds, grainSize),
43
+ // [&](const explicit_range<size_t>& r) {
44
+ // int threadNumber = r.begin() / grainSize;
45
+ // for(size_t foldIndex=r.begin(); foldIndex!=r.end(); ++foldIndex)
46
+ for(int foldIndex=numFolds; foldIndex<numFolds; ++foldIndex)
47
+ {
48
+ vlcMessage.Begin("Estimating more...");
49
+
50
+ shared_ptr<RandomForestEstimator> estimator = dynamic_pointer_cast<RandomForestEstimator>(m_estimators.at(foldIndex));
51
+ estimator->estimateMore(numTrees);
52
+
53
+ vlcMessage.End();
54
+ }
55
+ // }, sp);
56
+ }
57
+
58
+ void RandomForestRunner::config()
59
+ {
60
+ vector<string>& dataFeatures = m_data->getFeatures();
61
+
62
+ if (parameters->featuresToRun.empty())
63
+ parameters->featuresToRun = dataFeatures;
64
+ else
65
+ {
66
+ BOOST_FOREACH(string& feature, parameters->featuresToRun)
67
+ {
68
+ if (Utils::vectorIndex(dataFeatures, feature) == -1)
69
+ throw std::runtime_error("Feature '" + feature + "' specified as part of parameter 'featuresToRun', but feature not found in data");
70
+ }
71
+ }
72
+ if (parameters->featuresToRun.empty())
73
+ throw std::runtime_error("There are no features to run!");
74
+
75
+ if (m_data->missingValueDefined())
76
+ DecisionTreeNode::setMissingValue(m_data->getMissingValue());
77
+ }
78
+
79
+
80
+
81
+ shared_ptr<MLEstimator> RandomForestRunner::createEstimator( MLData* data, vector<shared_ptr<MLExperiment> > trainingExperiments )
82
+ {
83
+ return shared_ptr<MLEstimator>(shared_ptr<RandomForestEstimator>(new RandomForestEstimator(data, trainingExperiments, parameters)));
84
+ }