ml4r 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/ml4r/{LinearRegression.h → LinearRegression/LinearRegression.h} +25 -19
- data/ext/ml4r/LinearRegression/OLSLinearRegression.h +29 -0
- data/ext/ml4r/MachineLearning/DecisionTree/CategoryInfo.h +32 -0
- data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeExperiment.h +30 -0
- data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeNode.h +86 -0
- data/ext/ml4r/MachineLearning/DecisionTree/FeatureInteraction.h +31 -0
- data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitter.h +45 -0
- data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterCategorical.h +17 -0
- data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterContinuous.h +16 -0
- data/ext/ml4r/MachineLearning/DecisionTree/SplitDefinition.h +81 -0
- data/ext/ml4r/MachineLearning/GBM/BernoulliCalculator.h +29 -0
- data/ext/ml4r/MachineLearning/GBM/GBM.h +50 -0
- data/ext/ml4r/MachineLearning/GBM/GBMCalculator.h +31 -0
- data/ext/ml4r/MachineLearning/GBM/GBMData.h +0 -0
- data/ext/ml4r/MachineLearning/GBM/GBMEstimator.h +79 -0
- data/ext/ml4r/MachineLearning/GBM/GBMOutput.h +53 -0
- data/ext/ml4r/MachineLearning/GBM/GBMParameters.h +50 -0
- data/ext/ml4r/MachineLearning/GBM/GBMRunner.h +35 -0
- data/ext/ml4r/MachineLearning/GBM/GaussianCalculator.h +29 -0
- data/ext/ml4r/MachineLearning/GBM/ZenithGBM.h +27 -0
- data/ext/ml4r/MachineLearning/MLData/MLData.h +77 -0
- data/ext/ml4r/MachineLearning/MLData/MLDataFields.h +25 -0
- data/ext/ml4r/MachineLearning/MLData/MLDataReader.h +37 -0
- data/ext/ml4r/MachineLearning/MLData/ZenithMLData.h +13 -0
- data/ext/ml4r/MachineLearning/MLData/ZenithMLDataReader.h +20 -0
- data/ext/ml4r/MachineLearning/MLEstimator.h +30 -0
- data/ext/ml4r/MachineLearning/MLEstimatorFactory.h +25 -0
- data/ext/ml4r/MachineLearning/MLExperiment.h +41 -0
- data/ext/ml4r/MachineLearning/MLOutput.h +45 -0
- data/ext/ml4r/MachineLearning/MLParameters.h +16 -0
- data/ext/ml4r/MachineLearning/MLRunner.h +47 -0
- data/ext/ml4r/MachineLearning/MLUtils.h +75 -0
- data/ext/ml4r/MachineLearning/RandomForest/RandomForestEstimator.h +47 -0
- data/ext/ml4r/MachineLearning/RandomForest/RandomForestOutput.h +33 -0
- data/ext/ml4r/MachineLearning/RandomForest/RandomForestParameters.h +32 -0
- data/ext/ml4r/MachineLearning/RandomForest/RandomForestRunner.h +34 -0
- data/ext/ml4r/extconf.rb +16 -3
- data/ext/ml4r/{MathUtils.h → utils/MathUtils.h} +0 -0
- data/ext/ml4r/{MatrixInversion.h → utils/MatrixInversion.h} +0 -0
- data/ext/ml4r/utils/StochasticUtils.h +33 -0
- data/ext/ml4r/utils/Utils.h +147 -0
- data/ext/ml4r/utils/VlcMessage.h +44 -0
- data/lib/ml4r/linear_regression.rb +7 -0
- metadata +45 -13
- data/ext/ml4r/LinearRegression/ZenithRegression.h +0 -17
- data/ext/ml4r/OLSLinearRegression.h +0 -23
- data/ext/ml4r/Utils.h +0 -53
- data/ext/ml4r/example.h +0 -18
- data/ext/ml4r/swig/example.h +0 -13
- data/ext/ml4r/swig/example_wrap.c +0 -2093
- data/ext/ml4r/utils/RubyUtils.h +0 -174
@@ -8,37 +8,43 @@ using boost::numeric::ublas::matrix;
|
|
8
8
|
#include <utility>
|
9
9
|
using std::pair;
|
10
10
|
|
11
|
-
//#include "ZenithBase.h"
|
12
|
-
//#include "boost/MatrixInverse.h"
|
13
|
-
|
14
11
|
class LinearRegression
|
15
12
|
{
|
16
13
|
public:
|
17
|
-
|
14
|
+
LinearRegression(std::vector<std::vector<double> > xs, std::vector<double> ys,
|
15
|
+
std::vector<double> weights = std::vector<double>())
|
16
|
+
: m_xs(xs), m_ys(ys), m_ws(weights), m_constantIsFixed(false), m_paramsAreValid(false) {}
|
17
|
+
|
18
|
+
LinearRegression(std::vector<double> xs, std::vector<double> ys,
|
19
|
+
std::vector<double> weights = std::vector<double>())
|
20
|
+
: m_ys(ys), m_ws(weights), m_constantIsFixed(false), m_paramsAreValid(false)
|
21
|
+
{
|
22
|
+
m_xs.resize(xs.size());
|
23
|
+
for (unsigned int i=0; i<xs.size(); ++i) m_xs.at(i).resize(1, xs.at(i));
|
24
|
+
}
|
25
|
+
|
26
|
+
LinearRegression(std::vector<std::vector<double> > xs, std::vector<double> ys, double fixedConstant,
|
27
|
+
std::vector<double> weights = std::vector<double>())
|
28
|
+
: m_xs(xs), m_ys(ys), m_ws(weights), m_constantIsFixed(true), m_constant(fixedConstant), m_paramsAreValid(false) {}
|
18
29
|
~LinearRegression() {}
|
19
30
|
|
20
|
-
|
21
|
-
void setYs(std::vector<double> ys);
|
22
|
-
void setWeights(std::vector<double> weights);
|
31
|
+
|
23
32
|
void setFixedConstant(double val);
|
24
33
|
|
25
34
|
|
26
35
|
pair<std::vector<double>,double> getParameterEstimates();
|
27
36
|
std::vector<double>& getFittedYs();
|
28
37
|
std::vector<double>& getPredictedYs();
|
29
|
-
// double
|
30
|
-
double
|
31
|
-
double
|
32
|
-
double
|
38
|
+
// double GetConstant();
|
39
|
+
double getRSquared();
|
40
|
+
double getSSquared();
|
41
|
+
double getFstatistic();
|
33
42
|
std::vector<double>& getTstatistics();
|
34
43
|
std::vector<double>& getStandardErrors();
|
35
|
-
double
|
36
|
-
double
|
37
|
-
double
|
38
|
-
double
|
39
|
-
|
40
|
-
// BOOM THIS IS THE PROBLEM HERE - CAN'T INSTANTIATE A PURE VIRTUAL CLASS
|
41
|
-
virtual void Execute() = 0;
|
44
|
+
double getPressStatistic();
|
45
|
+
double getPresarStatistic();
|
46
|
+
double getAdjustedRSquared();
|
47
|
+
double getRSquaredPrediction();
|
42
48
|
|
43
49
|
protected:
|
44
50
|
|
@@ -75,7 +81,7 @@ protected:
|
|
75
81
|
boost::numeric::ublas::matrix<double> m_Y;
|
76
82
|
boost::numeric::ublas::matrix<double> m_B; // m_B = prod(m_A, m_Y)
|
77
83
|
boost::numeric::ublas::matrix<double> m_A; // m_A = (X'WX)-1 X'W
|
78
|
-
std::vector<double>
|
84
|
+
std::vector<double> m_h_diagonal; // hat ublas::matrix = XA. This is the diagonal of it.
|
79
85
|
boost::numeric::ublas::matrix<double> m_Xtranspose;
|
80
86
|
boost::numeric::ublas::matrix<double> m_Xtranspose_W;
|
81
87
|
boost::numeric::ublas::matrix<double> m_Xtranspose_W_X;
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#ifndef OLSLinearRegression_h__
|
2
|
+
#define OLSLinearRegression_h__
|
3
|
+
|
4
|
+
#include "LinearRegression.h"
|
5
|
+
|
6
|
+
class OLSLinearRegression : public LinearRegression
|
7
|
+
{
|
8
|
+
public:
|
9
|
+
|
10
|
+
OLSLinearRegression(std::vector<std::vector<double> > xs, std::vector<double> ys,
|
11
|
+
std::vector<double> weights = std::vector<double>());
|
12
|
+
OLSLinearRegression(std::vector<double> xs, std::vector<double> ys,
|
13
|
+
std::vector<double> weights = std::vector<double>());
|
14
|
+
OLSLinearRegression(std::vector<std::vector<double> > xs, std::vector<double> ys, double fixedConstant,
|
15
|
+
std::vector<double> weights = std::vector<double>());
|
16
|
+
~OLSLinearRegression();
|
17
|
+
|
18
|
+
void calculate();
|
19
|
+
|
20
|
+
void EstimateBs();
|
21
|
+
|
22
|
+
protected:
|
23
|
+
|
24
|
+
private:
|
25
|
+
};
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
#endif // OLSLinearRegression_h__
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#ifndef __CATEGORY_INFO_H__
|
2
|
+
#define __CATEGORY_INFO_H__
|
3
|
+
|
4
|
+
struct CategoryInfo
|
5
|
+
{
|
6
|
+
CategoryInfo()
|
7
|
+
: sumZ(0), sumW(0), countN(0), meanZ(0), category(-1)
|
8
|
+
{
|
9
|
+
|
10
|
+
}
|
11
|
+
|
12
|
+
vector<shared_ptr<DecisionTreeExperiment> > experiments;
|
13
|
+
double sumW;
|
14
|
+
double sumZ;
|
15
|
+
int countN;
|
16
|
+
double meanZ;
|
17
|
+
|
18
|
+
double category;
|
19
|
+
void addExperiment(shared_ptr<DecisionTreeExperiment> experiment)
|
20
|
+
{
|
21
|
+
experiments.push_back(experiment);
|
22
|
+
sumW += experiment->getWeight();
|
23
|
+
sumZ += experiment->getWeight() * experiment->getZ();
|
24
|
+
countN += 1;
|
25
|
+
meanZ = sumZ / sumW;
|
26
|
+
}
|
27
|
+
|
28
|
+
// define operator to sort on meanZ
|
29
|
+
bool operator<(const CategoryInfo& rhs) const { return this->meanZ < rhs.meanZ; }
|
30
|
+
};
|
31
|
+
|
32
|
+
#endif // __CATEGORY_INFO_H__
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#ifndef DecisionTreeExperiment_h__
|
2
|
+
#define DecisionTreeExperiment_h__
|
3
|
+
|
4
|
+
#include "MachineLearning/MLExperiment.h"
|
5
|
+
#include <boost/shared_ptr.hpp>
|
6
|
+
using boost::shared_ptr;
|
7
|
+
|
8
|
+
class DecisionTreeExperiment : public MLExperiment
|
9
|
+
{
|
10
|
+
public:
|
11
|
+
DecisionTreeExperiment();
|
12
|
+
DecisionTreeExperiment(shared_ptr<MLExperiment> mlExperiment);
|
13
|
+
~DecisionTreeExperiment();
|
14
|
+
|
15
|
+
void setF(double f);
|
16
|
+
void setZ(double z);
|
17
|
+
double getF();
|
18
|
+
double getY();
|
19
|
+
double getZ();
|
20
|
+
void incrementF(double increment);
|
21
|
+
|
22
|
+
protected:
|
23
|
+
double m_F; // modelled estimate (utility in the case of bernoulli)
|
24
|
+
double m_Z; // the thing which decision trees attempt to partition
|
25
|
+
private:
|
26
|
+
};
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
#endif // DecisionTreeExperiment_h__
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#ifndef __DecisionTreeNode_h__
|
2
|
+
#define __DecisionTreeNode_h__
|
3
|
+
|
4
|
+
#include <vector>
|
5
|
+
#include <boost/shared_ptr.hpp>
|
6
|
+
#include <set>
|
7
|
+
#include <map>
|
8
|
+
using std::map;
|
9
|
+
using std::set;
|
10
|
+
using boost::shared_ptr;
|
11
|
+
using std::vector;
|
12
|
+
|
13
|
+
class DecisionTreeExperiment;
|
14
|
+
class SplitDefinition;
|
15
|
+
|
16
|
+
enum Partition
|
17
|
+
{
|
18
|
+
ROOT,
|
19
|
+
LHS,
|
20
|
+
RHS,
|
21
|
+
MISSING,
|
22
|
+
};
|
23
|
+
|
24
|
+
class DecisionTreeNode
|
25
|
+
{
|
26
|
+
public:
|
27
|
+
DecisionTreeNode( vector<shared_ptr<DecisionTreeExperiment> > experiments,
|
28
|
+
double sumZ,
|
29
|
+
double sumW,
|
30
|
+
Partition partition,
|
31
|
+
shared_ptr<SplitDefinition> parentSplitDefinition);
|
32
|
+
~DecisionTreeNode();
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
shared_ptr<DecisionTreeNode> getTerminalNodeForExperiment(shared_ptr<DecisionTreeExperiment> experiment);
|
37
|
+
|
38
|
+
void defineSplit(shared_ptr<SplitDefinition> splitDefinition,
|
39
|
+
shared_ptr<DecisionTreeNode> lhsChild,
|
40
|
+
shared_ptr<DecisionTreeNode> rhsChild,
|
41
|
+
shared_ptr<DecisionTreeNode> missingChild);
|
42
|
+
|
43
|
+
vector<shared_ptr<DecisionTreeExperiment> > getExperiments();
|
44
|
+
|
45
|
+
bool isTerminalNode();
|
46
|
+
void clearExperimentsWithinTree();
|
47
|
+
|
48
|
+
double getSumZ();
|
49
|
+
double getSumW();
|
50
|
+
void setSumZ(double sumZ);
|
51
|
+
void setSumW(double sumW);
|
52
|
+
void updateSums();
|
53
|
+
|
54
|
+
shared_ptr<SplitDefinition> getSplitDefinition();
|
55
|
+
shared_ptr<SplitDefinition> getParentSplitDefinition();
|
56
|
+
Partition getPartition();
|
57
|
+
|
58
|
+
static void setMissingValue(double missingValue);
|
59
|
+
protected:
|
60
|
+
shared_ptr<DecisionTreeNode> chooseChild(shared_ptr<DecisionTreeExperiment> experiment);
|
61
|
+
void setChildren(shared_ptr<DecisionTreeNode> lhsChild,
|
62
|
+
shared_ptr<DecisionTreeNode> rhsChild,
|
63
|
+
shared_ptr<DecisionTreeNode> missingChild);
|
64
|
+
|
65
|
+
// if this decision tree node has been further split, the following variables will be populated, otherwise they will be null!
|
66
|
+
bool m_nodeHasChildren;
|
67
|
+
|
68
|
+
shared_ptr<DecisionTreeNode> m_lhsChild;
|
69
|
+
shared_ptr<DecisionTreeNode> m_rhsChild;
|
70
|
+
shared_ptr<DecisionTreeNode> m_missingChild;
|
71
|
+
|
72
|
+
vector<shared_ptr<DecisionTreeExperiment> > m_experiments;
|
73
|
+
|
74
|
+
static bool m_missingValueDefined;
|
75
|
+
static double m_missingValue;
|
76
|
+
|
77
|
+
double m_sumZ;
|
78
|
+
double m_sumW;
|
79
|
+
Partition m_whichPartitionAmI;
|
80
|
+
shared_ptr<SplitDefinition> m_parentSplitDefinition;
|
81
|
+
shared_ptr<SplitDefinition> m_splitDefinition;
|
82
|
+
};
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
#endif // DecisionTreeNode_h__
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#ifndef FeatureInteraction_h__
|
2
|
+
#define FeatureInteraction_h__
|
3
|
+
|
4
|
+
#include <boost/shared_ptr.hpp>
|
5
|
+
using boost::shared_ptr;
|
6
|
+
|
7
|
+
#include "DecisionTreeNode.h"
|
8
|
+
|
9
|
+
class SplitDefinition;
|
10
|
+
|
11
|
+
class FeatureInteraction
|
12
|
+
{
|
13
|
+
public:
|
14
|
+
FeatureInteraction(shared_ptr<SplitDefinition> primarySplitDefinition,shared_ptr<SplitDefinition> secondarySplitDefinition,Partition primaryPartition)
|
15
|
+
: primarySplitDefinition(primarySplitDefinition), secondarySplitDefinition(secondarySplitDefinition),
|
16
|
+
primaryPartition(primaryPartition)
|
17
|
+
{
|
18
|
+
|
19
|
+
}
|
20
|
+
~FeatureInteraction() {};
|
21
|
+
|
22
|
+
shared_ptr<SplitDefinition> primarySplitDefinition;
|
23
|
+
shared_ptr<SplitDefinition> secondarySplitDefinition;
|
24
|
+
Partition primaryPartition;
|
25
|
+
|
26
|
+
|
27
|
+
protected:
|
28
|
+
|
29
|
+
private:
|
30
|
+
};
|
31
|
+
#endif // FeatureInteraction_h__
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#ifndef __NodeSplitter_h__
|
2
|
+
#define __NodeSplitter_h__
|
3
|
+
|
4
|
+
#include "MachineLearning/DecisionTree/DecisionTreeNode.h"
|
5
|
+
|
6
|
+
#include <boost/shared_ptr.hpp>
|
7
|
+
using boost::shared_ptr;
|
8
|
+
|
9
|
+
class SplitDefinition;
|
10
|
+
class DecisionTreeNode;
|
11
|
+
class DecisionTreeExperiment;
|
12
|
+
class MLData;
|
13
|
+
|
14
|
+
// pure virtual base class for NodeSplitterContinuous and NodeSplitterCategorical
|
15
|
+
|
16
|
+
class NodeSplitter
|
17
|
+
{
|
18
|
+
public:
|
19
|
+
NodeSplitter(MLData* data, int minObservations, double scale);
|
20
|
+
~NodeSplitter();
|
21
|
+
|
22
|
+
shared_ptr<SplitDefinition> createSplitDefinition(shared_ptr<DecisionTreeNode> node, int featureIndex);
|
23
|
+
shared_ptr<SplitDefinition> createContinuousSplitDefinition(shared_ptr<DecisionTreeNode> node, int featureIndex);
|
24
|
+
shared_ptr<SplitDefinition> createCategoricalSplitDefinition(shared_ptr<DecisionTreeNode> node, int featureIndex);
|
25
|
+
|
26
|
+
double calculateImprovement(double lhsSumW, double lhsSumZ, double rhsSumW, double rhsSumZ, double missingSumW, double missingSumZ);
|
27
|
+
|
28
|
+
shared_ptr<DecisionTreeNode> createLhsChild(shared_ptr<SplitDefinition> splitDefinition);
|
29
|
+
shared_ptr<DecisionTreeNode> createRhsChild(shared_ptr<SplitDefinition> splitDefinition);
|
30
|
+
shared_ptr<DecisionTreeNode> createMissingChild(shared_ptr<SplitDefinition> splitDefinition);
|
31
|
+
shared_ptr<DecisionTreeNode> createChild(shared_ptr<SplitDefinition> splitDefinition, Partition partition);
|
32
|
+
|
33
|
+
vector<shared_ptr<DecisionTreeExperiment> > partitionExperiments(vector<shared_ptr<DecisionTreeExperiment> >& experiments,
|
34
|
+
shared_ptr<SplitDefinition> splitDefinition, Partition partition);
|
35
|
+
|
36
|
+
vector<shared_ptr<DecisionTreeNode> > splitNode(shared_ptr<DecisionTreeNode> nodeToSplit, vector<int> featuresToConsider);
|
37
|
+
protected:
|
38
|
+
MLData* m_data;
|
39
|
+
double m_missingValue;
|
40
|
+
int m_minObservations;
|
41
|
+
bool m_missingValueDefined;
|
42
|
+
double m_scale;
|
43
|
+
};
|
44
|
+
|
45
|
+
#endif // NodeSplitter_h__
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#ifndef __NodeSplitterCategorical_h__
|
2
|
+
#define __NodeSplitterCategorical_h__
|
3
|
+
|
4
|
+
#include "MachineLearning/DecisionTree/NodeSplitter.h"
|
5
|
+
|
6
|
+
class NodeSplitterCategorical : public NodeSplitter
|
7
|
+
{
|
8
|
+
public:
|
9
|
+
NodeSplitterCategorical(MLData* data, int minObservations, double scale);
|
10
|
+
~NodeSplitterCategorical();
|
11
|
+
|
12
|
+
shared_ptr<SplitDefinition> createSplitDefinition(shared_ptr<DecisionTreeNode> node, int featureIndex);
|
13
|
+
protected:
|
14
|
+
|
15
|
+
};
|
16
|
+
|
17
|
+
#endif // NodeSplitterCategorical_h__
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#ifndef __NodeSplitterContinuous_h__
|
2
|
+
#define __NodeSplitterContinuous_h__
|
3
|
+
|
4
|
+
#include "MachineLearning/DecisionTree/NodeSplitter.h"
|
5
|
+
|
6
|
+
class NodeSplitterContinuous : public NodeSplitter
|
7
|
+
{
|
8
|
+
public:
|
9
|
+
NodeSplitterContinuous(MLData* data, int minObservations, double scale);
|
10
|
+
~NodeSplitterContinuous();
|
11
|
+
|
12
|
+
shared_ptr<SplitDefinition> createSplitDefinition(shared_ptr<DecisionTreeNode> node, int featureIndex);
|
13
|
+
protected:
|
14
|
+
};
|
15
|
+
|
16
|
+
#endif // NodeSplitterContinuous_h__
|
@@ -0,0 +1,81 @@
|
|
1
|
+
#ifndef __SplitDefinition_h__
|
2
|
+
#define __SplitDefinition_h__
|
3
|
+
|
4
|
+
#include <boost/shared_ptr.hpp>
|
5
|
+
#include <set>
|
6
|
+
using std::set;
|
7
|
+
using boost::shared_ptr;
|
8
|
+
|
9
|
+
class DecisionTreeExperiment;
|
10
|
+
class DecisionTreeNode;
|
11
|
+
|
12
|
+
class SplitDefinition
|
13
|
+
{
|
14
|
+
public:
|
15
|
+
SplitDefinition(shared_ptr<DecisionTreeNode> nodeToSplit,
|
16
|
+
int featureIndex,
|
17
|
+
set<double>& lhsCategories,
|
18
|
+
set<double>& rhsCategories,
|
19
|
+
double lhsSumZ,
|
20
|
+
double lhsSumW,
|
21
|
+
int lhsCount,
|
22
|
+
double rhsSumZ,
|
23
|
+
double rhsSumW,
|
24
|
+
int rhsCount,
|
25
|
+
double missingSumZ,
|
26
|
+
double missingSumW,
|
27
|
+
int missingCount,
|
28
|
+
double improvement);
|
29
|
+
|
30
|
+
SplitDefinition(shared_ptr<DecisionTreeNode> nodeToSplit,
|
31
|
+
int featureIndex,
|
32
|
+
double splitValue,
|
33
|
+
double lhsSumZ,
|
34
|
+
double lhsSumW,
|
35
|
+
int lhsCount,
|
36
|
+
double rhsSumZ,
|
37
|
+
double rhsSumW,
|
38
|
+
int rhsCount,
|
39
|
+
double missingSumZ,
|
40
|
+
double missingSumW,
|
41
|
+
int missingCount,
|
42
|
+
double improvement);
|
43
|
+
|
44
|
+
~SplitDefinition();
|
45
|
+
|
46
|
+
int getFeatureIndex();
|
47
|
+
double getImprovement();
|
48
|
+
shared_ptr<DecisionTreeNode> getNodeToSplit();
|
49
|
+
double getLhsSumZ();
|
50
|
+
double getLhsSumW();
|
51
|
+
int getLhsExperimentCount();
|
52
|
+
double getRhsSumZ();
|
53
|
+
double getRhsSumW();
|
54
|
+
int getRhsExperimentCount();
|
55
|
+
double getMissingSumZ();
|
56
|
+
double getMissingSumW();
|
57
|
+
int getMissingExperimentCount();
|
58
|
+
set<double>& getLhsCategories();
|
59
|
+
set<double>& getRhsCategories();
|
60
|
+
double getSplitValue();
|
61
|
+
bool isCategorical();
|
62
|
+
protected:
|
63
|
+
shared_ptr<DecisionTreeNode> m_nodeToSplit;
|
64
|
+
int m_splitFeatureIndex;
|
65
|
+
set<double> m_lhsCategories;
|
66
|
+
set<double> m_rhsCategories;
|
67
|
+
double m_splitValue;
|
68
|
+
double m_lhsSumZ;
|
69
|
+
double m_rhsSumZ;
|
70
|
+
double m_missingSumZ;
|
71
|
+
double m_lhsSumW;
|
72
|
+
double m_rhsSumW;
|
73
|
+
double m_missingSumW;
|
74
|
+
int m_lhsCount;
|
75
|
+
int m_rhsCount;
|
76
|
+
int m_missingCount;
|
77
|
+
double m_improvement;
|
78
|
+
bool m_featureIsCategorical;
|
79
|
+
};
|
80
|
+
|
81
|
+
#endif // SplitDefinition_h__
|