ml4r 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/ml4r/{LinearRegression.h → LinearRegression/LinearRegression.h} +25 -19
- data/ext/ml4r/LinearRegression/OLSLinearRegression.h +29 -0
- data/ext/ml4r/MachineLearning/DecisionTree/CategoryInfo.h +32 -0
- data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeExperiment.h +30 -0
- data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeNode.h +86 -0
- data/ext/ml4r/MachineLearning/DecisionTree/FeatureInteraction.h +31 -0
- data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitter.h +45 -0
- data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterCategorical.h +17 -0
- data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterContinuous.h +16 -0
- data/ext/ml4r/MachineLearning/DecisionTree/SplitDefinition.h +81 -0
- data/ext/ml4r/MachineLearning/GBM/BernoulliCalculator.h +29 -0
- data/ext/ml4r/MachineLearning/GBM/GBM.h +50 -0
- data/ext/ml4r/MachineLearning/GBM/GBMCalculator.h +31 -0
- data/ext/ml4r/MachineLearning/GBM/GBMData.h +0 -0
- data/ext/ml4r/MachineLearning/GBM/GBMEstimator.h +79 -0
- data/ext/ml4r/MachineLearning/GBM/GBMOutput.h +53 -0
- data/ext/ml4r/MachineLearning/GBM/GBMParameters.h +50 -0
- data/ext/ml4r/MachineLearning/GBM/GBMRunner.h +35 -0
- data/ext/ml4r/MachineLearning/GBM/GaussianCalculator.h +29 -0
- data/ext/ml4r/MachineLearning/GBM/ZenithGBM.h +27 -0
- data/ext/ml4r/MachineLearning/MLData/MLData.h +77 -0
- data/ext/ml4r/MachineLearning/MLData/MLDataFields.h +25 -0
- data/ext/ml4r/MachineLearning/MLData/MLDataReader.h +37 -0
- data/ext/ml4r/MachineLearning/MLData/ZenithMLData.h +13 -0
- data/ext/ml4r/MachineLearning/MLData/ZenithMLDataReader.h +20 -0
- data/ext/ml4r/MachineLearning/MLEstimator.h +30 -0
- data/ext/ml4r/MachineLearning/MLEstimatorFactory.h +25 -0
- data/ext/ml4r/MachineLearning/MLExperiment.h +41 -0
- data/ext/ml4r/MachineLearning/MLOutput.h +45 -0
- data/ext/ml4r/MachineLearning/MLParameters.h +16 -0
- data/ext/ml4r/MachineLearning/MLRunner.h +47 -0
- data/ext/ml4r/MachineLearning/MLUtils.h +75 -0
- data/ext/ml4r/MachineLearning/RandomForest/RandomForestEstimator.h +47 -0
- data/ext/ml4r/MachineLearning/RandomForest/RandomForestOutput.h +33 -0
- data/ext/ml4r/MachineLearning/RandomForest/RandomForestParameters.h +32 -0
- data/ext/ml4r/MachineLearning/RandomForest/RandomForestRunner.h +34 -0
- data/ext/ml4r/extconf.rb +16 -3
- data/ext/ml4r/{MathUtils.h → utils/MathUtils.h} +0 -0
- data/ext/ml4r/{MatrixInversion.h → utils/MatrixInversion.h} +0 -0
- data/ext/ml4r/utils/StochasticUtils.h +33 -0
- data/ext/ml4r/utils/Utils.h +147 -0
- data/ext/ml4r/utils/VlcMessage.h +44 -0
- data/lib/ml4r/linear_regression.rb +7 -0
- metadata +45 -13
- data/ext/ml4r/LinearRegression/ZenithRegression.h +0 -17
- data/ext/ml4r/OLSLinearRegression.h +0 -23
- data/ext/ml4r/Utils.h +0 -53
- data/ext/ml4r/example.h +0 -18
- data/ext/ml4r/swig/example.h +0 -13
- data/ext/ml4r/swig/example_wrap.c +0 -2093
- data/ext/ml4r/utils/RubyUtils.h +0 -174
@@ -0,0 +1,29 @@
|
|
1
|
+
#ifndef BernoulliCalculator_h__
|
2
|
+
#define BernoulliCalculator_h__
|
3
|
+
|
4
|
+
#include "MachineLearning/GBM/GBMCalculator.h"
|
5
|
+
|
6
|
+
#include <boost/shared_ptr.hpp>
|
7
|
+
#include <vector>
|
8
|
+
using std::vector;
|
9
|
+
using boost::shared_ptr;
|
10
|
+
|
11
|
+
class BernoulliCalculator : public GBMCalculator
|
12
|
+
{
|
13
|
+
public:
|
14
|
+
BernoulliCalculator();
|
15
|
+
~BernoulliCalculator();
|
16
|
+
|
17
|
+
double calculateDeviance(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
18
|
+
void populateInitialF(vector<shared_ptr<DecisionTreeExperiment> >& experiments, bool useInitialPredictions);
|
19
|
+
void updateZ(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
20
|
+
double computeFIncrement(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
21
|
+
void updatePredictions(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
22
|
+
double calculatePrediction(double f);
|
23
|
+
double calculateF(double prediction);
|
24
|
+
protected:
|
25
|
+
|
26
|
+
private:
|
27
|
+
};
|
28
|
+
|
29
|
+
#endif // BernoulliCalculator_h__
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#ifndef __GBM_h__
|
2
|
+
#define __GBM_h__
|
3
|
+
|
4
|
+
#include "MachineLearning/GBM/GBMParameters.h"
|
5
|
+
|
6
|
+
#include <boost/shared_ptr.hpp>
|
7
|
+
#include <vector>
|
8
|
+
using std::vector;
|
9
|
+
using boost::shared_ptr;
|
10
|
+
|
11
|
+
class MLData;
|
12
|
+
class DecisionTreeExperiment;
|
13
|
+
class MLExperiment;
|
14
|
+
class MLDataFields;
|
15
|
+
class DecisionTreeNode;
|
16
|
+
class SplitDefinition;
|
17
|
+
class GBMOutput;
|
18
|
+
class GBMCalculator;
|
19
|
+
|
20
|
+
class GBM
|
21
|
+
{
|
22
|
+
public:
|
23
|
+
GBM();
|
24
|
+
~GBM();
|
25
|
+
void estimate();
|
26
|
+
void estimateMore(int numTrees);
|
27
|
+
|
28
|
+
void setData(MLData* mlData);
|
29
|
+
vector<double> getPredictions(MLData* newData);
|
30
|
+
vector<double> getPredictions(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
31
|
+
|
32
|
+
vector<double> getMeanTrainingPredictions();
|
33
|
+
vector<double> getCrossValidationPredictions();
|
34
|
+
|
35
|
+
GBMParameters parameters;
|
36
|
+
protected:
|
37
|
+
MLData* m_data;
|
38
|
+
vector<shared_ptr<GBMOutput> > m_outputObjects;
|
39
|
+
shared_ptr<GBMCalculator> m_gbmCalculator;
|
40
|
+
|
41
|
+
void config();
|
42
|
+
void input();
|
43
|
+
void goNuts();
|
44
|
+
void output();
|
45
|
+
vector<shared_ptr<DecisionTreeExperiment> > makeDecisionTreeExperiments(vector<shared_ptr<MLExperiment> >& experiments);
|
46
|
+
|
47
|
+
|
48
|
+
};
|
49
|
+
|
50
|
+
#endif // GBM_h__
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#ifndef GBMCalculator_h__
|
2
|
+
#define GBMCalculator_h__
|
3
|
+
|
4
|
+
#include <boost/shared_ptr.hpp>
|
5
|
+
#include <vector>
|
6
|
+
using std::vector;
|
7
|
+
using boost::shared_ptr;
|
8
|
+
|
9
|
+
class DecisionTreeExperiment;
|
10
|
+
|
11
|
+
class GBMCalculator
|
12
|
+
{
|
13
|
+
public:
|
14
|
+
GBMCalculator() {};
|
15
|
+
~GBMCalculator() {};
|
16
|
+
|
17
|
+
virtual double calculateDeviance(vector<shared_ptr<DecisionTreeExperiment> >& experiments) = 0;
|
18
|
+
virtual void populateInitialF(vector<shared_ptr<DecisionTreeExperiment> >& experiments, bool useInitialPredictions) = 0;
|
19
|
+
virtual void updateZ(vector<shared_ptr<DecisionTreeExperiment> >& experiments) = 0;
|
20
|
+
virtual double computeFIncrement(vector<shared_ptr<DecisionTreeExperiment> >& experiments) = 0;
|
21
|
+
virtual void updatePredictions(vector<shared_ptr<DecisionTreeExperiment> >& experiments) = 0;
|
22
|
+
virtual double calculatePrediction(double f) = 0;
|
23
|
+
virtual double calculateF(double prediction) = 0;
|
24
|
+
protected:
|
25
|
+
|
26
|
+
private:
|
27
|
+
};
|
28
|
+
|
29
|
+
#endif // GBMCalculator_h__
|
30
|
+
|
31
|
+
|
File without changes
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#ifndef GBMEstimator_h__
|
2
|
+
#define GBMEstimator_h__
|
3
|
+
|
4
|
+
#include "MachineLearning/DecisionTree/FeatureInteraction.h"
|
5
|
+
#include "MachineLearning/MLEstimator.h"
|
6
|
+
|
7
|
+
#include <boost/shared_ptr.hpp>
|
8
|
+
#include <vector>
|
9
|
+
#include <map>
|
10
|
+
#include <set>
|
11
|
+
#include <utility>
|
12
|
+
using std::pair;
|
13
|
+
using std::set;
|
14
|
+
using std::map;
|
15
|
+
using std::vector;
|
16
|
+
using boost::shared_ptr;
|
17
|
+
|
18
|
+
|
19
|
+
class GBMParameters;
|
20
|
+
class GBMOutput;
|
21
|
+
class DecisionTreeExperiment;
|
22
|
+
class DecisionTreeNode;
|
23
|
+
class SplitDefinition;
|
24
|
+
class MLData;
|
25
|
+
class GBMCalculator;
|
26
|
+
class MLExperiment;
|
27
|
+
|
28
|
+
class GBMEstimator : public MLEstimator
|
29
|
+
{
|
30
|
+
public:
|
31
|
+
GBMEstimator(MLData* data, vector<shared_ptr<MLExperiment> > experiments, shared_ptr<GBMParameters> parameters);
|
32
|
+
~GBMEstimator();
|
33
|
+
|
34
|
+
shared_ptr<MLOutput> estimate();
|
35
|
+
void estimateMore(int numTrees);
|
36
|
+
vector<FeatureInteraction> findInteractions(int howMany);
|
37
|
+
protected:
|
38
|
+
void initializeEstimator();
|
39
|
+
void performIteration();
|
40
|
+
void constructFeatureIndices();
|
41
|
+
void initialiseGBMExperimentData();
|
42
|
+
void populateInitialF();
|
43
|
+
void updateZ(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
44
|
+
void updatePredictions(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
45
|
+
|
46
|
+
void sortTrainingExperiments();
|
47
|
+
|
48
|
+
void constructDecisionTree(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
49
|
+
void constructGenerousDecisionTree(vector<shared_ptr<DecisionTreeExperiment> >& experiments, int rfToLevel);
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
// map<int, vector<shared_ptr<DecisionTreeExperiment> > > partitionSortedExperiments(shared_ptr<SplitDefinition> splitDefinition, Partition partition);
|
54
|
+
map<int, vector<shared_ptr<DecisionTreeExperiment> > > bagSortedExperiments(vector<shared_ptr<DecisionTreeExperiment> >& baggedExperiments);
|
55
|
+
|
56
|
+
vector<int> getRandomFeatureList();
|
57
|
+
|
58
|
+
void calculateFIncrementPerDecisionTreeNode();
|
59
|
+
void applyFIncrementToInBagExperiments();
|
60
|
+
pair<vector<shared_ptr<DecisionTreeExperiment> >, vector<shared_ptr<DecisionTreeExperiment> > > bagExperiments();
|
61
|
+
void applyFIncrementToExperiments(vector<shared_ptr<DecisionTreeExperiment> >& outOfBagExperiments);
|
62
|
+
void reportDeviance(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
63
|
+
void deleteRedundantData();
|
64
|
+
|
65
|
+
|
66
|
+
shared_ptr<GBMOutput> m_output;
|
67
|
+
vector<shared_ptr<DecisionTreeExperiment> > m_decisionTreeExperiments;
|
68
|
+
shared_ptr<GBMParameters> m_parameters;
|
69
|
+
vector<int> m_featureIndices;
|
70
|
+
shared_ptr<DecisionTreeNode> m_decisionTreeHead;
|
71
|
+
set<shared_ptr<DecisionTreeNode> > m_terminalNodes;
|
72
|
+
map<shared_ptr<DecisionTreeNode>, double> m_FIncrements;
|
73
|
+
shared_ptr<GBMCalculator> m_gbmCalculator;
|
74
|
+
bool m_missingValueDefined;
|
75
|
+
double m_missingValue;
|
76
|
+
private:
|
77
|
+
};
|
78
|
+
|
79
|
+
#endif // GBMEstimator_h__
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#ifndef GBMOutput_h__
|
2
|
+
#define GBMOutput_h__
|
3
|
+
|
4
|
+
#include "MachineLearning/GBM/GBMParameters.h"
|
5
|
+
#include "MachineLearning/MLOutput.h"
|
6
|
+
|
7
|
+
#include <boost/shared_ptr.hpp>
|
8
|
+
#include <vector>
|
9
|
+
#include <map>
|
10
|
+
using std::map;
|
11
|
+
using std::vector;
|
12
|
+
using boost::shared_ptr;
|
13
|
+
|
14
|
+
class DecisionTreeNode;
|
15
|
+
class DecisionTreeExperiment;
|
16
|
+
class MLData;
|
17
|
+
class GBMCalculator;
|
18
|
+
|
19
|
+
class GBMOutput : public MLOutput
|
20
|
+
{
|
21
|
+
public:
|
22
|
+
GBMOutput(MLData* trainingData, vector<int> trainingExperimentIndicies,shared_ptr<GBMParameters> parameters);
|
23
|
+
~GBMOutput();
|
24
|
+
|
25
|
+
// recording inputs of GBM estimation
|
26
|
+
shared_ptr<GBMParameters> getParameters();
|
27
|
+
|
28
|
+
// recording outputs of GBM estimation
|
29
|
+
void setMeanY(double y);
|
30
|
+
void addHeadDecisionTreeNode(shared_ptr<DecisionTreeNode> node);
|
31
|
+
void addFIncrements(map<shared_ptr<DecisionTreeNode>, double> m_FIncrements);
|
32
|
+
|
33
|
+
// applying model to new data
|
34
|
+
double predictForExperiment(shared_ptr<MLExperiment> experiment);
|
35
|
+
void setPredictionForDecisionTreeExperiment(shared_ptr<DecisionTreeExperiment> experiment);
|
36
|
+
|
37
|
+
// stats
|
38
|
+
int getNumTrees();
|
39
|
+
|
40
|
+
void capTrees(int numTrees);
|
41
|
+
|
42
|
+
protected:
|
43
|
+
|
44
|
+
vector<shared_ptr<DecisionTreeNode> > m_headNodes;
|
45
|
+
vector<map<shared_ptr<DecisionTreeNode>, double> > m_fIncrements;
|
46
|
+
double m_meanY;
|
47
|
+
bool m_useMeanY;
|
48
|
+
shared_ptr<GBMParameters> m_parameters;
|
49
|
+
shared_ptr<GBMCalculator> m_gbmCalculator;
|
50
|
+
private:
|
51
|
+
};
|
52
|
+
|
53
|
+
#endif // GBMOutput_h__
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#ifndef __GBMParameters_h__
|
2
|
+
#define __GBMParameters_h__
|
3
|
+
|
4
|
+
#include <string>
|
5
|
+
#include <vector>
|
6
|
+
#include <limits>
|
7
|
+
using std::vector;
|
8
|
+
using std::string;
|
9
|
+
|
10
|
+
enum GBMDistribution {
|
11
|
+
BERNOULLI,
|
12
|
+
GAUSSIAN
|
13
|
+
};
|
14
|
+
|
15
|
+
class GBMParameters
|
16
|
+
{
|
17
|
+
public:
|
18
|
+
GBMParameters()
|
19
|
+
: tryMVariables(-1),growKDecisionTreeNodes(5),bagFraction(1.0),
|
20
|
+
shrinkageFactor(0.01),numIterations(100),minObservations(10),
|
21
|
+
distribution(BERNOULLI), greedy(true), rfToLevel(0), verbose(false),
|
22
|
+
scale(std::numeric_limits<double>::infinity())
|
23
|
+
{};
|
24
|
+
~GBMParameters() {};
|
25
|
+
|
26
|
+
// parameters will be public
|
27
|
+
vector<string> featuresToRun; // X's for this run
|
28
|
+
|
29
|
+
int tryMVariables;
|
30
|
+
int growKDecisionTreeNodes;
|
31
|
+
|
32
|
+
double bagFraction;
|
33
|
+
double shrinkageFactor;
|
34
|
+
int numIterations;
|
35
|
+
|
36
|
+
int minObservations;
|
37
|
+
vector<int> trainingExperimentIds;
|
38
|
+
|
39
|
+
GBMDistribution distribution;
|
40
|
+
|
41
|
+
int rfToLevel;
|
42
|
+
bool greedy;
|
43
|
+
|
44
|
+
double scale;
|
45
|
+
|
46
|
+
bool verbose;
|
47
|
+
protected:
|
48
|
+
};
|
49
|
+
|
50
|
+
#endif // GBMParameters_h__
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#ifndef __GBM_h__
|
2
|
+
#define __GBM_h__
|
3
|
+
|
4
|
+
#include "MachineLearning/MLRunner.h"
|
5
|
+
|
6
|
+
#include <boost/shared_ptr.hpp>
|
7
|
+
#include <vector>
|
8
|
+
using std::vector;
|
9
|
+
using boost::shared_ptr;
|
10
|
+
|
11
|
+
class MLData;
|
12
|
+
class MLExperiment;
|
13
|
+
class FeatureInteraction;
|
14
|
+
class GBMParameters;
|
15
|
+
|
16
|
+
class GBMRunner : public MLRunner
|
17
|
+
{
|
18
|
+
public:
|
19
|
+
GBMRunner();
|
20
|
+
~GBMRunner();
|
21
|
+
|
22
|
+
void estimateMore(int numTrees);
|
23
|
+
void capTrees(int numTrees);
|
24
|
+
|
25
|
+
vector<FeatureInteraction> getFeatureInteractions(int howMany);
|
26
|
+
|
27
|
+
shared_ptr<MLEstimator> createEstimator(MLData* data, vector<shared_ptr<MLExperiment> > trainingExperiments);
|
28
|
+
|
29
|
+
shared_ptr<GBMParameters> parameters;
|
30
|
+
|
31
|
+
protected:
|
32
|
+
void config();
|
33
|
+
};
|
34
|
+
|
35
|
+
#endif // GBM_h__
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#ifndef GaussianCalculator_h__
|
2
|
+
#define GaussianCalculator_h__
|
3
|
+
|
4
|
+
#include "MachineLearning/GBM/GBMCalculator.h"
|
5
|
+
|
6
|
+
#include <boost/shared_ptr.hpp>
|
7
|
+
#include <vector>
|
8
|
+
using std::vector;
|
9
|
+
using boost::shared_ptr;
|
10
|
+
|
11
|
+
class GaussianCalculator : public GBMCalculator
|
12
|
+
{
|
13
|
+
public:
|
14
|
+
GaussianCalculator();
|
15
|
+
~GaussianCalculator();
|
16
|
+
|
17
|
+
double calculateDeviance(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
18
|
+
void populateInitialF(vector<shared_ptr<DecisionTreeExperiment> >& experiments, bool useInitialPredictions);
|
19
|
+
void updateZ(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
20
|
+
double computeFIncrement(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
21
|
+
void updatePredictions(vector<shared_ptr<DecisionTreeExperiment> >& experiments);
|
22
|
+
double calculatePrediction(double f);
|
23
|
+
double calculateF(double prediction);
|
24
|
+
protected:
|
25
|
+
|
26
|
+
private:
|
27
|
+
};
|
28
|
+
|
29
|
+
#endif // GaussianCalculator_h__
|
@@ -0,0 +1,27 @@
|
|
1
|
+
// #ifndef ZenithGBM_h__
|
2
|
+
// #define ZenithGBM_h__
|
3
|
+
|
4
|
+
// #include "stdafx.h"
|
5
|
+
|
6
|
+
// // ruby interface methods
|
7
|
+
// void zenith_gbm_Free(void* v);
|
8
|
+
// OtInterface::VALUE zenith_gbm_New(int argc, VALUE* argv, VALUE klass);
|
9
|
+
// OtInterface::VALUE zenith_gbm_Initialize(VALUE self);
|
10
|
+
// OtInterface::VALUE zenith_gbm_estimate(VALUE self);
|
11
|
+
// OtInterface::VALUE zenith_gbm_estimateMore(VALUE self, VALUE numTrees);
|
12
|
+
// OtInterface::VALUE zenith_gbm_setFeaturesToRun(VALUE self, VALUE featuresValue);
|
13
|
+
// OtInterface::VALUE zenith_gbm_setData(VALUE self, VALUE data);
|
14
|
+
// OtInterface::VALUE zenith_gbm_setDistribution(VALUE self, VALUE distribution);
|
15
|
+
// OtInterface::VALUE zenith_gbm_setTryMVariables(VALUE self, VALUE mVariablesValue);
|
16
|
+
// OtInterface::VALUE zenith_gbm_setKTerminalNodes(VALUE self, VALUE kNodesValue);
|
17
|
+
// OtInterface::VALUE zenith_gbm_setNumIterations(VALUE self, VALUE numIterationsValue);
|
18
|
+
// OtInterface::VALUE zenith_gbm_setShrinkageFactor(VALUE self, VALUE shrinkageFactorValue);
|
19
|
+
// OtInterface::VALUE zenith_gbm_setBagFraction(VALUE self, VALUE bagFractionValue);
|
20
|
+
// OtInterface::VALUE zenith_gbm_setTrainingExperimentIds(VALUE self, VALUE experimentIdsValue);
|
21
|
+
// OtInterface::VALUE zenith_gbm_minObservations(VALUE self, VALUE minObservations);
|
22
|
+
// OtInterface::VALUE zenith_gbm_verbose(VALUE self, VALUE verbose);
|
23
|
+
// OtInterface::VALUE zenith_gbm_predictions(VALUE self, VALUE newMlData);
|
24
|
+
// OtInterface::VALUE zenith_gbm_training_predictions(VALUE self);
|
25
|
+
// OtInterface::VALUE zenith_gbm_crossvalidation_predictions(VALUE self);
|
26
|
+
|
27
|
+
// #endif // ZenithGBM_h__
|
@@ -0,0 +1,77 @@
|
|
1
|
+
#ifndef __MLData_h__
|
2
|
+
#define __MLData_h__
|
3
|
+
|
4
|
+
#include <boost/shared_ptr.hpp>
|
5
|
+
#include <vector>
|
6
|
+
#include <set>
|
7
|
+
#include <map>
|
8
|
+
#include <utility>
|
9
|
+
#include <string>
|
10
|
+
using std::string;
|
11
|
+
using std::pair;
|
12
|
+
using std::set;
|
13
|
+
using std::map;
|
14
|
+
using std::vector;
|
15
|
+
using boost::shared_ptr;
|
16
|
+
|
17
|
+
class MLExperiment;
|
18
|
+
|
19
|
+
class MLData
|
20
|
+
{
|
21
|
+
public:
|
22
|
+
MLData();
|
23
|
+
~MLData();
|
24
|
+
|
25
|
+
void setExperiments(vector<shared_ptr<MLExperiment> > experiments);
|
26
|
+
vector<shared_ptr<MLExperiment> >& getExperiments();
|
27
|
+
shared_ptr<MLExperiment> getExperiment(int experimentIndex);
|
28
|
+
|
29
|
+
void createFolds(int numFolds, int randomSeed);
|
30
|
+
void setFolds(vector<int> folds);
|
31
|
+
|
32
|
+
void setTrainingExperimentIds(vector<int>& experimentIds);
|
33
|
+
void setTrainingExperiments(vector<shared_ptr<MLExperiment> > experiments);
|
34
|
+
|
35
|
+
vector<int>& getFoldNumbers();
|
36
|
+
vector<int> getFolds();
|
37
|
+
|
38
|
+
vector<shared_ptr<MLExperiment> >& getTrainingExperiments(int fold);
|
39
|
+
vector<shared_ptr<MLExperiment> >& getCrossValidationExperiments(int fold);
|
40
|
+
|
41
|
+
shared_ptr<MLExperiment> getExperimentWithId(int experimentId);
|
42
|
+
// vector<shared_ptr<MLExperiment> >& getExperimentsSortedOnFeature(int featureIndex);
|
43
|
+
|
44
|
+
vector<string>& getFeatures();
|
45
|
+
void setFeatures(vector<string> features);
|
46
|
+
int getFeatureIndex(string& feature);
|
47
|
+
|
48
|
+
void constructCategories(vector<string> categoricalFeatures);
|
49
|
+
set<int>& getCategoricalFeatureIndices();
|
50
|
+
|
51
|
+
void setInitialPredictions(vector<double> initialPredictions);
|
52
|
+
void setInitialPredictionsDefined(bool defined);
|
53
|
+
bool initialPredictionsDefined();
|
54
|
+
|
55
|
+
int getNumFolds();
|
56
|
+
|
57
|
+
void setMissingValue(double missingValue);
|
58
|
+
bool missingValueDefined();
|
59
|
+
double getMissingValue();
|
60
|
+
protected:
|
61
|
+
|
62
|
+
vector<shared_ptr<MLExperiment> > m_experiments;
|
63
|
+
map<int, vector<shared_ptr<MLExperiment> > > m_trainingExperiments;
|
64
|
+
map<int, vector<shared_ptr<MLExperiment> > > m_cvExperiments;
|
65
|
+
|
66
|
+
map<int, shared_ptr<MLExperiment> > m_experimentsById;
|
67
|
+
vector<string> m_featureNames;
|
68
|
+
map<string, int> m_featureIndices;
|
69
|
+
set<int> m_categoricalFeatureIndices;
|
70
|
+
bool m_initialPredictionsDefined;
|
71
|
+
vector<int> m_foldNumbers;
|
72
|
+
double m_missingValue;
|
73
|
+
bool m_missingValueDefined;
|
74
|
+
// map<int, vector<shared_ptr<MLExperiment> > > m_experimentsSortedByFeature;
|
75
|
+
};
|
76
|
+
|
77
|
+
#endif // MLData_h__
|