ml4r 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. data/ext/ml4r/{LinearRegression.h → LinearRegression/LinearRegression.h} +25 -19
  2. data/ext/ml4r/LinearRegression/OLSLinearRegression.h +29 -0
  3. data/ext/ml4r/MachineLearning/DecisionTree/CategoryInfo.h +32 -0
  4. data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeExperiment.h +30 -0
  5. data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeNode.h +86 -0
  6. data/ext/ml4r/MachineLearning/DecisionTree/FeatureInteraction.h +31 -0
  7. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitter.h +45 -0
  8. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterCategorical.h +17 -0
  9. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterContinuous.h +16 -0
  10. data/ext/ml4r/MachineLearning/DecisionTree/SplitDefinition.h +81 -0
  11. data/ext/ml4r/MachineLearning/GBM/BernoulliCalculator.h +29 -0
  12. data/ext/ml4r/MachineLearning/GBM/GBM.h +50 -0
  13. data/ext/ml4r/MachineLearning/GBM/GBMCalculator.h +31 -0
  14. data/ext/ml4r/MachineLearning/GBM/GBMData.h +0 -0
  15. data/ext/ml4r/MachineLearning/GBM/GBMEstimator.h +79 -0
  16. data/ext/ml4r/MachineLearning/GBM/GBMOutput.h +53 -0
  17. data/ext/ml4r/MachineLearning/GBM/GBMParameters.h +50 -0
  18. data/ext/ml4r/MachineLearning/GBM/GBMRunner.h +35 -0
  19. data/ext/ml4r/MachineLearning/GBM/GaussianCalculator.h +29 -0
  20. data/ext/ml4r/MachineLearning/GBM/ZenithGBM.h +27 -0
  21. data/ext/ml4r/MachineLearning/MLData/MLData.h +77 -0
  22. data/ext/ml4r/MachineLearning/MLData/MLDataFields.h +25 -0
  23. data/ext/ml4r/MachineLearning/MLData/MLDataReader.h +37 -0
  24. data/ext/ml4r/MachineLearning/MLData/ZenithMLData.h +13 -0
  25. data/ext/ml4r/MachineLearning/MLData/ZenithMLDataReader.h +20 -0
  26. data/ext/ml4r/MachineLearning/MLEstimator.h +30 -0
  27. data/ext/ml4r/MachineLearning/MLEstimatorFactory.h +25 -0
  28. data/ext/ml4r/MachineLearning/MLExperiment.h +41 -0
  29. data/ext/ml4r/MachineLearning/MLOutput.h +45 -0
  30. data/ext/ml4r/MachineLearning/MLParameters.h +16 -0
  31. data/ext/ml4r/MachineLearning/MLRunner.h +47 -0
  32. data/ext/ml4r/MachineLearning/MLUtils.h +75 -0
  33. data/ext/ml4r/MachineLearning/RandomForest/RandomForestEstimator.h +47 -0
  34. data/ext/ml4r/MachineLearning/RandomForest/RandomForestOutput.h +33 -0
  35. data/ext/ml4r/MachineLearning/RandomForest/RandomForestParameters.h +32 -0
  36. data/ext/ml4r/MachineLearning/RandomForest/RandomForestRunner.h +34 -0
  37. data/ext/ml4r/extconf.rb +16 -3
  38. data/ext/ml4r/{MathUtils.h → utils/MathUtils.h} +0 -0
  39. data/ext/ml4r/{MatrixInversion.h → utils/MatrixInversion.h} +0 -0
  40. data/ext/ml4r/utils/StochasticUtils.h +33 -0
  41. data/ext/ml4r/utils/Utils.h +147 -0
  42. data/ext/ml4r/utils/VlcMessage.h +44 -0
  43. data/lib/ml4r/linear_regression.rb +7 -0
  44. metadata +45 -13
  45. data/ext/ml4r/LinearRegression/ZenithRegression.h +0 -17
  46. data/ext/ml4r/OLSLinearRegression.h +0 -23
  47. data/ext/ml4r/Utils.h +0 -53
  48. data/ext/ml4r/example.h +0 -18
  49. data/ext/ml4r/swig/example.h +0 -13
  50. data/ext/ml4r/swig/example_wrap.c +0 -2093
  51. data/ext/ml4r/utils/RubyUtils.h +0 -174
@@ -0,0 +1,32 @@
1
+ #ifndef RandomForestParameters_h__
2
+ #define RandomForestParameters_h__
3
+
4
+ #include <vector>
5
+ #include <string>
6
+ #include <limits>
7
+ using std::string;
8
+ using std::vector;
9
+
10
+ class RandomForestParameters
11
+ {
12
+ public:
13
+ RandomForestParameters()
14
+ : minObservations(1), tryMVariables(100), numIterations(200), bagFraction(0.3), verbose(false),
15
+ withReplacement(false),scale(std::numeric_limits<double>::infinity())
16
+ {};
17
+ ~RandomForestParameters() {};
18
+
19
+ int minObservations;
20
+ int tryMVariables;
21
+ vector<string> featuresToRun;
22
+ int numIterations;
23
+ double bagFraction;
24
+ bool verbose;
25
+ bool withReplacement;
26
+ double scale;
27
+ protected:
28
+
29
+ private:
30
+ };
31
+
32
+ #endif // RandomForestParameters_h__
@@ -0,0 +1,34 @@
1
+ #ifndef RandomForest_h__
2
+ #define RandomForest_h__
3
+
4
+ #include "MachineLearning/MLRunner.h"
5
+
6
+ #include <boost/shared_ptr.hpp>
7
+ #include <vector>
8
+ using std::vector;
9
+ using boost::shared_ptr;
10
+
11
+ class MLData;
12
+ class MLExperiment;
13
+ class MLEstimator;
14
+ class RandomForestParameters;
15
+
16
+ class RandomForestRunner : public MLRunner
17
+ {
18
+ public:
19
+ RandomForestRunner();
20
+ ~RandomForestRunner();
21
+
22
+ void estimateMore(int numTrees);
23
+
24
+ shared_ptr<RandomForestParameters> parameters;
25
+
26
+ shared_ptr<MLEstimator> createEstimator(MLData* data, vector<shared_ptr<MLExperiment> > trainingExperiments);
27
+
28
+ protected:
29
+ void config();
30
+
31
+ private:
32
+ };
33
+
34
+ #endif // RandomForest_h__
data/ext/ml4r/extconf.rb CHANGED
@@ -1,7 +1,20 @@
1
1
  require 'mkmf'
2
2
 
3
- $libs += " -lstdc++ --std=c++0x"
4
- # $CPPFLAGS += " --std=c++0x"
3
+ $libs += " -lstdc++ "
4
+
5
+ # The following spam out the console with hard to read error messages, obscuring any real problems
6
+ CONFIG['warnflags'].gsub!('-Wdeclaration-after-statement', '')
7
+ CONFIG['warnflags'].gsub!('-Wimplicit-function-declaration', '')
8
+ CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '')
5
9
 
6
10
  dir_config('boost')
7
- create_makefile('ml4r/ml4r')
11
+
12
+ $srcs = Dir.glob("**/*.c*")
13
+ $objs = $srcs.map { |e| e.gsub(/cpp$/, "o") }
14
+
15
+ create_makefile('ml4r/ml4r')
16
+
17
+ dat = IO.readlines("Makefile")
18
+ File.open("Makefile", 'w') { |f|
19
+ dat.each { |line| f.puts(line.gsub(/\$\</, "$< -o $@")) }
20
+ }
File without changes
@@ -0,0 +1,33 @@
1
+ #ifndef __STOCHASTIC_UTILS_H__
2
+ #define __STOCHASTIC_UTILS_H__
3
+
4
+ #include <cstdlib>
5
+ #include <vector>
6
+ #include <string>
7
+ #include <map>
8
+ #include <stdexcept>
9
+ using std::runtime_error;
10
+ using std::map;
11
+ using std::string;
12
+ using std::vector;
13
+
14
+ #define RAND_MAX_FLOAT ((float)RAND_MAX)
15
+
16
+ namespace StochasticUtils
17
+ {
18
+ vector<double> convertPdfToCumulativeSum(std::vector<double> pdf);
19
+ vector<float> convertHistogramToPdf(vector<float> histogram);
20
+ int chooseCategoryFromCdf(float * cumulativeProbabilities, int N);
21
+ int chooseCategoryFromCdf(vector<float>& cumulativeProbabilities);
22
+ int chooseCategoryFromCdf(float qot, vector<float>& cumulativeProbabilities);
23
+ int chooseCategoryFromPdf(vector<float>& probabilities, string categoryType = "object");
24
+ int chooseCategoryFromPdf(double qot, vector<float>& probabilities, string categoryType);
25
+ template<class T> T chooseCategoryFromPdf(map<T,float>& probabilities);
26
+ template<class T> T chooseCategoryFromPdf(double qot, map<T,float>& probabilities);
27
+ inline double getQot()
28
+ {
29
+ return (double)rand() / ((double)(RAND_MAX)+(double)(1));
30
+ }
31
+ };
32
+
33
+ #endif // __STOCHASTIC_UTILS_H__
@@ -0,0 +1,147 @@
1
+ #ifndef __Utils_h__
2
+ #define __Utils_h__
3
+
4
+ #include <vector>
5
+ #include <stdexcept>
6
+ #include <map>
7
+ #include <boost/foreach.hpp>
8
+ using std::map;
9
+ using std::vector;
10
+
11
+ namespace Utils
12
+ {
13
+ template<class Container, class T>
14
+ bool hasElement(const Container& m, T element)
15
+ {
16
+ return find(m.begin(), m.end(), element) != m.end();
17
+ }
18
+
19
+ template<class T, class U>
20
+ bool hasElement(const std::map<T,U>& m, T element)
21
+ {
22
+ return m.find(element) != m.end();
23
+ }
24
+
25
+ template<typename T>
26
+ int vectorIndex(vector<T>& c, T element)
27
+ {
28
+ if (find(c.begin(), c.end(), element) == c.end())
29
+ return -1;
30
+ return (int) (find(c.begin(), c.end(), element) - c.begin());
31
+ }
32
+
33
+ template<typename T>
34
+ T vectorSum(const std::vector<T>& vec)
35
+ {
36
+ if (vec.size() == 0)
37
+ return 0.0;
38
+
39
+ T sum = vec.front();
40
+ for (unsigned int i=1; i < vec.size(); ++i)
41
+ sum += vec.at(i);
42
+
43
+ return sum;
44
+ }
45
+
46
+ template<class T> vector<T>
47
+ vectorRange(T start, T end, T stepSize = 1)
48
+ {
49
+ int numSteps = (end - start) / stepSize + 1;
50
+ vector<T> returnValue;
51
+ returnValue.resize(numSteps);
52
+
53
+ for (int i = 0; i < numSteps; ++i)
54
+ returnValue.at(i) = start + i * stepSize;
55
+
56
+ return returnValue;
57
+ }
58
+
59
+ template<class T> vector<T>
60
+ vectorRepeat(vector<T>& vec, int totalDesiredLength)
61
+ {
62
+ vector<T> returnValue;
63
+ returnValue.resize(totalDesiredLength);
64
+ int vectorSize = vec.size();
65
+
66
+ for (int i = 0; i < totalDesiredLength; ++i)
67
+ {
68
+ int index = i % vectorSize;
69
+ returnValue.at(i) = vec.at(index);
70
+ }
71
+ return returnValue;
72
+ }
73
+
74
+ template<class T> vector<T>
75
+ vectorShuffle(vector<T>& vec)
76
+ {
77
+
78
+ vector<int> sortVector = vectorOfRandomInt(vec.size());
79
+ vector<T> returnValue = vectorSortUsingOtherVector(vec, sortVector);
80
+ return returnValue;
81
+ }
82
+
83
+ vector<int> vectorOfRandomInt(int length);
84
+
85
+ template<class T, class U>
86
+ vector<T> vectorSortUsingOtherVector(vector<T>& vec, vector<U>& otherVector)
87
+ {
88
+ if (otherVector.size() != vec.size())
89
+ throw std::runtime_error("[] - vec and otherVector must be of equal size.");
90
+
91
+ vector<U> otherVectorSorted = otherVector;
92
+ sort(otherVectorSorted.begin(), otherVectorSorted.end());
93
+ map<U, int> newPosition;
94
+ int index = -1;
95
+ BOOST_FOREACH(U& e, otherVectorSorted)
96
+ {
97
+ ++index;
98
+ if (index == 0 || e != otherVectorSorted.at(index-1))
99
+ newPosition[e] = index;
100
+ }
101
+
102
+ vector<T> returnValue;
103
+ returnValue.resize(vec.size());
104
+
105
+ index = -1;
106
+ BOOST_FOREACH(T& e, vec)
107
+ {
108
+ ++index;
109
+ U sortValue = otherVector.at(index);
110
+ int newIndex = newPosition[sortValue]++; // ++ happens after newIndex is assigned
111
+ returnValue[newIndex] = e;
112
+ }
113
+ return returnValue;
114
+ }
115
+
116
+ template<class T>
117
+ std::vector<T> vectorAbs(const std::vector<T>& vec)
118
+ {
119
+ std::vector<T> absVec;
120
+ absVec.reserve(vec.size());
121
+
122
+ for (unsigned int i=0; i < vec.size(); ++i)
123
+ {
124
+ absVec.push_back((T)abs(vec.at(i)));
125
+ }
126
+ return absVec;
127
+ }
128
+
129
+ template<class T> std::vector<T>& operator+=(std::vector<T>& vec, const std::vector<T>& vec2)
130
+ {
131
+ if (vec.size() != vec2.size())
132
+ throw std::runtime_error("Can't add vectors of different sizes");
133
+ for (unsigned int i=0; i < vec.size(); ++i)
134
+ vec.at(i) += vec2.at(i);
135
+ return vec;
136
+ }
137
+
138
+ template <class T>
139
+ std::vector<T>& operator+=(std::vector<T>& vec, const float offset)
140
+ {
141
+ for (unsigned int i=0; i < vec.size(); ++i)
142
+ vec.at(i) += offset;
143
+ return vec;
144
+ }
145
+ };
146
+
147
+ #endif
@@ -0,0 +1,44 @@
1
+ #ifndef __VLC_MESSAGE_H__
2
+ #define __VLC_MESSAGE_H__
3
+
4
+ #include <iostream>
5
+ #include <boost/lexical_cast.hpp>
6
+ using boost::lexical_cast;
7
+ using std::cout;
8
+ using std::endl;
9
+
10
+ class VlcMessage
11
+ {
12
+ public:
13
+
14
+ VlcMessage() : indentLevel(0) {}
15
+ ~VlcMessage() {}
16
+
17
+ void Write(std::string message, int level = 0)
18
+ {
19
+ for (int i=0; i<indentLevel; ++i)
20
+ cout << " ";
21
+ if (level > 1) cout << "*** ";
22
+ cout << message;
23
+ if (level > 1) cout << " ***";
24
+ cout << endl;
25
+ }
26
+
27
+ void Begin(std::string message)
28
+ {
29
+ cout << message << endl;
30
+ ++indentLevel;
31
+ }
32
+
33
+ void End()
34
+ {
35
+ cout << endl;
36
+ --indentLevel;
37
+ }
38
+
39
+ int indentLevel;
40
+ };
41
+
42
+ extern VlcMessage vlcMessage;
43
+
44
+ #endif // __VLC_MESSAGE_H__
@@ -0,0 +1,7 @@
1
+ module Ml4r
2
+ class OLSLinearRegression
3
+ def i nitialize
4
+
5
+ end
6
+ end
7
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ml4r
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-06-24 00:00:00.000000000Z
13
+ date: 2012-06-24 00:00:00.000000000 Z
14
14
  dependencies: []
15
15
  description: A ruby based library of Maching Learning (ML) algorithms
16
16
  email:
@@ -21,18 +21,50 @@ extensions:
21
21
  - ext/ml4r/extconf.rb
22
22
  extra_rdoc_files: []
23
23
  files:
24
+ - lib/ml4r/linear_regression.rb
24
25
  - lib/ml4r.rb
25
26
  - lib/test_cpp_extension.rb
26
- - ext/ml4r/swig/example_wrap.c
27
- - ext/ml4r/example.h
28
- - ext/ml4r/LinearRegression/ZenithRegression.h
29
- - ext/ml4r/LinearRegression.h
30
- - ext/ml4r/MathUtils.h
31
- - ext/ml4r/MatrixInversion.h
32
- - ext/ml4r/OLSLinearRegression.h
33
- - ext/ml4r/swig/example.h
34
- - ext/ml4r/utils/RubyUtils.h
35
- - ext/ml4r/Utils.h
27
+ - ext/ml4r/LinearRegression/LinearRegression.h
28
+ - ext/ml4r/LinearRegression/OLSLinearRegression.h
29
+ - ext/ml4r/MachineLearning/DecisionTree/CategoryInfo.h
30
+ - ext/ml4r/MachineLearning/DecisionTree/DecisionTreeExperiment.h
31
+ - ext/ml4r/MachineLearning/DecisionTree/DecisionTreeNode.h
32
+ - ext/ml4r/MachineLearning/DecisionTree/FeatureInteraction.h
33
+ - ext/ml4r/MachineLearning/DecisionTree/NodeSplitter.h
34
+ - ext/ml4r/MachineLearning/DecisionTree/NodeSplitterCategorical.h
35
+ - ext/ml4r/MachineLearning/DecisionTree/NodeSplitterContinuous.h
36
+ - ext/ml4r/MachineLearning/DecisionTree/SplitDefinition.h
37
+ - ext/ml4r/MachineLearning/GBM/BernoulliCalculator.h
38
+ - ext/ml4r/MachineLearning/GBM/GaussianCalculator.h
39
+ - ext/ml4r/MachineLearning/GBM/GBM.h
40
+ - ext/ml4r/MachineLearning/GBM/GBMCalculator.h
41
+ - ext/ml4r/MachineLearning/GBM/GBMData.h
42
+ - ext/ml4r/MachineLearning/GBM/GBMEstimator.h
43
+ - ext/ml4r/MachineLearning/GBM/GBMOutput.h
44
+ - ext/ml4r/MachineLearning/GBM/GBMParameters.h
45
+ - ext/ml4r/MachineLearning/GBM/GBMRunner.h
46
+ - ext/ml4r/MachineLearning/GBM/ZenithGBM.h
47
+ - ext/ml4r/MachineLearning/MLData/MLData.h
48
+ - ext/ml4r/MachineLearning/MLData/MLDataFields.h
49
+ - ext/ml4r/MachineLearning/MLData/MLDataReader.h
50
+ - ext/ml4r/MachineLearning/MLData/ZenithMLData.h
51
+ - ext/ml4r/MachineLearning/MLData/ZenithMLDataReader.h
52
+ - ext/ml4r/MachineLearning/MLEstimator.h
53
+ - ext/ml4r/MachineLearning/MLEstimatorFactory.h
54
+ - ext/ml4r/MachineLearning/MLExperiment.h
55
+ - ext/ml4r/MachineLearning/MLOutput.h
56
+ - ext/ml4r/MachineLearning/MLParameters.h
57
+ - ext/ml4r/MachineLearning/MLRunner.h
58
+ - ext/ml4r/MachineLearning/MLUtils.h
59
+ - ext/ml4r/MachineLearning/RandomForest/RandomForestEstimator.h
60
+ - ext/ml4r/MachineLearning/RandomForest/RandomForestOutput.h
61
+ - ext/ml4r/MachineLearning/RandomForest/RandomForestParameters.h
62
+ - ext/ml4r/MachineLearning/RandomForest/RandomForestRunner.h
63
+ - ext/ml4r/utils/MathUtils.h
64
+ - ext/ml4r/utils/MatrixInversion.h
65
+ - ext/ml4r/utils/StochasticUtils.h
66
+ - ext/ml4r/utils/Utils.h
67
+ - ext/ml4r/utils/VlcMessage.h
36
68
  - ext/ml4r/extconf.rb
37
69
  homepage: https://github.com/vlc/ml4r
38
70
  licenses: []
@@ -54,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
54
86
  version: '0'
55
87
  requirements: []
56
88
  rubyforge_project:
57
- rubygems_version: 1.8.10
89
+ rubygems_version: 1.8.24
58
90
  signing_key:
59
91
  specification_version: 3
60
92
  summary: A ruby based library of Maching Learning (ML) algorithms
@@ -1,17 +0,0 @@
1
- #ifndef ZenithRegression_h__
2
- #define ZenithRegression_h__
3
-
4
- // ruby interface methods
5
- void zenith_regression_Free(void* v);
6
- static VALUE zenith_regression_New(int argc, VALUE* argv, VALUE klass);
7
- static VALUE zenith_regression_Initialize(VALUE self);
8
-
9
- static VALUE zenith_regression_observations(VALUE self, VALUE obs);
10
- static VALUE zenith_regression_weights(VALUE self, VALUE wgts);
11
- static VALUE zenith_regression_setFixedConstant(VALUE self, VALUE obs);
12
- static VALUE zenith_regression_execute(VALUE self);
13
- static VALUE zenith_regression_getFittedYs(VALUE self);
14
- static VALUE zenith_regression_getPredictedYs(VALUE self);
15
- static VALUE zenith_regression_getRegressionStatistics(VALUE self);
16
-
17
- #endif // ZenithRegression_h__
@@ -1,23 +0,0 @@
1
- #ifndef OLSLinearRegression_h__
2
- #define OLSLinearRegression_h__
3
-
4
- #include "LinearRegression.h"
5
-
6
- class OLSLinearRegression : public LinearRegression
7
- {
8
- public:
9
- OLSLinearRegression();
10
- ~OLSLinearRegression();
11
-
12
- void Execute();
13
-
14
- void EstimateBs();
15
-
16
- protected:
17
-
18
- private:
19
- };
20
-
21
-
22
-
23
- #endif // OLSLinearRegression_h__