ml4r 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/ext/ml4r/{LinearRegression.h → LinearRegression/LinearRegression.h} +25 -19
  2. data/ext/ml4r/LinearRegression/OLSLinearRegression.h +29 -0
  3. data/ext/ml4r/MachineLearning/DecisionTree/CategoryInfo.h +32 -0
  4. data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeExperiment.h +30 -0
  5. data/ext/ml4r/MachineLearning/DecisionTree/DecisionTreeNode.h +86 -0
  6. data/ext/ml4r/MachineLearning/DecisionTree/FeatureInteraction.h +31 -0
  7. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitter.h +45 -0
  8. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterCategorical.h +17 -0
  9. data/ext/ml4r/MachineLearning/DecisionTree/NodeSplitterContinuous.h +16 -0
  10. data/ext/ml4r/MachineLearning/DecisionTree/SplitDefinition.h +81 -0
  11. data/ext/ml4r/MachineLearning/GBM/BernoulliCalculator.h +29 -0
  12. data/ext/ml4r/MachineLearning/GBM/GBM.h +50 -0
  13. data/ext/ml4r/MachineLearning/GBM/GBMCalculator.h +31 -0
  14. data/ext/ml4r/MachineLearning/GBM/GBMData.h +0 -0
  15. data/ext/ml4r/MachineLearning/GBM/GBMEstimator.h +79 -0
  16. data/ext/ml4r/MachineLearning/GBM/GBMOutput.h +53 -0
  17. data/ext/ml4r/MachineLearning/GBM/GBMParameters.h +50 -0
  18. data/ext/ml4r/MachineLearning/GBM/GBMRunner.h +35 -0
  19. data/ext/ml4r/MachineLearning/GBM/GaussianCalculator.h +29 -0
  20. data/ext/ml4r/MachineLearning/GBM/ZenithGBM.h +27 -0
  21. data/ext/ml4r/MachineLearning/MLData/MLData.h +77 -0
  22. data/ext/ml4r/MachineLearning/MLData/MLDataFields.h +25 -0
  23. data/ext/ml4r/MachineLearning/MLData/MLDataReader.h +37 -0
  24. data/ext/ml4r/MachineLearning/MLData/ZenithMLData.h +13 -0
  25. data/ext/ml4r/MachineLearning/MLData/ZenithMLDataReader.h +20 -0
  26. data/ext/ml4r/MachineLearning/MLEstimator.h +30 -0
  27. data/ext/ml4r/MachineLearning/MLEstimatorFactory.h +25 -0
  28. data/ext/ml4r/MachineLearning/MLExperiment.h +41 -0
  29. data/ext/ml4r/MachineLearning/MLOutput.h +45 -0
  30. data/ext/ml4r/MachineLearning/MLParameters.h +16 -0
  31. data/ext/ml4r/MachineLearning/MLRunner.h +47 -0
  32. data/ext/ml4r/MachineLearning/MLUtils.h +75 -0
  33. data/ext/ml4r/MachineLearning/RandomForest/RandomForestEstimator.h +47 -0
  34. data/ext/ml4r/MachineLearning/RandomForest/RandomForestOutput.h +33 -0
  35. data/ext/ml4r/MachineLearning/RandomForest/RandomForestParameters.h +32 -0
  36. data/ext/ml4r/MachineLearning/RandomForest/RandomForestRunner.h +34 -0
  37. data/ext/ml4r/extconf.rb +16 -3
  38. data/ext/ml4r/{MathUtils.h → utils/MathUtils.h} +0 -0
  39. data/ext/ml4r/{MatrixInversion.h → utils/MatrixInversion.h} +0 -0
  40. data/ext/ml4r/utils/StochasticUtils.h +33 -0
  41. data/ext/ml4r/utils/Utils.h +147 -0
  42. data/ext/ml4r/utils/VlcMessage.h +44 -0
  43. data/lib/ml4r/linear_regression.rb +7 -0
  44. metadata +45 -13
  45. data/ext/ml4r/LinearRegression/ZenithRegression.h +0 -17
  46. data/ext/ml4r/OLSLinearRegression.h +0 -23
  47. data/ext/ml4r/Utils.h +0 -53
  48. data/ext/ml4r/example.h +0 -18
  49. data/ext/ml4r/swig/example.h +0 -13
  50. data/ext/ml4r/swig/example_wrap.c +0 -2093
  51. data/ext/ml4r/utils/RubyUtils.h +0 -174
@@ -0,0 +1,32 @@
1
+ #ifndef RandomForestParameters_h__
2
+ #define RandomForestParameters_h__
3
+
4
+ #include <vector>
5
+ #include <string>
6
+ #include <limits>
7
+ using std::string;
8
+ using std::vector;
9
+
10
+ class RandomForestParameters
11
+ {
12
+ public:
13
+ RandomForestParameters()
14
+ : minObservations(1), tryMVariables(100), numIterations(200), bagFraction(0.3), verbose(false),
15
+ withReplacement(false),scale(std::numeric_limits<double>::infinity())
16
+ {};
17
+ ~RandomForestParameters() {};
18
+
19
+ int minObservations;
20
+ int tryMVariables;
21
+ vector<string> featuresToRun;
22
+ int numIterations;
23
+ double bagFraction;
24
+ bool verbose;
25
+ bool withReplacement;
26
+ double scale;
27
+ protected:
28
+
29
+ private:
30
+ };
31
+
32
+ #endif // RandomForestParameters_h__
@@ -0,0 +1,34 @@
1
+ #ifndef RandomForest_h__
2
+ #define RandomForest_h__
3
+
4
+ #include "MachineLearning/MLRunner.h"
5
+
6
+ #include <boost/shared_ptr.hpp>
7
+ #include <vector>
8
+ using std::vector;
9
+ using boost::shared_ptr;
10
+
11
+ class MLData;
12
+ class MLExperiment;
13
+ class MLEstimator;
14
+ class RandomForestParameters;
15
+
16
+ class RandomForestRunner : public MLRunner
17
+ {
18
+ public:
19
+ RandomForestRunner();
20
+ ~RandomForestRunner();
21
+
22
+ void estimateMore(int numTrees);
23
+
24
+ shared_ptr<RandomForestParameters> parameters;
25
+
26
+ shared_ptr<MLEstimator> createEstimator(MLData* data, vector<shared_ptr<MLExperiment> > trainingExperiments);
27
+
28
+ protected:
29
+ void config();
30
+
31
+ private:
32
+ };
33
+
34
+ #endif // RandomForest_h__
data/ext/ml4r/extconf.rb CHANGED
@@ -1,7 +1,20 @@
1
1
  require 'mkmf'
2
2
 
3
- $libs += " -lstdc++ --std=c++0x"
4
- # $CPPFLAGS += " --std=c++0x"
3
+ $libs += " -lstdc++ "
4
+
5
+ # The following spam out the console with hard to read error messages, obscuring any real problems
6
+ CONFIG['warnflags'].gsub!('-Wdeclaration-after-statement', '')
7
+ CONFIG['warnflags'].gsub!('-Wimplicit-function-declaration', '')
8
+ CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '')
5
9
 
6
10
  dir_config('boost')
7
- create_makefile('ml4r/ml4r')
11
+
12
+ $srcs = Dir.glob("**/*.c*")
13
+ $objs = $srcs.map { |e| e.gsub(/cpp$/, "o") }
14
+
15
+ create_makefile('ml4r/ml4r')
16
+
17
+ dat = IO.readlines("Makefile")
18
+ File.open("Makefile", 'w') { |f|
19
+ dat.each { |line| f.puts(line.gsub(/\$\</, "$< -o $@")) }
20
+ }
File without changes
@@ -0,0 +1,33 @@
1
+ #ifndef __STOCHASTIC_UTILS_H__
2
+ #define __STOCHASTIC_UTILS_H__
3
+
4
+ #include <cstdlib>
5
+ #include <vector>
6
+ #include <string>
7
+ #include <map>
8
+ #include <stdexcept>
9
+ using std::runtime_error;
10
+ using std::map;
11
+ using std::string;
12
+ using std::vector;
13
+
14
+ #define RAND_MAX_FLOAT ((float)RAND_MAX)
15
+
16
+ namespace StochasticUtils
17
+ {
18
+ vector<double> convertPdfToCumulativeSum(std::vector<double> pdf);
19
+ vector<float> convertHistogramToPdf(vector<float> histogram);
20
+ int chooseCategoryFromCdf(float * cumulativeProbabilities, int N);
21
+ int chooseCategoryFromCdf(vector<float>& cumulativeProbabilities);
22
+ int chooseCategoryFromCdf(float qot, vector<float>& cumulativeProbabilities);
23
+ int chooseCategoryFromPdf(vector<float>& probabilities, string categoryType = "object");
24
+ int chooseCategoryFromPdf(double qot, vector<float>& probabilities, string categoryType);
25
+ template<class T> T chooseCategoryFromPdf(map<T,float>& probabilities);
26
+ template<class T> T chooseCategoryFromPdf(double qot, map<T,float>& probabilities);
27
+ inline double getQot()
28
+ {
29
+ return (double)rand() / ((double)(RAND_MAX)+(double)(1));
30
+ }
31
+ };
32
+
33
+ #endif // __STOCHASTIC_UTILS_H__
@@ -0,0 +1,147 @@
1
+ #ifndef __Utils_h__
2
+ #define __Utils_h__
3
+
4
+ #include <vector>
5
+ #include <stdexcept>
6
+ #include <map>
7
+ #include <boost/foreach.hpp>
8
+ using std::map;
9
+ using std::vector;
10
+
11
+ namespace Utils
12
+ {
13
+ template<class Container, class T>
14
+ bool hasElement(const Container& m, T element)
15
+ {
16
+ return find(m.begin(), m.end(), element) != m.end();
17
+ }
18
+
19
+ template<class T, class U>
20
+ bool hasElement(const std::map<T,U>& m, T element)
21
+ {
22
+ return m.find(element) != m.end();
23
+ }
24
+
25
+ template<typename T>
26
+ int vectorIndex(vector<T>& c, T element)
27
+ {
28
+ if (find(c.begin(), c.end(), element) == c.end())
29
+ return -1;
30
+ return (int) (find(c.begin(), c.end(), element) - c.begin());
31
+ }
32
+
33
+ template<typename T>
34
+ T vectorSum(const std::vector<T>& vec)
35
+ {
36
+ if (vec.size() == 0)
37
+ return 0.0;
38
+
39
+ T sum = vec.front();
40
+ for (unsigned int i=1; i < vec.size(); ++i)
41
+ sum += vec.at(i);
42
+
43
+ return sum;
44
+ }
45
+
46
+ template<class T> vector<T>
47
+ vectorRange(T start, T end, T stepSize = 1)
48
+ {
49
+ int numSteps = (end - start) / stepSize + 1;
50
+ vector<T> returnValue;
51
+ returnValue.resize(numSteps);
52
+
53
+ for (int i = 0; i < numSteps; ++i)
54
+ returnValue.at(i) = start + i * stepSize;
55
+
56
+ return returnValue;
57
+ }
58
+
59
+ template<class T> vector<T>
60
+ vectorRepeat(vector<T>& vec, int totalDesiredLength)
61
+ {
62
+ vector<T> returnValue;
63
+ returnValue.resize(totalDesiredLength);
64
+ int vectorSize = vec.size();
65
+
66
+ for (int i = 0; i < totalDesiredLength; ++i)
67
+ {
68
+ int index = i % vectorSize;
69
+ returnValue.at(i) = vec.at(index);
70
+ }
71
+ return returnValue;
72
+ }
73
+
74
+ template<class T> vector<T>
75
+ vectorShuffle(vector<T>& vec)
76
+ {
77
+
78
+ vector<int> sortVector = vectorOfRandomInt(vec.size());
79
+ vector<T> returnValue = vectorSortUsingOtherVector(vec, sortVector);
80
+ return returnValue;
81
+ }
82
+
83
+ vector<int> vectorOfRandomInt(int length);
84
+
85
+ template<class T, class U>
86
+ vector<T> vectorSortUsingOtherVector(vector<T>& vec, vector<U>& otherVector)
87
+ {
88
+ if (otherVector.size() != vec.size())
89
+ throw std::runtime_error("[] - vec and otherVector must be of equal size.");
90
+
91
+ vector<U> otherVectorSorted = otherVector;
92
+ sort(otherVectorSorted.begin(), otherVectorSorted.end());
93
+ map<U, int> newPosition;
94
+ int index = -1;
95
+ BOOST_FOREACH(U& e, otherVectorSorted)
96
+ {
97
+ ++index;
98
+ if (index == 0 || e != otherVectorSorted.at(index-1))
99
+ newPosition[e] = index;
100
+ }
101
+
102
+ vector<T> returnValue;
103
+ returnValue.resize(vec.size());
104
+
105
+ index = -1;
106
+ BOOST_FOREACH(T& e, vec)
107
+ {
108
+ ++index;
109
+ U sortValue = otherVector.at(index);
110
+ int newIndex = newPosition[sortValue]++; // ++ happens after newIndex is assigned
111
+ returnValue[newIndex] = e;
112
+ }
113
+ return returnValue;
114
+ }
115
+
116
+ template<class T>
117
+ std::vector<T> vectorAbs(const std::vector<T>& vec)
118
+ {
119
+ std::vector<T> absVec;
120
+ absVec.reserve(vec.size());
121
+
122
+ for (unsigned int i=0; i < vec.size(); ++i)
123
+ {
124
+ absVec.push_back((T)abs(vec.at(i)));
125
+ }
126
+ return absVec;
127
+ }
128
+
129
+ template<class T> std::vector<T>& operator+=(std::vector<T>& vec, const std::vector<T>& vec2)
130
+ {
131
+ if (vec.size() != vec2.size())
132
+ throw std::runtime_error("Can't add vectors of different sizes");
133
+ for (unsigned int i=0; i < vec.size(); ++i)
134
+ vec.at(i) += vec2.at(i);
135
+ return vec;
136
+ }
137
+
138
+ template <class T>
139
+ std::vector<T>& operator+=(std::vector<T>& vec, const float offset)
140
+ {
141
+ for (unsigned int i=0; i < vec.size(); ++i)
142
+ vec.at(i) += offset;
143
+ return vec;
144
+ }
145
+ };
146
+
147
+ #endif
@@ -0,0 +1,44 @@
1
+ #ifndef __VLC_MESSAGE_H__
2
+ #define __VLC_MESSAGE_H__
3
+
4
+ #include <iostream>
5
+ #include <boost/lexical_cast.hpp>
6
+ using boost::lexical_cast;
7
+ using std::cout;
8
+ using std::endl;
9
+
10
+ class VlcMessage
11
+ {
12
+ public:
13
+
14
+ VlcMessage() : indentLevel(0) {}
15
+ ~VlcMessage() {}
16
+
17
+ void Write(std::string message, int level = 0)
18
+ {
19
+ for (int i=0; i<indentLevel; ++i)
20
+ cout << " ";
21
+ if (level > 1) cout << "*** ";
22
+ cout << message;
23
+ if (level > 1) cout << " ***";
24
+ cout << endl;
25
+ }
26
+
27
+ void Begin(std::string message)
28
+ {
29
+ cout << message << endl;
30
+ ++indentLevel;
31
+ }
32
+
33
+ void End()
34
+ {
35
+ cout << endl;
36
+ --indentLevel;
37
+ }
38
+
39
+ int indentLevel;
40
+ };
41
+
42
+ extern VlcMessage vlcMessage;
43
+
44
+ #endif // __VLC_MESSAGE_H__
@@ -0,0 +1,7 @@
1
+ module Ml4r
2
+ class OLSLinearRegression
3
+ def i nitialize
4
+
5
+ end
6
+ end
7
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ml4r
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-06-24 00:00:00.000000000Z
13
+ date: 2012-06-24 00:00:00.000000000 Z
14
14
  dependencies: []
15
15
  description: A ruby based library of Maching Learning (ML) algorithms
16
16
  email:
@@ -21,18 +21,50 @@ extensions:
21
21
  - ext/ml4r/extconf.rb
22
22
  extra_rdoc_files: []
23
23
  files:
24
+ - lib/ml4r/linear_regression.rb
24
25
  - lib/ml4r.rb
25
26
  - lib/test_cpp_extension.rb
26
- - ext/ml4r/swig/example_wrap.c
27
- - ext/ml4r/example.h
28
- - ext/ml4r/LinearRegression/ZenithRegression.h
29
- - ext/ml4r/LinearRegression.h
30
- - ext/ml4r/MathUtils.h
31
- - ext/ml4r/MatrixInversion.h
32
- - ext/ml4r/OLSLinearRegression.h
33
- - ext/ml4r/swig/example.h
34
- - ext/ml4r/utils/RubyUtils.h
35
- - ext/ml4r/Utils.h
27
+ - ext/ml4r/LinearRegression/LinearRegression.h
28
+ - ext/ml4r/LinearRegression/OLSLinearRegression.h
29
+ - ext/ml4r/MachineLearning/DecisionTree/CategoryInfo.h
30
+ - ext/ml4r/MachineLearning/DecisionTree/DecisionTreeExperiment.h
31
+ - ext/ml4r/MachineLearning/DecisionTree/DecisionTreeNode.h
32
+ - ext/ml4r/MachineLearning/DecisionTree/FeatureInteraction.h
33
+ - ext/ml4r/MachineLearning/DecisionTree/NodeSplitter.h
34
+ - ext/ml4r/MachineLearning/DecisionTree/NodeSplitterCategorical.h
35
+ - ext/ml4r/MachineLearning/DecisionTree/NodeSplitterContinuous.h
36
+ - ext/ml4r/MachineLearning/DecisionTree/SplitDefinition.h
37
+ - ext/ml4r/MachineLearning/GBM/BernoulliCalculator.h
38
+ - ext/ml4r/MachineLearning/GBM/GaussianCalculator.h
39
+ - ext/ml4r/MachineLearning/GBM/GBM.h
40
+ - ext/ml4r/MachineLearning/GBM/GBMCalculator.h
41
+ - ext/ml4r/MachineLearning/GBM/GBMData.h
42
+ - ext/ml4r/MachineLearning/GBM/GBMEstimator.h
43
+ - ext/ml4r/MachineLearning/GBM/GBMOutput.h
44
+ - ext/ml4r/MachineLearning/GBM/GBMParameters.h
45
+ - ext/ml4r/MachineLearning/GBM/GBMRunner.h
46
+ - ext/ml4r/MachineLearning/GBM/ZenithGBM.h
47
+ - ext/ml4r/MachineLearning/MLData/MLData.h
48
+ - ext/ml4r/MachineLearning/MLData/MLDataFields.h
49
+ - ext/ml4r/MachineLearning/MLData/MLDataReader.h
50
+ - ext/ml4r/MachineLearning/MLData/ZenithMLData.h
51
+ - ext/ml4r/MachineLearning/MLData/ZenithMLDataReader.h
52
+ - ext/ml4r/MachineLearning/MLEstimator.h
53
+ - ext/ml4r/MachineLearning/MLEstimatorFactory.h
54
+ - ext/ml4r/MachineLearning/MLExperiment.h
55
+ - ext/ml4r/MachineLearning/MLOutput.h
56
+ - ext/ml4r/MachineLearning/MLParameters.h
57
+ - ext/ml4r/MachineLearning/MLRunner.h
58
+ - ext/ml4r/MachineLearning/MLUtils.h
59
+ - ext/ml4r/MachineLearning/RandomForest/RandomForestEstimator.h
60
+ - ext/ml4r/MachineLearning/RandomForest/RandomForestOutput.h
61
+ - ext/ml4r/MachineLearning/RandomForest/RandomForestParameters.h
62
+ - ext/ml4r/MachineLearning/RandomForest/RandomForestRunner.h
63
+ - ext/ml4r/utils/MathUtils.h
64
+ - ext/ml4r/utils/MatrixInversion.h
65
+ - ext/ml4r/utils/StochasticUtils.h
66
+ - ext/ml4r/utils/Utils.h
67
+ - ext/ml4r/utils/VlcMessage.h
36
68
  - ext/ml4r/extconf.rb
37
69
  homepage: https://github.com/vlc/ml4r
38
70
  licenses: []
@@ -54,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
54
86
  version: '0'
55
87
  requirements: []
56
88
  rubyforge_project:
57
- rubygems_version: 1.8.10
89
+ rubygems_version: 1.8.24
58
90
  signing_key:
59
91
  specification_version: 3
60
92
  summary: A ruby based library of Maching Learning (ML) algorithms
@@ -1,17 +0,0 @@
1
- #ifndef ZenithRegression_h__
2
- #define ZenithRegression_h__
3
-
4
- // ruby interface methods
5
- void zenith_regression_Free(void* v);
6
- static VALUE zenith_regression_New(int argc, VALUE* argv, VALUE klass);
7
- static VALUE zenith_regression_Initialize(VALUE self);
8
-
9
- static VALUE zenith_regression_observations(VALUE self, VALUE obs);
10
- static VALUE zenith_regression_weights(VALUE self, VALUE wgts);
11
- static VALUE zenith_regression_setFixedConstant(VALUE self, VALUE obs);
12
- static VALUE zenith_regression_execute(VALUE self);
13
- static VALUE zenith_regression_getFittedYs(VALUE self);
14
- static VALUE zenith_regression_getPredictedYs(VALUE self);
15
- static VALUE zenith_regression_getRegressionStatistics(VALUE self);
16
-
17
- #endif // ZenithRegression_h__
@@ -1,23 +0,0 @@
1
- #ifndef OLSLinearRegression_h__
2
- #define OLSLinearRegression_h__
3
-
4
- #include "LinearRegression.h"
5
-
6
- class OLSLinearRegression : public LinearRegression
7
- {
8
- public:
9
- OLSLinearRegression();
10
- ~OLSLinearRegression();
11
-
12
- void Execute();
13
-
14
- void EstimateBs();
15
-
16
- protected:
17
-
18
- private:
19
- };
20
-
21
-
22
-
23
- #endif // OLSLinearRegression_h__