harlequin 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +9 -3
- data/harlequin.gemspec +1 -0
- data/lib/harlequin.rb +59 -23
- data/lib/harlequin/version.rb +1 -1
- data/spec/harlequin_spec.rb +8 -0
- data/spec/spec_helper.rb +1 -0
- metadata +18 -7
data/README.markdown
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# About #
|
1
2
|
Harlequin is a gem that allows easy access to the linear and quadratic discriminant analysis functions of R. To use harlequin, initialize a DiscriminantAnalysis object with an array of variable names for analysis, and a classification variable name as a second argument, like so:
|
2
3
|
|
3
4
|
```ruby
|
@@ -20,7 +21,7 @@ Initialize linear or quadratic analysis with ```#init_lda_analysis``` or ```#ini
|
|
20
21
|
|
21
22
|
```ruby
|
22
23
|
analysis.init_lda_analysis
|
23
|
-
analysis.predict(:weight => 180, :height => 68)
|
24
|
+
analysis.predict(:weight => 180, :height => 68) # => {:class=>"male", :confidence=>0.9999999999666846}
|
24
25
|
```
|
25
26
|
|
26
27
|
Multiple predictions can be computed at once in the same way as adding multiple training rows.
|
@@ -28,5 +29,10 @@ Multiple predictions can be computed at once in the same way as adding multiple
|
|
28
29
|
In order to assess the effectiveness of adding a variable, the DiscriminantAnalysis class includes access to the two-sample t-test for difference in means between classes. This currently works for binary classification only.
|
29
30
|
|
30
31
|
```ruby
|
31
|
-
analysis.t_test(:weight)
|
32
|
-
```
|
32
|
+
analysis.t_test(:weight) # => {:t_statistic=>12.0748, :degrees_of_freedom=>1.471, :p_value=>0.01898}
|
33
|
+
```
|
34
|
+
|
35
|
+
# Requirements #
|
36
|
+
A Ruby script using Harlequin requires an R instance, so make sure you have a working copy of R installed on your system. The OSX binaries for R can be found [here](http://cran.r-project.org/bin/macosx/). See the documentation for Rinruby for more details.
|
37
|
+
|
38
|
+
You will also need the additional R packages MASS and alr3. These can be installed with the R command line by first choosing a mirror with ```chooseCRANmirror()``` and then installing with ```install.packages(c("MASS"), c("alr3"))```.
|
data/harlequin.gemspec
CHANGED
data/lib/harlequin.rb
CHANGED
@@ -4,6 +4,7 @@ module Harlequin
|
|
4
4
|
R.echo false
|
5
5
|
R.eval "library(MASS)"
|
6
6
|
R.eval "library(alr3)"
|
7
|
+
R.eval "library(RWeka)"
|
7
8
|
|
8
9
|
class DiscriminantAnalysis
|
9
10
|
attr_reader :training_data, :variables, :classification_variable, :accuracy, :class_hash
|
@@ -14,6 +15,7 @@ module Harlequin
|
|
14
15
|
@classification_variable = classification_variable
|
15
16
|
@training_data = []
|
16
17
|
@class_hash = {}
|
18
|
+
@current_analysis_type = nil
|
17
19
|
end
|
18
20
|
|
19
21
|
def clear_training_data
|
@@ -31,33 +33,54 @@ module Harlequin
|
|
31
33
|
end
|
32
34
|
end
|
33
35
|
|
34
|
-
# Returns the
|
36
|
+
# Returns the classification for an array of sample points.
|
35
37
|
def predict(*samples)
|
36
38
|
(variables - [classification_variable]).each do |var|
|
37
39
|
R.assign(var.to_s + "_sample", samples.map { |s| s[var] })
|
38
40
|
end
|
39
|
-
|
41
|
+
|
40
42
|
sample_var_declarations = (variables - [classification_variable]).map { |var| "#{var.to_s} = #{var.to_s}_sample" }.join(',')
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
43
|
+
|
44
|
+
if @current_analysis_type == :k_nearest_neighbor
|
45
|
+
R.eval <<-EOF
|
46
|
+
sample_points <- data.frame(#{sample_var_declarations})
|
47
|
+
|
48
|
+
predictions <- predict(fit, sample_points)
|
49
|
+
classes <- round(predictions)
|
50
|
+
d <- data.frame(classes, confidence=predictions)
|
51
|
+
EOF
|
52
|
+
|
53
|
+
prediction_matrix = R.pull "classes"
|
54
|
+
predictions = prediction_matrix.map do |row|
|
55
|
+
classification = row.to_i
|
56
|
+
{
|
57
|
+
:class => @class_hash.invert[classification]
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
predictions.count == 1 ? predictions.first : predictions
|
62
|
+
else
|
63
|
+
R.eval <<-EOF
|
64
|
+
sample_points <- data.frame(#{sample_var_declarations})
|
65
|
+
|
66
|
+
predictions <- predict(fit, sample_points)
|
67
|
+
classes <- as.numeric(predictions$class)
|
68
|
+
|
69
|
+
d <- data.frame(classes, confidence=predictions$posterior)
|
70
|
+
EOF
|
71
|
+
|
72
|
+
prediction_matrix = R.pull "as.matrix(d)"
|
73
|
+
predictions = prediction_matrix.to_a.map do |row|
|
74
|
+
classification = row.first.to_i
|
75
|
+
confidence = row[classification]
|
76
|
+
{
|
77
|
+
:class => @class_hash.invert[classification],
|
78
|
+
:confidence => confidence
|
79
|
+
}
|
80
|
+
end
|
81
|
+
|
82
|
+
predictions.count == 1 ? predictions.first : predictions
|
58
83
|
end
|
59
|
-
|
60
|
-
predictions.count == 1 ? predictions.first : predictions
|
61
84
|
end
|
62
85
|
|
63
86
|
# Performs a test of difference of means between classes
|
@@ -66,7 +89,7 @@ module Harlequin
|
|
66
89
|
R.eval "t_test <- t.test(#{variable.to_s} ~ #{classification_variable.to_s})"
|
67
90
|
|
68
91
|
t_statistic = R.pull "t_test$statistic"
|
69
|
-
degrees_of_freedom = R.pull "t_test$
|
92
|
+
degrees_of_freedom = R.pull "t_test$parameter"
|
70
93
|
p_value = R.pull "t_test$p.value"
|
71
94
|
|
72
95
|
{
|
@@ -110,9 +133,22 @@ module Harlequin
|
|
110
133
|
analysis_data <- data.frame(#{@var_declarations})
|
111
134
|
fit <- #{analysis_type}(#{classification_variable.to_s} ~ #{@non_class_variables}, data=analysis_data)
|
112
135
|
EOF
|
136
|
+
|
137
|
+
@current_analysis_type = analysis_type.to_sym
|
113
138
|
compute_accuracy
|
114
139
|
end
|
115
140
|
end
|
141
|
+
|
142
|
+
def init_knn_analysis
|
143
|
+
init_analysis
|
144
|
+
R.eval <<-EOF
|
145
|
+
analysis_data <- data.frame(#{@var_declarations})
|
146
|
+
fit <- IBk(#{classification_variable.to_s} ~ ., data=analysis_data)
|
147
|
+
EOF
|
148
|
+
|
149
|
+
@current_analysis_type = :k_nearest_neighbor
|
150
|
+
compute_accuracy
|
151
|
+
end
|
116
152
|
|
117
153
|
private
|
118
154
|
|
@@ -146,4 +182,4 @@ module Harlequin
|
|
146
182
|
end
|
147
183
|
end
|
148
184
|
|
149
|
-
include Harlequin
|
185
|
+
include Harlequin
|
data/lib/harlequin/version.rb
CHANGED
data/spec/harlequin_spec.rb
CHANGED
@@ -41,6 +41,14 @@ describe Harlequin::DiscriminantAnalysis do
|
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
+
it 'predicts for k-nearest neighbor classifiers' do
|
45
|
+
@discriminant_analysis.init_knn_analysis
|
46
|
+
samples = [@male_sample, @female_sample]
|
47
|
+
predictions = @discriminant_analysis.predict(*samples)
|
48
|
+
|
49
|
+
predictions.map { |row| row[:class] }.should eq [1,2]
|
50
|
+
end
|
51
|
+
|
44
52
|
it 'clears training data from a DiscriminantAnalysis instance' do
|
45
53
|
@discriminant_analysis.clear_training_data
|
46
54
|
@discriminant_analysis.training_data.should be_empty
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: harlequin
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-03-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &70229741113520 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,21 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70229741113520
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: pry
|
27
|
+
requirement: &70229741113100 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70229741113100
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: rinruby
|
27
|
-
requirement: &
|
38
|
+
requirement: &70229741112680 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ! '>='
|
@@ -32,7 +43,7 @@ dependencies:
|
|
32
43
|
version: '0'
|
33
44
|
type: :runtime
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
46
|
+
version_requirements: *70229741112680
|
36
47
|
description: harlequin is a Ruby wrapper for linear and quadratic discriminant analysis
|
37
48
|
in R for statistical classification. Also allows means testing to determine significance
|
38
49
|
of discriminant variables.
|
@@ -72,7 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
72
83
|
version: '0'
|
73
84
|
requirements: []
|
74
85
|
rubyforge_project: harlequin
|
75
|
-
rubygems_version: 1.8.
|
86
|
+
rubygems_version: 1.8.15
|
76
87
|
signing_key:
|
77
88
|
specification_version: 3
|
78
89
|
summary: Wrapper for discriminant analysis methods in R
|