bio-band 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +20 -0
- data/Gemfile.lock +79 -0
- data/Jarfile +9 -0
- data/Jarfile.lock +10 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +54 -0
- data/Rakefile +54 -0
- data/VERSION +1 -0
- data/bin/bio-band +83 -0
- data/bio-band.gemspec +129 -0
- data/ext/mkrf_conf.rb +74 -0
- data/features/create_dataset.feature +12 -0
- data/features/step_definitions/create_dataset.rb +40 -0
- data/features/step_definitions/weka_classifiers.rb +42 -0
- data/features/step_definitions/weka_clustering.rb +30 -0
- data/features/step_definitions/weka_filters.rb +29 -0
- data/features/step_definitions/weka_parsers.rb +45 -0
- data/features/support/env.rb +3 -0
- data/features/weka_classifiers.feature +16 -0
- data/features/weka_clustering.feature +14 -0
- data/features/weka_filters.feature +12 -0
- data/features/weka_parsers.feature +18 -0
- data/features/weka_pipeline.feature +13 -0
- data/lib/bio-band.rb +10 -0
- data/lib/bio-band/apache.rb +1 -0
- data/lib/bio-band/apache/stat/inference.rb +145 -0
- data/lib/bio-band/core.rb +6 -0
- data/lib/bio-band/core/parser/parser.rb +23 -0
- data/lib/bio-band/core/type/apache_matrices.rb +35 -0
- data/lib/bio-band/core/type/attribute.rb +53 -0
- data/lib/bio-band/core/type/instance.rb +10 -0
- data/lib/bio-band/core/type/instances.rb +332 -0
- data/lib/bio-band/core/type/utils.rb +31 -0
- data/lib/bio-band/weka.rb +11 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes.rb +75 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +42 -0
- data/lib/bio-band/weka/classifiers/evaluation.rb +12 -0
- data/lib/bio-band/weka/classifiers/functions/functions.rb +23 -0
- data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +39 -0
- data/lib/bio-band/weka/classifiers/lazy/lazy.rb +23 -0
- data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +39 -0
- data/lib/bio-band/weka/classifiers/trees/trees.rb +48 -0
- data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +42 -0
- data/lib/bio-band/weka/clusterers/clusterers.rb +32 -0
- data/lib/bio-band/weka/clusterers/clusterers_utils.rb +49 -0
- data/lib/bio-band/weka/db/DatabaseUtils_mysql +280 -0
- data/lib/bio-band/weka/db/DatabaseUtils_postgresql +594 -0
- data/lib/bio-band/weka/db/db.rb +74 -0
- data/lib/bio-band/weka/filters/supervised/attribute/attribute.rb +25 -0
- data/lib/bio-band/weka/filters/supervised/instance/instance.rb +17 -0
- data/lib/bio-band/weka/filters/supervised/supervised_utils.rb +32 -0
- data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +70 -0
- data/lib/bio-band/weka/filters/unsupervised/instance/instance.rb +48 -0
- data/lib/bio-band/weka/filters/unsupervised/unsupervised_utils.rb +33 -0
- data/resources/weather.csv +15 -0
- data/resources/weather.numeric.arff +23 -0
- data/spec/bio-band_spec.rb +7 -0
- data/spec/spec_helper.rb +12 -0
- metadata +302 -0
data/ext/mkrf_conf.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
path = File.expand_path(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
def command?(name)
|
4
|
+
`which #{name}`
|
5
|
+
$?.success?
|
6
|
+
end
|
7
|
+
|
8
|
+
module OS
|
9
|
+
def OS.windows?
|
10
|
+
(/cygwin|mswin|mingw|bccwin|wince|emx/ =~ RbConfig::CONFIG['host_os']) != nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def OS.mac?
|
14
|
+
(/darwin/ =~ RbConfig::CONFIG['host_os']) != nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def OS.unix?
|
18
|
+
!OS.windows?
|
19
|
+
end
|
20
|
+
|
21
|
+
def OS.linux?
|
22
|
+
OS.unix? and not OS.mac?
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
File.open(File.join(path,"Rakefile"),"w") do |rakefile|
|
27
|
+
|
28
|
+
if OS.windows? == true
|
29
|
+
puts "Sorry, still no support is provided for your OS!"
|
30
|
+
|
31
|
+
elsif OS.mac? == true
|
32
|
+
if command?("mvn")==false && command?("brew")==true
|
33
|
+
rakefile.write <<-RAKE
|
34
|
+
task :brew_install do
|
35
|
+
sh "brew install maven"
|
36
|
+
end
|
37
|
+
task :default => [:brew_install]
|
38
|
+
RAKE
|
39
|
+
elsif command?("brew")==false
|
40
|
+
rakefile.write <<-RAKE
|
41
|
+
task :ok_inst do
|
42
|
+
puts "Sorry, Maven could not be installed. Try installing 'brew' first"
|
43
|
+
end
|
44
|
+
task :default => [:ok_inst]
|
45
|
+
RAKE
|
46
|
+
else
|
47
|
+
rakefile.write <<-RAKE
|
48
|
+
task :ok_inst do
|
49
|
+
puts "Maven has been detected on your system"
|
50
|
+
end
|
51
|
+
task :default => [:ok_inst]
|
52
|
+
RAKE
|
53
|
+
end
|
54
|
+
elsif OS.linux? == true
|
55
|
+
if command?("mvn")==false
|
56
|
+
rakefile.write <<-RAKE
|
57
|
+
task :apt_install do
|
58
|
+
sh "sudo apt-get install maven2"
|
59
|
+
end
|
60
|
+
task :default => [:apt_install]
|
61
|
+
RAKE
|
62
|
+
else
|
63
|
+
rakefile.write <<-RAKE
|
64
|
+
task :ok_inst do
|
65
|
+
puts "Maven has been detected on your system"
|
66
|
+
end
|
67
|
+
task :default => [:ok_inst]
|
68
|
+
RAKE
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Feature: Creation of an in-memory dataset
|
2
|
+
In order to perform calculations on a dataset
|
3
|
+
I want to easily build it and store it in memory
|
4
|
+
|
5
|
+
Scenario: creation of a dataset
|
6
|
+
Given a nominal attribute, named "assertion", with values "yes,no"
|
7
|
+
Given one numeric attribute, named "temperature"
|
8
|
+
Given another numeric attribute, names "days"
|
9
|
+
Given two data rows: "yes,100,30","no,100,0"
|
10
|
+
Then I want to build en empty dataset for my use
|
11
|
+
And I want to populate the dataset by row
|
12
|
+
And I want to print my dataset as a bidimensional Ruby Array
|
@@ -0,0 +1,40 @@
|
|
1
|
+
Given(/^a nominal attribute, named "(.*?)", with values "(.*?)"$/) do |arg1, arg2|
|
2
|
+
$first_att = arg1.to_sym
|
3
|
+
$values = arg2.split(',')
|
4
|
+
end
|
5
|
+
|
6
|
+
Given(/^one numeric attribute, named "(.*?)"$/) do |arg1|
|
7
|
+
$second_att = arg1.to_sym
|
8
|
+
end
|
9
|
+
|
10
|
+
Given(/^another numeric attribute, names "(.*?)"$/) do |arg1|
|
11
|
+
$third_att = arg1.to_sym
|
12
|
+
end
|
13
|
+
|
14
|
+
Given(/^two data rows: "(.*?)","(.*?)"$/) do |arg1, arg2|
|
15
|
+
@first_row = arg1.split(',')
|
16
|
+
@second_row = arg2.split(',')
|
17
|
+
@first_row[1] = @first_row[1].to_f
|
18
|
+
@first_row[2] = @first_row[2].to_f
|
19
|
+
@second_row[1] = @second_row[1].to_f
|
20
|
+
@second_row[2] = @second_row[2].to_f
|
21
|
+
end
|
22
|
+
|
23
|
+
Then(/^I want to build en empty dataset for my use$/) do
|
24
|
+
class Dataset < Core::Type::Instances::Base
|
25
|
+
nominal $first_att, $values
|
26
|
+
numeric $second_att
|
27
|
+
string $third_att
|
28
|
+
end
|
29
|
+
@my_instance = Dataset.new
|
30
|
+
@my_instance.summary
|
31
|
+
end
|
32
|
+
|
33
|
+
Then(/^I want to populate the dataset by row$/) do
|
34
|
+
@my_instance.populate_by_row([@first_row,@second_row])
|
35
|
+
@my_instance.summary
|
36
|
+
end
|
37
|
+
|
38
|
+
Then(/^I want to print my dataset as a bidimensional Ruby Array$/) do
|
39
|
+
puts @my_instance.to_a2d.inspect
|
40
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
Given(/^the Weka "(.*?)" classifier$/) do |arg1|
|
2
|
+
@classifier = Weka::Classifier::Bayes::NaiveBayes.new
|
3
|
+
end
|
4
|
+
|
5
|
+
Then(/^I want to print a "(.*?)"$/) do |arg1|
|
6
|
+
@classifier.send arg1.to_sym
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I want to print an options list$/) do
|
10
|
+
@classifier.list_options
|
11
|
+
end
|
12
|
+
|
13
|
+
Given(/^the unsupervised Weka classifier "(.*?)"$/) do |arg1|
|
14
|
+
class My_classifier < Weka::Classifier::Bayes::NaiveBayes::Base
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
Then(/^I want to set option "(.*?)" for it$/) do |arg1|
|
19
|
+
My_classifier.set_options(arg1)
|
20
|
+
end
|
21
|
+
|
22
|
+
Then(/^I want to set the dataset parsed from "(.*?)"$/) do |arg1|
|
23
|
+
@arff = File.join('resources',arg1)
|
24
|
+
@dataset_ARFF = Core::Parser::parse_ARFF(@arff)
|
25
|
+
My_classifier.set_data(@dataset_ARFF)
|
26
|
+
end
|
27
|
+
|
28
|
+
Then(/^I want to print a summary for the dataset$/) do
|
29
|
+
@dataset_ARFF.summary
|
30
|
+
end
|
31
|
+
|
32
|
+
Then(/^I want to set the class index for attribute with index "(.*?)"$/) do |arg1|
|
33
|
+
My_classifier.set_class_index(arg1.to_i)
|
34
|
+
end
|
35
|
+
|
36
|
+
Then(/^I want to instantiate the classifier for my use$/) do
|
37
|
+
My_classifier.new
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
|
@@ -0,0 +1,30 @@
|
|
1
|
+
Given(/^the ARFF dataset stored in the file "(.*?)"$/) do |arg1|
|
2
|
+
@arff = File.join('resources',arg1)
|
3
|
+
end
|
4
|
+
|
5
|
+
Given(/^the SimpleKMeans algorithm implementation from Weka$/) do
|
6
|
+
@clusterer = Weka::Classifier::Bayes::NaiveBayes.new
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I want to parse the data from the file$/) do
|
10
|
+
@dataset_ARFF = Core::Parser::parse_ARFF(@arff)
|
11
|
+
end
|
12
|
+
|
13
|
+
Then(/^I want to list the options available for SimpleKMeans$/) do
|
14
|
+
puts @clusterer.list_options
|
15
|
+
end
|
16
|
+
|
17
|
+
Then(/^I want to set K = "(.*?)" as K\-means option$/) do |arg1|
|
18
|
+
class Clustering < Weka::Clusterer::SimpleKMeans::Base
|
19
|
+
end
|
20
|
+
Clustering.set_options "-N #{arg1}"
|
21
|
+
end
|
22
|
+
|
23
|
+
Then(/^I want to perform clustering on the parsed dataset$/) do
|
24
|
+
Clustering.set_data(@dataset_ARFF)
|
25
|
+
@clustered = Clustering.new
|
26
|
+
end
|
27
|
+
|
28
|
+
Then(/^I want to report result statistics$/) do
|
29
|
+
puts @clustered
|
30
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
Given(/^the example ARFF file "(.*?)"$/) do |arff_file|
|
2
|
+
@arff = File.join('resources',arff_file)
|
3
|
+
end
|
4
|
+
|
5
|
+
Given(/^the Weka Attribute Add filter$/) do
|
6
|
+
@filter = Weka::Filter::Unsupervised::Attribute::Add.new
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I want to parse the file in order to create an Instances class object$/) do
|
10
|
+
@dataset_ARFF = Core::Parser::parse_ARFF(@arff)
|
11
|
+
@dataset_ARFF.summary
|
12
|
+
end
|
13
|
+
|
14
|
+
Then(/^I want to print the available filter options and usage$/) do
|
15
|
+
@filter.filter_options
|
16
|
+
end
|
17
|
+
|
18
|
+
Then(/^I want to set the option String "(.*?)"$/) do |arg1|
|
19
|
+
@filter.set_filter_options(arg1)
|
20
|
+
@filter.set_data(@dataset_ARFF)
|
21
|
+
end
|
22
|
+
|
23
|
+
Then(/^I want to add an attribute \(a column\) to the dataset using the Weka filter Add$/) do
|
24
|
+
@new_inst = @filter.use
|
25
|
+
end
|
26
|
+
|
27
|
+
Then(/^I want to print a "(.*?)" for the modified dataset$/) do |arg1|
|
28
|
+
@new_inst.send arg1.to_sym
|
29
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
Given /^the CSV file "(.*?)"$/ do |csv_file|
|
2
|
+
@csv = File.join('resources',csv_file)
|
3
|
+
@dataset_CSV = Core::Parser::parse_CSV(@csv)
|
4
|
+
end
|
5
|
+
|
6
|
+
Then /^I want to print to stdout the summary for the CSV parsed Instances object$/ do
|
7
|
+
@dataset_CSV.summary
|
8
|
+
end
|
9
|
+
|
10
|
+
Given /^the ARFF file "(.*?)"$/ do |arff_file|
|
11
|
+
@arff = File.join('resources',arff_file)
|
12
|
+
puts @arff
|
13
|
+
@dataset_ARFF = Core::Parser::parse_ARFF(@arff)
|
14
|
+
end
|
15
|
+
|
16
|
+
Then /^I want to print to stdout the summary for the ARFF parsed Instances object$/ do
|
17
|
+
@dataset_ARFF.summary
|
18
|
+
end
|
19
|
+
|
20
|
+
Given(/^the database "(.*?)"$/) do |arg1|
|
21
|
+
@db_connection = arg1
|
22
|
+
end
|
23
|
+
|
24
|
+
Given(/^a table named "(.*?)"$/) do |arg1|
|
25
|
+
@target_table = arg1
|
26
|
+
end
|
27
|
+
|
28
|
+
Then(/^I want to extract data from that table$/) do
|
29
|
+
@dataset = Weka::Db.query_mysql(@db_connection,'root','',"select * from #{@target_table}")
|
30
|
+
end
|
31
|
+
|
32
|
+
Then(/^I want to print to stdout the summary for the parsed Instances object$/) do
|
33
|
+
@dataset.summary
|
34
|
+
end
|
35
|
+
|
36
|
+
Then(/^I want to convert the data into a bidimensional Ruby Array$/) do
|
37
|
+
@dataset.to_a2d.should == [["sunny", 85.0, 85.0, "FALSE", "no"], ["sunny", 80.0, 90.0, "TRUE", "no"], ["overcast", 83.0, 86.0, "FALSE", "yes"],
|
38
|
+
["rainy", 70.0, 96.0, "FALSE", "yes"], ["rainy", 68.0, 80.0, "FALSE", "yes"], ["rainy", 65.0, 70.0, "TRUE", "no"],
|
39
|
+
["overcast", 64.0, 65.0, "TRUE", "yes"], ["sunny", 72.0, 95.0, "FALSE", "no"], ["sunny", 69.0, 70.0, "FALSE", "yes"],
|
40
|
+
["rainy", 75.0, 80.0, "FALSE", "yes"], ["sunny", 75.0, 70.0, "TRUE", "yes"], ["overcast", 72.0, 90.0, "TRUE", "yes"],
|
41
|
+
["overcast", 81.0, 75.0, "FALSE", "yes"], ["rainy", 71.0, 91.0, "TRUE", "no"]]
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Feature: Using a Weka classifier
|
2
|
+
In order to classify an instance data
|
3
|
+
I want to use a Weka classifier
|
4
|
+
|
5
|
+
Scenario: Understand options and usage
|
6
|
+
Given the Weka "NaiveBayes" classifier
|
7
|
+
Then I want to print a "description"
|
8
|
+
And I want to print an options list
|
9
|
+
|
10
|
+
Scenario: Use a classifier on a data instance
|
11
|
+
Given the unsupervised Weka classifier "NaiveBayes"
|
12
|
+
Then I want to set option "-K" for it
|
13
|
+
And I want to set the dataset parsed from "weather.numeric.arff"
|
14
|
+
And I want to print a summary for the dataset
|
15
|
+
And I want to set the class index for attribute with index "0"
|
16
|
+
And I want to instantiate the classifier for my use
|
@@ -0,0 +1,14 @@
|
|
1
|
+
Feature: Weka dataset clustering
|
2
|
+
In order to group similar data vectors in my dataset
|
3
|
+
I want to use Weka clustering algorithms
|
4
|
+
|
5
|
+
Scenario: use of Kmeans algorithm
|
6
|
+
Given the ARFF dataset stored in the file "weather.numeric.arff"
|
7
|
+
Given the SimpleKMeans algorithm implementation from Weka
|
8
|
+
Then I want to parse the data from the file
|
9
|
+
And I want to list the options available for SimpleKMeans
|
10
|
+
And I want to set K = "4" as K-means option
|
11
|
+
And I want to perform clustering on the parsed dataset
|
12
|
+
And I want to report result statistics
|
13
|
+
|
14
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Feature: Filter a dataset (Weka Instances class)
|
2
|
+
In order to manipulate a dataset
|
3
|
+
I want to use the Weka Filter class on it
|
4
|
+
|
5
|
+
Scenario: Use of the Unsupervised Attribute filter 'Add'
|
6
|
+
Given the example ARFF file "weather.numeric.arff"
|
7
|
+
Given the Weka Attribute Add filter
|
8
|
+
Then I want to parse the file in order to create an Instances class object
|
9
|
+
And I want to print the available filter options and usage
|
10
|
+
And I want to set the option String "-T NUM -N dummy"
|
11
|
+
And I want to add an attribute (a column) to the dataset using the Weka filter Add
|
12
|
+
And I want to print a "summary" for the modified dataset
|
@@ -0,0 +1,18 @@
|
|
1
|
+
Feature: Weka basic parsing capabilities
|
2
|
+
In order to perform calculations on a dataset
|
3
|
+
I want to import data from .ARFF, .CSV files, and external databases
|
4
|
+
|
5
|
+
Scenario: parsing a CSV file
|
6
|
+
Given the CSV file "weather.csv"
|
7
|
+
Then I want to print to stdout the summary for the CSV parsed Instances object
|
8
|
+
|
9
|
+
Scenario: parsing an ARFF file
|
10
|
+
Given the ARFF file "weather.numeric.arff"
|
11
|
+
Then I want to print to stdout the summary for the ARFF parsed Instances object
|
12
|
+
|
13
|
+
Scenario: parsing data from a mySQL table
|
14
|
+
Given the database "jdbc:mysql://localhost:3306/Gene_classes"
|
15
|
+
And a table named "test_weka"
|
16
|
+
Then I want to extract data from that table
|
17
|
+
And I want to print to stdout the summary for the parsed Instances object
|
18
|
+
And I want to convert the data into a bidimensional Ruby Array
|
@@ -0,0 +1,13 @@
|
|
1
|
+
Feature: Classification pipeline
|
2
|
+
In order to perform text-mining on a dataset
|
3
|
+
I want to parse the data, filter it and then classify it using a Bayesian classifier
|
4
|
+
|
5
|
+
Scenario: Use of Naive Bayes on a filtered data set
|
6
|
+
Given a file containing the training set data "ReutersGrain-train.arff"
|
7
|
+
Given a file containing the test set data "ReutersGrain-test.arff"
|
8
|
+
Then I want to parse them
|
9
|
+
And I want to filter them using the unsupervised filter "StringToWordVector"
|
10
|
+
And I want to build a "NaiveBayes" classifier using training set data
|
11
|
+
And I want to evaluate the performance of the classifier on the test test
|
12
|
+
And I want want to print to stdout a "summary" for the evaluation
|
13
|
+
|
data/lib/bio-band.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
start = File.absolute_path '.'
|
2
|
+
Dir.chdir(File.join(File.dirname(__FILE__),".."))
|
3
|
+
require 'jbundler'
|
4
|
+
Dir.chdir start
|
5
|
+
require File.join(File.dirname(__FILE__),'..','.jbundler','classpath.rb')
|
6
|
+
require "java"
|
7
|
+
require "bio-band/core"
|
8
|
+
require "bio-band/weka"
|
9
|
+
require "bio-band/apache"
|
10
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'bio-band/apache/stat/inference.rb'
|
@@ -0,0 +1,145 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Apache
|
4
|
+
module Stat
|
5
|
+
module Inference
|
6
|
+
|
7
|
+
java_import 'org.apache.commons.math3.stat.inference.ChiSquareTest'
|
8
|
+
java_import 'org.apache.commons.math3.stat.inference.MannWhitneyUTest'
|
9
|
+
java_import 'org.apache.commons.math3.stat.inference.OneWayAnova'
|
10
|
+
java_import 'org.apache.commons.math3.stat.inference.TTest'
|
11
|
+
java_import 'org.apache.commons.math3.stat.inference.WilcoxonSignedRankTest'
|
12
|
+
java_import 'org.apache.commons.math3.stat.StatUtils'
|
13
|
+
java_import 'java.util.ArrayList'
|
14
|
+
|
15
|
+
# An implementation of the Wilcoxon signed-rank test
|
16
|
+
# * *Args* :
|
17
|
+
# - +Array1+ -> must be a RubyArray.
|
18
|
+
# - +Array2+ -> must be a RubyArray.
|
19
|
+
def self.wilcoxon_test(array_1,array_2)
|
20
|
+
obj = WilcoxonSignedRankTest.new
|
21
|
+
first = Core::Utils::double_to_a(array_1)
|
22
|
+
second = Core::Utils::double_to_a(array_2)
|
23
|
+
val = obj.wilcoxonSignedRank first, second
|
24
|
+
p_val = obj.wilcoxonSignedRankTest first, second, true.to_java(:boolean)
|
25
|
+
return val,p_val
|
26
|
+
end
|
27
|
+
|
28
|
+
# Utility class called by 'chi_square' method in this same package
|
29
|
+
class Chi_square
|
30
|
+
def self.chi_square_2d(array_2d)
|
31
|
+
obj = ChiSquareTest.new
|
32
|
+
val = obj.chi_square(array_2d.to_java(Java::long[]))
|
33
|
+
p_value = obj.chi_square_test(array_2d.to_java(Java::long[]))
|
34
|
+
return val,p_value
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.chi_square_two_arrays(expected,observed)
|
38
|
+
obj = ChiSquareTest.new
|
39
|
+
val = obj.chi_square(expected.to_java(:double),observed.to_java(:long))
|
40
|
+
p_value = obj.chi_square_test(expected.to_java(:double),observed.to_java(:long))
|
41
|
+
return val,p_value
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# 1) Computes the Chi-Square statistic comparing observed and expected frequency counts.
|
46
|
+
# * *Args* :
|
47
|
+
# - +Array+ -> must be a bidimensional RubyArray.
|
48
|
+
# 2) Computes the Chi-Square statistic associated with a chi-square test of independence
|
49
|
+
# based on the input counts array, viewed as a two-way table.
|
50
|
+
# * *Args* :
|
51
|
+
# - +Array1+ -> must be a RubyArray.
|
52
|
+
# - +Array2+ -> must be a RubyArray.
|
53
|
+
def self.chi_square(*args)
|
54
|
+
if args.length == 2
|
55
|
+
Chi_square.chi_square_two_arrays(*args)
|
56
|
+
elsif args.length == 1
|
57
|
+
raise ArgumentError,"RubyArray must be bidimensional" unless args[0].is_2d?
|
58
|
+
Chi_square.chi_square_2d(*args)
|
59
|
+
else
|
60
|
+
raise ArgumentError, 'Function *args should be two RubyArrays or a bidimensional RubyArray'
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.chi_square_dataset_compare(observed1,observed2)
|
65
|
+
obj = ChiSquareTest.new
|
66
|
+
val = obj.chiSquareDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
|
67
|
+
p_value = obj.chiSquareTestDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
|
68
|
+
return val,p_value
|
69
|
+
end
|
70
|
+
|
71
|
+
def mann_whitney_u(array1,array2)
|
72
|
+
obj = MannWhitneyUTest.new
|
73
|
+
first = array_1.to_java :double
|
74
|
+
second = array_2.to_java :double
|
75
|
+
value = mannWhitneyU first,second
|
76
|
+
p_value = mannWhitneyUTest first,second
|
77
|
+
return value,p_value
|
78
|
+
end
|
79
|
+
|
80
|
+
#Utility class called by 't_test' method in this same package
|
81
|
+
class T_test
|
82
|
+
|
83
|
+
def self.homoscedastic(array_1,array_2)
|
84
|
+
obj = TTest.new
|
85
|
+
first = array_1.to_java :double
|
86
|
+
second = array_2.to_java :double
|
87
|
+
value = obj.homoscedasticT(first,second)
|
88
|
+
p_value = obj.homoscedasticTTest(first,second)
|
89
|
+
return value, p_value
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.paired(array_1,array_2)
|
93
|
+
obj = TTest.new
|
94
|
+
first = array_1.to_java :double
|
95
|
+
second =array_2.to_java :double
|
96
|
+
value = obj.pairedT(first,second)
|
97
|
+
p_value = obj.pairedTTest(first,second)
|
98
|
+
return value,p_value
|
99
|
+
end
|
100
|
+
|
101
|
+
def self.t(array_1,array_2)
|
102
|
+
obj = TTest.new
|
103
|
+
first = array_1.to_java :double
|
104
|
+
second =array_2.to_java :double
|
105
|
+
value = obj.t(first,second)
|
106
|
+
p_value =obj.tTest(first,second)
|
107
|
+
return value,p_value
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# An implementation for Student's t-tests
|
112
|
+
# * *Args* :
|
113
|
+
# - +sample_1+ -> an array of numeric values representing a sample
|
114
|
+
# - +sample_2+ -> an array of numeric values representing a sample
|
115
|
+
# - +homoscedastic+ -> set to true for equal variance assumption
|
116
|
+
# - +paired+ -> set to true if you want to perform a 'paired' t test
|
117
|
+
def self.t_test(sample_1,sample_2,homoscedastic=false,paired=false)
|
118
|
+
if homoscedastic == true
|
119
|
+
T_test.homoscedastic(sample_1,sample_2)
|
120
|
+
elsif paired == true
|
121
|
+
T_test.paired(sample_1,sample_2)
|
122
|
+
else
|
123
|
+
T_test.t(sample_1,sample_2)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# Implements one-way ANOVA (analysis of variance) statistics.
|
128
|
+
# Tests for differences between two or more categories of univariate data (for example,
|
129
|
+
# the body mass index of accountants, lawyers, doctors and computer programmers). When
|
130
|
+
# two categories are given, this is equivalent to the TTest.
|
131
|
+
# * *Args* :
|
132
|
+
# - +bidimensional_array+ -> a 2d RubyArray
|
133
|
+
def self.one_way_anova(bidimensional_array)
|
134
|
+
collection = ArrayList.new
|
135
|
+
bidimensional_array.each do |array|
|
136
|
+
collection.add(array.to_java :double)
|
137
|
+
end
|
138
|
+
obj = OneWayAnova.new
|
139
|
+
f_value = obj.anovaFValue(collection)
|
140
|
+
p_value = obj.anovaPValue(collection)
|
141
|
+
return f_value,p_value
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|