ruby-band 0.1.11
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +3 -0
- data/Gemfile +30 -0
- data/Gemfile.lock +119 -0
- data/Jarfile +9 -0
- data/Jarfile.lock +10 -0
- data/LICENSE.txt +22 -0
- data/README.md +321 -0
- data/README.rdoc +70 -0
- data/Rakefile +66 -0
- data/VERSION +1 -0
- data/band_server/client.rb +35 -0
- data/band_server/client_alt.rb +35 -0
- data/band_server/first_dataset.csv +15 -0
- data/band_server/second_dataset.csv +15 -0
- data/band_server/simple_server.rb +90 -0
- data/band_server/third_dataset.csv +15 -0
- data/band_server/uploads/first_dataset.csv +15 -0
- data/band_server/uploads/second_dataset.csv +15 -0
- data/band_server/uploads/third_dataset.csv +15 -0
- data/bin/ruby-band +83 -0
- data/ext/mkrf_conf.rb +74 -0
- data/features/create_dataset.feature +12 -0
- data/features/step_definitions/create_dataset.rb +39 -0
- data/features/step_definitions/weka_classifiers.rb +43 -0
- data/features/step_definitions/weka_clustering.rb +34 -0
- data/features/step_definitions/weka_filters.rb +32 -0
- data/features/step_definitions/weka_parsers.rb +46 -0
- data/features/step_definitions/weka_pipeline.rb +41 -0
- data/features/support/env.rb +3 -0
- data/features/weka_classifiers.feature +16 -0
- data/features/weka_clustering.feature +15 -0
- data/features/weka_filters.feature +12 -0
- data/features/weka_parsers.feature +18 -0
- data/features/weka_pipeline.feature +14 -0
- data/lib/ruby-band.rb +12 -0
- data/lib/ruby-band/apache.rb +2 -0
- data/lib/ruby-band/apache/stat/correlation.rb +42 -0
- data/lib/ruby-band/apache/stat/inference.rb +151 -0
- data/lib/ruby-band/apache/stat/regression.rb +22 -0
- data/lib/ruby-band/core.rb +6 -0
- data/lib/ruby-band/core/parser/parser.rb +27 -0
- data/lib/ruby-band/core/type/apache_matrices.rb +35 -0
- data/lib/ruby-band/core/type/attribute.rb +53 -0
- data/lib/ruby-band/core/type/instance.rb +10 -0
- data/lib/ruby-band/core/type/instances.rb +361 -0
- data/lib/ruby-band/core/type/utils.rb +31 -0
- data/lib/ruby-band/weka.rb +14 -0
- data/lib/ruby-band/weka/attribute_selection/attribute_selection_utils.rb +20 -0
- data/lib/ruby-band/weka/attribute_selection/evaluators.rb +58 -0
- data/lib/ruby-band/weka/attribute_selection/search.rb +52 -0
- data/lib/ruby-band/weka/classifiers/bayes/bayes.rb +86 -0
- data/lib/ruby-band/weka/classifiers/bayes/bayes_utils.rb +82 -0
- data/lib/ruby-band/weka/classifiers/evaluation.rb +13 -0
- data/lib/ruby-band/weka/classifiers/functions/functions.rb +177 -0
- data/lib/ruby-band/weka/classifiers/functions/functions_utils.rb +78 -0
- data/lib/ruby-band/weka/classifiers/lazy/lazy.rb +86 -0
- data/lib/ruby-band/weka/classifiers/lazy/lazy_utils.rb +83 -0
- data/lib/ruby-band/weka/classifiers/mi/mi.rb +191 -0
- data/lib/ruby-band/weka/classifiers/mi/mi_utils.rb +80 -0
- data/lib/ruby-band/weka/classifiers/rules/rules.rb +190 -0
- data/lib/ruby-band/weka/classifiers/rules/rules_utils.rb +81 -0
- data/lib/ruby-band/weka/classifiers/trees/trees.rb +110 -0
- data/lib/ruby-band/weka/classifiers/trees/trees_utils.rb +85 -0
- data/lib/ruby-band/weka/clusterers/clusterers.rb +99 -0
- data/lib/ruby-band/weka/clusterers/clusterers_utils.rb +86 -0
- data/lib/ruby-band/weka/db/DatabaseUtils_mysql +280 -0
- data/lib/ruby-band/weka/db/DatabaseUtils_postgresql +594 -0
- data/lib/ruby-band/weka/db/db.rb +74 -0
- data/lib/ruby-band/weka/filters/supervised/attribute/attribute.rb +55 -0
- data/lib/ruby-band/weka/filters/supervised/instance/instance.rb +17 -0
- data/lib/ruby-band/weka/filters/supervised/supervised_utils.rb +38 -0
- data/lib/ruby-band/weka/filters/unsupervised/attribute/attribute.rb +90 -0
- data/lib/ruby-band/weka/filters/unsupervised/instance/instance.rb +48 -0
- data/lib/ruby-band/weka/filters/unsupervised/unsupervised_utils.rb +38 -0
- data/resources/ReutersGrain-test.arff +611 -0
- data/resources/ReutersGrain-train.arff +1561 -0
- data/resources/weather.csv +15 -0
- data/resources/weather.numeric.arff +23 -0
- data/ruby-band.gemspec +178 -0
- data/spec/ruby-band_spec.rb +7 -0
- data/spec/spec_helper.rb +12 -0
- data/test/helper.rb +18 -0
- data/test/test_apacheCorrelation.rb +22 -0
- data/test/test_apacheInference.rb +46 -0
- data/test/test_ruby-band.rb +9 -0
- metadata +426 -0
data/ext/mkrf_conf.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
path = File.expand_path(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
def command?(name)
|
4
|
+
`which #{name}`
|
5
|
+
$?.success?
|
6
|
+
end
|
7
|
+
|
8
|
+
module OS
|
9
|
+
def OS.windows?
|
10
|
+
(/cygwin|mswin|mingw|bccwin|wince|emx/ =~ RbConfig::CONFIG['host_os']) != nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def OS.mac?
|
14
|
+
(/darwin/ =~ RbConfig::CONFIG['host_os']) != nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def OS.unix?
|
18
|
+
!OS.windows?
|
19
|
+
end
|
20
|
+
|
21
|
+
def OS.linux?
|
22
|
+
OS.unix? and not OS.mac?
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
File.open(File.join(path,"Rakefile"),"w") do |rakefile|
|
27
|
+
|
28
|
+
if OS.windows? == true
|
29
|
+
puts "Sorry, still no support is provided for your OS!"
|
30
|
+
|
31
|
+
elsif OS.mac? == true
|
32
|
+
if command?("mvn")==false && command?("brew")==true
|
33
|
+
rakefile.write <<-RAKE
|
34
|
+
task :brew_install do
|
35
|
+
sh "brew install maven"
|
36
|
+
end
|
37
|
+
task :default => [:brew_install]
|
38
|
+
RAKE
|
39
|
+
elsif command?("brew")==false
|
40
|
+
rakefile.write <<-RAKE
|
41
|
+
task :ok_inst do
|
42
|
+
puts "Sorry, Maven could not be installed. Try installing 'brew' first"
|
43
|
+
end
|
44
|
+
task :default => [:ok_inst]
|
45
|
+
RAKE
|
46
|
+
else
|
47
|
+
rakefile.write <<-RAKE
|
48
|
+
task :ok_inst do
|
49
|
+
puts "Maven has been detected on your system"
|
50
|
+
end
|
51
|
+
task :default => [:ok_inst]
|
52
|
+
RAKE
|
53
|
+
end
|
54
|
+
elsif OS.linux? == true
|
55
|
+
if command?("mvn")==false
|
56
|
+
rakefile.write <<-RAKE
|
57
|
+
task :apt_install do
|
58
|
+
sh "sudo apt-get install maven2"
|
59
|
+
end
|
60
|
+
task :default => [:apt_install]
|
61
|
+
RAKE
|
62
|
+
else
|
63
|
+
rakefile.write <<-RAKE
|
64
|
+
task :ok_inst do
|
65
|
+
puts "Maven has been detected on your system"
|
66
|
+
end
|
67
|
+
task :default => [:ok_inst]
|
68
|
+
RAKE
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Feature: Creation of an in-memory dataset
|
2
|
+
In order to perform calculations on a dataset
|
3
|
+
I want to easily build it and store it in memory
|
4
|
+
|
5
|
+
Scenario: creation of a dataset
|
6
|
+
Given a nominal attribute, named "assertion", with values "yes,no"
|
7
|
+
Given one numeric attribute, named "temperature"
|
8
|
+
Given another numeric attribute, names "days"
|
9
|
+
Given two data rows: "yes,100,30","no,100,0"
|
10
|
+
Then I want to build en empty dataset for my use
|
11
|
+
And I want to populate the dataset by row
|
12
|
+
And I want to print my dataset as a bidimensional Ruby Array
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Given(/^a nominal attribute, named "(.*?)", with values "(.*?)"$/) do |arg1, arg2|
|
2
|
+
$first_att = arg1.to_sym
|
3
|
+
$values = arg2.split(',')
|
4
|
+
end
|
5
|
+
|
6
|
+
Given(/^one numeric attribute, named "(.*?)"$/) do |arg1|
|
7
|
+
$second_att = arg1.to_sym
|
8
|
+
end
|
9
|
+
|
10
|
+
Given(/^another numeric attribute, names "(.*?)"$/) do |arg1|
|
11
|
+
$third_att = arg1.to_sym
|
12
|
+
end
|
13
|
+
|
14
|
+
Given(/^two data rows: "(.*?)","(.*?)"$/) do |arg1, arg2|
|
15
|
+
$first_row = arg1.split(',')
|
16
|
+
$second_row = arg2.split(',')
|
17
|
+
$first_row[1] = $first_row[1].to_f
|
18
|
+
$first_row[2] = $first_row[2].to_f
|
19
|
+
$second_row[1] = $second_row[1].to_f
|
20
|
+
$second_row[2] = $second_row[2].to_f
|
21
|
+
end
|
22
|
+
|
23
|
+
Then(/^I want to build en empty dataset for my use$/) do
|
24
|
+
$my_instance = Core::Type::Instances::Base.new do
|
25
|
+
nominal $first_att, $values
|
26
|
+
numeric $second_att
|
27
|
+
string $third_att
|
28
|
+
end
|
29
|
+
$my_instance.summary
|
30
|
+
end
|
31
|
+
|
32
|
+
Then(/^I want to populate the dataset by row$/) do
|
33
|
+
$my_instance.populate_by_row([$first_row,$second_row])
|
34
|
+
$my_instance.summary
|
35
|
+
end
|
36
|
+
|
37
|
+
Then(/^I want to print my dataset as a bidimensional Ruby Array$/) do
|
38
|
+
puts $my_instance.to_a2d.inspect
|
39
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
Given(/^the Weka "(.*?)" classifier$/) do |arg1|
|
2
|
+
@classifier = Weka::Classifier::Bayes::NaiveBayes.new
|
3
|
+
end
|
4
|
+
|
5
|
+
Then(/^I want to print a "(.*?)"$/) do |arg1|
|
6
|
+
@classifier.send arg1.to_sym
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I want to print an options list$/) do
|
10
|
+
@classifier.list_options
|
11
|
+
end
|
12
|
+
|
13
|
+
Given(/^the unsupervised Weka classifier "(.*?)"$/) do |arg1|
|
14
|
+
class My_classifier < Weka::Classifier::Bayes::NaiveBayes::Base
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
Then(/^I want to set option "(.*?)" for it$/) do |arg1|
|
19
|
+
My_classifier.set_options(arg1)
|
20
|
+
end
|
21
|
+
|
22
|
+
Then(/^I want to set the dataset parsed from "(.*?)"$/) do |arg1|
|
23
|
+
@arff = File.join('resources',arg1)
|
24
|
+
@dataset_ARFF = Core::Parser::parse_ARFF(@arff)
|
25
|
+
My_classifier.set_data(@dataset_ARFF)
|
26
|
+
end
|
27
|
+
|
28
|
+
Then(/^I want to print a summary for the dataset$/) do
|
29
|
+
@dataset_ARFF.summary
|
30
|
+
end
|
31
|
+
|
32
|
+
Then(/^I want to set the class index for attribute with index "(.*?)"$/) do |arg1|
|
33
|
+
My_classifier.set_class_index(arg1.to_i)
|
34
|
+
end
|
35
|
+
|
36
|
+
Then(/^I want to instantiate the classifier for my use$/) do
|
37
|
+
classifier = My_classifier.new
|
38
|
+
classifier.cross_validate(2)
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
Given(/^the ARFF dataset stored in the file "(.*?)"$/) do |arg1|
|
2
|
+
@arff = File.join('resources',arg1)
|
3
|
+
end
|
4
|
+
|
5
|
+
Given(/^the SimpleKMeans algorithm implementation from Weka$/) do
|
6
|
+
@clusterer = Weka::Clusterer::SimpleKMeans.new
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I want to parse the data from the file$/) do
|
10
|
+
@dataset_ARFF = Core::Parser::parse_ARFF(@arff)
|
11
|
+
end
|
12
|
+
|
13
|
+
Then(/^I want to list the options available for SimpleKMeans$/) do
|
14
|
+
puts @clusterer.list_options
|
15
|
+
end
|
16
|
+
|
17
|
+
Then(/^I want to set K = "(.*?)" as K\-means option$/) do |arg1|
|
18
|
+
class Clustering < Weka::Clusterer::SimpleKMeans::Base
|
19
|
+
end
|
20
|
+
Clustering.set_options "-N #{arg1}"
|
21
|
+
end
|
22
|
+
|
23
|
+
Then(/^I want to perform clustering on the parsed dataset$/) do
|
24
|
+
Clustering.set_data(@dataset_ARFF)
|
25
|
+
@clustered = Clustering.new
|
26
|
+
end
|
27
|
+
|
28
|
+
Then(/^I want to report result statistics$/) do
|
29
|
+
puts @clustered
|
30
|
+
end
|
31
|
+
|
32
|
+
Then(/^I want to use Weka clustering cross\-validation$/) do
|
33
|
+
puts @clustered.evaluate
|
34
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
Given(/^the example ARFF file "(.*?)"$/) do |arff_file|
|
2
|
+
@arff = File.join('resources',arff_file)
|
3
|
+
end
|
4
|
+
|
5
|
+
Given(/^the Weka Attribute Add filter$/) do
|
6
|
+
@filter = Weka::Filter::Unsupervised::Attribute::Add.new
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I want to parse the file in order to create an Instances class object$/) do
|
10
|
+
$dataset_ARFF = Core::Parser::parse_ARFF(@arff)
|
11
|
+
puts $dataset_ARFF.summary
|
12
|
+
end
|
13
|
+
|
14
|
+
Then(/^I want to print the available filter options and usage$/) do
|
15
|
+
@filter.options_list
|
16
|
+
end
|
17
|
+
|
18
|
+
Then(/^I want to set the option String "(.*?)"$/) do |arg1|
|
19
|
+
$arg1 = arg1
|
20
|
+
@filter.set do
|
21
|
+
filter_options $arg1
|
22
|
+
data $dataset_ARFF
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
Then(/^I want to add an attribute \(a column\) to the dataset using the Weka filter Add$/) do
|
27
|
+
@new_inst = @filter.use
|
28
|
+
end
|
29
|
+
|
30
|
+
Then(/^I want to print a "(.*?)" for the modified dataset$/) do |arg1|
|
31
|
+
@new_inst.send arg1.to_sym
|
32
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
Given /^the CSV file "(.*?)"$/ do |csv_file|
|
2
|
+
@csv = File.join('resources',csv_file)
|
3
|
+
@dataset_CSV = Core::Parser::parse_CSV(@csv)
|
4
|
+
end
|
5
|
+
|
6
|
+
Then /^I want to print to stdout the summary for the CSV parsed Instances object$/ do
|
7
|
+
@dataset_CSV.summary
|
8
|
+
end
|
9
|
+
|
10
|
+
Given /^the ARFF file "(.*?)"$/ do |arff_file|
|
11
|
+
@arff = File.join('resources',arff_file)
|
12
|
+
puts @arff
|
13
|
+
@dataset_ARFF = Core::Parser::parse_ARFF(@arff)
|
14
|
+
end
|
15
|
+
|
16
|
+
Then /^I want to print to stdout the summary for the ARFF parsed Instances object$/ do
|
17
|
+
@dataset_ARFF.summary
|
18
|
+
end
|
19
|
+
|
20
|
+
Given(/^the database "(.*?)"$/) do |arg1|
|
21
|
+
@db_connection = arg1
|
22
|
+
end
|
23
|
+
|
24
|
+
Given(/^a table named "(.*?)"$/) do |arg1|
|
25
|
+
@target_table = arg1
|
26
|
+
end
|
27
|
+
|
28
|
+
Then(/^I want to extract data from that table$/) do
|
29
|
+
# INSERT HERE MySQL DB CREDENTIALS in order to test this parsing functionality
|
30
|
+
# @dataset = Weka::Db.query_mysql(@db_connection,'root','',"select * from #{@target_table}")
|
31
|
+
end
|
32
|
+
|
33
|
+
Then(/^I want to print to stdout the summary for the parsed Instances object$/) do
|
34
|
+
# @dataset.summary
|
35
|
+
end
|
36
|
+
|
37
|
+
Then(/^I want to convert the data into a bidimensional Ruby Array$/) do
|
38
|
+
# @dataset.to_a2d.should == [["sunny", 85.0, 85.0, "FALSE", "no"], ["sunny", 80.0, 90.0, "TRUE", "no"], ["overcast", 83.0, 86.0, "FALSE", "yes"],
|
39
|
+
# ["rainy", 70.0, 96.0, "FALSE", "yes"], ["rainy", 68.0, 80.0, "FALSE", "yes"], ["rainy", 65.0, 70.0, "TRUE", "no"],
|
40
|
+
# ["overcast", 64.0, 65.0, "TRUE", "yes"], ["sunny", 72.0, 95.0, "FALSE", "no"], ["sunny", 69.0, 70.0, "FALSE", "yes"],
|
41
|
+
# ["rainy", 75.0, 80.0, "FALSE", "yes"], ["sunny", 75.0, 70.0, "TRUE", "yes"], ["overcast", 72.0, 90.0, "TRUE", "yes"],
|
42
|
+
# ["overcast", 81.0, 75.0, "FALSE", "yes"], ["rainy", 71.0, 91.0, "TRUE", "no"]]
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
|
@@ -0,0 +1,41 @@
|
|
1
|
+
Given(/^a file containing the training set data "(.*?)"$/) do |arg1|
|
2
|
+
@training_file = File.join('resources',arg1)
|
3
|
+
end
|
4
|
+
|
5
|
+
Given(/^a file containing the test set data "(.*?)"$/) do |arg1|
|
6
|
+
@test_file = File.join('resources',arg1)
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I want to parse them$/) do
|
10
|
+
$training_set = Core::Parser::parse_ARFF(@training_file)
|
11
|
+
$test_set = Core::Parser::parse_ARFF(@test_file)
|
12
|
+
end
|
13
|
+
|
14
|
+
Then(/^I want to filter them using the unsupervised filter "(.*?)"$/) do |arg1|
|
15
|
+
filter = Weka::Filter::Unsupervised::Attribute::StringToWordVector.new
|
16
|
+
filter.set do
|
17
|
+
data $training_set
|
18
|
+
end
|
19
|
+
$filtered_training_set = filter.use
|
20
|
+
|
21
|
+
$filtered_test_set = filter.use $test_set
|
22
|
+
$filtered_test_set.set_class_index 0
|
23
|
+
end
|
24
|
+
|
25
|
+
Then(/^I want to build a "(.*?)" classifier using training set data$/) do |arg1|
|
26
|
+
$bayes = Weka::Classifier::Bayes::ComplementNaiveBayes::Base.new do
|
27
|
+
set_data $filtered_training_set
|
28
|
+
set_class_index 0
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
Then(/^I want to evaluate the performance of the classifier on the test set$/) do
|
34
|
+
@evaluator = Weka::Classifier::Evaluation.new $filtered_training_set
|
35
|
+
@evaluator.evaluate_model($bayes,$filtered_test_set)
|
36
|
+
end
|
37
|
+
|
38
|
+
Then(/^I want want to print to stdout a summary of the evaluation$/) do
|
39
|
+
puts @evaluator.summary
|
40
|
+
end
|
41
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Feature: Using a Weka classifier
|
2
|
+
In order to classify an instance data
|
3
|
+
I want to use a Weka classifier
|
4
|
+
|
5
|
+
Scenario: Understand options and usage
|
6
|
+
Given the Weka "NaiveBayes" classifier
|
7
|
+
Then I want to print a "description"
|
8
|
+
And I want to print an options list
|
9
|
+
|
10
|
+
Scenario: Use a classifier on a data instance
|
11
|
+
Given the unsupervised Weka classifier "NaiveBayes"
|
12
|
+
Then I want to set option "-K" for it
|
13
|
+
And I want to set the dataset parsed from "weather.numeric.arff"
|
14
|
+
And I want to print a summary for the dataset
|
15
|
+
And I want to set the class index for attribute with index "0"
|
16
|
+
And I want to instantiate the classifier for my use
|
@@ -0,0 +1,15 @@
|
|
1
|
+
Feature: Weka dataset clustering
|
2
|
+
In order to group similar data vectors in my dataset
|
3
|
+
I want to use Weka clustering algorithms
|
4
|
+
|
5
|
+
Scenario: use of Kmeans algorithm
|
6
|
+
Given the ARFF dataset stored in the file "weather.numeric.arff"
|
7
|
+
Given the SimpleKMeans algorithm implementation from Weka
|
8
|
+
Then I want to parse the data from the file
|
9
|
+
And I want to list the options available for SimpleKMeans
|
10
|
+
And I want to set K = "4" as K-means option
|
11
|
+
And I want to perform clustering on the parsed dataset
|
12
|
+
And I want to report result statistics
|
13
|
+
And I want to use Weka clustering cross-validation
|
14
|
+
|
15
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Feature: Filter a dataset (Weka Instances class)
|
2
|
+
In order to manipulate a dataset
|
3
|
+
I want to use the Weka Filter class on it
|
4
|
+
|
5
|
+
Scenario: Use of the Unsupervised Attribute filter 'Add'
|
6
|
+
Given the example ARFF file "weather.numeric.arff"
|
7
|
+
Given the Weka Attribute Add filter
|
8
|
+
Then I want to parse the file in order to create an Instances class object
|
9
|
+
And I want to print the available filter options and usage
|
10
|
+
And I want to set the option String "-T NUM -N dummy"
|
11
|
+
And I want to add an attribute (a column) to the dataset using the Weka filter Add
|
12
|
+
And I want to print a "summary" for the modified dataset
|
@@ -0,0 +1,18 @@
|
|
1
|
+
Feature: Weka basic parsing capabilities
|
2
|
+
In order to perform calculations on a dataset
|
3
|
+
I want to import data from .ARFF, .CSV files, and external databases
|
4
|
+
|
5
|
+
Scenario: parsing a CSV file
|
6
|
+
Given the CSV file "weather.csv"
|
7
|
+
Then I want to print to stdout the summary for the CSV parsed Instances object
|
8
|
+
|
9
|
+
Scenario: parsing an ARFF file
|
10
|
+
Given the ARFF file "weather.numeric.arff"
|
11
|
+
Then I want to print to stdout the summary for the ARFF parsed Instances object
|
12
|
+
|
13
|
+
Scenario: parsing data from a mySQL table
|
14
|
+
Given the database "jdbc:mysql://localhost:3306/Gene_classes"
|
15
|
+
And a table named "test_weka"
|
16
|
+
Then I want to extract data from that table
|
17
|
+
And I want to print to stdout the summary for the parsed Instances object
|
18
|
+
And I want to convert the data into a bidimensional Ruby Array
|
@@ -0,0 +1,14 @@
|
|
1
|
+
Feature: Classification pipeline
|
2
|
+
In order to perform text-mining on a dataset
|
3
|
+
I want to parse the data, filter it and then classify it using a Bayesian classifier
|
4
|
+
|
5
|
+
Scenario: Use of Naive Bayes on a filtered data set
|
6
|
+
Given a file containing the training set data "ReutersGrain-train.arff"
|
7
|
+
Given a file containing the test set data "ReutersGrain-test.arff"
|
8
|
+
Then I want to parse them
|
9
|
+
And I want to filter them using the unsupervised filter "StringToWordVector"
|
10
|
+
And I want to build a "NaiveBayes" classifier using training set data
|
11
|
+
And I want to evaluate the performance of the classifier on the test set
|
12
|
+
And I want want to print to stdout a summary of the evaluation
|
13
|
+
|
14
|
+
|
data/lib/ruby-band.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
start = File.absolute_path '.'
|
2
|
+
Dir.chdir(File.join(File.dirname(__FILE__),".."))
|
3
|
+
require 'jbundler'
|
4
|
+
Dir.chdir start
|
5
|
+
require File.join(File.dirname(__FILE__),'..','.jbundler','classpath.rb')
|
6
|
+
require "java"
|
7
|
+
require "ruby-band/core"
|
8
|
+
require "ruby-band/weka"
|
9
|
+
require "ruby-band/apache"
|
10
|
+
Random.ancestors[1].instance_eval {remove_const :Random} if defined?(Random)
|
11
|
+
java_import 'java.util.Random'
|
12
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Apache
|
4
|
+
module Stat
|
5
|
+
module Correlation
|
6
|
+
|
7
|
+
java_import "org.apache.commons.math3.stat.correlation.Covariance"
|
8
|
+
java_import "org.apache.commons.math3.stat.correlation.PearsonsCorrelation"
|
9
|
+
java_import "org.apache.commons.math3.stat.correlation.SpearmansCorrelation"
|
10
|
+
|
11
|
+
# Calculate covariance between two Numeric arrays
|
12
|
+
# * *Args* :
|
13
|
+
# - +Array1+ -> must be a RubyArray.
|
14
|
+
# - +Array2+ -> must be a RubyArray.
|
15
|
+
def self.covariance(array_1,array_2)
|
16
|
+
obj = Covariance.new
|
17
|
+
result = obj.covariance(array_1.to_java(:double),array_2.to_java(:double))
|
18
|
+
result
|
19
|
+
end
|
20
|
+
|
21
|
+
# Calculate Pearson correlation between two Numeric arrays
|
22
|
+
# * *Args* :
|
23
|
+
# - +Array1+ -> must be a RubyArray.
|
24
|
+
# - +Array2+ -> must be a RubyArray.
|
25
|
+
def self.pearson_correlation(array_1,array_2)
|
26
|
+
obj = PearsonsCorrelation.new
|
27
|
+
result = obj.correlation(array_1.to_java(:double),array_2.to_java(:double))
|
28
|
+
result
|
29
|
+
end
|
30
|
+
|
31
|
+
# Calculate Spearman correlation between two Numeric arrays
|
32
|
+
# * *Args* :
|
33
|
+
# - +Array1+ -> must be a RubyArray.
|
34
|
+
# - +Array2+ -> must be a RubyArray.
|
35
|
+
def self.spearman_correlation(array_1,array_2)
|
36
|
+
obj = SpearmansCorrelation.new
|
37
|
+
result = obj.correlation(array_1.to_java(:double),array_2.to_java(:double))
|
38
|
+
result
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|