ruby-band 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +3 -0
- data/Gemfile +30 -0
- data/Gemfile.lock +119 -0
- data/Jarfile +9 -0
- data/Jarfile.lock +10 -0
- data/LICENSE.txt +22 -0
- data/README.md +321 -0
- data/README.rdoc +70 -0
- data/Rakefile +66 -0
- data/VERSION +1 -0
- data/band_server/client.rb +35 -0
- data/band_server/client_alt.rb +35 -0
- data/band_server/first_dataset.csv +15 -0
- data/band_server/second_dataset.csv +15 -0
- data/band_server/simple_server.rb +90 -0
- data/band_server/third_dataset.csv +15 -0
- data/band_server/uploads/first_dataset.csv +15 -0
- data/band_server/uploads/second_dataset.csv +15 -0
- data/band_server/uploads/third_dataset.csv +15 -0
- data/bin/ruby-band +83 -0
- data/ext/mkrf_conf.rb +74 -0
- data/features/create_dataset.feature +12 -0
- data/features/step_definitions/create_dataset.rb +39 -0
- data/features/step_definitions/weka_classifiers.rb +43 -0
- data/features/step_definitions/weka_clustering.rb +34 -0
- data/features/step_definitions/weka_filters.rb +32 -0
- data/features/step_definitions/weka_parsers.rb +46 -0
- data/features/step_definitions/weka_pipeline.rb +41 -0
- data/features/support/env.rb +3 -0
- data/features/weka_classifiers.feature +16 -0
- data/features/weka_clustering.feature +15 -0
- data/features/weka_filters.feature +12 -0
- data/features/weka_parsers.feature +18 -0
- data/features/weka_pipeline.feature +14 -0
- data/lib/ruby-band.rb +12 -0
- data/lib/ruby-band/apache.rb +2 -0
- data/lib/ruby-band/apache/stat/correlation.rb +42 -0
- data/lib/ruby-band/apache/stat/inference.rb +151 -0
- data/lib/ruby-band/apache/stat/regression.rb +22 -0
- data/lib/ruby-band/core.rb +6 -0
- data/lib/ruby-band/core/parser/parser.rb +27 -0
- data/lib/ruby-band/core/type/apache_matrices.rb +35 -0
- data/lib/ruby-band/core/type/attribute.rb +53 -0
- data/lib/ruby-band/core/type/instance.rb +10 -0
- data/lib/ruby-band/core/type/instances.rb +361 -0
- data/lib/ruby-band/core/type/utils.rb +31 -0
- data/lib/ruby-band/weka.rb +14 -0
- data/lib/ruby-band/weka/attribute_selection/attribute_selection_utils.rb +20 -0
- data/lib/ruby-band/weka/attribute_selection/evaluators.rb +58 -0
- data/lib/ruby-band/weka/attribute_selection/search.rb +52 -0
- data/lib/ruby-band/weka/classifiers/bayes/bayes.rb +86 -0
- data/lib/ruby-band/weka/classifiers/bayes/bayes_utils.rb +82 -0
- data/lib/ruby-band/weka/classifiers/evaluation.rb +13 -0
- data/lib/ruby-band/weka/classifiers/functions/functions.rb +177 -0
- data/lib/ruby-band/weka/classifiers/functions/functions_utils.rb +78 -0
- data/lib/ruby-band/weka/classifiers/lazy/lazy.rb +86 -0
- data/lib/ruby-band/weka/classifiers/lazy/lazy_utils.rb +83 -0
- data/lib/ruby-band/weka/classifiers/mi/mi.rb +191 -0
- data/lib/ruby-band/weka/classifiers/mi/mi_utils.rb +80 -0
- data/lib/ruby-band/weka/classifiers/rules/rules.rb +190 -0
- data/lib/ruby-band/weka/classifiers/rules/rules_utils.rb +81 -0
- data/lib/ruby-band/weka/classifiers/trees/trees.rb +110 -0
- data/lib/ruby-band/weka/classifiers/trees/trees_utils.rb +85 -0
- data/lib/ruby-band/weka/clusterers/clusterers.rb +99 -0
- data/lib/ruby-band/weka/clusterers/clusterers_utils.rb +86 -0
- data/lib/ruby-band/weka/db/DatabaseUtils_mysql +280 -0
- data/lib/ruby-band/weka/db/DatabaseUtils_postgresql +594 -0
- data/lib/ruby-band/weka/db/db.rb +74 -0
- data/lib/ruby-band/weka/filters/supervised/attribute/attribute.rb +55 -0
- data/lib/ruby-band/weka/filters/supervised/instance/instance.rb +17 -0
- data/lib/ruby-band/weka/filters/supervised/supervised_utils.rb +38 -0
- data/lib/ruby-band/weka/filters/unsupervised/attribute/attribute.rb +90 -0
- data/lib/ruby-band/weka/filters/unsupervised/instance/instance.rb +48 -0
- data/lib/ruby-band/weka/filters/unsupervised/unsupervised_utils.rb +38 -0
- data/resources/ReutersGrain-test.arff +611 -0
- data/resources/ReutersGrain-train.arff +1561 -0
- data/resources/weather.csv +15 -0
- data/resources/weather.numeric.arff +23 -0
- data/ruby-band.gemspec +178 -0
- data/spec/ruby-band_spec.rb +7 -0
- data/spec/spec_helper.rb +12 -0
- data/test/helper.rb +18 -0
- data/test/test_apacheCorrelation.rb +22 -0
- data/test/test_apacheInference.rb +46 -0
- data/test/test_ruby-band.rb +9 -0
- metadata +426 -0
data/ext/mkrf_conf.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
path = File.expand_path(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
def command?(name)
|
4
|
+
`which #{name}`
|
5
|
+
$?.success?
|
6
|
+
end
|
7
|
+
|
8
|
+
module OS
|
9
|
+
def OS.windows?
|
10
|
+
(/cygwin|mswin|mingw|bccwin|wince|emx/ =~ RbConfig::CONFIG['host_os']) != nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def OS.mac?
|
14
|
+
(/darwin/ =~ RbConfig::CONFIG['host_os']) != nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def OS.unix?
|
18
|
+
!OS.windows?
|
19
|
+
end
|
20
|
+
|
21
|
+
def OS.linux?
|
22
|
+
OS.unix? and not OS.mac?
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
File.open(File.join(path,"Rakefile"),"w") do |rakefile|
|
27
|
+
|
28
|
+
if OS.windows? == true
|
29
|
+
puts "Sorry, still no support is provided for your OS!"
|
30
|
+
|
31
|
+
elsif OS.mac? == true
|
32
|
+
if command?("mvn")==false && command?("brew")==true
|
33
|
+
rakefile.write <<-RAKE
|
34
|
+
task :brew_install do
|
35
|
+
sh "brew install maven"
|
36
|
+
end
|
37
|
+
task :default => [:brew_install]
|
38
|
+
RAKE
|
39
|
+
elsif command?("brew")==false
|
40
|
+
rakefile.write <<-RAKE
|
41
|
+
task :ok_inst do
|
42
|
+
puts "Sorry, Maven could not be installed. Try installing 'brew' first"
|
43
|
+
end
|
44
|
+
task :default => [:ok_inst]
|
45
|
+
RAKE
|
46
|
+
else
|
47
|
+
rakefile.write <<-RAKE
|
48
|
+
task :ok_inst do
|
49
|
+
puts "Maven has been detected on your system"
|
50
|
+
end
|
51
|
+
task :default => [:ok_inst]
|
52
|
+
RAKE
|
53
|
+
end
|
54
|
+
elsif OS.linux? == true
|
55
|
+
if command?("mvn")==false
|
56
|
+
rakefile.write <<-RAKE
|
57
|
+
task :apt_install do
|
58
|
+
sh "sudo apt-get install maven2"
|
59
|
+
end
|
60
|
+
task :default => [:apt_install]
|
61
|
+
RAKE
|
62
|
+
else
|
63
|
+
rakefile.write <<-RAKE
|
64
|
+
task :ok_inst do
|
65
|
+
puts "Maven has been detected on your system"
|
66
|
+
end
|
67
|
+
task :default => [:ok_inst]
|
68
|
+
RAKE
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Feature: Creation of an in-memory dataset
|
2
|
+
In order to perform calculations on a dataset
|
3
|
+
I want to easily build it and store it in memory
|
4
|
+
|
5
|
+
Scenario: creation of a dataset
|
6
|
+
Given a nominal attribute, named "assertion", with values "yes,no"
|
7
|
+
Given one numeric attribute, named "temperature"
|
8
|
+
Given another numeric attribute, names "days"
|
9
|
+
Given two data rows: "yes,100,30","no,100,0"
|
10
|
+
Then I want to build en empty dataset for my use
|
11
|
+
And I want to populate the dataset by row
|
12
|
+
And I want to print my dataset as a bidimensional Ruby Array
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Given(/^a nominal attribute, named "(.*?)", with values "(.*?)"$/) do |arg1, arg2|
|
2
|
+
$first_att = arg1.to_sym
|
3
|
+
$values = arg2.split(',')
|
4
|
+
end
|
5
|
+
|
6
|
+
Given(/^one numeric attribute, named "(.*?)"$/) do |arg1|
|
7
|
+
$second_att = arg1.to_sym
|
8
|
+
end
|
9
|
+
|
10
|
+
Given(/^another numeric attribute, names "(.*?)"$/) do |arg1|
|
11
|
+
$third_att = arg1.to_sym
|
12
|
+
end
|
13
|
+
|
14
|
+
Given(/^two data rows: "(.*?)","(.*?)"$/) do |arg1, arg2|
|
15
|
+
$first_row = arg1.split(',')
|
16
|
+
$second_row = arg2.split(',')
|
17
|
+
$first_row[1] = $first_row[1].to_f
|
18
|
+
$first_row[2] = $first_row[2].to_f
|
19
|
+
$second_row[1] = $second_row[1].to_f
|
20
|
+
$second_row[2] = $second_row[2].to_f
|
21
|
+
end
|
22
|
+
|
23
|
+
Then(/^I want to build en empty dataset for my use$/) do
|
24
|
+
$my_instance = Core::Type::Instances::Base.new do
|
25
|
+
nominal $first_att, $values
|
26
|
+
numeric $second_att
|
27
|
+
string $third_att
|
28
|
+
end
|
29
|
+
$my_instance.summary
|
30
|
+
end
|
31
|
+
|
32
|
+
Then(/^I want to populate the dataset by row$/) do
|
33
|
+
$my_instance.populate_by_row([$first_row,$second_row])
|
34
|
+
$my_instance.summary
|
35
|
+
end
|
36
|
+
|
37
|
+
Then(/^I want to print my dataset as a bidimensional Ruby Array$/) do
|
38
|
+
puts $my_instance.to_a2d.inspect
|
39
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
Given(/^the Weka "(.*?)" classifier$/) do |arg1|
|
2
|
+
@classifier = Weka::Classifier::Bayes::NaiveBayes.new
|
3
|
+
end
|
4
|
+
|
5
|
+
Then(/^I want to print a "(.*?)"$/) do |arg1|
|
6
|
+
@classifier.send arg1.to_sym
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I want to print an options list$/) do
|
10
|
+
@classifier.list_options
|
11
|
+
end
|
12
|
+
|
13
|
+
Given(/^the unsupervised Weka classifier "(.*?)"$/) do |arg1|
|
14
|
+
class My_classifier < Weka::Classifier::Bayes::NaiveBayes::Base
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
Then(/^I want to set option "(.*?)" for it$/) do |arg1|
|
19
|
+
My_classifier.set_options(arg1)
|
20
|
+
end
|
21
|
+
|
22
|
+
Then(/^I want to set the dataset parsed from "(.*?)"$/) do |arg1|
|
23
|
+
@arff = File.join('resources',arg1)
|
24
|
+
@dataset_ARFF = Core::Parser::parse_ARFF(@arff)
|
25
|
+
My_classifier.set_data(@dataset_ARFF)
|
26
|
+
end
|
27
|
+
|
28
|
+
Then(/^I want to print a summary for the dataset$/) do
|
29
|
+
@dataset_ARFF.summary
|
30
|
+
end
|
31
|
+
|
32
|
+
Then(/^I want to set the class index for attribute with index "(.*?)"$/) do |arg1|
|
33
|
+
My_classifier.set_class_index(arg1.to_i)
|
34
|
+
end
|
35
|
+
|
36
|
+
Then(/^I want to instantiate the classifier for my use$/) do
|
37
|
+
classifier = My_classifier.new
|
38
|
+
classifier.cross_validate(2)
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
Given(/^the ARFF dataset stored in the file "(.*?)"$/) do |arg1|
|
2
|
+
@arff = File.join('resources',arg1)
|
3
|
+
end
|
4
|
+
|
5
|
+
Given(/^the SimpleKMeans algorithm implementation from Weka$/) do
|
6
|
+
@clusterer = Weka::Clusterer::SimpleKMeans.new
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I want to parse the data from the file$/) do
|
10
|
+
@dataset_ARFF = Core::Parser::parse_ARFF(@arff)
|
11
|
+
end
|
12
|
+
|
13
|
+
Then(/^I want to list the options available for SimpleKMeans$/) do
|
14
|
+
puts @clusterer.list_options
|
15
|
+
end
|
16
|
+
|
17
|
+
Then(/^I want to set K = "(.*?)" as K\-means option$/) do |arg1|
|
18
|
+
class Clustering < Weka::Clusterer::SimpleKMeans::Base
|
19
|
+
end
|
20
|
+
Clustering.set_options "-N #{arg1}"
|
21
|
+
end
|
22
|
+
|
23
|
+
Then(/^I want to perform clustering on the parsed dataset$/) do
|
24
|
+
Clustering.set_data(@dataset_ARFF)
|
25
|
+
@clustered = Clustering.new
|
26
|
+
end
|
27
|
+
|
28
|
+
Then(/^I want to report result statistics$/) do
|
29
|
+
puts @clustered
|
30
|
+
end
|
31
|
+
|
32
|
+
Then(/^I want to use Weka clustering cross\-validation$/) do
|
33
|
+
puts @clustered.evaluate
|
34
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
Given(/^the example ARFF file "(.*?)"$/) do |arff_file|
|
2
|
+
@arff = File.join('resources',arff_file)
|
3
|
+
end
|
4
|
+
|
5
|
+
Given(/^the Weka Attribute Add filter$/) do
|
6
|
+
@filter = Weka::Filter::Unsupervised::Attribute::Add.new
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I want to parse the file in order to create an Instances class object$/) do
|
10
|
+
$dataset_ARFF = Core::Parser::parse_ARFF(@arff)
|
11
|
+
puts $dataset_ARFF.summary
|
12
|
+
end
|
13
|
+
|
14
|
+
Then(/^I want to print the available filter options and usage$/) do
|
15
|
+
@filter.options_list
|
16
|
+
end
|
17
|
+
|
18
|
+
Then(/^I want to set the option String "(.*?)"$/) do |arg1|
|
19
|
+
$arg1 = arg1
|
20
|
+
@filter.set do
|
21
|
+
filter_options $arg1
|
22
|
+
data $dataset_ARFF
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
Then(/^I want to add an attribute \(a column\) to the dataset using the Weka filter Add$/) do
|
27
|
+
@new_inst = @filter.use
|
28
|
+
end
|
29
|
+
|
30
|
+
Then(/^I want to print a "(.*?)" for the modified dataset$/) do |arg1|
|
31
|
+
@new_inst.send arg1.to_sym
|
32
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
Given /^the CSV file "(.*?)"$/ do |csv_file|
|
2
|
+
@csv = File.join('resources',csv_file)
|
3
|
+
@dataset_CSV = Core::Parser::parse_CSV(@csv)
|
4
|
+
end
|
5
|
+
|
6
|
+
Then /^I want to print to stdout the summary for the CSV parsed Instances object$/ do
|
7
|
+
@dataset_CSV.summary
|
8
|
+
end
|
9
|
+
|
10
|
+
Given /^the ARFF file "(.*?)"$/ do |arff_file|
|
11
|
+
@arff = File.join('resources',arff_file)
|
12
|
+
puts @arff
|
13
|
+
@dataset_ARFF = Core::Parser::parse_ARFF(@arff)
|
14
|
+
end
|
15
|
+
|
16
|
+
Then /^I want to print to stdout the summary for the ARFF parsed Instances object$/ do
|
17
|
+
@dataset_ARFF.summary
|
18
|
+
end
|
19
|
+
|
20
|
+
Given(/^the database "(.*?)"$/) do |arg1|
|
21
|
+
@db_connection = arg1
|
22
|
+
end
|
23
|
+
|
24
|
+
Given(/^a table named "(.*?)"$/) do |arg1|
|
25
|
+
@target_table = arg1
|
26
|
+
end
|
27
|
+
|
28
|
+
Then(/^I want to extract data from that table$/) do
|
29
|
+
# INSERT HERE MySQL DB CREDENTIALS in order to test this parsing functionality
|
30
|
+
# @dataset = Weka::Db.query_mysql(@db_connection,'root','',"select * from #{@target_table}")
|
31
|
+
end
|
32
|
+
|
33
|
+
Then(/^I want to print to stdout the summary for the parsed Instances object$/) do
|
34
|
+
# @dataset.summary
|
35
|
+
end
|
36
|
+
|
37
|
+
Then(/^I want to convert the data into a bidimensional Ruby Array$/) do
|
38
|
+
# @dataset.to_a2d.should == [["sunny", 85.0, 85.0, "FALSE", "no"], ["sunny", 80.0, 90.0, "TRUE", "no"], ["overcast", 83.0, 86.0, "FALSE", "yes"],
|
39
|
+
# ["rainy", 70.0, 96.0, "FALSE", "yes"], ["rainy", 68.0, 80.0, "FALSE", "yes"], ["rainy", 65.0, 70.0, "TRUE", "no"],
|
40
|
+
# ["overcast", 64.0, 65.0, "TRUE", "yes"], ["sunny", 72.0, 95.0, "FALSE", "no"], ["sunny", 69.0, 70.0, "FALSE", "yes"],
|
41
|
+
# ["rainy", 75.0, 80.0, "FALSE", "yes"], ["sunny", 75.0, 70.0, "TRUE", "yes"], ["overcast", 72.0, 90.0, "TRUE", "yes"],
|
42
|
+
# ["overcast", 81.0, 75.0, "FALSE", "yes"], ["rainy", 71.0, 91.0, "TRUE", "no"]]
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
|
@@ -0,0 +1,41 @@
|
|
1
|
+
Given(/^a file containing the training set data "(.*?)"$/) do |arg1|
|
2
|
+
@training_file = File.join('resources',arg1)
|
3
|
+
end
|
4
|
+
|
5
|
+
Given(/^a file containing the test set data "(.*?)"$/) do |arg1|
|
6
|
+
@test_file = File.join('resources',arg1)
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I want to parse them$/) do
|
10
|
+
$training_set = Core::Parser::parse_ARFF(@training_file)
|
11
|
+
$test_set = Core::Parser::parse_ARFF(@test_file)
|
12
|
+
end
|
13
|
+
|
14
|
+
Then(/^I want to filter them using the unsupervised filter "(.*?)"$/) do |arg1|
|
15
|
+
filter = Weka::Filter::Unsupervised::Attribute::StringToWordVector.new
|
16
|
+
filter.set do
|
17
|
+
data $training_set
|
18
|
+
end
|
19
|
+
$filtered_training_set = filter.use
|
20
|
+
|
21
|
+
$filtered_test_set = filter.use $test_set
|
22
|
+
$filtered_test_set.set_class_index 0
|
23
|
+
end
|
24
|
+
|
25
|
+
Then(/^I want to build a "(.*?)" classifier using training set data$/) do |arg1|
|
26
|
+
$bayes = Weka::Classifier::Bayes::ComplementNaiveBayes::Base.new do
|
27
|
+
set_data $filtered_training_set
|
28
|
+
set_class_index 0
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
Then(/^I want to evaluate the performance of the classifier on the test set$/) do
|
34
|
+
@evaluator = Weka::Classifier::Evaluation.new $filtered_training_set
|
35
|
+
@evaluator.evaluate_model($bayes,$filtered_test_set)
|
36
|
+
end
|
37
|
+
|
38
|
+
Then(/^I want want to print to stdout a summary of the evaluation$/) do
|
39
|
+
puts @evaluator.summary
|
40
|
+
end
|
41
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Feature: Using a Weka classifier
|
2
|
+
In order to classify an instance data
|
3
|
+
I want to use a Weka classifier
|
4
|
+
|
5
|
+
Scenario: Understand options and usage
|
6
|
+
Given the Weka "NaiveBayes" classifier
|
7
|
+
Then I want to print a "description"
|
8
|
+
And I want to print an options list
|
9
|
+
|
10
|
+
Scenario: Use a classifier on a data instance
|
11
|
+
Given the unsupervised Weka classifier "NaiveBayes"
|
12
|
+
Then I want to set option "-K" for it
|
13
|
+
And I want to set the dataset parsed from "weather.numeric.arff"
|
14
|
+
And I want to print a summary for the dataset
|
15
|
+
And I want to set the class index for attribute with index "0"
|
16
|
+
And I want to instantiate the classifier for my use
|
@@ -0,0 +1,15 @@
|
|
1
|
+
Feature: Weka dataset clustering
|
2
|
+
In order to group similar data vectors in my dataset
|
3
|
+
I want to use Weka clustering algorithms
|
4
|
+
|
5
|
+
Scenario: use of Kmeans algorithm
|
6
|
+
Given the ARFF dataset stored in the file "weather.numeric.arff"
|
7
|
+
Given the SimpleKMeans algorithm implementation from Weka
|
8
|
+
Then I want to parse the data from the file
|
9
|
+
And I want to list the options available for SimpleKMeans
|
10
|
+
And I want to set K = "4" as K-means option
|
11
|
+
And I want to perform clustering on the parsed dataset
|
12
|
+
And I want to report result statistics
|
13
|
+
And I want to use Weka clustering cross-validation
|
14
|
+
|
15
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Feature: Filter a dataset (Weka Instances class)
|
2
|
+
In order to manipulate a dataset
|
3
|
+
I want to use the Weka Filter class on it
|
4
|
+
|
5
|
+
Scenario: Use of the Unsupervised Attribute filter 'Add'
|
6
|
+
Given the example ARFF file "weather.numeric.arff"
|
7
|
+
Given the Weka Attribute Add filter
|
8
|
+
Then I want to parse the file in order to create an Instances class object
|
9
|
+
And I want to print the available filter options and usage
|
10
|
+
And I want to set the option String "-T NUM -N dummy"
|
11
|
+
And I want to add an attribute (a column) to the dataset using the Weka filter Add
|
12
|
+
And I want to print a "summary" for the modified dataset
|
@@ -0,0 +1,18 @@
|
|
1
|
+
Feature: Weka basic parsing capabilities
|
2
|
+
In order to perform calculations on a dataset
|
3
|
+
I want to import data from .ARFF, .CSV files, and external databases
|
4
|
+
|
5
|
+
Scenario: parsing a CSV file
|
6
|
+
Given the CSV file "weather.csv"
|
7
|
+
Then I want to print to stdout the summary for the CSV parsed Instances object
|
8
|
+
|
9
|
+
Scenario: parsing an ARFF file
|
10
|
+
Given the ARFF file "weather.numeric.arff"
|
11
|
+
Then I want to print to stdout the summary for the ARFF parsed Instances object
|
12
|
+
|
13
|
+
Scenario: parsing data from a mySQL table
|
14
|
+
Given the database "jdbc:mysql://localhost:3306/Gene_classes"
|
15
|
+
And a table named "test_weka"
|
16
|
+
Then I want to extract data from that table
|
17
|
+
And I want to print to stdout the summary for the parsed Instances object
|
18
|
+
And I want to convert the data into a bidimensional Ruby Array
|
@@ -0,0 +1,14 @@
|
|
1
|
+
Feature: Classification pipeline
|
2
|
+
In order to perform text-mining on a dataset
|
3
|
+
I want to parse the data, filter it and then classify it using a Bayesian classifier
|
4
|
+
|
5
|
+
Scenario: Use of Naive Bayes on a filtered data set
|
6
|
+
Given a file containing the training set data "ReutersGrain-train.arff"
|
7
|
+
Given a file containing the test set data "ReutersGrain-test.arff"
|
8
|
+
Then I want to parse them
|
9
|
+
And I want to filter them using the unsupervised filter "StringToWordVector"
|
10
|
+
And I want to build a "NaiveBayes" classifier using training set data
|
11
|
+
And I want to evaluate the performance of the classifier on the test set
|
12
|
+
And I want want to print to stdout a summary of the evaluation
|
13
|
+
|
14
|
+
|
data/lib/ruby-band.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
start = File.absolute_path '.'
|
2
|
+
Dir.chdir(File.join(File.dirname(__FILE__),".."))
|
3
|
+
require 'jbundler'
|
4
|
+
Dir.chdir start
|
5
|
+
require File.join(File.dirname(__FILE__),'..','.jbundler','classpath.rb')
|
6
|
+
require "java"
|
7
|
+
require "ruby-band/core"
|
8
|
+
require "ruby-band/weka"
|
9
|
+
require "ruby-band/apache"
|
10
|
+
Random.ancestors[1].instance_eval {remove_const :Random} if defined?(Random)
|
11
|
+
java_import 'java.util.Random'
|
12
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Apache
|
4
|
+
module Stat
|
5
|
+
module Correlation
|
6
|
+
|
7
|
+
java_import "org.apache.commons.math3.stat.correlation.Covariance"
|
8
|
+
java_import "org.apache.commons.math3.stat.correlation.PearsonsCorrelation"
|
9
|
+
java_import "org.apache.commons.math3.stat.correlation.SpearmansCorrelation"
|
10
|
+
|
11
|
+
# Calculate covariance between two Numeric arrays
|
12
|
+
# * *Args* :
|
13
|
+
# - +Array1+ -> must be a RubyArray.
|
14
|
+
# - +Array2+ -> must be a RubyArray.
|
15
|
+
def self.covariance(array_1,array_2)
|
16
|
+
obj = Covariance.new
|
17
|
+
result = obj.covariance(array_1.to_java(:double),array_2.to_java(:double))
|
18
|
+
result
|
19
|
+
end
|
20
|
+
|
21
|
+
# Calculate Pearson correlation between two Numeric arrays
|
22
|
+
# * *Args* :
|
23
|
+
# - +Array1+ -> must be a RubyArray.
|
24
|
+
# - +Array2+ -> must be a RubyArray.
|
25
|
+
def self.pearson_correlation(array_1,array_2)
|
26
|
+
obj = PearsonsCorrelation.new
|
27
|
+
result = obj.correlation(array_1.to_java(:double),array_2.to_java(:double))
|
28
|
+
result
|
29
|
+
end
|
30
|
+
|
31
|
+
# Calculate Spearman correlation between two Numeric arrays
|
32
|
+
# * *Args* :
|
33
|
+
# - +Array1+ -> must be a RubyArray.
|
34
|
+
# - +Array2+ -> must be a RubyArray.
|
35
|
+
def self.spearman_correlation(array_1,array_2)
|
36
|
+
obj = SpearmansCorrelation.new
|
37
|
+
result = obj.correlation(array_1.to_java(:double),array_2.to_java(:double))
|
38
|
+
result
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|