RubyGems - bio-band - Versions diffs - 0.1.3 → 0.1.4 - Mend

bio-band 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

data/Gemfile +1 -0
data/Gemfile.lock +5 -0
data/Jarfile +1 -1
data/Jarfile.lock +1 -1
data/README.rdoc +2 -0
data/Rakefile +2 -1
data/VERSION +1 -1
data/band_server/client.rb +35 -0
data/band_server/client_alt.rb +35 -0
data/band_server/first_dataset.csv +15 -0
data/band_server/second_dataset.csv +15 -0
data/band_server/simple_server.rb +95 -0
data/band_server/third_dataset.csv +15 -0
data/band_server/uploads/first_dataset.csv +15 -0
data/band_server/uploads/second_dataset.csv +15 -0
data/band_server/uploads/third_dataset.csv +15 -0
data/bio-band.gemspec +19 -3
data/features/step_definitions/weka_classifiers.rb +3 -2
data/features/weka_classifiers.feature +13 -13
data/lib/bio-band.rb +2 -0
data/lib/bio-band/apache/stat/inference.rb +25 -19
data/lib/bio-band/apache/stat/regression.rb +2 -2
data/lib/bio-band/core/parser/parser.rb +6 -6
data/lib/bio-band/core/type/instances.rb +15 -5
data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +2 -0
data/lib/bio-band/weka/attribute_selection/evaluators.rb +2 -1
data/lib/bio-band/weka/attribute_selection/search.rb +1 -0
data/lib/bio-band/weka/classifiers/bayes/bayes.rb +1 -0
data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +16 -3
data/lib/bio-band/weka/classifiers/evaluation.rb +9 -9
data/lib/bio-band/weka/classifiers/functions/functions.rb +1 -0
data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +16 -3
data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +21 -3
data/lib/bio-band/weka/classifiers/mi/mi.rb +1 -0
data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +18 -3
data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +20 -4
data/lib/bio-band/weka/classifiers/trees/trees.rb +1 -0
data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +20 -3
data/lib/bio-band/weka/clusterers/clusterers.rb +37 -13
data/lib/bio-band/weka/clusterers/clusterers_utils.rb +60 -35
data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +9 -1
data/test/helper.rb +18 -0
data/test/test_apacheCorrelation.rb +22 -0
data/test/test_apacheInference.rb +46 -0
data/test/test_bio-band.rb +9 -0
metadata +33 -2

data/Gemfile CHANGED Viewed

@@ -7,6 +7,7 @@ source "http://rubygems.org"
 # Include everything needed to run rake, tests, features, etc.
 group :development do
   gem "shoulda", ">= 0"
+  gem "test-unit", ">= 0"
   gem "rdoc", "~> 3.12"
   gem "bundler", "~> 1.3.5"
   gem "jeweler", "~> 1.8.4"

data/Gemfile.lock CHANGED Viewed

@@ -14,6 +14,8 @@ GEM
       multi_json (~> 1.3)
     diff-lcs (1.2.4)
     fastercsv (1.5.5)
+    gherkin (2.12.0)
+      multi_json (~> 1.3)
     gherkin (2.12.0-java)
       multi_json (~> 1.3)
     git (1.2.5)
@@ -26,6 +28,7 @@ GEM
       git (>= 1.2.5)
       rake
       rdoc
+    json (1.8.0)
     json (1.8.0-java)
     maven-tools (0.32.4)
     multi_json (1.7.4)
@@ -59,6 +62,7 @@ GEM
       multi_json (~> 1.0)
       simplecov-html (~> 0.7.1)
     simplecov-html (0.7.1)
+    test-unit (2.5.5)
     thor (0.18.1)
     transaction-simple (1.4.0.2)
@@ -77,3 +81,4 @@ DEPENDENCIES
   ruport
   shoulda
   simplecov
+  test-unit

data/Jarfile CHANGED Viewed

@@ -1,4 +1,4 @@
-jar 'nz.ac.waikato.cms.weka:weka-stable','3.6.9'
+jar 'nz.ac.waikato.cms.weka:weka-stable','3.6.10'
 jar 'org.apache.commons:commons-math3','3.0'
 jar 'junit:junit','3.8.1'
 jar 'mysql:mysql-connector-java','5.1.6'

data/Jarfile.lock CHANGED Viewed

@@ -1,4 +1,4 @@
-nz.ac.waikato.cms.weka:weka-stable:jar:3.6.9
+nz.ac.waikato.cms.weka:weka-stable:jar:3.6.10
 net.sf.squirrel-sql.thirdparty-non-maven:java-cup:jar:0.11a
 org.apache.commons:commons-math3:jar:3.0
 junit:junit:jar:3.8.1

data/README.rdoc CHANGED Viewed

@@ -19,6 +19,8 @@ Otherwise use:
 == Usage
+See the Wiki section for in depth information on how to use this gem
 == Developers
 To use the library

data/Rakefile CHANGED Viewed

@@ -29,7 +29,8 @@ Jeweler::RubygemsDotOrgTasks.new
 require 'rake/testtask'
 Rake::TestTask.new(:test) do |test|
   test.libs << 'lib' << 'test'
-  test.pattern = 'test/**/test_*.rb'
+#  test.pattern = 'test/**/test_*.rb'
+  test.test_files = FileList['test/test_*.rb']
   test.verbose = true
 end

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.1.3
1	+ 0.1.4

data/band_server/client.rb ADDED Viewed

@@ -0,0 +1,35 @@
+require 'rest_client'
+require 'uri'
+address = 'http://localhost:4567/'
+response_old = RestClient.post "#{address}create_dataset", { 'first' => { 'dataset' => File.new('first_dataset.csv'),'dataset_name' => 'dataset_1'},
+                                                             'second' => {'dataset' => File.new('second_dataset.csv'),'dataset_name' => 'dataset_2'} }
+puts 'List of the parsed datasets:'
+response_list =  RestClient.get "#{address}datasets", :cookies => response_old.cookies
+puts response_list
+# use filter on the training dataset
+RestClient.get "#{address}filter?filter_name=Filter::Unsupervised::Attribute::Discretize&&filter_options=-K_3&&dataset_in=dataset_1&&dataset_out=filtered_training_set",:cookies => response_old.cookies
+puts 'List of the datasets + the filtered training set'
+response_list =  RestClient.get "#{address}datasets", :cookies => response_old.cookies
+puts response_list
+puts 'Print a single dataset:'
+puts  RestClient.get "#{address}datasets/dataset_1", :cookies => response_old.cookies
+puts 'Train a classifier on the dataset_1:'
+puts RestClient.get "#{address}train_classifier?classifier_name=Classifier::Bayes::NaiveBayes&&classifier_options=-K&&dataset_in=dataset_1&&model_name=naive_classifier&&class_index=0",:cookies => response_old.cookies
+puts 'List all the trained classifiers:'
+puts  RestClient.get "#{address}classifiers", :cookies => response_old.cookies
+puts 'Show classifier statistics:'
+puts  RestClient.get "#{address}classifiers/naive_classifier", :cookies => response_old.cookies
+puts 'Crossvalidate the classifier:'
+puts  RestClient.get "#{address}crossvalidate_classifier?model_name=naive_classifier&&fold=5", :cookies => response_old.cookies

data/band_server/client_alt.rb ADDED Viewed

@@ -0,0 +1,35 @@
+require 'rest_client'
+require 'uri'
+address = 'http://localhost:4567/'
+response_old = RestClient.post "#{address}create_dataset", { 'first' => { 'dataset' => File.new('first_dataset.csv'),'dataset_name' => 'dataset_1'},
+                                                             #'second' => {'dataset' => File.new('second_dataset.csv'),'dataset_name' => 'dataset_2'},
+															 'third' => {'dataset' => File.new('third_dataset.csv'),'dataset_name' => 'dataset_3'} }
+puts 'List of the parsed datasets:'
+response_list =  RestClient.get "#{address}datasets", :cookies => response_old.cookies
+puts response_list
+# use filter on the training dataset
+RestClient.get "#{address}filter?filter_name=Filter::Unsupervised::Attribute::Discretize&&filter_options=-K_3&&dataset_in=dataset_1&&dataset_out=filtered_training_set",:cookies => response_old.cookies
+puts 'List of the datasets + the filtered training set'
+response_list =  RestClient.get "#{address}datasets", :cookies => response_old.cookies
+puts response_list
+puts 'Print a single dataset:'
+puts  RestClient.get "#{address}datasets/dataset_1", :cookies => response_old.cookies
+puts 'Train a classifier on the dataset_1:'
+puts RestClient.get "#{address}train_classifier?classifier_name=Classifier::Bayes::NaiveBayes&&classifier_options=-K&&dataset_in=dataset_1&&model_name=naive_classifier&&class_index=0",:cookies => response_old.cookies
+puts 'List all the trained classifiers:'
+puts  RestClient.get "#{address}classifiers", :cookies => response_old.cookies
+puts 'Show classifier statistics:'
+puts  RestClient.get "#{address}classifiers/naive_classifier", :cookies => response_old.cookies
+puts 'Crossvalidate the classifier:'
+puts  RestClient.get "#{address}crossvalidate_classifier?model_name=naive_classifier&&fold=5", :cookies => response_old.cookies

data/band_server/first_dataset.csv ADDED Viewed

@@ -0,0 +1,15 @@
+outlook,temperature,humidity,windy,play
+sunny,85,85,FALSE,no
+sunny,80,90,TRUE,no
+overcast,83,86,FALSE,yes
+rainy,70,96,FALSE,yes
+rainy,68,80,FALSE,yes
+rainy,65,70,TRUE,no
+overcast,64,65,TRUE,yes
+sunny,72,95,FALSE,no
+sunny,69,70,FALSE,yes
+rainy,75,80,FALSE,yes
+sunny,75,70,TRUE,yes
+overcast,72,90,TRUE,yes
+overcast,81,75,FALSE,yes
+rainy,71,91,TRUE,no

data/band_server/second_dataset.csv ADDED Viewed

@@ -0,0 +1,15 @@
+outlook,temperature,humidity,windy,play
+sunny,85,85,FALSE,no
+sunny,80,90,TRUE,no
+overcast,83,86,FALSE,yes
+rainy,70,96,FALSE,yes
+rainy,68,80,FALSE,yes
+rainy,65,70,TRUE,no
+overcast,64,65,TRUE,yes
+sunny,72,95,FALSE,no
+sunny,69,70,FALSE,yes
+rainy,75,80,FALSE,yes
+sunny,75,70,TRUE,yes
+overcast,72,90,TRUE,yes
+overcast,81,75,FALSE,yes
+rainy,71,91,TRUE,no

data/band_server/simple_server.rb ADDED Viewed

@@ -0,0 +1,95 @@
+#### This simple example server should run on JRuby, while the client can be run
+### either on Ruby or JRuby
+require 'sinatra/base'
+require 'bio-band'
+require 'JSON'
+class Trial < Sinatra::Base
+  use Rack::Session::Pool
+  # list created datasets or classifiers
+  # get '/list/:data_class' do
+  #   session[params[:data_class]].keys.join(",\t")
+  # end
+  get '/datasets' do
+    session[:datasets].keys.join(",\t")
+  end
+  get '/classifiers' do
+    session[:classifiers].keys.join(",\t")
+  end
+  # return a dataset using Json format
+  get '/datasets/:dataset_in' do
+    # force session to start by writing in the Hash ## ODDDDD
+    session[:init] = true
+    session[:datasets][params[:dataset_in]].to_json_format
+  end
+  get '/classifiers/:classifier_in' do
+    session[:init] = true
+    session[:classifiers][params[:classifier_in]]
+  end
+  get '/summary/:dataset_in' do
+    session[:init] = true
+    dataset_in = session[:datasets][params[:dataset_in]]
+    return dataset_in.summary[0].to_s,dataset_in.summary[1].to_s
+  end
+  post "/create_dataset" do
+    params.each_key do |key|
+      File.open('uploads/' + params[key]['dataset'][:filename], "w") do |f|
+        f.write(params[key]['dataset'][:tempfile].read)
+      end
+      session[:datasets] ||= Hash.new
+      session[:classifiers] ||= Hash.new
+      session[:datasets][params[key]['dataset_name']] = Core::Parser::parse_CSV('uploads/' + params[key]['dataset'][:filename])
+    end
+    return "The dataset was successfully created!"
+  end
+  get '/filter' do
+    session[:init] = true
+    $dataset_in = session[:datasets][params[:dataset_in]]
+    filter = eval("Weka::#{params[:filter_name]}.new")
+    $params = params
+    filter.set do
+      filter_options $params[:filter_options].split("_").join(" ")
+      data $dataset_in
+    end
+    filtered_data = filter.use
+    session[:datasets][params[:dataset_out]] = filtered_data
+  end
+  get '/train_classifier' do
+    session[:init] = true
+    @dataset_in = session[:datasets][params[:dataset_in]]
+    classifier = eval("Weka::#{params[:classifier_name]}.new")
+    classifier.set_options params[:classifier_options].split("_").join(" ") if params[:classifier_options]
+    @dataset_in.setClassIndex(params[:class_index].to_i)
+    classifier.set_data @dataset_in
+    classifier.build_classifier(@dataset_in)
+    session[:classifiers][params[:model_name]] = classifier
+    classifier.to_s
+  end
+  get '/crossvalidate_classifier' do
+    session[:init] = true
+    classifier = session[:classifiers][params[:model_name]]
+    fold = params[:fold].to_i
+    eval = Weka::Classifier::Evaluation.new classifier.instance_eval("@dataset")
+    eval.crossValidateModel(classifier.class.new, classifier.instance_eval("@dataset"), fold.to_java(:int), Random.new(1))
+    eval.summary
+  end
+  post '/clear' do
+    session.clear
+    puts "Session is now cleared"
+  end
+end
+Trial.run!

data/band_server/third_dataset.csv ADDED Viewed

@@ -0,0 +1,15 @@
+outlook,temperature,humidity,windy,play
+sunny,85,85,FALSE,no
+sunny,80,90,TRUE,no
+overcast,83,86,FALSE,yes
+rainy,70,96,FALSE,yes
+rainy,68,80,FALSE,yes
+rainy,65,70,TRUE,no
+overcast,64,65,TRUE,yes
+sunny,72,95,FALSE,no
+sunny,69,70,FALSE,yes
+rainy,75,80,FALSE,yes
+sunny,75,70,TRUE,yes
+overcast,72,90,TRUE,yes
+overcast,81,75,FALSE,yes
+rainy,71,91,TRUE,no

data/band_server/uploads/first_dataset.csv ADDED Viewed

@@ -0,0 +1,15 @@
+outlook,temperature,humidity,windy,play
+sunny,85,85,FALSE,no
+sunny,80,90,TRUE,no
+overcast,83,86,FALSE,yes
+rainy,70,96,FALSE,yes
+rainy,68,80,FALSE,yes
+rainy,65,70,TRUE,no
+overcast,64,65,TRUE,yes
+sunny,72,95,FALSE,no
+sunny,69,70,FALSE,yes
+rainy,75,80,FALSE,yes
+sunny,75,70,TRUE,yes
+overcast,72,90,TRUE,yes
+overcast,81,75,FALSE,yes
+rainy,71,91,TRUE,no

data/band_server/uploads/second_dataset.csv ADDED Viewed

@@ -0,0 +1,15 @@
+outlook,temperature,humidity,windy,play
+sunny,85,85,FALSE,no
+sunny,80,90,TRUE,no
+overcast,83,86,FALSE,yes
+rainy,70,96,FALSE,yes
+rainy,68,80,FALSE,yes
+rainy,65,70,TRUE,no
+overcast,64,65,TRUE,yes
+sunny,72,95,FALSE,no
+sunny,69,70,FALSE,yes
+rainy,75,80,FALSE,yes
+sunny,75,70,TRUE,yes
+overcast,72,90,TRUE,yes
+overcast,81,75,FALSE,yes
+rainy,71,91,TRUE,no

data/band_server/uploads/third_dataset.csv ADDED Viewed

@@ -0,0 +1,15 @@
+outlook,temperature,humidity,windy,play
+sunny,85,85,FALSE,no
+sunny,80,90,TRUE,no
+overcast,83,86,FALSE,yes
+rainy,70,96,FALSE,yes
+rainy,68,80,FALSE,yes
+rainy,65,70,TRUE,no
+overcast,64,65,TRUE,yes
+sunny,72,95,FALSE,no
+sunny,69,70,FALSE,yes
+rainy,75,80,FALSE,yes
+sunny,75,70,TRUE,yes
+overcast,72,90,TRUE,yes
+overcast,81,75,FALSE,yes
+rainy,71,91,TRUE,no

data/bio-band.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = "bio-band"
-  s.version = "0.1.3"
+  s.version = "0.1.4"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["arrigonialberto86"]
-  s.date = "2013-09-02"
+  s.date = "2013-09-11"
   s.description = "Data mining and machine learning algorithms for JRuby "
   s.email = "arrigonialberto86@gmail.com"
   s.executables = ["bio-band"]
@@ -27,6 +27,15 @@ Gem::Specification.new do |s|
     "README.rdoc",
     "Rakefile",
     "VERSION",
+    "band_server/client.rb",
+    "band_server/client_alt.rb",
+    "band_server/first_dataset.csv",
+    "band_server/second_dataset.csv",
+    "band_server/simple_server.rb",
+    "band_server/third_dataset.csv",
+    "band_server/uploads/first_dataset.csv",
+    "band_server/uploads/second_dataset.csv",
+    "band_server/uploads/third_dataset.csv",
     "bin/bio-band",
     "bio-band.gemspec",
     "ext/mkrf_conf.rb",
@@ -85,7 +94,11 @@ Gem::Specification.new do |s|
     "resources/weather.csv",
     "resources/weather.numeric.arff",
     "spec/bio-band_spec.rb",
-    "spec/spec_helper.rb"
+    "spec/spec_helper.rb",
+    "test/helper.rb",
+    "test/test_apacheCorrelation.rb",
+    "test/test_apacheInference.rb",
+    "test/test_bio-band.rb"
   ]
   s.homepage = "http://github.com/arrigonialberto86/bioruby-band"
   s.licenses = ["MIT"]
@@ -98,6 +111,7 @@ Gem::Specification.new do |s|
     if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
       s.add_development_dependency(%q<shoulda>, [">= 0"])
+      s.add_development_dependency(%q<test-unit>, [">= 0"])
       s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
       s.add_development_dependency(%q<bundler>, ["~> 1.3.5"])
       s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
@@ -110,6 +124,7 @@ Gem::Specification.new do |s|
       s.add_development_dependency(%q<bio>, [">= 1.4.2"])
     else
       s.add_dependency(%q<shoulda>, [">= 0"])
+      s.add_dependency(%q<test-unit>, [">= 0"])
       s.add_dependency(%q<rdoc>, ["~> 3.12"])
       s.add_dependency(%q<bundler>, ["~> 1.3.5"])
       s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
@@ -123,6 +138,7 @@ Gem::Specification.new do |s|
     end
   else
     s.add_dependency(%q<shoulda>, [">= 0"])
+    s.add_dependency(%q<test-unit>, [">= 0"])
     s.add_dependency(%q<rdoc>, ["~> 3.12"])
     s.add_dependency(%q<bundler>, ["~> 1.3.5"])
     s.add_dependency(%q<jeweler>, ["~> 1.8.4"])

data/features/step_definitions/weka_classifiers.rb CHANGED Viewed

@@ -30,11 +30,12 @@ Then(/^I want to print a summary for the dataset$/) do
 end
 Then(/^I want to set the class index for attribute with index "(.*?)"$/) do |arg1|
-    My_classifier.set_class_index(arg1.to_i)
+  My_classifier.set_class_index(arg1.to_i)
 end
 Then(/^I want to instantiate the classifier for my use$/) do
-  My_classifier.new
+  classifier = My_classifier.new
+  classifier.cross_validate(2)
 end

data/features/weka_classifiers.feature CHANGED Viewed

@@ -1,16 +1,16 @@
 Feature: Using a Weka classifier
-	In order to classify an instance data
-	I want to use a Weka classifier
+  In order to classify an instance data
+  I want to use a Weka classifier
-	Scenario: Understand options and usage
-		Given the Weka "NaiveBayes" classifier
-		Then I want to print a "description"
-		And I want to print an options list
+  Scenario: Understand options and usage
+    Given the Weka "NaiveBayes" classifier
+    Then I want to print a "description"
+    And I want to print an options list
-	Scenario: Use a classifier on a data instance
-		Given the unsupervised Weka classifier "NaiveBayes"
-		Then I want to set option "-K" for it
-		And I want to set the dataset parsed from "weather.numeric.arff"
-		And I want to print a summary for the dataset
-		And I want to set the class index for attribute with index "0"
-		And I want to instantiate the classifier for my use
+  Scenario: Use a classifier on a data instance
+    Given the unsupervised Weka classifier "NaiveBayes"
+    Then I want to set option "-K" for it
+    And I want to set the dataset parsed from "weather.numeric.arff"
+    And I want to print a summary for the dataset
+    And I want to set the class index for attribute with index "0"
+    And I want to instantiate the classifier for my use