bio-band 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +5 -0
  3. data/Jarfile +1 -1
  4. data/Jarfile.lock +1 -1
  5. data/README.rdoc +2 -0
  6. data/Rakefile +2 -1
  7. data/VERSION +1 -1
  8. data/band_server/client.rb +35 -0
  9. data/band_server/client_alt.rb +35 -0
  10. data/band_server/first_dataset.csv +15 -0
  11. data/band_server/second_dataset.csv +15 -0
  12. data/band_server/simple_server.rb +95 -0
  13. data/band_server/third_dataset.csv +15 -0
  14. data/band_server/uploads/first_dataset.csv +15 -0
  15. data/band_server/uploads/second_dataset.csv +15 -0
  16. data/band_server/uploads/third_dataset.csv +15 -0
  17. data/bio-band.gemspec +19 -3
  18. data/features/step_definitions/weka_classifiers.rb +3 -2
  19. data/features/weka_classifiers.feature +13 -13
  20. data/lib/bio-band.rb +2 -0
  21. data/lib/bio-band/apache/stat/inference.rb +25 -19
  22. data/lib/bio-band/apache/stat/regression.rb +2 -2
  23. data/lib/bio-band/core/parser/parser.rb +6 -6
  24. data/lib/bio-band/core/type/instances.rb +15 -5
  25. data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +2 -0
  26. data/lib/bio-band/weka/attribute_selection/evaluators.rb +2 -1
  27. data/lib/bio-band/weka/attribute_selection/search.rb +1 -0
  28. data/lib/bio-band/weka/classifiers/bayes/bayes.rb +1 -0
  29. data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +16 -3
  30. data/lib/bio-band/weka/classifiers/evaluation.rb +9 -9
  31. data/lib/bio-band/weka/classifiers/functions/functions.rb +1 -0
  32. data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +16 -3
  33. data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +21 -3
  34. data/lib/bio-band/weka/classifiers/mi/mi.rb +1 -0
  35. data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +18 -3
  36. data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +20 -4
  37. data/lib/bio-band/weka/classifiers/trees/trees.rb +1 -0
  38. data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +20 -3
  39. data/lib/bio-band/weka/clusterers/clusterers.rb +37 -13
  40. data/lib/bio-band/weka/clusterers/clusterers_utils.rb +60 -35
  41. data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +9 -1
  42. data/test/helper.rb +18 -0
  43. data/test/test_apacheCorrelation.rb +22 -0
  44. data/test/test_apacheInference.rb +46 -0
  45. data/test/test_bio-band.rb +9 -0
  46. metadata +33 -2
data/Gemfile CHANGED
@@ -7,6 +7,7 @@ source "http://rubygems.org"
7
7
  # Include everything needed to run rake, tests, features, etc.
8
8
  group :development do
9
9
  gem "shoulda", ">= 0"
10
+ gem "test-unit", ">= 0"
10
11
  gem "rdoc", "~> 3.12"
11
12
  gem "bundler", "~> 1.3.5"
12
13
  gem "jeweler", "~> 1.8.4"
data/Gemfile.lock CHANGED
@@ -14,6 +14,8 @@ GEM
14
14
  multi_json (~> 1.3)
15
15
  diff-lcs (1.2.4)
16
16
  fastercsv (1.5.5)
17
+ gherkin (2.12.0)
18
+ multi_json (~> 1.3)
17
19
  gherkin (2.12.0-java)
18
20
  multi_json (~> 1.3)
19
21
  git (1.2.5)
@@ -26,6 +28,7 @@ GEM
26
28
  git (>= 1.2.5)
27
29
  rake
28
30
  rdoc
31
+ json (1.8.0)
29
32
  json (1.8.0-java)
30
33
  maven-tools (0.32.4)
31
34
  multi_json (1.7.4)
@@ -59,6 +62,7 @@ GEM
59
62
  multi_json (~> 1.0)
60
63
  simplecov-html (~> 0.7.1)
61
64
  simplecov-html (0.7.1)
65
+ test-unit (2.5.5)
62
66
  thor (0.18.1)
63
67
  transaction-simple (1.4.0.2)
64
68
 
@@ -77,3 +81,4 @@ DEPENDENCIES
77
81
  ruport
78
82
  shoulda
79
83
  simplecov
84
+ test-unit
data/Jarfile CHANGED
@@ -1,4 +1,4 @@
1
- jar 'nz.ac.waikato.cms.weka:weka-stable','3.6.9'
1
+ jar 'nz.ac.waikato.cms.weka:weka-stable','3.6.10'
2
2
  jar 'org.apache.commons:commons-math3','3.0'
3
3
  jar 'junit:junit','3.8.1'
4
4
  jar 'mysql:mysql-connector-java','5.1.6'
data/Jarfile.lock CHANGED
@@ -1,4 +1,4 @@
1
- nz.ac.waikato.cms.weka:weka-stable:jar:3.6.9
1
+ nz.ac.waikato.cms.weka:weka-stable:jar:3.6.10
2
2
  net.sf.squirrel-sql.thirdparty-non-maven:java-cup:jar:0.11a
3
3
  org.apache.commons:commons-math3:jar:3.0
4
4
  junit:junit:jar:3.8.1
data/README.rdoc CHANGED
@@ -19,6 +19,8 @@ Otherwise use:
19
19
 
20
20
  == Usage
21
21
 
22
+ See the Wiki section for in depth information on how to use this gem
23
+
22
24
  == Developers
23
25
 
24
26
  To use the library
data/Rakefile CHANGED
@@ -29,7 +29,8 @@ Jeweler::RubygemsDotOrgTasks.new
29
29
  require 'rake/testtask'
30
30
  Rake::TestTask.new(:test) do |test|
31
31
  test.libs << 'lib' << 'test'
32
- test.pattern = 'test/**/test_*.rb'
32
+ # test.pattern = 'test/**/test_*.rb'
33
+ test.test_files = FileList['test/test_*.rb']
33
34
  test.verbose = true
34
35
  end
35
36
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.4
@@ -0,0 +1,35 @@
1
+ require 'rest_client'
2
+ require 'uri'
3
+
4
+ address = 'http://localhost:4567/'
5
+
6
+ response_old = RestClient.post "#{address}create_dataset", { 'first' => { 'dataset' => File.new('first_dataset.csv'),'dataset_name' => 'dataset_1'},
7
+ 'second' => {'dataset' => File.new('second_dataset.csv'),'dataset_name' => 'dataset_2'} }
8
+
9
+
10
+ puts 'List of the parsed datasets:'
11
+ response_list = RestClient.get "#{address}datasets", :cookies => response_old.cookies
12
+ puts response_list
13
+
14
+ # use filter on the training dataset
15
+ RestClient.get "#{address}filter?filter_name=Filter::Unsupervised::Attribute::Discretize&&filter_options=-K_3&&dataset_in=dataset_1&&dataset_out=filtered_training_set",:cookies => response_old.cookies
16
+
17
+ puts 'List of the datasets + the filtered training set'
18
+ response_list = RestClient.get "#{address}datasets", :cookies => response_old.cookies
19
+ puts response_list
20
+
21
+ puts 'Print a single dataset:'
22
+ puts RestClient.get "#{address}datasets/dataset_1", :cookies => response_old.cookies
23
+
24
+
25
+ puts 'Train a classifier on the dataset_1:'
26
+ puts RestClient.get "#{address}train_classifier?classifier_name=Classifier::Bayes::NaiveBayes&&classifier_options=-K&&dataset_in=dataset_1&&model_name=naive_classifier&&class_index=0",:cookies => response_old.cookies
27
+
28
+ puts 'List all the trained classifiers:'
29
+ puts RestClient.get "#{address}classifiers", :cookies => response_old.cookies
30
+
31
+ puts 'Show classifier statistics:'
32
+ puts RestClient.get "#{address}classifiers/naive_classifier", :cookies => response_old.cookies
33
+
34
+ puts 'Crossvalidate the classifier:'
35
+ puts RestClient.get "#{address}crossvalidate_classifier?model_name=naive_classifier&&fold=5", :cookies => response_old.cookies
@@ -0,0 +1,35 @@
1
+ require 'rest_client'
2
+ require 'uri'
3
+
4
+ address = 'http://localhost:4567/'
5
+
6
+ response_old = RestClient.post "#{address}create_dataset", { 'first' => { 'dataset' => File.new('first_dataset.csv'),'dataset_name' => 'dataset_1'},
7
+ #'second' => {'dataset' => File.new('second_dataset.csv'),'dataset_name' => 'dataset_2'},
8
+ 'third' => {'dataset' => File.new('third_dataset.csv'),'dataset_name' => 'dataset_3'} }
9
+
10
+ puts 'List of the parsed datasets:'
11
+ response_list = RestClient.get "#{address}datasets", :cookies => response_old.cookies
12
+ puts response_list
13
+
14
+ # use filter on the training dataset
15
+ RestClient.get "#{address}filter?filter_name=Filter::Unsupervised::Attribute::Discretize&&filter_options=-K_3&&dataset_in=dataset_1&&dataset_out=filtered_training_set",:cookies => response_old.cookies
16
+
17
+ puts 'List of the datasets + the filtered training set'
18
+ response_list = RestClient.get "#{address}datasets", :cookies => response_old.cookies
19
+ puts response_list
20
+
21
+ puts 'Print a single dataset:'
22
+ puts RestClient.get "#{address}datasets/dataset_1", :cookies => response_old.cookies
23
+
24
+
25
+ puts 'Train a classifier on the dataset_1:'
26
+ puts RestClient.get "#{address}train_classifier?classifier_name=Classifier::Bayes::NaiveBayes&&classifier_options=-K&&dataset_in=dataset_1&&model_name=naive_classifier&&class_index=0",:cookies => response_old.cookies
27
+
28
+ puts 'List all the trained classifiers:'
29
+ puts RestClient.get "#{address}classifiers", :cookies => response_old.cookies
30
+
31
+ puts 'Show classifier statistics:'
32
+ puts RestClient.get "#{address}classifiers/naive_classifier", :cookies => response_old.cookies
33
+
34
+ puts 'Crossvalidate the classifier:'
35
+ puts RestClient.get "#{address}crossvalidate_classifier?model_name=naive_classifier&&fold=5", :cookies => response_old.cookies
@@ -0,0 +1,15 @@
1
+ outlook,temperature,humidity,windy,play
2
+ sunny,85,85,FALSE,no
3
+ sunny,80,90,TRUE,no
4
+ overcast,83,86,FALSE,yes
5
+ rainy,70,96,FALSE,yes
6
+ rainy,68,80,FALSE,yes
7
+ rainy,65,70,TRUE,no
8
+ overcast,64,65,TRUE,yes
9
+ sunny,72,95,FALSE,no
10
+ sunny,69,70,FALSE,yes
11
+ rainy,75,80,FALSE,yes
12
+ sunny,75,70,TRUE,yes
13
+ overcast,72,90,TRUE,yes
14
+ overcast,81,75,FALSE,yes
15
+ rainy,71,91,TRUE,no
@@ -0,0 +1,15 @@
1
+ outlook,temperature,humidity,windy,play
2
+ sunny,85,85,FALSE,no
3
+ sunny,80,90,TRUE,no
4
+ overcast,83,86,FALSE,yes
5
+ rainy,70,96,FALSE,yes
6
+ rainy,68,80,FALSE,yes
7
+ rainy,65,70,TRUE,no
8
+ overcast,64,65,TRUE,yes
9
+ sunny,72,95,FALSE,no
10
+ sunny,69,70,FALSE,yes
11
+ rainy,75,80,FALSE,yes
12
+ sunny,75,70,TRUE,yes
13
+ overcast,72,90,TRUE,yes
14
+ overcast,81,75,FALSE,yes
15
+ rainy,71,91,TRUE,no
@@ -0,0 +1,95 @@
1
+ #### This simple example server should run on JRuby, while the client can be run
2
+ ### either on Ruby or JRuby
3
+
4
+
5
+ require 'sinatra/base'
6
+ require 'bio-band'
7
+ require 'JSON'
8
+
9
+ class Trial < Sinatra::Base
10
+ use Rack::Session::Pool
11
+
12
+ # list created datasets or classifiers
13
+ # get '/list/:data_class' do
14
+ # session[params[:data_class]].keys.join(",\t")
15
+ # end
16
+
17
+ get '/datasets' do
18
+ session[:datasets].keys.join(",\t")
19
+ end
20
+ get '/classifiers' do
21
+ session[:classifiers].keys.join(",\t")
22
+ end
23
+
24
+ # return a dataset using Json format
25
+ get '/datasets/:dataset_in' do
26
+ # force session to start by writing in the Hash ## ODDDDD
27
+ session[:init] = true
28
+ session[:datasets][params[:dataset_in]].to_json_format
29
+ end
30
+
31
+ get '/classifiers/:classifier_in' do
32
+ session[:init] = true
33
+ session[:classifiers][params[:classifier_in]]
34
+ end
35
+
36
+ get '/summary/:dataset_in' do
37
+ session[:init] = true
38
+ dataset_in = session[:datasets][params[:dataset_in]]
39
+ return dataset_in.summary[0].to_s,dataset_in.summary[1].to_s
40
+ end
41
+
42
+ post "/create_dataset" do
43
+ params.each_key do |key|
44
+ File.open('uploads/' + params[key]['dataset'][:filename], "w") do |f|
45
+ f.write(params[key]['dataset'][:tempfile].read)
46
+ end
47
+ session[:datasets] ||= Hash.new
48
+ session[:classifiers] ||= Hash.new
49
+ session[:datasets][params[key]['dataset_name']] = Core::Parser::parse_CSV('uploads/' + params[key]['dataset'][:filename])
50
+ end
51
+ return "The dataset was successfully created!"
52
+ end
53
+
54
+ get '/filter' do
55
+ session[:init] = true
56
+ $dataset_in = session[:datasets][params[:dataset_in]]
57
+ filter = eval("Weka::#{params[:filter_name]}.new")
58
+ $params = params
59
+ filter.set do
60
+ filter_options $params[:filter_options].split("_").join(" ")
61
+ data $dataset_in
62
+ end
63
+ filtered_data = filter.use
64
+ session[:datasets][params[:dataset_out]] = filtered_data
65
+ end
66
+
67
+ get '/train_classifier' do
68
+ session[:init] = true
69
+ @dataset_in = session[:datasets][params[:dataset_in]]
70
+ classifier = eval("Weka::#{params[:classifier_name]}.new")
71
+ classifier.set_options params[:classifier_options].split("_").join(" ") if params[:classifier_options]
72
+ @dataset_in.setClassIndex(params[:class_index].to_i)
73
+ classifier.set_data @dataset_in
74
+ classifier.build_classifier(@dataset_in)
75
+ session[:classifiers][params[:model_name]] = classifier
76
+ classifier.to_s
77
+ end
78
+
79
+ get '/crossvalidate_classifier' do
80
+ session[:init] = true
81
+ classifier = session[:classifiers][params[:model_name]]
82
+ fold = params[:fold].to_i
83
+ eval = Weka::Classifier::Evaluation.new classifier.instance_eval("@dataset")
84
+ eval.crossValidateModel(classifier.class.new, classifier.instance_eval("@dataset"), fold.to_java(:int), Random.new(1))
85
+ eval.summary
86
+ end
87
+
88
+ post '/clear' do
89
+ session.clear
90
+ puts "Session is now cleared"
91
+ end
92
+
93
+ end
94
+
95
+ Trial.run!
@@ -0,0 +1,15 @@
1
+ outlook,temperature,humidity,windy,play
2
+ sunny,85,85,FALSE,no
3
+ sunny,80,90,TRUE,no
4
+ overcast,83,86,FALSE,yes
5
+ rainy,70,96,FALSE,yes
6
+ rainy,68,80,FALSE,yes
7
+ rainy,65,70,TRUE,no
8
+ overcast,64,65,TRUE,yes
9
+ sunny,72,95,FALSE,no
10
+ sunny,69,70,FALSE,yes
11
+ rainy,75,80,FALSE,yes
12
+ sunny,75,70,TRUE,yes
13
+ overcast,72,90,TRUE,yes
14
+ overcast,81,75,FALSE,yes
15
+ rainy,71,91,TRUE,no
@@ -0,0 +1,15 @@
1
+ outlook,temperature,humidity,windy,play
2
+ sunny,85,85,FALSE,no
3
+ sunny,80,90,TRUE,no
4
+ overcast,83,86,FALSE,yes
5
+ rainy,70,96,FALSE,yes
6
+ rainy,68,80,FALSE,yes
7
+ rainy,65,70,TRUE,no
8
+ overcast,64,65,TRUE,yes
9
+ sunny,72,95,FALSE,no
10
+ sunny,69,70,FALSE,yes
11
+ rainy,75,80,FALSE,yes
12
+ sunny,75,70,TRUE,yes
13
+ overcast,72,90,TRUE,yes
14
+ overcast,81,75,FALSE,yes
15
+ rainy,71,91,TRUE,no
@@ -0,0 +1,15 @@
1
+ outlook,temperature,humidity,windy,play
2
+ sunny,85,85,FALSE,no
3
+ sunny,80,90,TRUE,no
4
+ overcast,83,86,FALSE,yes
5
+ rainy,70,96,FALSE,yes
6
+ rainy,68,80,FALSE,yes
7
+ rainy,65,70,TRUE,no
8
+ overcast,64,65,TRUE,yes
9
+ sunny,72,95,FALSE,no
10
+ sunny,69,70,FALSE,yes
11
+ rainy,75,80,FALSE,yes
12
+ sunny,75,70,TRUE,yes
13
+ overcast,72,90,TRUE,yes
14
+ overcast,81,75,FALSE,yes
15
+ rainy,71,91,TRUE,no
@@ -0,0 +1,15 @@
1
+ outlook,temperature,humidity,windy,play
2
+ sunny,85,85,FALSE,no
3
+ sunny,80,90,TRUE,no
4
+ overcast,83,86,FALSE,yes
5
+ rainy,70,96,FALSE,yes
6
+ rainy,68,80,FALSE,yes
7
+ rainy,65,70,TRUE,no
8
+ overcast,64,65,TRUE,yes
9
+ sunny,72,95,FALSE,no
10
+ sunny,69,70,FALSE,yes
11
+ rainy,75,80,FALSE,yes
12
+ sunny,75,70,TRUE,yes
13
+ overcast,72,90,TRUE,yes
14
+ overcast,81,75,FALSE,yes
15
+ rainy,71,91,TRUE,no
data/bio-band.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-band"
8
- s.version = "0.1.3"
8
+ s.version = "0.1.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["arrigonialberto86"]
12
- s.date = "2013-09-02"
12
+ s.date = "2013-09-11"
13
13
  s.description = "Data mining and machine learning algorithms for JRuby "
14
14
  s.email = "arrigonialberto86@gmail.com"
15
15
  s.executables = ["bio-band"]
@@ -27,6 +27,15 @@ Gem::Specification.new do |s|
27
27
  "README.rdoc",
28
28
  "Rakefile",
29
29
  "VERSION",
30
+ "band_server/client.rb",
31
+ "band_server/client_alt.rb",
32
+ "band_server/first_dataset.csv",
33
+ "band_server/second_dataset.csv",
34
+ "band_server/simple_server.rb",
35
+ "band_server/third_dataset.csv",
36
+ "band_server/uploads/first_dataset.csv",
37
+ "band_server/uploads/second_dataset.csv",
38
+ "band_server/uploads/third_dataset.csv",
30
39
  "bin/bio-band",
31
40
  "bio-band.gemspec",
32
41
  "ext/mkrf_conf.rb",
@@ -85,7 +94,11 @@ Gem::Specification.new do |s|
85
94
  "resources/weather.csv",
86
95
  "resources/weather.numeric.arff",
87
96
  "spec/bio-band_spec.rb",
88
- "spec/spec_helper.rb"
97
+ "spec/spec_helper.rb",
98
+ "test/helper.rb",
99
+ "test/test_apacheCorrelation.rb",
100
+ "test/test_apacheInference.rb",
101
+ "test/test_bio-band.rb"
89
102
  ]
90
103
  s.homepage = "http://github.com/arrigonialberto86/bioruby-band"
91
104
  s.licenses = ["MIT"]
@@ -98,6 +111,7 @@ Gem::Specification.new do |s|
98
111
 
99
112
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
100
113
  s.add_development_dependency(%q<shoulda>, [">= 0"])
114
+ s.add_development_dependency(%q<test-unit>, [">= 0"])
101
115
  s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
102
116
  s.add_development_dependency(%q<bundler>, ["~> 1.3.5"])
103
117
  s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
@@ -110,6 +124,7 @@ Gem::Specification.new do |s|
110
124
  s.add_development_dependency(%q<bio>, [">= 1.4.2"])
111
125
  else
112
126
  s.add_dependency(%q<shoulda>, [">= 0"])
127
+ s.add_dependency(%q<test-unit>, [">= 0"])
113
128
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
114
129
  s.add_dependency(%q<bundler>, ["~> 1.3.5"])
115
130
  s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
@@ -123,6 +138,7 @@ Gem::Specification.new do |s|
123
138
  end
124
139
  else
125
140
  s.add_dependency(%q<shoulda>, [">= 0"])
141
+ s.add_dependency(%q<test-unit>, [">= 0"])
126
142
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
127
143
  s.add_dependency(%q<bundler>, ["~> 1.3.5"])
128
144
  s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
@@ -30,11 +30,12 @@ Then(/^I want to print a summary for the dataset$/) do
30
30
  end
31
31
 
32
32
  Then(/^I want to set the class index for attribute with index "(.*?)"$/) do |arg1|
33
- My_classifier.set_class_index(arg1.to_i)
33
+ My_classifier.set_class_index(arg1.to_i)
34
34
  end
35
35
 
36
36
  Then(/^I want to instantiate the classifier for my use$/) do
37
- My_classifier.new
37
+ classifier = My_classifier.new
38
+ classifier.cross_validate(2)
38
39
  end
39
40
 
40
41
 
@@ -1,16 +1,16 @@
1
1
  Feature: Using a Weka classifier
2
- In order to classify an instance data
3
- I want to use a Weka classifier
2
+ In order to classify an instance data
3
+ I want to use a Weka classifier
4
4
 
5
- Scenario: Understand options and usage
6
- Given the Weka "NaiveBayes" classifier
7
- Then I want to print a "description"
8
- And I want to print an options list
5
+ Scenario: Understand options and usage
6
+ Given the Weka "NaiveBayes" classifier
7
+ Then I want to print a "description"
8
+ And I want to print an options list
9
9
 
10
- Scenario: Use a classifier on a data instance
11
- Given the unsupervised Weka classifier "NaiveBayes"
12
- Then I want to set option "-K" for it
13
- And I want to set the dataset parsed from "weather.numeric.arff"
14
- And I want to print a summary for the dataset
15
- And I want to set the class index for attribute with index "0"
16
- And I want to instantiate the classifier for my use
10
+ Scenario: Use a classifier on a data instance
11
+ Given the unsupervised Weka classifier "NaiveBayes"
12
+ Then I want to set option "-K" for it
13
+ And I want to set the dataset parsed from "weather.numeric.arff"
14
+ And I want to print a summary for the dataset
15
+ And I want to set the class index for attribute with index "0"
16
+ And I want to instantiate the classifier for my use