bio-band 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +5 -0
  3. data/Jarfile +1 -1
  4. data/Jarfile.lock +1 -1
  5. data/README.rdoc +2 -0
  6. data/Rakefile +2 -1
  7. data/VERSION +1 -1
  8. data/band_server/client.rb +35 -0
  9. data/band_server/client_alt.rb +35 -0
  10. data/band_server/first_dataset.csv +15 -0
  11. data/band_server/second_dataset.csv +15 -0
  12. data/band_server/simple_server.rb +95 -0
  13. data/band_server/third_dataset.csv +15 -0
  14. data/band_server/uploads/first_dataset.csv +15 -0
  15. data/band_server/uploads/second_dataset.csv +15 -0
  16. data/band_server/uploads/third_dataset.csv +15 -0
  17. data/bio-band.gemspec +19 -3
  18. data/features/step_definitions/weka_classifiers.rb +3 -2
  19. data/features/weka_classifiers.feature +13 -13
  20. data/lib/bio-band.rb +2 -0
  21. data/lib/bio-band/apache/stat/inference.rb +25 -19
  22. data/lib/bio-band/apache/stat/regression.rb +2 -2
  23. data/lib/bio-band/core/parser/parser.rb +6 -6
  24. data/lib/bio-band/core/type/instances.rb +15 -5
  25. data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +2 -0
  26. data/lib/bio-band/weka/attribute_selection/evaluators.rb +2 -1
  27. data/lib/bio-band/weka/attribute_selection/search.rb +1 -0
  28. data/lib/bio-band/weka/classifiers/bayes/bayes.rb +1 -0
  29. data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +16 -3
  30. data/lib/bio-band/weka/classifiers/evaluation.rb +9 -9
  31. data/lib/bio-band/weka/classifiers/functions/functions.rb +1 -0
  32. data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +16 -3
  33. data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +21 -3
  34. data/lib/bio-band/weka/classifiers/mi/mi.rb +1 -0
  35. data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +18 -3
  36. data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +20 -4
  37. data/lib/bio-band/weka/classifiers/trees/trees.rb +1 -0
  38. data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +20 -3
  39. data/lib/bio-band/weka/clusterers/clusterers.rb +37 -13
  40. data/lib/bio-band/weka/clusterers/clusterers_utils.rb +60 -35
  41. data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +9 -1
  42. data/test/helper.rb +18 -0
  43. data/test/test_apacheCorrelation.rb +22 -0
  44. data/test/test_apacheInference.rb +46 -0
  45. data/test/test_bio-band.rb +9 -0
  46. metadata +33 -2
data/Gemfile CHANGED
@@ -7,6 +7,7 @@ source "http://rubygems.org"
7
7
  # Include everything needed to run rake, tests, features, etc.
8
8
  group :development do
9
9
  gem "shoulda", ">= 0"
10
+ gem "test-unit", ">= 0"
10
11
  gem "rdoc", "~> 3.12"
11
12
  gem "bundler", "~> 1.3.5"
12
13
  gem "jeweler", "~> 1.8.4"
data/Gemfile.lock CHANGED
@@ -14,6 +14,8 @@ GEM
14
14
  multi_json (~> 1.3)
15
15
  diff-lcs (1.2.4)
16
16
  fastercsv (1.5.5)
17
+ gherkin (2.12.0)
18
+ multi_json (~> 1.3)
17
19
  gherkin (2.12.0-java)
18
20
  multi_json (~> 1.3)
19
21
  git (1.2.5)
@@ -26,6 +28,7 @@ GEM
26
28
  git (>= 1.2.5)
27
29
  rake
28
30
  rdoc
31
+ json (1.8.0)
29
32
  json (1.8.0-java)
30
33
  maven-tools (0.32.4)
31
34
  multi_json (1.7.4)
@@ -59,6 +62,7 @@ GEM
59
62
  multi_json (~> 1.0)
60
63
  simplecov-html (~> 0.7.1)
61
64
  simplecov-html (0.7.1)
65
+ test-unit (2.5.5)
62
66
  thor (0.18.1)
63
67
  transaction-simple (1.4.0.2)
64
68
 
@@ -77,3 +81,4 @@ DEPENDENCIES
77
81
  ruport
78
82
  shoulda
79
83
  simplecov
84
+ test-unit
data/Jarfile CHANGED
@@ -1,4 +1,4 @@
1
- jar 'nz.ac.waikato.cms.weka:weka-stable','3.6.9'
1
+ jar 'nz.ac.waikato.cms.weka:weka-stable','3.6.10'
2
2
  jar 'org.apache.commons:commons-math3','3.0'
3
3
  jar 'junit:junit','3.8.1'
4
4
  jar 'mysql:mysql-connector-java','5.1.6'
data/Jarfile.lock CHANGED
@@ -1,4 +1,4 @@
1
- nz.ac.waikato.cms.weka:weka-stable:jar:3.6.9
1
+ nz.ac.waikato.cms.weka:weka-stable:jar:3.6.10
2
2
  net.sf.squirrel-sql.thirdparty-non-maven:java-cup:jar:0.11a
3
3
  org.apache.commons:commons-math3:jar:3.0
4
4
  junit:junit:jar:3.8.1
data/README.rdoc CHANGED
@@ -19,6 +19,8 @@ Otherwise use:
19
19
 
20
20
  == Usage
21
21
 
22
+ See the Wiki section for in depth information on how to use this gem
23
+
22
24
  == Developers
23
25
 
24
26
  To use the library
data/Rakefile CHANGED
@@ -29,7 +29,8 @@ Jeweler::RubygemsDotOrgTasks.new
29
29
  require 'rake/testtask'
30
30
  Rake::TestTask.new(:test) do |test|
31
31
  test.libs << 'lib' << 'test'
32
- test.pattern = 'test/**/test_*.rb'
32
+ # test.pattern = 'test/**/test_*.rb'
33
+ test.test_files = FileList['test/test_*.rb']
33
34
  test.verbose = true
34
35
  end
35
36
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.4
@@ -0,0 +1,35 @@
1
+ require 'rest_client'
2
+ require 'uri'
3
+
4
+ address = 'http://localhost:4567/'
5
+
6
+ response_old = RestClient.post "#{address}create_dataset", { 'first' => { 'dataset' => File.new('first_dataset.csv'),'dataset_name' => 'dataset_1'},
7
+ 'second' => {'dataset' => File.new('second_dataset.csv'),'dataset_name' => 'dataset_2'} }
8
+
9
+
10
+ puts 'List of the parsed datasets:'
11
+ response_list = RestClient.get "#{address}datasets", :cookies => response_old.cookies
12
+ puts response_list
13
+
14
+ # use filter on the training dataset
15
+ RestClient.get "#{address}filter?filter_name=Filter::Unsupervised::Attribute::Discretize&&filter_options=-K_3&&dataset_in=dataset_1&&dataset_out=filtered_training_set",:cookies => response_old.cookies
16
+
17
+ puts 'List of the datasets + the filtered training set'
18
+ response_list = RestClient.get "#{address}datasets", :cookies => response_old.cookies
19
+ puts response_list
20
+
21
+ puts 'Print a single dataset:'
22
+ puts RestClient.get "#{address}datasets/dataset_1", :cookies => response_old.cookies
23
+
24
+
25
+ puts 'Train a classifier on the dataset_1:'
26
+ puts RestClient.get "#{address}train_classifier?classifier_name=Classifier::Bayes::NaiveBayes&&classifier_options=-K&&dataset_in=dataset_1&&model_name=naive_classifier&&class_index=0",:cookies => response_old.cookies
27
+
28
+ puts 'List all the trained classifiers:'
29
+ puts RestClient.get "#{address}classifiers", :cookies => response_old.cookies
30
+
31
+ puts 'Show classifier statistics:'
32
+ puts RestClient.get "#{address}classifiers/naive_classifier", :cookies => response_old.cookies
33
+
34
+ puts 'Crossvalidate the classifier:'
35
+ puts RestClient.get "#{address}crossvalidate_classifier?model_name=naive_classifier&&fold=5", :cookies => response_old.cookies
@@ -0,0 +1,35 @@
1
+ require 'rest_client'
2
+ require 'uri'
3
+
4
+ address = 'http://localhost:4567/'
5
+
6
+ response_old = RestClient.post "#{address}create_dataset", { 'first' => { 'dataset' => File.new('first_dataset.csv'),'dataset_name' => 'dataset_1'},
7
+ #'second' => {'dataset' => File.new('second_dataset.csv'),'dataset_name' => 'dataset_2'},
8
+ 'third' => {'dataset' => File.new('third_dataset.csv'),'dataset_name' => 'dataset_3'} }
9
+
10
+ puts 'List of the parsed datasets:'
11
+ response_list = RestClient.get "#{address}datasets", :cookies => response_old.cookies
12
+ puts response_list
13
+
14
+ # use filter on the training dataset
15
+ RestClient.get "#{address}filter?filter_name=Filter::Unsupervised::Attribute::Discretize&&filter_options=-K_3&&dataset_in=dataset_1&&dataset_out=filtered_training_set",:cookies => response_old.cookies
16
+
17
+ puts 'List of the datasets + the filtered training set'
18
+ response_list = RestClient.get "#{address}datasets", :cookies => response_old.cookies
19
+ puts response_list
20
+
21
+ puts 'Print a single dataset:'
22
+ puts RestClient.get "#{address}datasets/dataset_1", :cookies => response_old.cookies
23
+
24
+
25
+ puts 'Train a classifier on the dataset_1:'
26
+ puts RestClient.get "#{address}train_classifier?classifier_name=Classifier::Bayes::NaiveBayes&&classifier_options=-K&&dataset_in=dataset_1&&model_name=naive_classifier&&class_index=0",:cookies => response_old.cookies
27
+
28
+ puts 'List all the trained classifiers:'
29
+ puts RestClient.get "#{address}classifiers", :cookies => response_old.cookies
30
+
31
+ puts 'Show classifier statistics:'
32
+ puts RestClient.get "#{address}classifiers/naive_classifier", :cookies => response_old.cookies
33
+
34
+ puts 'Crossvalidate the classifier:'
35
+ puts RestClient.get "#{address}crossvalidate_classifier?model_name=naive_classifier&&fold=5", :cookies => response_old.cookies
@@ -0,0 +1,15 @@
1
+ outlook,temperature,humidity,windy,play
2
+ sunny,85,85,FALSE,no
3
+ sunny,80,90,TRUE,no
4
+ overcast,83,86,FALSE,yes
5
+ rainy,70,96,FALSE,yes
6
+ rainy,68,80,FALSE,yes
7
+ rainy,65,70,TRUE,no
8
+ overcast,64,65,TRUE,yes
9
+ sunny,72,95,FALSE,no
10
+ sunny,69,70,FALSE,yes
11
+ rainy,75,80,FALSE,yes
12
+ sunny,75,70,TRUE,yes
13
+ overcast,72,90,TRUE,yes
14
+ overcast,81,75,FALSE,yes
15
+ rainy,71,91,TRUE,no
@@ -0,0 +1,15 @@
1
+ outlook,temperature,humidity,windy,play
2
+ sunny,85,85,FALSE,no
3
+ sunny,80,90,TRUE,no
4
+ overcast,83,86,FALSE,yes
5
+ rainy,70,96,FALSE,yes
6
+ rainy,68,80,FALSE,yes
7
+ rainy,65,70,TRUE,no
8
+ overcast,64,65,TRUE,yes
9
+ sunny,72,95,FALSE,no
10
+ sunny,69,70,FALSE,yes
11
+ rainy,75,80,FALSE,yes
12
+ sunny,75,70,TRUE,yes
13
+ overcast,72,90,TRUE,yes
14
+ overcast,81,75,FALSE,yes
15
+ rainy,71,91,TRUE,no
@@ -0,0 +1,95 @@
1
+ #### This simple example server should run on JRuby, while the client can be run
2
+ ### either on Ruby or JRuby
3
+
4
+
5
+ require 'sinatra/base'
6
+ require 'bio-band'
7
+ require 'JSON'
8
+
9
+ class Trial < Sinatra::Base
10
+ use Rack::Session::Pool
11
+
12
+ # list created datasets or classifiers
13
+ # get '/list/:data_class' do
14
+ # session[params[:data_class]].keys.join(",\t")
15
+ # end
16
+
17
+ get '/datasets' do
18
+ session[:datasets].keys.join(",\t")
19
+ end
20
+ get '/classifiers' do
21
+ session[:classifiers].keys.join(",\t")
22
+ end
23
+
24
+ # return a dataset using Json format
25
+ get '/datasets/:dataset_in' do
26
+ # force session to start by writing in the Hash ## ODDDDD
27
+ session[:init] = true
28
+ session[:datasets][params[:dataset_in]].to_json_format
29
+ end
30
+
31
+ get '/classifiers/:classifier_in' do
32
+ session[:init] = true
33
+ session[:classifiers][params[:classifier_in]]
34
+ end
35
+
36
+ get '/summary/:dataset_in' do
37
+ session[:init] = true
38
+ dataset_in = session[:datasets][params[:dataset_in]]
39
+ return dataset_in.summary[0].to_s,dataset_in.summary[1].to_s
40
+ end
41
+
42
+ post "/create_dataset" do
43
+ params.each_key do |key|
44
+ File.open('uploads/' + params[key]['dataset'][:filename], "w") do |f|
45
+ f.write(params[key]['dataset'][:tempfile].read)
46
+ end
47
+ session[:datasets] ||= Hash.new
48
+ session[:classifiers] ||= Hash.new
49
+ session[:datasets][params[key]['dataset_name']] = Core::Parser::parse_CSV('uploads/' + params[key]['dataset'][:filename])
50
+ end
51
+ return "The dataset was successfully created!"
52
+ end
53
+
54
+ get '/filter' do
55
+ session[:init] = true
56
+ $dataset_in = session[:datasets][params[:dataset_in]]
57
+ filter = eval("Weka::#{params[:filter_name]}.new")
58
+ $params = params
59
+ filter.set do
60
+ filter_options $params[:filter_options].split("_").join(" ")
61
+ data $dataset_in
62
+ end
63
+ filtered_data = filter.use
64
+ session[:datasets][params[:dataset_out]] = filtered_data
65
+ end
66
+
67
+ get '/train_classifier' do
68
+ session[:init] = true
69
+ @dataset_in = session[:datasets][params[:dataset_in]]
70
+ classifier = eval("Weka::#{params[:classifier_name]}.new")
71
+ classifier.set_options params[:classifier_options].split("_").join(" ") if params[:classifier_options]
72
+ @dataset_in.setClassIndex(params[:class_index].to_i)
73
+ classifier.set_data @dataset_in
74
+ classifier.build_classifier(@dataset_in)
75
+ session[:classifiers][params[:model_name]] = classifier
76
+ classifier.to_s
77
+ end
78
+
79
+ get '/crossvalidate_classifier' do
80
+ session[:init] = true
81
+ classifier = session[:classifiers][params[:model_name]]
82
+ fold = params[:fold].to_i
83
+ eval = Weka::Classifier::Evaluation.new classifier.instance_eval("@dataset")
84
+ eval.crossValidateModel(classifier.class.new, classifier.instance_eval("@dataset"), fold.to_java(:int), Random.new(1))
85
+ eval.summary
86
+ end
87
+
88
+ post '/clear' do
89
+ session.clear
90
+ puts "Session is now cleared"
91
+ end
92
+
93
+ end
94
+
95
+ Trial.run!
@@ -0,0 +1,15 @@
1
+ outlook,temperature,humidity,windy,play
2
+ sunny,85,85,FALSE,no
3
+ sunny,80,90,TRUE,no
4
+ overcast,83,86,FALSE,yes
5
+ rainy,70,96,FALSE,yes
6
+ rainy,68,80,FALSE,yes
7
+ rainy,65,70,TRUE,no
8
+ overcast,64,65,TRUE,yes
9
+ sunny,72,95,FALSE,no
10
+ sunny,69,70,FALSE,yes
11
+ rainy,75,80,FALSE,yes
12
+ sunny,75,70,TRUE,yes
13
+ overcast,72,90,TRUE,yes
14
+ overcast,81,75,FALSE,yes
15
+ rainy,71,91,TRUE,no
@@ -0,0 +1,15 @@
1
+ outlook,temperature,humidity,windy,play
2
+ sunny,85,85,FALSE,no
3
+ sunny,80,90,TRUE,no
4
+ overcast,83,86,FALSE,yes
5
+ rainy,70,96,FALSE,yes
6
+ rainy,68,80,FALSE,yes
7
+ rainy,65,70,TRUE,no
8
+ overcast,64,65,TRUE,yes
9
+ sunny,72,95,FALSE,no
10
+ sunny,69,70,FALSE,yes
11
+ rainy,75,80,FALSE,yes
12
+ sunny,75,70,TRUE,yes
13
+ overcast,72,90,TRUE,yes
14
+ overcast,81,75,FALSE,yes
15
+ rainy,71,91,TRUE,no
@@ -0,0 +1,15 @@
1
+ outlook,temperature,humidity,windy,play
2
+ sunny,85,85,FALSE,no
3
+ sunny,80,90,TRUE,no
4
+ overcast,83,86,FALSE,yes
5
+ rainy,70,96,FALSE,yes
6
+ rainy,68,80,FALSE,yes
7
+ rainy,65,70,TRUE,no
8
+ overcast,64,65,TRUE,yes
9
+ sunny,72,95,FALSE,no
10
+ sunny,69,70,FALSE,yes
11
+ rainy,75,80,FALSE,yes
12
+ sunny,75,70,TRUE,yes
13
+ overcast,72,90,TRUE,yes
14
+ overcast,81,75,FALSE,yes
15
+ rainy,71,91,TRUE,no
@@ -0,0 +1,15 @@
1
+ outlook,temperature,humidity,windy,play
2
+ sunny,85,85,FALSE,no
3
+ sunny,80,90,TRUE,no
4
+ overcast,83,86,FALSE,yes
5
+ rainy,70,96,FALSE,yes
6
+ rainy,68,80,FALSE,yes
7
+ rainy,65,70,TRUE,no
8
+ overcast,64,65,TRUE,yes
9
+ sunny,72,95,FALSE,no
10
+ sunny,69,70,FALSE,yes
11
+ rainy,75,80,FALSE,yes
12
+ sunny,75,70,TRUE,yes
13
+ overcast,72,90,TRUE,yes
14
+ overcast,81,75,FALSE,yes
15
+ rainy,71,91,TRUE,no
data/bio-band.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-band"
8
- s.version = "0.1.3"
8
+ s.version = "0.1.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["arrigonialberto86"]
12
- s.date = "2013-09-02"
12
+ s.date = "2013-09-11"
13
13
  s.description = "Data mining and machine learning algorithms for JRuby "
14
14
  s.email = "arrigonialberto86@gmail.com"
15
15
  s.executables = ["bio-band"]
@@ -27,6 +27,15 @@ Gem::Specification.new do |s|
27
27
  "README.rdoc",
28
28
  "Rakefile",
29
29
  "VERSION",
30
+ "band_server/client.rb",
31
+ "band_server/client_alt.rb",
32
+ "band_server/first_dataset.csv",
33
+ "band_server/second_dataset.csv",
34
+ "band_server/simple_server.rb",
35
+ "band_server/third_dataset.csv",
36
+ "band_server/uploads/first_dataset.csv",
37
+ "band_server/uploads/second_dataset.csv",
38
+ "band_server/uploads/third_dataset.csv",
30
39
  "bin/bio-band",
31
40
  "bio-band.gemspec",
32
41
  "ext/mkrf_conf.rb",
@@ -85,7 +94,11 @@ Gem::Specification.new do |s|
85
94
  "resources/weather.csv",
86
95
  "resources/weather.numeric.arff",
87
96
  "spec/bio-band_spec.rb",
88
- "spec/spec_helper.rb"
97
+ "spec/spec_helper.rb",
98
+ "test/helper.rb",
99
+ "test/test_apacheCorrelation.rb",
100
+ "test/test_apacheInference.rb",
101
+ "test/test_bio-band.rb"
89
102
  ]
90
103
  s.homepage = "http://github.com/arrigonialberto86/bioruby-band"
91
104
  s.licenses = ["MIT"]
@@ -98,6 +111,7 @@ Gem::Specification.new do |s|
98
111
 
99
112
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
100
113
  s.add_development_dependency(%q<shoulda>, [">= 0"])
114
+ s.add_development_dependency(%q<test-unit>, [">= 0"])
101
115
  s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
102
116
  s.add_development_dependency(%q<bundler>, ["~> 1.3.5"])
103
117
  s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
@@ -110,6 +124,7 @@ Gem::Specification.new do |s|
110
124
  s.add_development_dependency(%q<bio>, [">= 1.4.2"])
111
125
  else
112
126
  s.add_dependency(%q<shoulda>, [">= 0"])
127
+ s.add_dependency(%q<test-unit>, [">= 0"])
113
128
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
114
129
  s.add_dependency(%q<bundler>, ["~> 1.3.5"])
115
130
  s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
@@ -123,6 +138,7 @@ Gem::Specification.new do |s|
123
138
  end
124
139
  else
125
140
  s.add_dependency(%q<shoulda>, [">= 0"])
141
+ s.add_dependency(%q<test-unit>, [">= 0"])
126
142
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
127
143
  s.add_dependency(%q<bundler>, ["~> 1.3.5"])
128
144
  s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
@@ -30,11 +30,12 @@ Then(/^I want to print a summary for the dataset$/) do
30
30
  end
31
31
 
32
32
  Then(/^I want to set the class index for attribute with index "(.*?)"$/) do |arg1|
33
- My_classifier.set_class_index(arg1.to_i)
33
+ My_classifier.set_class_index(arg1.to_i)
34
34
  end
35
35
 
36
36
  Then(/^I want to instantiate the classifier for my use$/) do
37
- My_classifier.new
37
+ classifier = My_classifier.new
38
+ classifier.cross_validate(2)
38
39
  end
39
40
 
40
41
 
@@ -1,16 +1,16 @@
1
1
  Feature: Using a Weka classifier
2
- In order to classify an instance data
3
- I want to use a Weka classifier
2
+ In order to classify an instance data
3
+ I want to use a Weka classifier
4
4
 
5
- Scenario: Understand options and usage
6
- Given the Weka "NaiveBayes" classifier
7
- Then I want to print a "description"
8
- And I want to print an options list
5
+ Scenario: Understand options and usage
6
+ Given the Weka "NaiveBayes" classifier
7
+ Then I want to print a "description"
8
+ And I want to print an options list
9
9
 
10
- Scenario: Use a classifier on a data instance
11
- Given the unsupervised Weka classifier "NaiveBayes"
12
- Then I want to set option "-K" for it
13
- And I want to set the dataset parsed from "weather.numeric.arff"
14
- And I want to print a summary for the dataset
15
- And I want to set the class index for attribute with index "0"
16
- And I want to instantiate the classifier for my use
10
+ Scenario: Use a classifier on a data instance
11
+ Given the unsupervised Weka classifier "NaiveBayes"
12
+ Then I want to set option "-K" for it
13
+ And I want to set the dataset parsed from "weather.numeric.arff"
14
+ And I want to print a summary for the dataset
15
+ And I want to set the class index for attribute with index "0"
16
+ And I want to instantiate the classifier for my use