weka 0.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +15 -0
  5. data/CODE_OF_CONDUCT.md +13 -0
  6. data/Gemfile +4 -0
  7. data/Jarfile +1 -0
  8. data/Jarfile.lock +17 -0
  9. data/MIT-LICENSE.txt +19 -0
  10. data/README.md +687 -0
  11. data/Rakefile +21 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +7 -0
  14. data/lib/weka.rb +32 -0
  15. data/lib/weka/attribute_selection.rb +1 -0
  16. data/lib/weka/attribute_selection/attribute_selection.rb +11 -0
  17. data/lib/weka/attribute_selection/evaluator.rb +29 -0
  18. data/lib/weka/attribute_selection/search.rb +14 -0
  19. data/lib/weka/class_builder.rb +88 -0
  20. data/lib/weka/classifiers.rb +1 -0
  21. data/lib/weka/classifiers/bayes.rb +16 -0
  22. data/lib/weka/classifiers/evaluation.rb +37 -0
  23. data/lib/weka/classifiers/functions.rb +21 -0
  24. data/lib/weka/classifiers/lazy.rb +13 -0
  25. data/lib/weka/classifiers/meta.rb +29 -0
  26. data/lib/weka/classifiers/rules.rb +16 -0
  27. data/lib/weka/classifiers/trees.rb +18 -0
  28. data/lib/weka/classifiers/utils.rb +138 -0
  29. data/lib/weka/clusterers.rb +16 -0
  30. data/lib/weka/clusterers/cluster_evaluation.rb +14 -0
  31. data/lib/weka/clusterers/utils.rb +103 -0
  32. data/lib/weka/concerns.rb +18 -0
  33. data/lib/weka/concerns/buildable.rb +19 -0
  34. data/lib/weka/concerns/describable.rb +30 -0
  35. data/lib/weka/concerns/optionizable.rb +49 -0
  36. data/lib/weka/concerns/persistent.rb +16 -0
  37. data/lib/weka/core.rb +6 -0
  38. data/lib/weka/core/attribute.rb +24 -0
  39. data/lib/weka/core/converters.rb +17 -0
  40. data/lib/weka/core/dense_instance.rb +68 -0
  41. data/lib/weka/core/instances.rb +199 -0
  42. data/lib/weka/core/loader.rb +32 -0
  43. data/lib/weka/core/saver.rb +34 -0
  44. data/lib/weka/exceptions.rb +6 -0
  45. data/lib/weka/filters.rb +1 -0
  46. data/lib/weka/filters/filter.rb +9 -0
  47. data/lib/weka/filters/supervised/attribute.rb +26 -0
  48. data/lib/weka/filters/supervised/instance.rb +16 -0
  49. data/lib/weka/filters/unsupervised/attribute.rb +67 -0
  50. data/lib/weka/filters/unsupervised/instance.rb +25 -0
  51. data/lib/weka/filters/utils.rb +17 -0
  52. data/lib/weka/jars.rb +19 -0
  53. data/lib/weka/version.rb +3 -0
  54. data/weka.gemspec +32 -0
  55. metadata +183 -0
@@ -0,0 +1,21 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :prepare
7
+ task :install => :prepare
8
+
9
+ desc 'Install weka jars & dependencies'
10
+ task :prepare do
11
+ require 'lock_jar'
12
+ lib_path = File.expand_path('.', File.dirname(__FILE__))
13
+ jars_dir = File.join(lib_path, 'jars')
14
+
15
+ LockJar.install('Jarfile.lock', local_repo: jars_dir)
16
+ end
17
+
18
+ desc "Start an irb session with the gem loaded"
19
+ task :irb do
20
+ sh 'irb -I ./lib -r weka'
21
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "weka"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,32 @@
1
+ require 'java'
2
+ require 'weka/jars'
3
+ require 'weka/version'
4
+ require 'weka/exceptions'
5
+
6
+ module Weka
7
+ include Jars
8
+
9
+ class << self
10
+ def require_all(type)
11
+ files = Dir[File.expand_path("../weka/#{type}/**/*.rb", __FILE__)]
12
+ utils = File.expand_path("../weka/#{type}/utils.rb", __FILE__)
13
+ sorted_files = move_to_head(utils, files)
14
+
15
+ sorted_files.each { |file| require file }
16
+ end
17
+
18
+ private
19
+
20
+ def move_to_head(file, files)
21
+ file_to_move = files.delete(file)
22
+ files.unshift(file_to_move) unless file_to_move.nil?
23
+ files
24
+ end
25
+ end
26
+ end
27
+
28
+ require 'weka/core'
29
+ require 'weka/classifiers'
30
+ require 'weka/filters'
31
+ require 'weka/clusterers'
32
+ require 'weka/attribute_selection'
@@ -0,0 +1 @@
1
+ Weka.require_all :attribute_selection
@@ -0,0 +1,11 @@
1
+ module Weka
2
+ module AttributeSelection
3
+ java_import 'weka.attributeSelection.AttributeSelection'
4
+
5
+ class AttributeSelection
6
+
7
+ alias :summary :to_results_string
8
+ alias :selected_attributes_count :number_attributes_selected
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,29 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module AttributeSelection
5
+ module Evaluator
6
+ include ClassBuilder
7
+
8
+ build_classes :CfsSubsetEval,
9
+ :CorrelationAttributeEval,
10
+ :GainRatioAttributeEval,
11
+ :InfoGainAttributeEval,
12
+ :OneRAttributeEval,
13
+ :PrincipalComponents,
14
+ :ReliefFAttributeEval,
15
+ :SymmetricalUncertAttributeEval,
16
+ :WrapperSubsetEval,
17
+ weka_module: 'weka.attributeSelection'
18
+
19
+ class CfsSubset < CfsSubsetEval; end
20
+ class CorrelationAttribute < CorrelationAttributeEval; end
21
+ class GainRatioAttribute < GainRatioAttributeEval; end
22
+ class InfoGainAttribute < InfoGainAttributeEval; end
23
+ class OneRAttribute < OneRAttributeEval; end
24
+ class ReliefFAttribute < ReliefFAttributeEval; end
25
+ class SymmetricalUncertAttribute < SymmetricalUncertAttributeEval; end
26
+ class WrapperSubset < WrapperSubsetEval; end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,14 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module AttributeSelection
5
+ module Search
6
+ include ClassBuilder
7
+
8
+ build_classes :GreedyStepwise,
9
+ :Ranker,
10
+ :BestFirst,
11
+ weka_module: 'weka.attributeSelection'
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,88 @@
1
+ require 'active_support/concern'
2
+ require 'active_support/core_ext/string'
3
+ require 'active_support/core_ext/module'
4
+ require 'weka/concerns'
5
+
6
+ module Weka
7
+ module ClassBuilder
8
+ extend ActiveSupport::Concern
9
+
10
+ module ClassMethods
11
+
12
+ def build_class(class_name, weka_module: nil, include_concerns: true)
13
+ java_import java_class_path(class_name, weka_module)
14
+ define_class(class_name, include_concerns: include_concerns)
15
+ end
16
+
17
+ def build_classes(*class_names, weka_module: nil, include_concerns: true)
18
+ class_names.each do |name|
19
+ build_class(name, weka_module: weka_module, include_concerns: include_concerns)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def java_class_path(class_name, weka_module)
26
+ if weka_module
27
+ "#{weka_module}.#{class_name}"
28
+ else
29
+ [*java_super_modules, java_including_module, class_name].compact.join('.')
30
+ end
31
+ end
32
+
33
+ def java_super_modules
34
+ super_modules.split('::').map do |name|
35
+ downcase_first_char(name)
36
+ end
37
+ end
38
+
39
+ def super_modules
40
+ toplevel_module? ? self.name : self.name.deconstantize
41
+ end
42
+
43
+ def java_including_module
44
+ downcase_first_char(including_module)
45
+ end
46
+
47
+ def including_module
48
+ self.name.demodulize unless toplevel_module?
49
+ end
50
+
51
+ def toplevel_module?
52
+ self.name.scan('::').count == 1
53
+ end
54
+
55
+ def define_class(class_name, include_concerns: true)
56
+ module_eval <<-CLASS_DEFINITION, __FILE__, __LINE__ + 1
57
+ class #{class_name}
58
+ #{'include Concerns' if include_concerns}
59
+ #{include_utils}
60
+ end
61
+ CLASS_DEFINITION
62
+ end
63
+
64
+ def include_utils
65
+ return unless utils_defined?
66
+ "include #{utils}"
67
+ end
68
+
69
+ def utils_defined?
70
+ utils_super_modules.constantize.const_defined?(:Utils)
71
+ end
72
+
73
+ def utils
74
+ "::#{utils_super_modules}::Utils"
75
+ end
76
+
77
+ def utils_super_modules
78
+ super_modules.split('::')[0..1].join('::')
79
+ end
80
+
81
+ def downcase_first_char(string)
82
+ return if string.blank?
83
+ string[0].downcase + string[1..-1]
84
+ end
85
+ end
86
+
87
+ end
88
+ end
@@ -0,0 +1 @@
1
+ Weka.require_all :classifiers
@@ -0,0 +1,16 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Classifiers
5
+ module Bayes
6
+ include ClassBuilder
7
+
8
+ build_classes :BayesNet,
9
+ :NaiveBayes,
10
+ :NaiveBayesMultinomial,
11
+ :NaiveBayesMultinomialText,
12
+ :NaiveBayesMultinomialUpdateable,
13
+ :NaiveBayesUpdateable
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,37 @@
1
+ module Weka
2
+ module Classifiers
3
+ java_import 'weka.classifiers.Evaluation'
4
+
5
+ class Evaluation
6
+
7
+ # Use both nomenclatures f_measure and fmeasure for consistency
8
+ # due to jruby's auto method generation of 'fMeasure' to 'f_measure' and
9
+ # 'weightedFMeasure' to 'weighted_fmeasure'.
10
+ alias :weighted_f_measure :weighted_fmeasure
11
+ alias :fmeasure :f_measure
12
+
13
+ alias :summary :to_summary_string
14
+ alias :class_details :to_class_details_string
15
+
16
+ alias :instance_count :num_instances
17
+ alias :correct_count :correct
18
+ alias :incorrect_count :incorrect
19
+ alias :unclassified_count :unclassified
20
+
21
+ alias :correct_percentage :pct_correct
22
+ alias :incorrect_percentage :pct_incorrect
23
+ alias :unclassified_percentage :pct_unclassified
24
+
25
+ alias :true_negative_count :num_true_negatives
26
+ alias :false_negative_count :num_false_negatives
27
+ alias :true_positive_count :num_true_positives
28
+ alias :false_positive_count :num_false_positives
29
+ alias :average_cost :avg_cost
30
+
31
+ alias :cumulative_margin_distribution :to_cumulative_margin_distribution_string
32
+ end
33
+
34
+ Java::WekaClassifiers::Evaluation.__persistent__ = true
35
+
36
+ end
37
+ end
@@ -0,0 +1,21 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Classifiers
5
+ module Functions
6
+ include ClassBuilder
7
+
8
+ build_classes :GaussianProcesses,
9
+ :LinearRegression,
10
+ :Logistic,
11
+ :MultilayerPerceptron,
12
+ :SGD,
13
+ :SGDText,
14
+ :SimpleLinearRegression,
15
+ :SimpleLogistic,
16
+ :SMO,
17
+ :SMOreg,
18
+ :VotedPerceptron
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,13 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Classifiers
5
+ module Lazy
6
+ include ClassBuilder
7
+
8
+ build_classes :IBk,
9
+ :KStar,
10
+ :LWL
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,29 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Classifiers
5
+ module Meta
6
+ include ClassBuilder
7
+
8
+ build_classes :AdaBoostM1,
9
+ :AdditiveRegression,
10
+ :AttributeSelectedClassifier,
11
+ :Bagging,
12
+ :ClassificationViaRegression,
13
+ :CostSensitiveClassifier,
14
+ :CVParameterSelection,
15
+ :FilteredClassifier,
16
+ :IterativeClassifierOptimizer,
17
+ :LogitBoost,
18
+ :MultiClassClassifier,
19
+ :MultiClassClassifierUpdateable,
20
+ :MultiScheme,
21
+ :RandomCommittee,
22
+ :RandomizableFilteredClassifier,
23
+ :RandomSubSpace,
24
+ :RegressionByDiscretization,
25
+ :Stacking,
26
+ :Vote
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,16 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Classifiers
5
+ module Rules
6
+ include ClassBuilder
7
+
8
+ build_classes :DecisionTable,
9
+ :JRip,
10
+ :M5Rules,
11
+ :OneR,
12
+ :PART,
13
+ :ZeroR
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,18 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Classifiers
5
+ module Trees
6
+ include ClassBuilder
7
+
8
+ build_classes :DecisionStump,
9
+ :HoeffdingTree,
10
+ :J48,
11
+ :LMT,
12
+ :M5P,
13
+ :RandomForest,
14
+ :RandomTree,
15
+ :REPTree
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,138 @@
1
+ require 'active_support/concern'
2
+ require 'active_support/core_ext/hash'
3
+ require 'weka/classifiers/evaluation'
4
+ require 'weka/core/instances'
5
+
6
+ module Weka
7
+ module Classifiers
8
+ module Utils
9
+ extend ActiveSupport::Concern
10
+
11
+ included do
12
+ java_import 'java.util.Random'
13
+
14
+ if instance_methods.include?(:build_classifier)
15
+ attr_reader :training_instances
16
+
17
+ def train_with_instances(instances)
18
+ ensure_class_attribute_assigned!(instances)
19
+
20
+ @training_instances = instances
21
+ build_classifier(instances)
22
+
23
+ self
24
+ end
25
+
26
+ def cross_validate(folds: 3)
27
+ ensure_trained_with_instances!
28
+
29
+ evaluation = Evaluation.new(training_instances)
30
+ random = Java::JavaUtil::Random.new(1)
31
+
32
+ evaluation.cross_validate_model(self, training_instances, folds.to_i, random)
33
+ evaluation
34
+ end
35
+
36
+ def evaluate(test_instances)
37
+ ensure_trained_with_instances!
38
+ ensure_class_attribute_assigned!(test_instances)
39
+
40
+ evaluation = Evaluation.new(training_instances)
41
+ evaluation.evaluate_model(self, test_instances)
42
+ evaluation
43
+ end
44
+ end
45
+
46
+ if instance_methods.include?(:classify_instance)
47
+ def classify(instance_or_values)
48
+ ensure_trained_with_instances!
49
+
50
+ instance = classifiable_instance_from(instance_or_values)
51
+ index = classify_instance(instance)
52
+
53
+ class_value_of_index(index)
54
+ end
55
+ end
56
+
57
+ if instance_methods.include?(:update_classifier)
58
+ def add_training_instance(instance)
59
+ training_instances.add(instance)
60
+ update_classifier(instance)
61
+
62
+ self
63
+ end
64
+
65
+ def add_training_data(data)
66
+ values = self.training_instances.internal_values_of(data)
67
+ instance = Weka::Core::DenseInstance.new(values)
68
+ add_training_instance(instance)
69
+ end
70
+ end
71
+
72
+ if instance_methods.include?(:distribution_for_instance)
73
+ def distribution_for(instance_or_values)
74
+ ensure_trained_with_instances!
75
+
76
+ instance = classifiable_instance_from(instance_or_values)
77
+ distributions = distribution_for_instance(instance)
78
+
79
+ class_distributions_from(distributions).with_indifferent_access
80
+ end
81
+ end
82
+
83
+ private
84
+
85
+ def ensure_class_attribute_assigned!(instances)
86
+ return if instances.class_attribute_defined?
87
+
88
+ error = 'Class attribute is not assigned for Instances.'
89
+ hint = 'You can assign a class attribute with #class_attribute=.'
90
+ message = "#{error} #{hint}"
91
+
92
+ raise UnassignedClassError, message
93
+ end
94
+
95
+ def ensure_trained_with_instances!
96
+ return unless training_instances.nil?
97
+
98
+ error = 'Classifier is not trained with Instances.'
99
+ hint = 'You can set the training instances with #train_with_instances.'
100
+ message = "#{error} #{hint}"
101
+
102
+ raise UnassignedTrainingInstancesError, message
103
+ end
104
+
105
+ def classifiable_instance_from(instance_or_values)
106
+ attributes = training_instances.attributes
107
+ instances = Weka::Core::Instances.new(attributes: attributes)
108
+
109
+ class_attribute = training_instances.class_attribute
110
+ class_index = training_instances.class_index
111
+ instances.insert_attribute_at(class_attribute, class_index)
112
+
113
+ instances.class_index = training_instances.class_index
114
+ instances.add_instance(instance_or_values)
115
+
116
+ instance = instances.first
117
+ instance.set_class_missing
118
+ instance
119
+ end
120
+
121
+ def class_value_of_index(index)
122
+ training_instances.class_attribute.value(index)
123
+ end
124
+
125
+ def class_distributions_from(distributions)
126
+ class_values = training_instances.class_attribute.values
127
+
128
+ distributions.each_with_index.reduce({}) do |result, (distribution, index)|
129
+ class_value = class_values[index].to_sym
130
+ result[class_value] = distribution
131
+ result
132
+ end
133
+ end
134
+ end
135
+
136
+ end
137
+ end
138
+ end