weka 0.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +15 -0
  5. data/CODE_OF_CONDUCT.md +13 -0
  6. data/Gemfile +4 -0
  7. data/Jarfile +1 -0
  8. data/Jarfile.lock +17 -0
  9. data/MIT-LICENSE.txt +19 -0
  10. data/README.md +687 -0
  11. data/Rakefile +21 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +7 -0
  14. data/lib/weka.rb +32 -0
  15. data/lib/weka/attribute_selection.rb +1 -0
  16. data/lib/weka/attribute_selection/attribute_selection.rb +11 -0
  17. data/lib/weka/attribute_selection/evaluator.rb +29 -0
  18. data/lib/weka/attribute_selection/search.rb +14 -0
  19. data/lib/weka/class_builder.rb +88 -0
  20. data/lib/weka/classifiers.rb +1 -0
  21. data/lib/weka/classifiers/bayes.rb +16 -0
  22. data/lib/weka/classifiers/evaluation.rb +37 -0
  23. data/lib/weka/classifiers/functions.rb +21 -0
  24. data/lib/weka/classifiers/lazy.rb +13 -0
  25. data/lib/weka/classifiers/meta.rb +29 -0
  26. data/lib/weka/classifiers/rules.rb +16 -0
  27. data/lib/weka/classifiers/trees.rb +18 -0
  28. data/lib/weka/classifiers/utils.rb +138 -0
  29. data/lib/weka/clusterers.rb +16 -0
  30. data/lib/weka/clusterers/cluster_evaluation.rb +14 -0
  31. data/lib/weka/clusterers/utils.rb +103 -0
  32. data/lib/weka/concerns.rb +18 -0
  33. data/lib/weka/concerns/buildable.rb +19 -0
  34. data/lib/weka/concerns/describable.rb +30 -0
  35. data/lib/weka/concerns/optionizable.rb +49 -0
  36. data/lib/weka/concerns/persistent.rb +16 -0
  37. data/lib/weka/core.rb +6 -0
  38. data/lib/weka/core/attribute.rb +24 -0
  39. data/lib/weka/core/converters.rb +17 -0
  40. data/lib/weka/core/dense_instance.rb +68 -0
  41. data/lib/weka/core/instances.rb +199 -0
  42. data/lib/weka/core/loader.rb +32 -0
  43. data/lib/weka/core/saver.rb +34 -0
  44. data/lib/weka/exceptions.rb +6 -0
  45. data/lib/weka/filters.rb +1 -0
  46. data/lib/weka/filters/filter.rb +9 -0
  47. data/lib/weka/filters/supervised/attribute.rb +26 -0
  48. data/lib/weka/filters/supervised/instance.rb +16 -0
  49. data/lib/weka/filters/unsupervised/attribute.rb +67 -0
  50. data/lib/weka/filters/unsupervised/instance.rb +25 -0
  51. data/lib/weka/filters/utils.rb +17 -0
  52. data/lib/weka/jars.rb +19 -0
  53. data/lib/weka/version.rb +3 -0
  54. data/weka.gemspec +32 -0
  55. metadata +183 -0
@@ -0,0 +1,21 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :prepare
7
+ task :install => :prepare
8
+
9
+ desc 'Install weka jars & dependencies'
10
+ task :prepare do
11
+ require 'lock_jar'
12
+ lib_path = File.expand_path('.', File.dirname(__FILE__))
13
+ jars_dir = File.join(lib_path, 'jars')
14
+
15
+ LockJar.install('Jarfile.lock', local_repo: jars_dir)
16
+ end
17
+
18
+ desc "Start an irb session with the gem loaded"
19
+ task :irb do
20
+ sh 'irb -I ./lib -r weka'
21
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "weka"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,32 @@
1
+ require 'java'
2
+ require 'weka/jars'
3
+ require 'weka/version'
4
+ require 'weka/exceptions'
5
+
6
+ module Weka
7
+ include Jars
8
+
9
+ class << self
10
+ def require_all(type)
11
+ files = Dir[File.expand_path("../weka/#{type}/**/*.rb", __FILE__)]
12
+ utils = File.expand_path("../weka/#{type}/utils.rb", __FILE__)
13
+ sorted_files = move_to_head(utils, files)
14
+
15
+ sorted_files.each { |file| require file }
16
+ end
17
+
18
+ private
19
+
20
+ def move_to_head(file, files)
21
+ file_to_move = files.delete(file)
22
+ files.unshift(file_to_move) unless file_to_move.nil?
23
+ files
24
+ end
25
+ end
26
+ end
27
+
28
+ require 'weka/core'
29
+ require 'weka/classifiers'
30
+ require 'weka/filters'
31
+ require 'weka/clusterers'
32
+ require 'weka/attribute_selection'
@@ -0,0 +1 @@
1
+ Weka.require_all :attribute_selection
@@ -0,0 +1,11 @@
1
+ module Weka
2
+ module AttributeSelection
3
+ java_import 'weka.attributeSelection.AttributeSelection'
4
+
5
+ class AttributeSelection
6
+
7
+ alias :summary :to_results_string
8
+ alias :selected_attributes_count :number_attributes_selected
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,29 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module AttributeSelection
5
+ module Evaluator
6
+ include ClassBuilder
7
+
8
+ build_classes :CfsSubsetEval,
9
+ :CorrelationAttributeEval,
10
+ :GainRatioAttributeEval,
11
+ :InfoGainAttributeEval,
12
+ :OneRAttributeEval,
13
+ :PrincipalComponents,
14
+ :ReliefFAttributeEval,
15
+ :SymmetricalUncertAttributeEval,
16
+ :WrapperSubsetEval,
17
+ weka_module: 'weka.attributeSelection'
18
+
19
+ class CfsSubset < CfsSubsetEval; end
20
+ class CorrelationAttribute < CorrelationAttributeEval; end
21
+ class GainRatioAttribute < GainRatioAttributeEval; end
22
+ class InfoGainAttribute < InfoGainAttributeEval; end
23
+ class OneRAttribute < OneRAttributeEval; end
24
+ class ReliefFAttribute < ReliefFAttributeEval; end
25
+ class SymmetricalUncertAttribute < SymmetricalUncertAttributeEval; end
26
+ class WrapperSubset < WrapperSubsetEval; end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,14 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module AttributeSelection
5
+ module Search
6
+ include ClassBuilder
7
+
8
+ build_classes :GreedyStepwise,
9
+ :Ranker,
10
+ :BestFirst,
11
+ weka_module: 'weka.attributeSelection'
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,88 @@
1
+ require 'active_support/concern'
2
+ require 'active_support/core_ext/string'
3
+ require 'active_support/core_ext/module'
4
+ require 'weka/concerns'
5
+
6
+ module Weka
7
+ module ClassBuilder
8
+ extend ActiveSupport::Concern
9
+
10
+ module ClassMethods
11
+
12
+ def build_class(class_name, weka_module: nil, include_concerns: true)
13
+ java_import java_class_path(class_name, weka_module)
14
+ define_class(class_name, include_concerns: include_concerns)
15
+ end
16
+
17
+ def build_classes(*class_names, weka_module: nil, include_concerns: true)
18
+ class_names.each do |name|
19
+ build_class(name, weka_module: weka_module, include_concerns: include_concerns)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def java_class_path(class_name, weka_module)
26
+ if weka_module
27
+ "#{weka_module}.#{class_name}"
28
+ else
29
+ [*java_super_modules, java_including_module, class_name].compact.join('.')
30
+ end
31
+ end
32
+
33
+ def java_super_modules
34
+ super_modules.split('::').map do |name|
35
+ downcase_first_char(name)
36
+ end
37
+ end
38
+
39
+ def super_modules
40
+ toplevel_module? ? self.name : self.name.deconstantize
41
+ end
42
+
43
+ def java_including_module
44
+ downcase_first_char(including_module)
45
+ end
46
+
47
+ def including_module
48
+ self.name.demodulize unless toplevel_module?
49
+ end
50
+
51
+ def toplevel_module?
52
+ self.name.scan('::').count == 1
53
+ end
54
+
55
+ def define_class(class_name, include_concerns: true)
56
+ module_eval <<-CLASS_DEFINITION, __FILE__, __LINE__ + 1
57
+ class #{class_name}
58
+ #{'include Concerns' if include_concerns}
59
+ #{include_utils}
60
+ end
61
+ CLASS_DEFINITION
62
+ end
63
+
64
+ def include_utils
65
+ return unless utils_defined?
66
+ "include #{utils}"
67
+ end
68
+
69
+ def utils_defined?
70
+ utils_super_modules.constantize.const_defined?(:Utils)
71
+ end
72
+
73
+ def utils
74
+ "::#{utils_super_modules}::Utils"
75
+ end
76
+
77
+ def utils_super_modules
78
+ super_modules.split('::')[0..1].join('::')
79
+ end
80
+
81
+ def downcase_first_char(string)
82
+ return if string.blank?
83
+ string[0].downcase + string[1..-1]
84
+ end
85
+ end
86
+
87
+ end
88
+ end
@@ -0,0 +1 @@
1
+ Weka.require_all :classifiers
@@ -0,0 +1,16 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Classifiers
5
+ module Bayes
6
+ include ClassBuilder
7
+
8
+ build_classes :BayesNet,
9
+ :NaiveBayes,
10
+ :NaiveBayesMultinomial,
11
+ :NaiveBayesMultinomialText,
12
+ :NaiveBayesMultinomialUpdateable,
13
+ :NaiveBayesUpdateable
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,37 @@
1
+ module Weka
2
+ module Classifiers
3
+ java_import 'weka.classifiers.Evaluation'
4
+
5
+ class Evaluation
6
+
7
+ # Use both nomenclatures f_measure and fmeasure for consistency
8
+ # due to jruby's auto method generation of 'fMeasure' to 'f_measure' and
9
+ # 'weightedFMeasure' to 'weighted_fmeasure'.
10
+ alias :weighted_f_measure :weighted_fmeasure
11
+ alias :fmeasure :f_measure
12
+
13
+ alias :summary :to_summary_string
14
+ alias :class_details :to_class_details_string
15
+
16
+ alias :instance_count :num_instances
17
+ alias :correct_count :correct
18
+ alias :incorrect_count :incorrect
19
+ alias :unclassified_count :unclassified
20
+
21
+ alias :correct_percentage :pct_correct
22
+ alias :incorrect_percentage :pct_incorrect
23
+ alias :unclassified_percentage :pct_unclassified
24
+
25
+ alias :true_negative_count :num_true_negatives
26
+ alias :false_negative_count :num_false_negatives
27
+ alias :true_positive_count :num_true_positives
28
+ alias :false_positive_count :num_false_positives
29
+ alias :average_cost :avg_cost
30
+
31
+ alias :cumulative_margin_distribution :to_cumulative_margin_distribution_string
32
+ end
33
+
34
+ Java::WekaClassifiers::Evaluation.__persistent__ = true
35
+
36
+ end
37
+ end
@@ -0,0 +1,21 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Classifiers
5
+ module Functions
6
+ include ClassBuilder
7
+
8
+ build_classes :GaussianProcesses,
9
+ :LinearRegression,
10
+ :Logistic,
11
+ :MultilayerPerceptron,
12
+ :SGD,
13
+ :SGDText,
14
+ :SimpleLinearRegression,
15
+ :SimpleLogistic,
16
+ :SMO,
17
+ :SMOreg,
18
+ :VotedPerceptron
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,13 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Classifiers
5
+ module Lazy
6
+ include ClassBuilder
7
+
8
+ build_classes :IBk,
9
+ :KStar,
10
+ :LWL
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,29 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Classifiers
5
+ module Meta
6
+ include ClassBuilder
7
+
8
+ build_classes :AdaBoostM1,
9
+ :AdditiveRegression,
10
+ :AttributeSelectedClassifier,
11
+ :Bagging,
12
+ :ClassificationViaRegression,
13
+ :CostSensitiveClassifier,
14
+ :CVParameterSelection,
15
+ :FilteredClassifier,
16
+ :IterativeClassifierOptimizer,
17
+ :LogitBoost,
18
+ :MultiClassClassifier,
19
+ :MultiClassClassifierUpdateable,
20
+ :MultiScheme,
21
+ :RandomCommittee,
22
+ :RandomizableFilteredClassifier,
23
+ :RandomSubSpace,
24
+ :RegressionByDiscretization,
25
+ :Stacking,
26
+ :Vote
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,16 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Classifiers
5
+ module Rules
6
+ include ClassBuilder
7
+
8
+ build_classes :DecisionTable,
9
+ :JRip,
10
+ :M5Rules,
11
+ :OneR,
12
+ :PART,
13
+ :ZeroR
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,18 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Classifiers
5
+ module Trees
6
+ include ClassBuilder
7
+
8
+ build_classes :DecisionStump,
9
+ :HoeffdingTree,
10
+ :J48,
11
+ :LMT,
12
+ :M5P,
13
+ :RandomForest,
14
+ :RandomTree,
15
+ :REPTree
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,138 @@
1
+ require 'active_support/concern'
2
+ require 'active_support/core_ext/hash'
3
+ require 'weka/classifiers/evaluation'
4
+ require 'weka/core/instances'
5
+
6
+ module Weka
7
+ module Classifiers
8
+ module Utils
9
+ extend ActiveSupport::Concern
10
+
11
+ included do
12
+ java_import 'java.util.Random'
13
+
14
+ if instance_methods.include?(:build_classifier)
15
+ attr_reader :training_instances
16
+
17
+ def train_with_instances(instances)
18
+ ensure_class_attribute_assigned!(instances)
19
+
20
+ @training_instances = instances
21
+ build_classifier(instances)
22
+
23
+ self
24
+ end
25
+
26
+ def cross_validate(folds: 3)
27
+ ensure_trained_with_instances!
28
+
29
+ evaluation = Evaluation.new(training_instances)
30
+ random = Java::JavaUtil::Random.new(1)
31
+
32
+ evaluation.cross_validate_model(self, training_instances, folds.to_i, random)
33
+ evaluation
34
+ end
35
+
36
+ def evaluate(test_instances)
37
+ ensure_trained_with_instances!
38
+ ensure_class_attribute_assigned!(test_instances)
39
+
40
+ evaluation = Evaluation.new(training_instances)
41
+ evaluation.evaluate_model(self, test_instances)
42
+ evaluation
43
+ end
44
+ end
45
+
46
+ if instance_methods.include?(:classify_instance)
47
+ def classify(instance_or_values)
48
+ ensure_trained_with_instances!
49
+
50
+ instance = classifiable_instance_from(instance_or_values)
51
+ index = classify_instance(instance)
52
+
53
+ class_value_of_index(index)
54
+ end
55
+ end
56
+
57
+ if instance_methods.include?(:update_classifier)
58
+ def add_training_instance(instance)
59
+ training_instances.add(instance)
60
+ update_classifier(instance)
61
+
62
+ self
63
+ end
64
+
65
+ def add_training_data(data)
66
+ values = self.training_instances.internal_values_of(data)
67
+ instance = Weka::Core::DenseInstance.new(values)
68
+ add_training_instance(instance)
69
+ end
70
+ end
71
+
72
+ if instance_methods.include?(:distribution_for_instance)
73
+ def distribution_for(instance_or_values)
74
+ ensure_trained_with_instances!
75
+
76
+ instance = classifiable_instance_from(instance_or_values)
77
+ distributions = distribution_for_instance(instance)
78
+
79
+ class_distributions_from(distributions).with_indifferent_access
80
+ end
81
+ end
82
+
83
+ private
84
+
85
+ def ensure_class_attribute_assigned!(instances)
86
+ return if instances.class_attribute_defined?
87
+
88
+ error = 'Class attribute is not assigned for Instances.'
89
+ hint = 'You can assign a class attribute with #class_attribute=.'
90
+ message = "#{error} #{hint}"
91
+
92
+ raise UnassignedClassError, message
93
+ end
94
+
95
+ def ensure_trained_with_instances!
96
+ return unless training_instances.nil?
97
+
98
+ error = 'Classifier is not trained with Instances.'
99
+ hint = 'You can set the training instances with #train_with_instances.'
100
+ message = "#{error} #{hint}"
101
+
102
+ raise UnassignedTrainingInstancesError, message
103
+ end
104
+
105
+ def classifiable_instance_from(instance_or_values)
106
+ attributes = training_instances.attributes
107
+ instances = Weka::Core::Instances.new(attributes: attributes)
108
+
109
+ class_attribute = training_instances.class_attribute
110
+ class_index = training_instances.class_index
111
+ instances.insert_attribute_at(class_attribute, class_index)
112
+
113
+ instances.class_index = training_instances.class_index
114
+ instances.add_instance(instance_or_values)
115
+
116
+ instance = instances.first
117
+ instance.set_class_missing
118
+ instance
119
+ end
120
+
121
+ def class_value_of_index(index)
122
+ training_instances.class_attribute.value(index)
123
+ end
124
+
125
+ def class_distributions_from(distributions)
126
+ class_values = training_instances.class_attribute.values
127
+
128
+ distributions.each_with_index.reduce({}) do |result, (distribution, index)|
129
+ class_value = class_values[index].to_sym
130
+ result[class_value] = distribution
131
+ result
132
+ end
133
+ end
134
+ end
135
+
136
+ end
137
+ end
138
+ end