weka 0.1.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.travis.yml +15 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Jarfile +1 -0
- data/Jarfile.lock +17 -0
- data/MIT-LICENSE.txt +19 -0
- data/README.md +687 -0
- data/Rakefile +21 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/weka.rb +32 -0
- data/lib/weka/attribute_selection.rb +1 -0
- data/lib/weka/attribute_selection/attribute_selection.rb +11 -0
- data/lib/weka/attribute_selection/evaluator.rb +29 -0
- data/lib/weka/attribute_selection/search.rb +14 -0
- data/lib/weka/class_builder.rb +88 -0
- data/lib/weka/classifiers.rb +1 -0
- data/lib/weka/classifiers/bayes.rb +16 -0
- data/lib/weka/classifiers/evaluation.rb +37 -0
- data/lib/weka/classifiers/functions.rb +21 -0
- data/lib/weka/classifiers/lazy.rb +13 -0
- data/lib/weka/classifiers/meta.rb +29 -0
- data/lib/weka/classifiers/rules.rb +16 -0
- data/lib/weka/classifiers/trees.rb +18 -0
- data/lib/weka/classifiers/utils.rb +138 -0
- data/lib/weka/clusterers.rb +16 -0
- data/lib/weka/clusterers/cluster_evaluation.rb +14 -0
- data/lib/weka/clusterers/utils.rb +103 -0
- data/lib/weka/concerns.rb +18 -0
- data/lib/weka/concerns/buildable.rb +19 -0
- data/lib/weka/concerns/describable.rb +30 -0
- data/lib/weka/concerns/optionizable.rb +49 -0
- data/lib/weka/concerns/persistent.rb +16 -0
- data/lib/weka/core.rb +6 -0
- data/lib/weka/core/attribute.rb +24 -0
- data/lib/weka/core/converters.rb +17 -0
- data/lib/weka/core/dense_instance.rb +68 -0
- data/lib/weka/core/instances.rb +199 -0
- data/lib/weka/core/loader.rb +32 -0
- data/lib/weka/core/saver.rb +34 -0
- data/lib/weka/exceptions.rb +6 -0
- data/lib/weka/filters.rb +1 -0
- data/lib/weka/filters/filter.rb +9 -0
- data/lib/weka/filters/supervised/attribute.rb +26 -0
- data/lib/weka/filters/supervised/instance.rb +16 -0
- data/lib/weka/filters/unsupervised/attribute.rb +67 -0
- data/lib/weka/filters/unsupervised/instance.rb +25 -0
- data/lib/weka/filters/utils.rb +17 -0
- data/lib/weka/jars.rb +19 -0
- data/lib/weka/version.rb +3 -0
- data/weka.gemspec +32 -0
- metadata +183 -0
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
task :default => :prepare
|
7
|
+
task :install => :prepare
|
8
|
+
|
9
|
+
desc 'Install weka jars & dependencies'
|
10
|
+
task :prepare do
|
11
|
+
require 'lock_jar'
|
12
|
+
lib_path = File.expand_path('.', File.dirname(__FILE__))
|
13
|
+
jars_dir = File.join(lib_path, 'jars')
|
14
|
+
|
15
|
+
LockJar.install('Jarfile.lock', local_repo: jars_dir)
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "Start an irb session with the gem loaded"
|
19
|
+
task :irb do
|
20
|
+
sh 'irb -I ./lib -r weka'
|
21
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "weka"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/lib/weka.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'java'
|
2
|
+
require 'weka/jars'
|
3
|
+
require 'weka/version'
|
4
|
+
require 'weka/exceptions'
|
5
|
+
|
6
|
+
module Weka
|
7
|
+
include Jars
|
8
|
+
|
9
|
+
class << self
|
10
|
+
def require_all(type)
|
11
|
+
files = Dir[File.expand_path("../weka/#{type}/**/*.rb", __FILE__)]
|
12
|
+
utils = File.expand_path("../weka/#{type}/utils.rb", __FILE__)
|
13
|
+
sorted_files = move_to_head(utils, files)
|
14
|
+
|
15
|
+
sorted_files.each { |file| require file }
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def move_to_head(file, files)
|
21
|
+
file_to_move = files.delete(file)
|
22
|
+
files.unshift(file_to_move) unless file_to_move.nil?
|
23
|
+
files
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'weka/core'
|
29
|
+
require 'weka/classifiers'
|
30
|
+
require 'weka/filters'
|
31
|
+
require 'weka/clusterers'
|
32
|
+
require 'weka/attribute_selection'
|
@@ -0,0 +1 @@
|
|
1
|
+
Weka.require_all :attribute_selection
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module AttributeSelection
|
5
|
+
module Evaluator
|
6
|
+
include ClassBuilder
|
7
|
+
|
8
|
+
build_classes :CfsSubsetEval,
|
9
|
+
:CorrelationAttributeEval,
|
10
|
+
:GainRatioAttributeEval,
|
11
|
+
:InfoGainAttributeEval,
|
12
|
+
:OneRAttributeEval,
|
13
|
+
:PrincipalComponents,
|
14
|
+
:ReliefFAttributeEval,
|
15
|
+
:SymmetricalUncertAttributeEval,
|
16
|
+
:WrapperSubsetEval,
|
17
|
+
weka_module: 'weka.attributeSelection'
|
18
|
+
|
19
|
+
class CfsSubset < CfsSubsetEval; end
|
20
|
+
class CorrelationAttribute < CorrelationAttributeEval; end
|
21
|
+
class GainRatioAttribute < GainRatioAttributeEval; end
|
22
|
+
class InfoGainAttribute < InfoGainAttributeEval; end
|
23
|
+
class OneRAttribute < OneRAttributeEval; end
|
24
|
+
class ReliefFAttribute < ReliefFAttributeEval; end
|
25
|
+
class SymmetricalUncertAttribute < SymmetricalUncertAttributeEval; end
|
26
|
+
class WrapperSubset < WrapperSubsetEval; end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
require 'active_support/core_ext/string'
|
3
|
+
require 'active_support/core_ext/module'
|
4
|
+
require 'weka/concerns'
|
5
|
+
|
6
|
+
module Weka
|
7
|
+
module ClassBuilder
|
8
|
+
extend ActiveSupport::Concern
|
9
|
+
|
10
|
+
module ClassMethods
|
11
|
+
|
12
|
+
def build_class(class_name, weka_module: nil, include_concerns: true)
|
13
|
+
java_import java_class_path(class_name, weka_module)
|
14
|
+
define_class(class_name, include_concerns: include_concerns)
|
15
|
+
end
|
16
|
+
|
17
|
+
def build_classes(*class_names, weka_module: nil, include_concerns: true)
|
18
|
+
class_names.each do |name|
|
19
|
+
build_class(name, weka_module: weka_module, include_concerns: include_concerns)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def java_class_path(class_name, weka_module)
|
26
|
+
if weka_module
|
27
|
+
"#{weka_module}.#{class_name}"
|
28
|
+
else
|
29
|
+
[*java_super_modules, java_including_module, class_name].compact.join('.')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def java_super_modules
|
34
|
+
super_modules.split('::').map do |name|
|
35
|
+
downcase_first_char(name)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def super_modules
|
40
|
+
toplevel_module? ? self.name : self.name.deconstantize
|
41
|
+
end
|
42
|
+
|
43
|
+
def java_including_module
|
44
|
+
downcase_first_char(including_module)
|
45
|
+
end
|
46
|
+
|
47
|
+
def including_module
|
48
|
+
self.name.demodulize unless toplevel_module?
|
49
|
+
end
|
50
|
+
|
51
|
+
def toplevel_module?
|
52
|
+
self.name.scan('::').count == 1
|
53
|
+
end
|
54
|
+
|
55
|
+
def define_class(class_name, include_concerns: true)
|
56
|
+
module_eval <<-CLASS_DEFINITION, __FILE__, __LINE__ + 1
|
57
|
+
class #{class_name}
|
58
|
+
#{'include Concerns' if include_concerns}
|
59
|
+
#{include_utils}
|
60
|
+
end
|
61
|
+
CLASS_DEFINITION
|
62
|
+
end
|
63
|
+
|
64
|
+
def include_utils
|
65
|
+
return unless utils_defined?
|
66
|
+
"include #{utils}"
|
67
|
+
end
|
68
|
+
|
69
|
+
def utils_defined?
|
70
|
+
utils_super_modules.constantize.const_defined?(:Utils)
|
71
|
+
end
|
72
|
+
|
73
|
+
def utils
|
74
|
+
"::#{utils_super_modules}::Utils"
|
75
|
+
end
|
76
|
+
|
77
|
+
def utils_super_modules
|
78
|
+
super_modules.split('::')[0..1].join('::')
|
79
|
+
end
|
80
|
+
|
81
|
+
def downcase_first_char(string)
|
82
|
+
return if string.blank?
|
83
|
+
string[0].downcase + string[1..-1]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Weka.require_all :classifiers
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Classifiers
|
5
|
+
module Bayes
|
6
|
+
include ClassBuilder
|
7
|
+
|
8
|
+
build_classes :BayesNet,
|
9
|
+
:NaiveBayes,
|
10
|
+
:NaiveBayesMultinomial,
|
11
|
+
:NaiveBayesMultinomialText,
|
12
|
+
:NaiveBayesMultinomialUpdateable,
|
13
|
+
:NaiveBayesUpdateable
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Weka
|
2
|
+
module Classifiers
|
3
|
+
java_import 'weka.classifiers.Evaluation'
|
4
|
+
|
5
|
+
class Evaluation
|
6
|
+
|
7
|
+
# Use both nomenclatures f_measure and fmeasure for consistency
|
8
|
+
# due to jruby's auto method generation of 'fMeasure' to 'f_measure' and
|
9
|
+
# 'weightedFMeasure' to 'weighted_fmeasure'.
|
10
|
+
alias :weighted_f_measure :weighted_fmeasure
|
11
|
+
alias :fmeasure :f_measure
|
12
|
+
|
13
|
+
alias :summary :to_summary_string
|
14
|
+
alias :class_details :to_class_details_string
|
15
|
+
|
16
|
+
alias :instance_count :num_instances
|
17
|
+
alias :correct_count :correct
|
18
|
+
alias :incorrect_count :incorrect
|
19
|
+
alias :unclassified_count :unclassified
|
20
|
+
|
21
|
+
alias :correct_percentage :pct_correct
|
22
|
+
alias :incorrect_percentage :pct_incorrect
|
23
|
+
alias :unclassified_percentage :pct_unclassified
|
24
|
+
|
25
|
+
alias :true_negative_count :num_true_negatives
|
26
|
+
alias :false_negative_count :num_false_negatives
|
27
|
+
alias :true_positive_count :num_true_positives
|
28
|
+
alias :false_positive_count :num_false_positives
|
29
|
+
alias :average_cost :avg_cost
|
30
|
+
|
31
|
+
alias :cumulative_margin_distribution :to_cumulative_margin_distribution_string
|
32
|
+
end
|
33
|
+
|
34
|
+
Java::WekaClassifiers::Evaluation.__persistent__ = true
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Classifiers
|
5
|
+
module Functions
|
6
|
+
include ClassBuilder
|
7
|
+
|
8
|
+
build_classes :GaussianProcesses,
|
9
|
+
:LinearRegression,
|
10
|
+
:Logistic,
|
11
|
+
:MultilayerPerceptron,
|
12
|
+
:SGD,
|
13
|
+
:SGDText,
|
14
|
+
:SimpleLinearRegression,
|
15
|
+
:SimpleLogistic,
|
16
|
+
:SMO,
|
17
|
+
:SMOreg,
|
18
|
+
:VotedPerceptron
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Classifiers
|
5
|
+
module Meta
|
6
|
+
include ClassBuilder
|
7
|
+
|
8
|
+
build_classes :AdaBoostM1,
|
9
|
+
:AdditiveRegression,
|
10
|
+
:AttributeSelectedClassifier,
|
11
|
+
:Bagging,
|
12
|
+
:ClassificationViaRegression,
|
13
|
+
:CostSensitiveClassifier,
|
14
|
+
:CVParameterSelection,
|
15
|
+
:FilteredClassifier,
|
16
|
+
:IterativeClassifierOptimizer,
|
17
|
+
:LogitBoost,
|
18
|
+
:MultiClassClassifier,
|
19
|
+
:MultiClassClassifierUpdateable,
|
20
|
+
:MultiScheme,
|
21
|
+
:RandomCommittee,
|
22
|
+
:RandomizableFilteredClassifier,
|
23
|
+
:RandomSubSpace,
|
24
|
+
:RegressionByDiscretization,
|
25
|
+
:Stacking,
|
26
|
+
:Vote
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Classifiers
|
5
|
+
module Trees
|
6
|
+
include ClassBuilder
|
7
|
+
|
8
|
+
build_classes :DecisionStump,
|
9
|
+
:HoeffdingTree,
|
10
|
+
:J48,
|
11
|
+
:LMT,
|
12
|
+
:M5P,
|
13
|
+
:RandomForest,
|
14
|
+
:RandomTree,
|
15
|
+
:REPTree
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
require 'active_support/core_ext/hash'
|
3
|
+
require 'weka/classifiers/evaluation'
|
4
|
+
require 'weka/core/instances'
|
5
|
+
|
6
|
+
module Weka
|
7
|
+
module Classifiers
|
8
|
+
module Utils
|
9
|
+
extend ActiveSupport::Concern
|
10
|
+
|
11
|
+
included do
|
12
|
+
java_import 'java.util.Random'
|
13
|
+
|
14
|
+
if instance_methods.include?(:build_classifier)
|
15
|
+
attr_reader :training_instances
|
16
|
+
|
17
|
+
def train_with_instances(instances)
|
18
|
+
ensure_class_attribute_assigned!(instances)
|
19
|
+
|
20
|
+
@training_instances = instances
|
21
|
+
build_classifier(instances)
|
22
|
+
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
def cross_validate(folds: 3)
|
27
|
+
ensure_trained_with_instances!
|
28
|
+
|
29
|
+
evaluation = Evaluation.new(training_instances)
|
30
|
+
random = Java::JavaUtil::Random.new(1)
|
31
|
+
|
32
|
+
evaluation.cross_validate_model(self, training_instances, folds.to_i, random)
|
33
|
+
evaluation
|
34
|
+
end
|
35
|
+
|
36
|
+
def evaluate(test_instances)
|
37
|
+
ensure_trained_with_instances!
|
38
|
+
ensure_class_attribute_assigned!(test_instances)
|
39
|
+
|
40
|
+
evaluation = Evaluation.new(training_instances)
|
41
|
+
evaluation.evaluate_model(self, test_instances)
|
42
|
+
evaluation
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
if instance_methods.include?(:classify_instance)
|
47
|
+
def classify(instance_or_values)
|
48
|
+
ensure_trained_with_instances!
|
49
|
+
|
50
|
+
instance = classifiable_instance_from(instance_or_values)
|
51
|
+
index = classify_instance(instance)
|
52
|
+
|
53
|
+
class_value_of_index(index)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
if instance_methods.include?(:update_classifier)
|
58
|
+
def add_training_instance(instance)
|
59
|
+
training_instances.add(instance)
|
60
|
+
update_classifier(instance)
|
61
|
+
|
62
|
+
self
|
63
|
+
end
|
64
|
+
|
65
|
+
def add_training_data(data)
|
66
|
+
values = self.training_instances.internal_values_of(data)
|
67
|
+
instance = Weka::Core::DenseInstance.new(values)
|
68
|
+
add_training_instance(instance)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
if instance_methods.include?(:distribution_for_instance)
|
73
|
+
def distribution_for(instance_or_values)
|
74
|
+
ensure_trained_with_instances!
|
75
|
+
|
76
|
+
instance = classifiable_instance_from(instance_or_values)
|
77
|
+
distributions = distribution_for_instance(instance)
|
78
|
+
|
79
|
+
class_distributions_from(distributions).with_indifferent_access
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
def ensure_class_attribute_assigned!(instances)
|
86
|
+
return if instances.class_attribute_defined?
|
87
|
+
|
88
|
+
error = 'Class attribute is not assigned for Instances.'
|
89
|
+
hint = 'You can assign a class attribute with #class_attribute=.'
|
90
|
+
message = "#{error} #{hint}"
|
91
|
+
|
92
|
+
raise UnassignedClassError, message
|
93
|
+
end
|
94
|
+
|
95
|
+
def ensure_trained_with_instances!
|
96
|
+
return unless training_instances.nil?
|
97
|
+
|
98
|
+
error = 'Classifier is not trained with Instances.'
|
99
|
+
hint = 'You can set the training instances with #train_with_instances.'
|
100
|
+
message = "#{error} #{hint}"
|
101
|
+
|
102
|
+
raise UnassignedTrainingInstancesError, message
|
103
|
+
end
|
104
|
+
|
105
|
+
def classifiable_instance_from(instance_or_values)
|
106
|
+
attributes = training_instances.attributes
|
107
|
+
instances = Weka::Core::Instances.new(attributes: attributes)
|
108
|
+
|
109
|
+
class_attribute = training_instances.class_attribute
|
110
|
+
class_index = training_instances.class_index
|
111
|
+
instances.insert_attribute_at(class_attribute, class_index)
|
112
|
+
|
113
|
+
instances.class_index = training_instances.class_index
|
114
|
+
instances.add_instance(instance_or_values)
|
115
|
+
|
116
|
+
instance = instances.first
|
117
|
+
instance.set_class_missing
|
118
|
+
instance
|
119
|
+
end
|
120
|
+
|
121
|
+
def class_value_of_index(index)
|
122
|
+
training_instances.class_attribute.value(index)
|
123
|
+
end
|
124
|
+
|
125
|
+
def class_distributions_from(distributions)
|
126
|
+
class_values = training_instances.class_attribute.values
|
127
|
+
|
128
|
+
distributions.each_with_index.reduce({}) do |result, (distribution, index)|
|
129
|
+
class_value = class_values[index].to_sym
|
130
|
+
result[class_value] = distribution
|
131
|
+
result
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|