weka 0.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.travis.yml +15 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Jarfile +1 -0
- data/Jarfile.lock +17 -0
- data/MIT-LICENSE.txt +19 -0
- data/README.md +687 -0
- data/Rakefile +21 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/weka.rb +32 -0
- data/lib/weka/attribute_selection.rb +1 -0
- data/lib/weka/attribute_selection/attribute_selection.rb +11 -0
- data/lib/weka/attribute_selection/evaluator.rb +29 -0
- data/lib/weka/attribute_selection/search.rb +14 -0
- data/lib/weka/class_builder.rb +88 -0
- data/lib/weka/classifiers.rb +1 -0
- data/lib/weka/classifiers/bayes.rb +16 -0
- data/lib/weka/classifiers/evaluation.rb +37 -0
- data/lib/weka/classifiers/functions.rb +21 -0
- data/lib/weka/classifiers/lazy.rb +13 -0
- data/lib/weka/classifiers/meta.rb +29 -0
- data/lib/weka/classifiers/rules.rb +16 -0
- data/lib/weka/classifiers/trees.rb +18 -0
- data/lib/weka/classifiers/utils.rb +138 -0
- data/lib/weka/clusterers.rb +16 -0
- data/lib/weka/clusterers/cluster_evaluation.rb +14 -0
- data/lib/weka/clusterers/utils.rb +103 -0
- data/lib/weka/concerns.rb +18 -0
- data/lib/weka/concerns/buildable.rb +19 -0
- data/lib/weka/concerns/describable.rb +30 -0
- data/lib/weka/concerns/optionizable.rb +49 -0
- data/lib/weka/concerns/persistent.rb +16 -0
- data/lib/weka/core.rb +6 -0
- data/lib/weka/core/attribute.rb +24 -0
- data/lib/weka/core/converters.rb +17 -0
- data/lib/weka/core/dense_instance.rb +68 -0
- data/lib/weka/core/instances.rb +199 -0
- data/lib/weka/core/loader.rb +32 -0
- data/lib/weka/core/saver.rb +34 -0
- data/lib/weka/exceptions.rb +6 -0
- data/lib/weka/filters.rb +1 -0
- data/lib/weka/filters/filter.rb +9 -0
- data/lib/weka/filters/supervised/attribute.rb +26 -0
- data/lib/weka/filters/supervised/instance.rb +16 -0
- data/lib/weka/filters/unsupervised/attribute.rb +67 -0
- data/lib/weka/filters/unsupervised/instance.rb +25 -0
- data/lib/weka/filters/utils.rb +17 -0
- data/lib/weka/jars.rb +19 -0
- data/lib/weka/version.rb +3 -0
- data/weka.gemspec +32 -0
- metadata +183 -0
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
task :default => :prepare
|
7
|
+
task :install => :prepare
|
8
|
+
|
9
|
+
desc 'Install weka jars & dependencies'
|
10
|
+
task :prepare do
|
11
|
+
require 'lock_jar'
|
12
|
+
lib_path = File.expand_path('.', File.dirname(__FILE__))
|
13
|
+
jars_dir = File.join(lib_path, 'jars')
|
14
|
+
|
15
|
+
LockJar.install('Jarfile.lock', local_repo: jars_dir)
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "Start an irb session with the gem loaded"
|
19
|
+
task :irb do
|
20
|
+
sh 'irb -I ./lib -r weka'
|
21
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "weka"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/lib/weka.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'java'
|
2
|
+
require 'weka/jars'
|
3
|
+
require 'weka/version'
|
4
|
+
require 'weka/exceptions'
|
5
|
+
|
6
|
+
module Weka
|
7
|
+
include Jars
|
8
|
+
|
9
|
+
class << self
|
10
|
+
def require_all(type)
|
11
|
+
files = Dir[File.expand_path("../weka/#{type}/**/*.rb", __FILE__)]
|
12
|
+
utils = File.expand_path("../weka/#{type}/utils.rb", __FILE__)
|
13
|
+
sorted_files = move_to_head(utils, files)
|
14
|
+
|
15
|
+
sorted_files.each { |file| require file }
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def move_to_head(file, files)
|
21
|
+
file_to_move = files.delete(file)
|
22
|
+
files.unshift(file_to_move) unless file_to_move.nil?
|
23
|
+
files
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'weka/core'
|
29
|
+
require 'weka/classifiers'
|
30
|
+
require 'weka/filters'
|
31
|
+
require 'weka/clusterers'
|
32
|
+
require 'weka/attribute_selection'
|
@@ -0,0 +1 @@
|
|
1
|
+
Weka.require_all :attribute_selection
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module AttributeSelection
|
5
|
+
module Evaluator
|
6
|
+
include ClassBuilder
|
7
|
+
|
8
|
+
build_classes :CfsSubsetEval,
|
9
|
+
:CorrelationAttributeEval,
|
10
|
+
:GainRatioAttributeEval,
|
11
|
+
:InfoGainAttributeEval,
|
12
|
+
:OneRAttributeEval,
|
13
|
+
:PrincipalComponents,
|
14
|
+
:ReliefFAttributeEval,
|
15
|
+
:SymmetricalUncertAttributeEval,
|
16
|
+
:WrapperSubsetEval,
|
17
|
+
weka_module: 'weka.attributeSelection'
|
18
|
+
|
19
|
+
class CfsSubset < CfsSubsetEval; end
|
20
|
+
class CorrelationAttribute < CorrelationAttributeEval; end
|
21
|
+
class GainRatioAttribute < GainRatioAttributeEval; end
|
22
|
+
class InfoGainAttribute < InfoGainAttributeEval; end
|
23
|
+
class OneRAttribute < OneRAttributeEval; end
|
24
|
+
class ReliefFAttribute < ReliefFAttributeEval; end
|
25
|
+
class SymmetricalUncertAttribute < SymmetricalUncertAttributeEval; end
|
26
|
+
class WrapperSubset < WrapperSubsetEval; end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
require 'active_support/core_ext/string'
|
3
|
+
require 'active_support/core_ext/module'
|
4
|
+
require 'weka/concerns'
|
5
|
+
|
6
|
+
module Weka
|
7
|
+
module ClassBuilder
|
8
|
+
extend ActiveSupport::Concern
|
9
|
+
|
10
|
+
module ClassMethods
|
11
|
+
|
12
|
+
def build_class(class_name, weka_module: nil, include_concerns: true)
|
13
|
+
java_import java_class_path(class_name, weka_module)
|
14
|
+
define_class(class_name, include_concerns: include_concerns)
|
15
|
+
end
|
16
|
+
|
17
|
+
def build_classes(*class_names, weka_module: nil, include_concerns: true)
|
18
|
+
class_names.each do |name|
|
19
|
+
build_class(name, weka_module: weka_module, include_concerns: include_concerns)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def java_class_path(class_name, weka_module)
|
26
|
+
if weka_module
|
27
|
+
"#{weka_module}.#{class_name}"
|
28
|
+
else
|
29
|
+
[*java_super_modules, java_including_module, class_name].compact.join('.')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def java_super_modules
|
34
|
+
super_modules.split('::').map do |name|
|
35
|
+
downcase_first_char(name)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def super_modules
|
40
|
+
toplevel_module? ? self.name : self.name.deconstantize
|
41
|
+
end
|
42
|
+
|
43
|
+
def java_including_module
|
44
|
+
downcase_first_char(including_module)
|
45
|
+
end
|
46
|
+
|
47
|
+
def including_module
|
48
|
+
self.name.demodulize unless toplevel_module?
|
49
|
+
end
|
50
|
+
|
51
|
+
def toplevel_module?
|
52
|
+
self.name.scan('::').count == 1
|
53
|
+
end
|
54
|
+
|
55
|
+
def define_class(class_name, include_concerns: true)
|
56
|
+
module_eval <<-CLASS_DEFINITION, __FILE__, __LINE__ + 1
|
57
|
+
class #{class_name}
|
58
|
+
#{'include Concerns' if include_concerns}
|
59
|
+
#{include_utils}
|
60
|
+
end
|
61
|
+
CLASS_DEFINITION
|
62
|
+
end
|
63
|
+
|
64
|
+
def include_utils
|
65
|
+
return unless utils_defined?
|
66
|
+
"include #{utils}"
|
67
|
+
end
|
68
|
+
|
69
|
+
def utils_defined?
|
70
|
+
utils_super_modules.constantize.const_defined?(:Utils)
|
71
|
+
end
|
72
|
+
|
73
|
+
def utils
|
74
|
+
"::#{utils_super_modules}::Utils"
|
75
|
+
end
|
76
|
+
|
77
|
+
def utils_super_modules
|
78
|
+
super_modules.split('::')[0..1].join('::')
|
79
|
+
end
|
80
|
+
|
81
|
+
def downcase_first_char(string)
|
82
|
+
return if string.blank?
|
83
|
+
string[0].downcase + string[1..-1]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Weka.require_all :classifiers
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Classifiers
|
5
|
+
module Bayes
|
6
|
+
include ClassBuilder
|
7
|
+
|
8
|
+
build_classes :BayesNet,
|
9
|
+
:NaiveBayes,
|
10
|
+
:NaiveBayesMultinomial,
|
11
|
+
:NaiveBayesMultinomialText,
|
12
|
+
:NaiveBayesMultinomialUpdateable,
|
13
|
+
:NaiveBayesUpdateable
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Weka
|
2
|
+
module Classifiers
|
3
|
+
java_import 'weka.classifiers.Evaluation'
|
4
|
+
|
5
|
+
class Evaluation
|
6
|
+
|
7
|
+
# Use both nomenclatures f_measure and fmeasure for consistency
|
8
|
+
# due to jruby's auto method generation of 'fMeasure' to 'f_measure' and
|
9
|
+
# 'weightedFMeasure' to 'weighted_fmeasure'.
|
10
|
+
alias :weighted_f_measure :weighted_fmeasure
|
11
|
+
alias :fmeasure :f_measure
|
12
|
+
|
13
|
+
alias :summary :to_summary_string
|
14
|
+
alias :class_details :to_class_details_string
|
15
|
+
|
16
|
+
alias :instance_count :num_instances
|
17
|
+
alias :correct_count :correct
|
18
|
+
alias :incorrect_count :incorrect
|
19
|
+
alias :unclassified_count :unclassified
|
20
|
+
|
21
|
+
alias :correct_percentage :pct_correct
|
22
|
+
alias :incorrect_percentage :pct_incorrect
|
23
|
+
alias :unclassified_percentage :pct_unclassified
|
24
|
+
|
25
|
+
alias :true_negative_count :num_true_negatives
|
26
|
+
alias :false_negative_count :num_false_negatives
|
27
|
+
alias :true_positive_count :num_true_positives
|
28
|
+
alias :false_positive_count :num_false_positives
|
29
|
+
alias :average_cost :avg_cost
|
30
|
+
|
31
|
+
alias :cumulative_margin_distribution :to_cumulative_margin_distribution_string
|
32
|
+
end
|
33
|
+
|
34
|
+
Java::WekaClassifiers::Evaluation.__persistent__ = true
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Classifiers
|
5
|
+
module Functions
|
6
|
+
include ClassBuilder
|
7
|
+
|
8
|
+
build_classes :GaussianProcesses,
|
9
|
+
:LinearRegression,
|
10
|
+
:Logistic,
|
11
|
+
:MultilayerPerceptron,
|
12
|
+
:SGD,
|
13
|
+
:SGDText,
|
14
|
+
:SimpleLinearRegression,
|
15
|
+
:SimpleLogistic,
|
16
|
+
:SMO,
|
17
|
+
:SMOreg,
|
18
|
+
:VotedPerceptron
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Classifiers
|
5
|
+
module Meta
|
6
|
+
include ClassBuilder
|
7
|
+
|
8
|
+
build_classes :AdaBoostM1,
|
9
|
+
:AdditiveRegression,
|
10
|
+
:AttributeSelectedClassifier,
|
11
|
+
:Bagging,
|
12
|
+
:ClassificationViaRegression,
|
13
|
+
:CostSensitiveClassifier,
|
14
|
+
:CVParameterSelection,
|
15
|
+
:FilteredClassifier,
|
16
|
+
:IterativeClassifierOptimizer,
|
17
|
+
:LogitBoost,
|
18
|
+
:MultiClassClassifier,
|
19
|
+
:MultiClassClassifierUpdateable,
|
20
|
+
:MultiScheme,
|
21
|
+
:RandomCommittee,
|
22
|
+
:RandomizableFilteredClassifier,
|
23
|
+
:RandomSubSpace,
|
24
|
+
:RegressionByDiscretization,
|
25
|
+
:Stacking,
|
26
|
+
:Vote
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Classifiers
|
5
|
+
module Trees
|
6
|
+
include ClassBuilder
|
7
|
+
|
8
|
+
build_classes :DecisionStump,
|
9
|
+
:HoeffdingTree,
|
10
|
+
:J48,
|
11
|
+
:LMT,
|
12
|
+
:M5P,
|
13
|
+
:RandomForest,
|
14
|
+
:RandomTree,
|
15
|
+
:REPTree
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
require 'active_support/core_ext/hash'
|
3
|
+
require 'weka/classifiers/evaluation'
|
4
|
+
require 'weka/core/instances'
|
5
|
+
|
6
|
+
module Weka
|
7
|
+
module Classifiers
|
8
|
+
module Utils
|
9
|
+
extend ActiveSupport::Concern
|
10
|
+
|
11
|
+
included do
|
12
|
+
java_import 'java.util.Random'
|
13
|
+
|
14
|
+
if instance_methods.include?(:build_classifier)
|
15
|
+
attr_reader :training_instances
|
16
|
+
|
17
|
+
def train_with_instances(instances)
|
18
|
+
ensure_class_attribute_assigned!(instances)
|
19
|
+
|
20
|
+
@training_instances = instances
|
21
|
+
build_classifier(instances)
|
22
|
+
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
def cross_validate(folds: 3)
|
27
|
+
ensure_trained_with_instances!
|
28
|
+
|
29
|
+
evaluation = Evaluation.new(training_instances)
|
30
|
+
random = Java::JavaUtil::Random.new(1)
|
31
|
+
|
32
|
+
evaluation.cross_validate_model(self, training_instances, folds.to_i, random)
|
33
|
+
evaluation
|
34
|
+
end
|
35
|
+
|
36
|
+
def evaluate(test_instances)
|
37
|
+
ensure_trained_with_instances!
|
38
|
+
ensure_class_attribute_assigned!(test_instances)
|
39
|
+
|
40
|
+
evaluation = Evaluation.new(training_instances)
|
41
|
+
evaluation.evaluate_model(self, test_instances)
|
42
|
+
evaluation
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
if instance_methods.include?(:classify_instance)
|
47
|
+
def classify(instance_or_values)
|
48
|
+
ensure_trained_with_instances!
|
49
|
+
|
50
|
+
instance = classifiable_instance_from(instance_or_values)
|
51
|
+
index = classify_instance(instance)
|
52
|
+
|
53
|
+
class_value_of_index(index)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
if instance_methods.include?(:update_classifier)
|
58
|
+
def add_training_instance(instance)
|
59
|
+
training_instances.add(instance)
|
60
|
+
update_classifier(instance)
|
61
|
+
|
62
|
+
self
|
63
|
+
end
|
64
|
+
|
65
|
+
def add_training_data(data)
|
66
|
+
values = self.training_instances.internal_values_of(data)
|
67
|
+
instance = Weka::Core::DenseInstance.new(values)
|
68
|
+
add_training_instance(instance)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
if instance_methods.include?(:distribution_for_instance)
|
73
|
+
def distribution_for(instance_or_values)
|
74
|
+
ensure_trained_with_instances!
|
75
|
+
|
76
|
+
instance = classifiable_instance_from(instance_or_values)
|
77
|
+
distributions = distribution_for_instance(instance)
|
78
|
+
|
79
|
+
class_distributions_from(distributions).with_indifferent_access
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
def ensure_class_attribute_assigned!(instances)
|
86
|
+
return if instances.class_attribute_defined?
|
87
|
+
|
88
|
+
error = 'Class attribute is not assigned for Instances.'
|
89
|
+
hint = 'You can assign a class attribute with #class_attribute=.'
|
90
|
+
message = "#{error} #{hint}"
|
91
|
+
|
92
|
+
raise UnassignedClassError, message
|
93
|
+
end
|
94
|
+
|
95
|
+
def ensure_trained_with_instances!
|
96
|
+
return unless training_instances.nil?
|
97
|
+
|
98
|
+
error = 'Classifier is not trained with Instances.'
|
99
|
+
hint = 'You can set the training instances with #train_with_instances.'
|
100
|
+
message = "#{error} #{hint}"
|
101
|
+
|
102
|
+
raise UnassignedTrainingInstancesError, message
|
103
|
+
end
|
104
|
+
|
105
|
+
def classifiable_instance_from(instance_or_values)
|
106
|
+
attributes = training_instances.attributes
|
107
|
+
instances = Weka::Core::Instances.new(attributes: attributes)
|
108
|
+
|
109
|
+
class_attribute = training_instances.class_attribute
|
110
|
+
class_index = training_instances.class_index
|
111
|
+
instances.insert_attribute_at(class_attribute, class_index)
|
112
|
+
|
113
|
+
instances.class_index = training_instances.class_index
|
114
|
+
instances.add_instance(instance_or_values)
|
115
|
+
|
116
|
+
instance = instances.first
|
117
|
+
instance.set_class_missing
|
118
|
+
instance
|
119
|
+
end
|
120
|
+
|
121
|
+
def class_value_of_index(index)
|
122
|
+
training_instances.class_attribute.value(index)
|
123
|
+
end
|
124
|
+
|
125
|
+
def class_distributions_from(distributions)
|
126
|
+
class_values = training_instances.class_attribute.values
|
127
|
+
|
128
|
+
distributions.each_with_index.reduce({}) do |result, (distribution, index)|
|
129
|
+
class_value = class_values[index].to_sym
|
130
|
+
result[class_value] = distribution
|
131
|
+
result
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|