weka 0.3.0-java → 0.4.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/.travis.yml +1 -1
- data/README.md +5 -2
- data/Rakefile +5 -5
- data/bin/console +4 -4
- data/lib/weka/attribute_selection/attribute_selection.rb +2 -3
- data/lib/weka/class_builder.rb +21 -12
- data/lib/weka/classifiers/evaluation.rb +18 -19
- data/lib/weka/classifiers/utils.rb +87 -90
- data/lib/weka/clusterers/cluster_evaluation.rb +2 -4
- data/lib/weka/clusterers/utils.rb +63 -66
- data/lib/weka/concerns.rb +5 -8
- data/lib/weka/concerns/buildable.rb +3 -5
- data/lib/weka/concerns/describable.rb +3 -4
- data/lib/weka/concerns/optionizable.rb +26 -27
- data/lib/weka/concerns/persistent.rb +3 -8
- data/lib/weka/concerns/serializable.rb +6 -8
- data/lib/weka/core/attribute.rb +62 -4
- data/lib/weka/core/converters.rb +2 -0
- data/lib/weka/core/dense_instance.rb +7 -7
- data/lib/weka/core/instances.rb +77 -17
- data/lib/weka/core/loader.rb +10 -1
- data/lib/weka/core/saver.rb +21 -1
- data/lib/weka/core/serialization_helper.rb +2 -3
- data/lib/weka/filters/filter.rb +0 -1
- data/lib/weka/filters/supervised/attribute.rb +2 -3
- data/lib/weka/filters/utils.rb +6 -9
- data/lib/weka/jars.rb +9 -12
- data/lib/weka/version.rb +1 -1
- data/weka.gemspec +2 -3
- metadata +17 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24673716e3c980b803c2c77172efbed98a8d7de6
|
4
|
+
data.tar.gz: 9c611dba1492b943ceaea2ca9c375bbbe95ada85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54236c07fa7110a5260a587cd2a1e57016705275ea6eba8b624ad7a5bc8ed58c7e878cf4b61913368f27c9286ccdde777323e34278917d0b05ed185b87fbdbb2
|
7
|
+
data.tar.gz: 245cc55a7abf2751ddea7a2f4dda479eab73b3aff2852fe915705944a2a0290a26b4bb1d8188e97009168c72c12775bf464d724ca84473a359685a9df7f13ae9
|
data/.rspec
CHANGED
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -33,7 +33,7 @@ The weka gem tries to carry over the namespaces defined in Weka and enhances som
|
|
33
33
|
|
34
34
|
The idea behind keeping the namespaces is, that you can also use the [Weka documentation](http://weka.sourceforge.net/doc.dev/) for looking up functionality and classes.
|
35
35
|
|
36
|
-
Please refer to [the gem
|
36
|
+
Please refer to [the gem’s Wiki](https://github.com/paulgoetze/weka-jruby/wiki) for
|
37
37
|
detailed information about how to use weka with JRuby and some examplary code snippets.
|
38
38
|
|
39
39
|
## Development
|
@@ -49,7 +49,7 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/paulgo
|
|
49
49
|
|
50
50
|
For development we use the [git branching model](http://nvie.com/posts/a-successful-git-branching-model/) described by [nvie](https://github.com/nvie).
|
51
51
|
|
52
|
-
Here
|
52
|
+
Here’s how to contribute:
|
53
53
|
|
54
54
|
1. Fork it ( https://github.com/paulgoetze/weka-jruby/fork )
|
55
55
|
2. Create your feature branch (`git checkout -b feature/my-new-feature develop`)
|
@@ -59,6 +59,9 @@ Here's how to contribute:
|
|
59
59
|
|
60
60
|
Please try to add RSpec tests along with your new features. This will ensure that your code does not break existing functionality and that your feature is working as expected.
|
61
61
|
|
62
|
+
We use [Rubocop](https://github.com/bbatsov/rubocop) for code style recommendations.
|
63
|
+
Please make sure your contributions comply with the default config of Rubocop.
|
64
|
+
|
62
65
|
## Acknowledgement
|
63
66
|
|
64
67
|
The original ideas for wrapping Weka in JRuby come from [@arrigonialberto86](https://github.com/arrigonialberto86) and his [ruby-band](https://github.com/arrigonialberto86/ruby-band) gem. Great thanks!
|
data/Rakefile
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
3
|
|
4
4
|
RSpec::Core::RakeTask.new(:spec)
|
5
5
|
|
6
|
-
task :
|
7
|
-
task :
|
6
|
+
task default: :prepare
|
7
|
+
task install: :prepare
|
8
8
|
|
9
9
|
desc 'Install weka jars & dependencies'
|
10
10
|
task :prepare do
|
@@ -15,7 +15,7 @@ task :prepare do
|
|
15
15
|
LockJar.install('Jarfile.lock', local_repo: jars_dir)
|
16
16
|
end
|
17
17
|
|
18
|
-
desc
|
18
|
+
desc 'Start an irb session with the gem loaded'
|
19
19
|
task :irb do
|
20
20
|
sh 'irb -I ./lib -r weka'
|
21
21
|
end
|
data/bin/console
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'weka'
|
5
5
|
|
6
6
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
7
|
# with your gem easier. You can also use a different console, if you like.
|
8
8
|
|
9
9
|
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
# require
|
10
|
+
# require 'pry'
|
11
11
|
# Pry.start
|
12
12
|
|
13
|
-
require
|
13
|
+
require 'irb'
|
14
14
|
IRB.start
|
@@ -3,9 +3,8 @@ module Weka
|
|
3
3
|
java_import 'weka.attributeSelection.AttributeSelection'
|
4
4
|
|
5
5
|
class AttributeSelection
|
6
|
-
|
7
|
-
alias
|
8
|
-
alias :selected_attributes_count :number_attributes_selected
|
6
|
+
alias summary to_results_string
|
7
|
+
alias selected_attributes_count number_attributes_selected
|
9
8
|
end
|
10
9
|
end
|
11
10
|
end
|
data/lib/weka/class_builder.rb
CHANGED
@@ -1,14 +1,12 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
require 'active_support/core_ext/string'
|
3
|
-
require 'active_support/core_ext/module'
|
4
1
|
require 'weka/concerns'
|
5
2
|
|
6
3
|
module Weka
|
7
4
|
module ClassBuilder
|
8
|
-
|
5
|
+
def self.included(base)
|
6
|
+
base.extend(ClassMethods)
|
7
|
+
end
|
9
8
|
|
10
9
|
module ClassMethods
|
11
|
-
|
12
10
|
def build_class(class_name, weka_module: nil, include_concerns: true)
|
13
11
|
java_import java_class_path(class_name, weka_module)
|
14
12
|
define_class(class_name, weka_module, include_concerns: include_concerns)
|
@@ -37,7 +35,11 @@ module Weka
|
|
37
35
|
end
|
38
36
|
|
39
37
|
def super_modules
|
40
|
-
toplevel_module? ?
|
38
|
+
toplevel_module? ? name : deconstantize(name)
|
39
|
+
end
|
40
|
+
|
41
|
+
def deconstantize(name)
|
42
|
+
name.split('::')[0...-1].join('::')
|
41
43
|
end
|
42
44
|
|
43
45
|
def java_including_module
|
@@ -45,11 +47,15 @@ module Weka
|
|
45
47
|
end
|
46
48
|
|
47
49
|
def including_module
|
48
|
-
|
50
|
+
demodulize(name) unless toplevel_module?
|
51
|
+
end
|
52
|
+
|
53
|
+
def demodulize(name)
|
54
|
+
name.split('::').last
|
49
55
|
end
|
50
56
|
|
51
57
|
def toplevel_module?
|
52
|
-
|
58
|
+
name.scan('::').count == 1
|
53
59
|
end
|
54
60
|
|
55
61
|
def define_class(class_name, weka_module, include_concerns: true)
|
@@ -66,7 +72,7 @@ module Weka
|
|
66
72
|
class_path = java_class_path(class_name, weka_module)
|
67
73
|
serializable = Weka::Core::SerializationHelper.serializable?(class_path)
|
68
74
|
|
69
|
-
|
75
|
+
'include Weka::Concerns::Serializable' if serializable
|
70
76
|
end
|
71
77
|
|
72
78
|
def include_utils
|
@@ -75,7 +81,11 @@ module Weka
|
|
75
81
|
end
|
76
82
|
|
77
83
|
def utils_defined?
|
78
|
-
utils_super_modules.
|
84
|
+
constantize(utils_super_modules).const_defined?(:Utils)
|
85
|
+
end
|
86
|
+
|
87
|
+
def constantize(module_names)
|
88
|
+
Object.module_eval("::#{module_names}")
|
79
89
|
end
|
80
90
|
|
81
91
|
def utils
|
@@ -87,10 +97,9 @@ module Weka
|
|
87
97
|
end
|
88
98
|
|
89
99
|
def downcase_first_char(string)
|
90
|
-
return if string.
|
100
|
+
return if string.nil? || string.empty?
|
91
101
|
string[0].downcase + string[1..-1]
|
92
102
|
end
|
93
103
|
end
|
94
|
-
|
95
104
|
end
|
96
105
|
end
|
@@ -3,35 +3,34 @@ module Weka
|
|
3
3
|
java_import 'weka.classifiers.Evaluation'
|
4
4
|
|
5
5
|
class Evaluation
|
6
|
-
|
7
6
|
# Use both nomenclatures f_measure and fmeasure for consistency
|
8
7
|
# due to jruby's auto method generation of 'fMeasure' to 'f_measure' and
|
9
8
|
# 'weightedFMeasure' to 'weighted_fmeasure'.
|
10
|
-
alias
|
11
|
-
alias
|
9
|
+
alias weighted_f_measure weighted_fmeasure
|
10
|
+
alias fmeasure f_measure
|
12
11
|
|
13
|
-
alias
|
14
|
-
alias
|
12
|
+
alias summary to_summary_string
|
13
|
+
alias class_details to_class_details_string
|
14
|
+
alias confusion_matrix to_matrix_string
|
15
15
|
|
16
|
-
alias
|
17
|
-
alias
|
18
|
-
alias
|
19
|
-
alias
|
16
|
+
alias instance_count num_instances
|
17
|
+
alias correct_count correct
|
18
|
+
alias incorrect_count incorrect
|
19
|
+
alias unclassified_count unclassified
|
20
20
|
|
21
|
-
alias
|
22
|
-
alias
|
23
|
-
alias
|
21
|
+
alias correct_percentage pct_correct
|
22
|
+
alias incorrect_percentage pct_incorrect
|
23
|
+
alias unclassified_percentage pct_unclassified
|
24
24
|
|
25
|
-
alias
|
26
|
-
alias
|
27
|
-
alias
|
28
|
-
alias
|
29
|
-
alias
|
25
|
+
alias true_negative_count num_true_negatives
|
26
|
+
alias false_negative_count num_false_negatives
|
27
|
+
alias true_positive_count num_true_positives
|
28
|
+
alias false_positive_count num_false_positives
|
29
|
+
alias average_cost avg_cost
|
30
30
|
|
31
|
-
alias
|
31
|
+
alias cumulative_margin_distribution to_cumulative_margin_distribution_string
|
32
32
|
end
|
33
33
|
|
34
34
|
Java::WekaClassifiers::Evaluation.__persistent__ = true
|
35
|
-
|
36
35
|
end
|
37
36
|
end
|
@@ -1,138 +1,135 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
require 'active_support/core_ext/hash'
|
3
1
|
require 'weka/classifiers/evaluation'
|
4
2
|
require 'weka/core/instances'
|
5
3
|
|
6
4
|
module Weka
|
7
5
|
module Classifiers
|
8
6
|
module Utils
|
9
|
-
|
7
|
+
def self.included(base)
|
8
|
+
base.class_eval do
|
9
|
+
java_import 'java.util.Random'
|
10
10
|
|
11
|
-
|
12
|
-
|
11
|
+
if instance_methods.include?(:build_classifier)
|
12
|
+
attr_reader :training_instances
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
def train_with_instances(instances)
|
15
|
+
ensure_class_attribute_assigned!(instances)
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
@training_instances = instances
|
18
|
+
build_classifier(instances)
|
19
19
|
|
20
|
-
|
21
|
-
|
20
|
+
self
|
21
|
+
end
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
def cross_validate(folds: 3)
|
27
|
-
ensure_trained_with_instances!
|
23
|
+
def cross_validate(folds: 3)
|
24
|
+
ensure_trained_with_instances!
|
28
25
|
|
29
|
-
|
30
|
-
|
26
|
+
evaluation = Evaluation.new(training_instances)
|
27
|
+
random = Java::JavaUtil::Random.new(1)
|
31
28
|
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
evaluation.cross_validate_model(self, training_instances, folds.to_i, random)
|
30
|
+
evaluation
|
31
|
+
end
|
35
32
|
|
36
|
-
|
37
|
-
|
38
|
-
|
33
|
+
def evaluate(test_instances)
|
34
|
+
ensure_trained_with_instances!
|
35
|
+
ensure_class_attribute_assigned!(test_instances)
|
39
36
|
|
40
|
-
|
41
|
-
|
42
|
-
|
37
|
+
evaluation = Evaluation.new(training_instances)
|
38
|
+
evaluation.evaluate_model(self, test_instances)
|
39
|
+
evaluation
|
40
|
+
end
|
43
41
|
end
|
44
|
-
end
|
45
42
|
|
46
|
-
|
47
|
-
|
48
|
-
|
43
|
+
if instance_methods.include?(:classify_instance)
|
44
|
+
def classify(instance_or_values)
|
45
|
+
ensure_trained_with_instances!
|
49
46
|
|
50
|
-
|
51
|
-
|
47
|
+
instance = classifiable_instance_from(instance_or_values)
|
48
|
+
index = classify_instance(instance)
|
52
49
|
|
53
|
-
|
50
|
+
class_value_of_index(index)
|
51
|
+
end
|
54
52
|
end
|
55
|
-
end
|
56
53
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
54
|
+
if instance_methods.include?(:update_classifier)
|
55
|
+
def add_training_instance(instance)
|
56
|
+
training_instances.add(instance)
|
57
|
+
update_classifier(instance)
|
61
58
|
|
62
|
-
|
63
|
-
|
59
|
+
self
|
60
|
+
end
|
64
61
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
62
|
+
def add_training_data(data)
|
63
|
+
values = training_instances.internal_values_of(data)
|
64
|
+
instance = Weka::Core::DenseInstance.new(values)
|
65
|
+
add_training_instance(instance)
|
66
|
+
end
|
69
67
|
end
|
70
|
-
end
|
71
68
|
|
72
|
-
|
73
|
-
|
74
|
-
|
69
|
+
if instance_methods.include?(:distribution_for_instance)
|
70
|
+
def distribution_for(instance_or_values)
|
71
|
+
ensure_trained_with_instances!
|
75
72
|
|
76
|
-
|
77
|
-
|
73
|
+
instance = classifiable_instance_from(instance_or_values)
|
74
|
+
distributions = distribution_for_instance(instance)
|
78
75
|
|
79
|
-
|
76
|
+
class_distributions_from(distributions)
|
77
|
+
end
|
80
78
|
end
|
81
|
-
end
|
82
79
|
|
83
|
-
|
80
|
+
private
|
84
81
|
|
85
|
-
|
86
|
-
|
82
|
+
def ensure_class_attribute_assigned!(instances)
|
83
|
+
return if instances.class_attribute_defined?
|
87
84
|
|
88
|
-
|
89
|
-
|
90
|
-
|
85
|
+
error = 'Class attribute is not assigned for Instances.'
|
86
|
+
hint = 'You can assign a class attribute with #class_attribute=.'
|
87
|
+
message = "#{error} #{hint}"
|
91
88
|
|
92
|
-
|
93
|
-
|
89
|
+
raise UnassignedClassError, message
|
90
|
+
end
|
94
91
|
|
95
|
-
|
96
|
-
|
92
|
+
def ensure_trained_with_instances!
|
93
|
+
return unless training_instances.nil?
|
97
94
|
|
98
|
-
|
99
|
-
|
100
|
-
|
95
|
+
error = 'Classifier is not trained with Instances.'
|
96
|
+
hint = 'You can set the training instances with #train_with_instances.'
|
97
|
+
message = "#{error} #{hint}"
|
101
98
|
|
102
|
-
|
103
|
-
|
99
|
+
raise UnassignedTrainingInstancesError, message
|
100
|
+
end
|
104
101
|
|
105
|
-
|
106
|
-
|
107
|
-
|
102
|
+
def classifiable_instance_from(instance_or_values)
|
103
|
+
attributes = training_instances.attributes
|
104
|
+
instances = Weka::Core::Instances.new(attributes: attributes)
|
108
105
|
|
109
|
-
|
110
|
-
|
111
|
-
|
106
|
+
class_attribute = training_instances.class_attribute
|
107
|
+
class_index = training_instances.class_index
|
108
|
+
instances.insert_attribute_at(class_attribute, class_index)
|
112
109
|
|
113
|
-
|
114
|
-
|
110
|
+
instances.class_index = training_instances.class_index
|
111
|
+
instances.add_instance(instance_or_values)
|
115
112
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
113
|
+
instance = instances.first
|
114
|
+
instance.set_class_missing
|
115
|
+
instance
|
116
|
+
end
|
120
117
|
|
121
|
-
|
122
|
-
|
123
|
-
|
118
|
+
def class_value_of_index(index)
|
119
|
+
training_instances.class_attribute.value(index)
|
120
|
+
end
|
124
121
|
|
125
|
-
|
126
|
-
|
122
|
+
def class_distributions_from(distributions)
|
123
|
+
class_values = training_instances.class_attribute.values
|
127
124
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
125
|
+
distributions.each_with_index.reduce({}) do |result, (distribution, index)|
|
126
|
+
class_value = class_values[index]
|
127
|
+
result[class_value] = distribution
|
128
|
+
result
|
129
|
+
end
|
132
130
|
end
|
133
131
|
end
|
134
132
|
end
|
135
|
-
|
136
133
|
end
|
137
134
|
end
|
138
135
|
end
|