weka 0.3.0-java → 0.4.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/.travis.yml +1 -1
- data/README.md +5 -2
- data/Rakefile +5 -5
- data/bin/console +4 -4
- data/lib/weka/attribute_selection/attribute_selection.rb +2 -3
- data/lib/weka/class_builder.rb +21 -12
- data/lib/weka/classifiers/evaluation.rb +18 -19
- data/lib/weka/classifiers/utils.rb +87 -90
- data/lib/weka/clusterers/cluster_evaluation.rb +2 -4
- data/lib/weka/clusterers/utils.rb +63 -66
- data/lib/weka/concerns.rb +5 -8
- data/lib/weka/concerns/buildable.rb +3 -5
- data/lib/weka/concerns/describable.rb +3 -4
- data/lib/weka/concerns/optionizable.rb +26 -27
- data/lib/weka/concerns/persistent.rb +3 -8
- data/lib/weka/concerns/serializable.rb +6 -8
- data/lib/weka/core/attribute.rb +62 -4
- data/lib/weka/core/converters.rb +2 -0
- data/lib/weka/core/dense_instance.rb +7 -7
- data/lib/weka/core/instances.rb +77 -17
- data/lib/weka/core/loader.rb +10 -1
- data/lib/weka/core/saver.rb +21 -1
- data/lib/weka/core/serialization_helper.rb +2 -3
- data/lib/weka/filters/filter.rb +0 -1
- data/lib/weka/filters/supervised/attribute.rb +2 -3
- data/lib/weka/filters/utils.rb +6 -9
- data/lib/weka/jars.rb +9 -12
- data/lib/weka/version.rb +1 -1
- data/weka.gemspec +2 -3
- metadata +17 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24673716e3c980b803c2c77172efbed98a8d7de6
|
4
|
+
data.tar.gz: 9c611dba1492b943ceaea2ca9c375bbbe95ada85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54236c07fa7110a5260a587cd2a1e57016705275ea6eba8b624ad7a5bc8ed58c7e878cf4b61913368f27c9286ccdde777323e34278917d0b05ed185b87fbdbb2
|
7
|
+
data.tar.gz: 245cc55a7abf2751ddea7a2f4dda479eab73b3aff2852fe915705944a2a0290a26b4bb1d8188e97009168c72c12775bf464d724ca84473a359685a9df7f13ae9
|
data/.rspec
CHANGED
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -33,7 +33,7 @@ The weka gem tries to carry over the namespaces defined in Weka and enhances som
|
|
33
33
|
|
34
34
|
The idea behind keeping the namespaces is, that you can also use the [Weka documentation](http://weka.sourceforge.net/doc.dev/) for looking up functionality and classes.
|
35
35
|
|
36
|
-
Please refer to [the gem
|
36
|
+
Please refer to [the gem’s Wiki](https://github.com/paulgoetze/weka-jruby/wiki) for
|
37
37
|
detailed information about how to use weka with JRuby and some examplary code snippets.
|
38
38
|
|
39
39
|
## Development
|
@@ -49,7 +49,7 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/paulgo
|
|
49
49
|
|
50
50
|
For development we use the [git branching model](http://nvie.com/posts/a-successful-git-branching-model/) described by [nvie](https://github.com/nvie).
|
51
51
|
|
52
|
-
Here
|
52
|
+
Here’s how to contribute:
|
53
53
|
|
54
54
|
1. Fork it ( https://github.com/paulgoetze/weka-jruby/fork )
|
55
55
|
2. Create your feature branch (`git checkout -b feature/my-new-feature develop`)
|
@@ -59,6 +59,9 @@ Here's how to contribute:
|
|
59
59
|
|
60
60
|
Please try to add RSpec tests along with your new features. This will ensure that your code does not break existing functionality and that your feature is working as expected.
|
61
61
|
|
62
|
+
We use [Rubocop](https://github.com/bbatsov/rubocop) for code style recommendations.
|
63
|
+
Please make sure your contributions comply with the default config of Rubocop.
|
64
|
+
|
62
65
|
## Acknowledgement
|
63
66
|
|
64
67
|
The original ideas for wrapping Weka in JRuby come from [@arrigonialberto86](https://github.com/arrigonialberto86) and his [ruby-band](https://github.com/arrigonialberto86/ruby-band) gem. Great thanks!
|
data/Rakefile
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
3
|
|
4
4
|
RSpec::Core::RakeTask.new(:spec)
|
5
5
|
|
6
|
-
task :
|
7
|
-
task :
|
6
|
+
task default: :prepare
|
7
|
+
task install: :prepare
|
8
8
|
|
9
9
|
desc 'Install weka jars & dependencies'
|
10
10
|
task :prepare do
|
@@ -15,7 +15,7 @@ task :prepare do
|
|
15
15
|
LockJar.install('Jarfile.lock', local_repo: jars_dir)
|
16
16
|
end
|
17
17
|
|
18
|
-
desc
|
18
|
+
desc 'Start an irb session with the gem loaded'
|
19
19
|
task :irb do
|
20
20
|
sh 'irb -I ./lib -r weka'
|
21
21
|
end
|
data/bin/console
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'weka'
|
5
5
|
|
6
6
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
7
|
# with your gem easier. You can also use a different console, if you like.
|
8
8
|
|
9
9
|
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
# require
|
10
|
+
# require 'pry'
|
11
11
|
# Pry.start
|
12
12
|
|
13
|
-
require
|
13
|
+
require 'irb'
|
14
14
|
IRB.start
|
@@ -3,9 +3,8 @@ module Weka
|
|
3
3
|
java_import 'weka.attributeSelection.AttributeSelection'
|
4
4
|
|
5
5
|
class AttributeSelection
|
6
|
-
|
7
|
-
alias
|
8
|
-
alias :selected_attributes_count :number_attributes_selected
|
6
|
+
alias summary to_results_string
|
7
|
+
alias selected_attributes_count number_attributes_selected
|
9
8
|
end
|
10
9
|
end
|
11
10
|
end
|
data/lib/weka/class_builder.rb
CHANGED
@@ -1,14 +1,12 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
require 'active_support/core_ext/string'
|
3
|
-
require 'active_support/core_ext/module'
|
4
1
|
require 'weka/concerns'
|
5
2
|
|
6
3
|
module Weka
|
7
4
|
module ClassBuilder
|
8
|
-
|
5
|
+
def self.included(base)
|
6
|
+
base.extend(ClassMethods)
|
7
|
+
end
|
9
8
|
|
10
9
|
module ClassMethods
|
11
|
-
|
12
10
|
def build_class(class_name, weka_module: nil, include_concerns: true)
|
13
11
|
java_import java_class_path(class_name, weka_module)
|
14
12
|
define_class(class_name, weka_module, include_concerns: include_concerns)
|
@@ -37,7 +35,11 @@ module Weka
|
|
37
35
|
end
|
38
36
|
|
39
37
|
def super_modules
|
40
|
-
toplevel_module? ?
|
38
|
+
toplevel_module? ? name : deconstantize(name)
|
39
|
+
end
|
40
|
+
|
41
|
+
def deconstantize(name)
|
42
|
+
name.split('::')[0...-1].join('::')
|
41
43
|
end
|
42
44
|
|
43
45
|
def java_including_module
|
@@ -45,11 +47,15 @@ module Weka
|
|
45
47
|
end
|
46
48
|
|
47
49
|
def including_module
|
48
|
-
|
50
|
+
demodulize(name) unless toplevel_module?
|
51
|
+
end
|
52
|
+
|
53
|
+
def demodulize(name)
|
54
|
+
name.split('::').last
|
49
55
|
end
|
50
56
|
|
51
57
|
def toplevel_module?
|
52
|
-
|
58
|
+
name.scan('::').count == 1
|
53
59
|
end
|
54
60
|
|
55
61
|
def define_class(class_name, weka_module, include_concerns: true)
|
@@ -66,7 +72,7 @@ module Weka
|
|
66
72
|
class_path = java_class_path(class_name, weka_module)
|
67
73
|
serializable = Weka::Core::SerializationHelper.serializable?(class_path)
|
68
74
|
|
69
|
-
|
75
|
+
'include Weka::Concerns::Serializable' if serializable
|
70
76
|
end
|
71
77
|
|
72
78
|
def include_utils
|
@@ -75,7 +81,11 @@ module Weka
|
|
75
81
|
end
|
76
82
|
|
77
83
|
def utils_defined?
|
78
|
-
utils_super_modules.
|
84
|
+
constantize(utils_super_modules).const_defined?(:Utils)
|
85
|
+
end
|
86
|
+
|
87
|
+
def constantize(module_names)
|
88
|
+
Object.module_eval("::#{module_names}")
|
79
89
|
end
|
80
90
|
|
81
91
|
def utils
|
@@ -87,10 +97,9 @@ module Weka
|
|
87
97
|
end
|
88
98
|
|
89
99
|
def downcase_first_char(string)
|
90
|
-
return if string.
|
100
|
+
return if string.nil? || string.empty?
|
91
101
|
string[0].downcase + string[1..-1]
|
92
102
|
end
|
93
103
|
end
|
94
|
-
|
95
104
|
end
|
96
105
|
end
|
@@ -3,35 +3,34 @@ module Weka
|
|
3
3
|
java_import 'weka.classifiers.Evaluation'
|
4
4
|
|
5
5
|
class Evaluation
|
6
|
-
|
7
6
|
# Use both nomenclatures f_measure and fmeasure for consistency
|
8
7
|
# due to jruby's auto method generation of 'fMeasure' to 'f_measure' and
|
9
8
|
# 'weightedFMeasure' to 'weighted_fmeasure'.
|
10
|
-
alias
|
11
|
-
alias
|
9
|
+
alias weighted_f_measure weighted_fmeasure
|
10
|
+
alias fmeasure f_measure
|
12
11
|
|
13
|
-
alias
|
14
|
-
alias
|
12
|
+
alias summary to_summary_string
|
13
|
+
alias class_details to_class_details_string
|
14
|
+
alias confusion_matrix to_matrix_string
|
15
15
|
|
16
|
-
alias
|
17
|
-
alias
|
18
|
-
alias
|
19
|
-
alias
|
16
|
+
alias instance_count num_instances
|
17
|
+
alias correct_count correct
|
18
|
+
alias incorrect_count incorrect
|
19
|
+
alias unclassified_count unclassified
|
20
20
|
|
21
|
-
alias
|
22
|
-
alias
|
23
|
-
alias
|
21
|
+
alias correct_percentage pct_correct
|
22
|
+
alias incorrect_percentage pct_incorrect
|
23
|
+
alias unclassified_percentage pct_unclassified
|
24
24
|
|
25
|
-
alias
|
26
|
-
alias
|
27
|
-
alias
|
28
|
-
alias
|
29
|
-
alias
|
25
|
+
alias true_negative_count num_true_negatives
|
26
|
+
alias false_negative_count num_false_negatives
|
27
|
+
alias true_positive_count num_true_positives
|
28
|
+
alias false_positive_count num_false_positives
|
29
|
+
alias average_cost avg_cost
|
30
30
|
|
31
|
-
alias
|
31
|
+
alias cumulative_margin_distribution to_cumulative_margin_distribution_string
|
32
32
|
end
|
33
33
|
|
34
34
|
Java::WekaClassifiers::Evaluation.__persistent__ = true
|
35
|
-
|
36
35
|
end
|
37
36
|
end
|
@@ -1,138 +1,135 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
require 'active_support/core_ext/hash'
|
3
1
|
require 'weka/classifiers/evaluation'
|
4
2
|
require 'weka/core/instances'
|
5
3
|
|
6
4
|
module Weka
|
7
5
|
module Classifiers
|
8
6
|
module Utils
|
9
|
-
|
7
|
+
def self.included(base)
|
8
|
+
base.class_eval do
|
9
|
+
java_import 'java.util.Random'
|
10
10
|
|
11
|
-
|
12
|
-
|
11
|
+
if instance_methods.include?(:build_classifier)
|
12
|
+
attr_reader :training_instances
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
def train_with_instances(instances)
|
15
|
+
ensure_class_attribute_assigned!(instances)
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
@training_instances = instances
|
18
|
+
build_classifier(instances)
|
19
19
|
|
20
|
-
|
21
|
-
|
20
|
+
self
|
21
|
+
end
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
def cross_validate(folds: 3)
|
27
|
-
ensure_trained_with_instances!
|
23
|
+
def cross_validate(folds: 3)
|
24
|
+
ensure_trained_with_instances!
|
28
25
|
|
29
|
-
|
30
|
-
|
26
|
+
evaluation = Evaluation.new(training_instances)
|
27
|
+
random = Java::JavaUtil::Random.new(1)
|
31
28
|
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
evaluation.cross_validate_model(self, training_instances, folds.to_i, random)
|
30
|
+
evaluation
|
31
|
+
end
|
35
32
|
|
36
|
-
|
37
|
-
|
38
|
-
|
33
|
+
def evaluate(test_instances)
|
34
|
+
ensure_trained_with_instances!
|
35
|
+
ensure_class_attribute_assigned!(test_instances)
|
39
36
|
|
40
|
-
|
41
|
-
|
42
|
-
|
37
|
+
evaluation = Evaluation.new(training_instances)
|
38
|
+
evaluation.evaluate_model(self, test_instances)
|
39
|
+
evaluation
|
40
|
+
end
|
43
41
|
end
|
44
|
-
end
|
45
42
|
|
46
|
-
|
47
|
-
|
48
|
-
|
43
|
+
if instance_methods.include?(:classify_instance)
|
44
|
+
def classify(instance_or_values)
|
45
|
+
ensure_trained_with_instances!
|
49
46
|
|
50
|
-
|
51
|
-
|
47
|
+
instance = classifiable_instance_from(instance_or_values)
|
48
|
+
index = classify_instance(instance)
|
52
49
|
|
53
|
-
|
50
|
+
class_value_of_index(index)
|
51
|
+
end
|
54
52
|
end
|
55
|
-
end
|
56
53
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
54
|
+
if instance_methods.include?(:update_classifier)
|
55
|
+
def add_training_instance(instance)
|
56
|
+
training_instances.add(instance)
|
57
|
+
update_classifier(instance)
|
61
58
|
|
62
|
-
|
63
|
-
|
59
|
+
self
|
60
|
+
end
|
64
61
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
62
|
+
def add_training_data(data)
|
63
|
+
values = training_instances.internal_values_of(data)
|
64
|
+
instance = Weka::Core::DenseInstance.new(values)
|
65
|
+
add_training_instance(instance)
|
66
|
+
end
|
69
67
|
end
|
70
|
-
end
|
71
68
|
|
72
|
-
|
73
|
-
|
74
|
-
|
69
|
+
if instance_methods.include?(:distribution_for_instance)
|
70
|
+
def distribution_for(instance_or_values)
|
71
|
+
ensure_trained_with_instances!
|
75
72
|
|
76
|
-
|
77
|
-
|
73
|
+
instance = classifiable_instance_from(instance_or_values)
|
74
|
+
distributions = distribution_for_instance(instance)
|
78
75
|
|
79
|
-
|
76
|
+
class_distributions_from(distributions)
|
77
|
+
end
|
80
78
|
end
|
81
|
-
end
|
82
79
|
|
83
|
-
|
80
|
+
private
|
84
81
|
|
85
|
-
|
86
|
-
|
82
|
+
def ensure_class_attribute_assigned!(instances)
|
83
|
+
return if instances.class_attribute_defined?
|
87
84
|
|
88
|
-
|
89
|
-
|
90
|
-
|
85
|
+
error = 'Class attribute is not assigned for Instances.'
|
86
|
+
hint = 'You can assign a class attribute with #class_attribute=.'
|
87
|
+
message = "#{error} #{hint}"
|
91
88
|
|
92
|
-
|
93
|
-
|
89
|
+
raise UnassignedClassError, message
|
90
|
+
end
|
94
91
|
|
95
|
-
|
96
|
-
|
92
|
+
def ensure_trained_with_instances!
|
93
|
+
return unless training_instances.nil?
|
97
94
|
|
98
|
-
|
99
|
-
|
100
|
-
|
95
|
+
error = 'Classifier is not trained with Instances.'
|
96
|
+
hint = 'You can set the training instances with #train_with_instances.'
|
97
|
+
message = "#{error} #{hint}"
|
101
98
|
|
102
|
-
|
103
|
-
|
99
|
+
raise UnassignedTrainingInstancesError, message
|
100
|
+
end
|
104
101
|
|
105
|
-
|
106
|
-
|
107
|
-
|
102
|
+
def classifiable_instance_from(instance_or_values)
|
103
|
+
attributes = training_instances.attributes
|
104
|
+
instances = Weka::Core::Instances.new(attributes: attributes)
|
108
105
|
|
109
|
-
|
110
|
-
|
111
|
-
|
106
|
+
class_attribute = training_instances.class_attribute
|
107
|
+
class_index = training_instances.class_index
|
108
|
+
instances.insert_attribute_at(class_attribute, class_index)
|
112
109
|
|
113
|
-
|
114
|
-
|
110
|
+
instances.class_index = training_instances.class_index
|
111
|
+
instances.add_instance(instance_or_values)
|
115
112
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
113
|
+
instance = instances.first
|
114
|
+
instance.set_class_missing
|
115
|
+
instance
|
116
|
+
end
|
120
117
|
|
121
|
-
|
122
|
-
|
123
|
-
|
118
|
+
def class_value_of_index(index)
|
119
|
+
training_instances.class_attribute.value(index)
|
120
|
+
end
|
124
121
|
|
125
|
-
|
126
|
-
|
122
|
+
def class_distributions_from(distributions)
|
123
|
+
class_values = training_instances.class_attribute.values
|
127
124
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
125
|
+
distributions.each_with_index.reduce({}) do |result, (distribution, index)|
|
126
|
+
class_value = class_values[index]
|
127
|
+
result[class_value] = distribution
|
128
|
+
result
|
129
|
+
end
|
132
130
|
end
|
133
131
|
end
|
134
132
|
end
|
135
|
-
|
136
133
|
end
|
137
134
|
end
|
138
135
|
end
|