weka 0.3.0-java → 0.4.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/.travis.yml +1 -1
- data/README.md +5 -2
- data/Rakefile +5 -5
- data/bin/console +4 -4
- data/lib/weka/attribute_selection/attribute_selection.rb +2 -3
- data/lib/weka/class_builder.rb +21 -12
- data/lib/weka/classifiers/evaluation.rb +18 -19
- data/lib/weka/classifiers/utils.rb +87 -90
- data/lib/weka/clusterers/cluster_evaluation.rb +2 -4
- data/lib/weka/clusterers/utils.rb +63 -66
- data/lib/weka/concerns.rb +5 -8
- data/lib/weka/concerns/buildable.rb +3 -5
- data/lib/weka/concerns/describable.rb +3 -4
- data/lib/weka/concerns/optionizable.rb +26 -27
- data/lib/weka/concerns/persistent.rb +3 -8
- data/lib/weka/concerns/serializable.rb +6 -8
- data/lib/weka/core/attribute.rb +62 -4
- data/lib/weka/core/converters.rb +2 -0
- data/lib/weka/core/dense_instance.rb +7 -7
- data/lib/weka/core/instances.rb +77 -17
- data/lib/weka/core/loader.rb +10 -1
- data/lib/weka/core/saver.rb +21 -1
- data/lib/weka/core/serialization_helper.rb +2 -3
- data/lib/weka/filters/filter.rb +0 -1
- data/lib/weka/filters/supervised/attribute.rb +2 -3
- data/lib/weka/filters/utils.rb +6 -9
- data/lib/weka/jars.rb +9 -12
- data/lib/weka/version.rb +1 -1
- data/weka.gemspec +2 -3
- metadata +17 -31
@@ -3,12 +3,10 @@ module Weka
|
|
3
3
|
java_import 'weka.clusterers.ClusterEvaluation'
|
4
4
|
|
5
5
|
class ClusterEvaluation
|
6
|
-
|
7
|
-
alias
|
8
|
-
alias :clusters_count :num_clusters
|
6
|
+
alias summary cluster_results_to_string
|
7
|
+
alias clusters_count num_clusters
|
9
8
|
end
|
10
9
|
|
11
10
|
Java::WekaClusterers::ClusterEvaluation.__persistent__ = true
|
12
|
-
|
13
11
|
end
|
14
12
|
end
|
@@ -1,103 +1,100 @@
|
|
1
|
-
require 'active_support/concern'
|
2
1
|
require 'weka/clusterers/cluster_evaluation'
|
3
2
|
require 'weka/core/instances'
|
4
3
|
|
5
4
|
module Weka
|
6
5
|
module Clusterers
|
7
6
|
module Utils
|
8
|
-
|
7
|
+
def self.included(base)
|
8
|
+
base.class_eval do
|
9
|
+
java_import 'java.util.Random'
|
9
10
|
|
10
|
-
|
11
|
-
|
11
|
+
if instance_methods.include?(:build_clusterer)
|
12
|
+
attr_reader :training_instances
|
12
13
|
|
13
|
-
|
14
|
-
|
14
|
+
def train_with_instances(instances)
|
15
|
+
@training_instances = instances
|
16
|
+
build_clusterer(instances)
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
build_clusterer(instances)
|
18
|
+
self
|
19
|
+
end
|
19
20
|
|
20
|
-
|
21
|
-
|
21
|
+
if ancestors.include?(Java::WekaClusterers::DensityBasedClusterer)
|
22
|
+
def cross_validate(folds: 3)
|
23
|
+
ensure_trained_with_instances!
|
24
|
+
|
25
|
+
ClusterEvaluation.cross_validate_model(
|
26
|
+
self,
|
27
|
+
training_instances,
|
28
|
+
folds.to_i,
|
29
|
+
Java::JavaUtil::Random.new(1)
|
30
|
+
)
|
31
|
+
end
|
32
|
+
end
|
22
33
|
|
23
|
-
|
24
|
-
def cross_validate(folds: 3)
|
34
|
+
def evaluate(test_instances)
|
25
35
|
ensure_trained_with_instances!
|
26
36
|
|
27
|
-
ClusterEvaluation.
|
28
|
-
self
|
29
|
-
|
30
|
-
|
31
|
-
Java::JavaUtil::Random.new(1)
|
32
|
-
)
|
37
|
+
ClusterEvaluation.new.tap do |evaluation|
|
38
|
+
evaluation.clusterer = self
|
39
|
+
evaluation.evaluate_clusterer(test_instances)
|
40
|
+
end
|
33
41
|
end
|
34
42
|
end
|
35
43
|
|
36
|
-
|
37
|
-
|
44
|
+
if instance_methods.include?(:cluster_instance)
|
45
|
+
def cluster(instance_or_values)
|
46
|
+
ensure_trained_with_instances!
|
38
47
|
|
39
|
-
|
40
|
-
|
41
|
-
evaluation.evaluate_clusterer(test_instances)
|
48
|
+
instance = clusterable_instance_from(instance_or_values)
|
49
|
+
cluster_instance(instance)
|
42
50
|
end
|
43
51
|
end
|
44
|
-
end
|
45
52
|
|
46
|
-
|
47
|
-
|
48
|
-
|
53
|
+
if instance_methods.include?(:update_clusterer)
|
54
|
+
def add_training_instance(instance)
|
55
|
+
training_instances.add(instance)
|
56
|
+
update_clusterer(instance)
|
49
57
|
|
50
|
-
|
51
|
-
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
if instance_methods.include?(:update_clusterer)
|
56
|
-
def add_training_instance(instance)
|
57
|
-
training_instances.add(instance)
|
58
|
-
update_clusterer(instance)
|
59
|
-
|
60
|
-
self
|
61
|
-
end
|
58
|
+
self
|
59
|
+
end
|
62
60
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
61
|
+
def add_training_data(data)
|
62
|
+
values = training_instances.internal_values_of(data)
|
63
|
+
instance = Weka::Core::DenseInstance.new(values)
|
64
|
+
add_training_instance(instance)
|
65
|
+
end
|
67
66
|
end
|
68
|
-
end
|
69
67
|
|
70
|
-
|
71
|
-
|
72
|
-
|
68
|
+
if instance_methods.include?(:distribution_for_instance)
|
69
|
+
def distribution_for(instance_or_values)
|
70
|
+
ensure_trained_with_instances!
|
73
71
|
|
74
|
-
|
75
|
-
|
72
|
+
instance = clusterable_instance_from(instance_or_values)
|
73
|
+
distribution_for_instance(instance).to_a
|
74
|
+
end
|
76
75
|
end
|
77
|
-
end
|
78
76
|
|
79
|
-
|
77
|
+
private
|
80
78
|
|
81
|
-
|
82
|
-
|
79
|
+
def ensure_trained_with_instances!
|
80
|
+
return unless training_instances.nil?
|
83
81
|
|
84
|
-
|
85
|
-
|
86
|
-
|
82
|
+
error = 'Clusterer is not trained with Instances.'
|
83
|
+
hint = 'You can set the training instances with #train_with_instances.'
|
84
|
+
message = "#{error} #{hint}"
|
87
85
|
|
88
|
-
|
89
|
-
|
86
|
+
raise UnassignedTrainingInstancesError, message
|
87
|
+
end
|
90
88
|
|
91
|
-
|
92
|
-
|
93
|
-
|
89
|
+
def clusterable_instance_from(instance_or_values)
|
90
|
+
attributes = training_instances.attributes
|
91
|
+
instances = Weka::Core::Instances.new(attributes: attributes)
|
94
92
|
|
95
|
-
|
96
|
-
|
93
|
+
instances.add_instance(instance_or_values)
|
94
|
+
instances.first
|
95
|
+
end
|
97
96
|
end
|
98
97
|
end
|
99
|
-
|
100
98
|
end
|
101
99
|
end
|
102
100
|
end
|
103
|
-
|
data/lib/weka/concerns.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'active_support/concern'
|
2
1
|
require 'weka/concerns/buildable'
|
3
2
|
require 'weka/concerns/describable'
|
4
3
|
require 'weka/concerns/optionizable'
|
@@ -7,13 +6,11 @@ require 'weka/concerns/serializable'
|
|
7
6
|
|
8
7
|
module Weka
|
9
8
|
module Concerns
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
include
|
14
|
-
include
|
15
|
-
include Optionizable
|
16
|
-
include Persistent
|
9
|
+
def self.included(base)
|
10
|
+
base.include Buildable
|
11
|
+
base.include Describable
|
12
|
+
base.include Optionizable
|
13
|
+
base.include Persistent
|
17
14
|
end
|
18
15
|
end
|
19
16
|
end
|
@@ -1,19 +1,17 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
1
|
module Weka
|
4
2
|
module Concerns
|
5
3
|
module Buildable
|
6
|
-
|
4
|
+
def self.included(base)
|
5
|
+
base.extend ClassMethods
|
6
|
+
end
|
7
7
|
|
8
8
|
module ClassMethods
|
9
|
-
|
10
9
|
def build(&block)
|
11
10
|
instance = new
|
12
11
|
instance.instance_eval(&block) if block_given?
|
13
12
|
instance
|
14
13
|
end
|
15
14
|
end
|
16
|
-
|
17
15
|
end
|
18
16
|
end
|
19
17
|
end
|
@@ -1,12 +1,11 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
1
|
module Weka
|
4
2
|
module Concerns
|
5
3
|
module Describable
|
6
|
-
|
4
|
+
def self.included(base)
|
5
|
+
base.extend ClassMethods
|
6
|
+
end
|
7
7
|
|
8
8
|
module ClassMethods
|
9
|
-
|
10
9
|
def description
|
11
10
|
new.global_info
|
12
11
|
end
|
@@ -1,40 +1,40 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
1
|
module Weka
|
4
2
|
module Concerns
|
5
3
|
module Optionizable
|
6
|
-
|
4
|
+
def self.included(base)
|
5
|
+
base.extend(ClassMethods)
|
7
6
|
|
8
|
-
|
9
|
-
|
7
|
+
base.class_eval do
|
8
|
+
java_import 'weka.core.Utils'
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
def use_options(*single_options, **hash_options)
|
11
|
+
joined_options = join_options(single_options, hash_options)
|
12
|
+
options = Java::WekaCore::Utils.split_options(joined_options)
|
14
13
|
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
set_options(options)
|
15
|
+
@options = joined_options
|
16
|
+
end
|
18
17
|
|
19
|
-
|
20
|
-
|
21
|
-
|
18
|
+
def options
|
19
|
+
@options || self.class.default_options
|
20
|
+
end
|
22
21
|
|
23
|
-
|
22
|
+
private
|
24
23
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
24
|
+
def join_options(*single_options, **hash_options)
|
25
|
+
[
|
26
|
+
join_single_options(*single_options),
|
27
|
+
join_hash_options(**hash_options)
|
28
|
+
].reject(&:empty?).join(' ')
|
29
|
+
end
|
31
30
|
|
32
|
-
|
33
|
-
|
34
|
-
|
31
|
+
def join_single_options(options)
|
32
|
+
options.map { |option| "-#{option.to_s.sub(/^-/, '')}" }.join(' ')
|
33
|
+
end
|
35
34
|
|
36
|
-
|
37
|
-
|
35
|
+
def join_hash_options(options)
|
36
|
+
options.map { |key, value| "-#{key} #{value}" }.join(' ')
|
37
|
+
end
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
@@ -43,7 +43,6 @@ module Weka
|
|
43
43
|
new.get_options.to_a.join(' ')
|
44
44
|
end
|
45
45
|
end
|
46
|
-
|
47
46
|
end
|
48
47
|
end
|
49
48
|
end
|
@@ -1,16 +1,11 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
1
|
module Weka
|
4
2
|
module Concerns
|
5
3
|
module Persistent
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
if self.respond_to?(:__persistent__=)
|
10
|
-
self.__persistent__ = true
|
4
|
+
def self.included(base)
|
5
|
+
base.class_eval do
|
6
|
+
self.__persistent__ = true if respond_to?(:__persistent__=)
|
11
7
|
end
|
12
8
|
end
|
13
|
-
|
14
9
|
end
|
15
10
|
end
|
16
11
|
end
|
@@ -1,17 +1,15 @@
|
|
1
|
-
require 'active_support/concern'
|
2
1
|
require 'weka/core/serialization_helper'
|
3
2
|
|
4
3
|
module Weka
|
5
4
|
module Concerns
|
6
5
|
module Serializable
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
def self.included(base)
|
7
|
+
base.class_eval do
|
8
|
+
def serialize(filename)
|
9
|
+
Weka::Core::SerializationHelper.write(filename, self)
|
10
|
+
end
|
12
11
|
end
|
13
12
|
end
|
14
|
-
|
15
13
|
end
|
16
14
|
end
|
17
|
-
end
|
15
|
+
end
|
data/lib/weka/core/attribute.rb
CHANGED
@@ -1,13 +1,73 @@
|
|
1
|
+
require 'weka/concerns/persistent'
|
2
|
+
|
1
3
|
module Weka
|
2
4
|
module Core
|
3
|
-
java_import
|
5
|
+
java_import 'weka.core.Attribute'
|
4
6
|
|
5
7
|
class Attribute
|
8
|
+
include Weka::Concerns::Persistent
|
9
|
+
|
10
|
+
TYPES = %i(numeric nominal string date).freeze
|
11
|
+
|
12
|
+
class << self
|
13
|
+
def new_numeric(name)
|
14
|
+
new(name.to_s)
|
15
|
+
end
|
16
|
+
|
17
|
+
def new_nominal(name, values)
|
18
|
+
new(name.to_s, Array(values).map(&:to_s))
|
19
|
+
end
|
20
|
+
|
21
|
+
def new_date(name, format)
|
22
|
+
new(name.to_s, format.to_s)
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# Creates a new Attribute instance of type string.
|
27
|
+
#
|
28
|
+
# The java class defines the same constructor:
|
29
|
+
# Attribute(java.lang.String, java.util.List<java.lang.String>)
|
30
|
+
# for nominal and string attributes and handles the type internally
|
31
|
+
# based on the second argument.
|
32
|
+
#
|
33
|
+
# In Java you would write following code to create a string Attribute:
|
34
|
+
# Attribute attribute = new Attribute("name", (FastVector) null);
|
35
|
+
#
|
36
|
+
# When we use a similar approach in JRuby:
|
37
|
+
# attribute = Attribute.new('name', nil)
|
38
|
+
# then a Java::JavaLang::NullPointerException is thrown.
|
39
|
+
#
|
40
|
+
# Thus, we use refelection here and call the contructor explicitly, see
|
41
|
+
# https://github.com/jruby/jruby/wiki/CallingJavaFromJRuby#constructors
|
42
|
+
#
|
43
|
+
# The object returned from Java constructor only has class
|
44
|
+
# Java::JavaObject so we need to cast it to the proper class
|
45
|
+
#
|
46
|
+
# See also:
|
47
|
+
# https://stackoverflow.com/questions/1792495/casting-objects-in-jruby
|
48
|
+
def new_string(name)
|
49
|
+
constructor = Attribute.java_class.declared_constructor(
|
50
|
+
java.lang.String,
|
51
|
+
java.util.List
|
52
|
+
)
|
53
|
+
|
54
|
+
constructor.new_instance(name.to_s, nil).to_java(Attribute)
|
55
|
+
end
|
56
|
+
end
|
6
57
|
|
7
58
|
def values
|
8
59
|
enumerate_values.to_a
|
9
60
|
end
|
10
61
|
|
62
|
+
##
|
63
|
+
# Returns the string representation of the attribute's type.
|
64
|
+
# Overwrites the weka.core.Attribute type Java method, which returns an
|
65
|
+
# integer representation of the type based on the defined type constants.
|
66
|
+
def type
|
67
|
+
self.class.type_to_string(self)
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
11
71
|
# The order of the if statements is important here, because a date is also
|
12
72
|
# a numeric.
|
13
73
|
def internal_value_of(value)
|
@@ -15,10 +75,8 @@ module Weka
|
|
15
75
|
return Float::NAN if [nil, '?'].include?(value)
|
16
76
|
return parse_date(value.to_s) if date?
|
17
77
|
return value.to_f if numeric?
|
18
|
-
return index_of_value(value.to_s) if nominal?
|
78
|
+
return index_of_value(value.to_s) if nominal? || string?
|
19
79
|
end
|
20
80
|
end
|
21
|
-
|
22
|
-
Weka::Core::Attribute.__persistent__ = true
|
23
81
|
end
|
24
82
|
end
|