weka 0.3.0-java → 0.4.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/.travis.yml +1 -1
- data/README.md +5 -2
- data/Rakefile +5 -5
- data/bin/console +4 -4
- data/lib/weka/attribute_selection/attribute_selection.rb +2 -3
- data/lib/weka/class_builder.rb +21 -12
- data/lib/weka/classifiers/evaluation.rb +18 -19
- data/lib/weka/classifiers/utils.rb +87 -90
- data/lib/weka/clusterers/cluster_evaluation.rb +2 -4
- data/lib/weka/clusterers/utils.rb +63 -66
- data/lib/weka/concerns.rb +5 -8
- data/lib/weka/concerns/buildable.rb +3 -5
- data/lib/weka/concerns/describable.rb +3 -4
- data/lib/weka/concerns/optionizable.rb +26 -27
- data/lib/weka/concerns/persistent.rb +3 -8
- data/lib/weka/concerns/serializable.rb +6 -8
- data/lib/weka/core/attribute.rb +62 -4
- data/lib/weka/core/converters.rb +2 -0
- data/lib/weka/core/dense_instance.rb +7 -7
- data/lib/weka/core/instances.rb +77 -17
- data/lib/weka/core/loader.rb +10 -1
- data/lib/weka/core/saver.rb +21 -1
- data/lib/weka/core/serialization_helper.rb +2 -3
- data/lib/weka/filters/filter.rb +0 -1
- data/lib/weka/filters/supervised/attribute.rb +2 -3
- data/lib/weka/filters/utils.rb +6 -9
- data/lib/weka/jars.rb +9 -12
- data/lib/weka/version.rb +1 -1
- data/weka.gemspec +2 -3
- metadata +17 -31
@@ -3,12 +3,10 @@ module Weka
|
|
3
3
|
java_import 'weka.clusterers.ClusterEvaluation'
|
4
4
|
|
5
5
|
class ClusterEvaluation
|
6
|
-
|
7
|
-
alias
|
8
|
-
alias :clusters_count :num_clusters
|
6
|
+
alias summary cluster_results_to_string
|
7
|
+
alias clusters_count num_clusters
|
9
8
|
end
|
10
9
|
|
11
10
|
Java::WekaClusterers::ClusterEvaluation.__persistent__ = true
|
12
|
-
|
13
11
|
end
|
14
12
|
end
|
@@ -1,103 +1,100 @@
|
|
1
|
-
require 'active_support/concern'
|
2
1
|
require 'weka/clusterers/cluster_evaluation'
|
3
2
|
require 'weka/core/instances'
|
4
3
|
|
5
4
|
module Weka
|
6
5
|
module Clusterers
|
7
6
|
module Utils
|
8
|
-
|
7
|
+
def self.included(base)
|
8
|
+
base.class_eval do
|
9
|
+
java_import 'java.util.Random'
|
9
10
|
|
10
|
-
|
11
|
-
|
11
|
+
if instance_methods.include?(:build_clusterer)
|
12
|
+
attr_reader :training_instances
|
12
13
|
|
13
|
-
|
14
|
-
|
14
|
+
def train_with_instances(instances)
|
15
|
+
@training_instances = instances
|
16
|
+
build_clusterer(instances)
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
build_clusterer(instances)
|
18
|
+
self
|
19
|
+
end
|
19
20
|
|
20
|
-
|
21
|
-
|
21
|
+
if ancestors.include?(Java::WekaClusterers::DensityBasedClusterer)
|
22
|
+
def cross_validate(folds: 3)
|
23
|
+
ensure_trained_with_instances!
|
24
|
+
|
25
|
+
ClusterEvaluation.cross_validate_model(
|
26
|
+
self,
|
27
|
+
training_instances,
|
28
|
+
folds.to_i,
|
29
|
+
Java::JavaUtil::Random.new(1)
|
30
|
+
)
|
31
|
+
end
|
32
|
+
end
|
22
33
|
|
23
|
-
|
24
|
-
def cross_validate(folds: 3)
|
34
|
+
def evaluate(test_instances)
|
25
35
|
ensure_trained_with_instances!
|
26
36
|
|
27
|
-
ClusterEvaluation.
|
28
|
-
self
|
29
|
-
|
30
|
-
|
31
|
-
Java::JavaUtil::Random.new(1)
|
32
|
-
)
|
37
|
+
ClusterEvaluation.new.tap do |evaluation|
|
38
|
+
evaluation.clusterer = self
|
39
|
+
evaluation.evaluate_clusterer(test_instances)
|
40
|
+
end
|
33
41
|
end
|
34
42
|
end
|
35
43
|
|
36
|
-
|
37
|
-
|
44
|
+
if instance_methods.include?(:cluster_instance)
|
45
|
+
def cluster(instance_or_values)
|
46
|
+
ensure_trained_with_instances!
|
38
47
|
|
39
|
-
|
40
|
-
|
41
|
-
evaluation.evaluate_clusterer(test_instances)
|
48
|
+
instance = clusterable_instance_from(instance_or_values)
|
49
|
+
cluster_instance(instance)
|
42
50
|
end
|
43
51
|
end
|
44
|
-
end
|
45
52
|
|
46
|
-
|
47
|
-
|
48
|
-
|
53
|
+
if instance_methods.include?(:update_clusterer)
|
54
|
+
def add_training_instance(instance)
|
55
|
+
training_instances.add(instance)
|
56
|
+
update_clusterer(instance)
|
49
57
|
|
50
|
-
|
51
|
-
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
if instance_methods.include?(:update_clusterer)
|
56
|
-
def add_training_instance(instance)
|
57
|
-
training_instances.add(instance)
|
58
|
-
update_clusterer(instance)
|
59
|
-
|
60
|
-
self
|
61
|
-
end
|
58
|
+
self
|
59
|
+
end
|
62
60
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
61
|
+
def add_training_data(data)
|
62
|
+
values = training_instances.internal_values_of(data)
|
63
|
+
instance = Weka::Core::DenseInstance.new(values)
|
64
|
+
add_training_instance(instance)
|
65
|
+
end
|
67
66
|
end
|
68
|
-
end
|
69
67
|
|
70
|
-
|
71
|
-
|
72
|
-
|
68
|
+
if instance_methods.include?(:distribution_for_instance)
|
69
|
+
def distribution_for(instance_or_values)
|
70
|
+
ensure_trained_with_instances!
|
73
71
|
|
74
|
-
|
75
|
-
|
72
|
+
instance = clusterable_instance_from(instance_or_values)
|
73
|
+
distribution_for_instance(instance).to_a
|
74
|
+
end
|
76
75
|
end
|
77
|
-
end
|
78
76
|
|
79
|
-
|
77
|
+
private
|
80
78
|
|
81
|
-
|
82
|
-
|
79
|
+
def ensure_trained_with_instances!
|
80
|
+
return unless training_instances.nil?
|
83
81
|
|
84
|
-
|
85
|
-
|
86
|
-
|
82
|
+
error = 'Clusterer is not trained with Instances.'
|
83
|
+
hint = 'You can set the training instances with #train_with_instances.'
|
84
|
+
message = "#{error} #{hint}"
|
87
85
|
|
88
|
-
|
89
|
-
|
86
|
+
raise UnassignedTrainingInstancesError, message
|
87
|
+
end
|
90
88
|
|
91
|
-
|
92
|
-
|
93
|
-
|
89
|
+
def clusterable_instance_from(instance_or_values)
|
90
|
+
attributes = training_instances.attributes
|
91
|
+
instances = Weka::Core::Instances.new(attributes: attributes)
|
94
92
|
|
95
|
-
|
96
|
-
|
93
|
+
instances.add_instance(instance_or_values)
|
94
|
+
instances.first
|
95
|
+
end
|
97
96
|
end
|
98
97
|
end
|
99
|
-
|
100
98
|
end
|
101
99
|
end
|
102
100
|
end
|
103
|
-
|
data/lib/weka/concerns.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'active_support/concern'
|
2
1
|
require 'weka/concerns/buildable'
|
3
2
|
require 'weka/concerns/describable'
|
4
3
|
require 'weka/concerns/optionizable'
|
@@ -7,13 +6,11 @@ require 'weka/concerns/serializable'
|
|
7
6
|
|
8
7
|
module Weka
|
9
8
|
module Concerns
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
include
|
14
|
-
include
|
15
|
-
include Optionizable
|
16
|
-
include Persistent
|
9
|
+
def self.included(base)
|
10
|
+
base.include Buildable
|
11
|
+
base.include Describable
|
12
|
+
base.include Optionizable
|
13
|
+
base.include Persistent
|
17
14
|
end
|
18
15
|
end
|
19
16
|
end
|
@@ -1,19 +1,17 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
1
|
module Weka
|
4
2
|
module Concerns
|
5
3
|
module Buildable
|
6
|
-
|
4
|
+
def self.included(base)
|
5
|
+
base.extend ClassMethods
|
6
|
+
end
|
7
7
|
|
8
8
|
module ClassMethods
|
9
|
-
|
10
9
|
def build(&block)
|
11
10
|
instance = new
|
12
11
|
instance.instance_eval(&block) if block_given?
|
13
12
|
instance
|
14
13
|
end
|
15
14
|
end
|
16
|
-
|
17
15
|
end
|
18
16
|
end
|
19
17
|
end
|
@@ -1,12 +1,11 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
1
|
module Weka
|
4
2
|
module Concerns
|
5
3
|
module Describable
|
6
|
-
|
4
|
+
def self.included(base)
|
5
|
+
base.extend ClassMethods
|
6
|
+
end
|
7
7
|
|
8
8
|
module ClassMethods
|
9
|
-
|
10
9
|
def description
|
11
10
|
new.global_info
|
12
11
|
end
|
@@ -1,40 +1,40 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
1
|
module Weka
|
4
2
|
module Concerns
|
5
3
|
module Optionizable
|
6
|
-
|
4
|
+
def self.included(base)
|
5
|
+
base.extend(ClassMethods)
|
7
6
|
|
8
|
-
|
9
|
-
|
7
|
+
base.class_eval do
|
8
|
+
java_import 'weka.core.Utils'
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
def use_options(*single_options, **hash_options)
|
11
|
+
joined_options = join_options(single_options, hash_options)
|
12
|
+
options = Java::WekaCore::Utils.split_options(joined_options)
|
14
13
|
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
set_options(options)
|
15
|
+
@options = joined_options
|
16
|
+
end
|
18
17
|
|
19
|
-
|
20
|
-
|
21
|
-
|
18
|
+
def options
|
19
|
+
@options || self.class.default_options
|
20
|
+
end
|
22
21
|
|
23
|
-
|
22
|
+
private
|
24
23
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
24
|
+
def join_options(*single_options, **hash_options)
|
25
|
+
[
|
26
|
+
join_single_options(*single_options),
|
27
|
+
join_hash_options(**hash_options)
|
28
|
+
].reject(&:empty?).join(' ')
|
29
|
+
end
|
31
30
|
|
32
|
-
|
33
|
-
|
34
|
-
|
31
|
+
def join_single_options(options)
|
32
|
+
options.map { |option| "-#{option.to_s.sub(/^-/, '')}" }.join(' ')
|
33
|
+
end
|
35
34
|
|
36
|
-
|
37
|
-
|
35
|
+
def join_hash_options(options)
|
36
|
+
options.map { |key, value| "-#{key} #{value}" }.join(' ')
|
37
|
+
end
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
@@ -43,7 +43,6 @@ module Weka
|
|
43
43
|
new.get_options.to_a.join(' ')
|
44
44
|
end
|
45
45
|
end
|
46
|
-
|
47
46
|
end
|
48
47
|
end
|
49
48
|
end
|
@@ -1,16 +1,11 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
1
|
module Weka
|
4
2
|
module Concerns
|
5
3
|
module Persistent
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
if self.respond_to?(:__persistent__=)
|
10
|
-
self.__persistent__ = true
|
4
|
+
def self.included(base)
|
5
|
+
base.class_eval do
|
6
|
+
self.__persistent__ = true if respond_to?(:__persistent__=)
|
11
7
|
end
|
12
8
|
end
|
13
|
-
|
14
9
|
end
|
15
10
|
end
|
16
11
|
end
|
@@ -1,17 +1,15 @@
|
|
1
|
-
require 'active_support/concern'
|
2
1
|
require 'weka/core/serialization_helper'
|
3
2
|
|
4
3
|
module Weka
|
5
4
|
module Concerns
|
6
5
|
module Serializable
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
def self.included(base)
|
7
|
+
base.class_eval do
|
8
|
+
def serialize(filename)
|
9
|
+
Weka::Core::SerializationHelper.write(filename, self)
|
10
|
+
end
|
12
11
|
end
|
13
12
|
end
|
14
|
-
|
15
13
|
end
|
16
14
|
end
|
17
|
-
end
|
15
|
+
end
|
data/lib/weka/core/attribute.rb
CHANGED
@@ -1,13 +1,73 @@
|
|
1
|
+
require 'weka/concerns/persistent'
|
2
|
+
|
1
3
|
module Weka
|
2
4
|
module Core
|
3
|
-
java_import
|
5
|
+
java_import 'weka.core.Attribute'
|
4
6
|
|
5
7
|
class Attribute
|
8
|
+
include Weka::Concerns::Persistent
|
9
|
+
|
10
|
+
TYPES = %i(numeric nominal string date).freeze
|
11
|
+
|
12
|
+
class << self
|
13
|
+
def new_numeric(name)
|
14
|
+
new(name.to_s)
|
15
|
+
end
|
16
|
+
|
17
|
+
def new_nominal(name, values)
|
18
|
+
new(name.to_s, Array(values).map(&:to_s))
|
19
|
+
end
|
20
|
+
|
21
|
+
def new_date(name, format)
|
22
|
+
new(name.to_s, format.to_s)
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# Creates a new Attribute instance of type string.
|
27
|
+
#
|
28
|
+
# The java class defines the same constructor:
|
29
|
+
# Attribute(java.lang.String, java.util.List<java.lang.String>)
|
30
|
+
# for nominal and string attributes and handles the type internally
|
31
|
+
# based on the second argument.
|
32
|
+
#
|
33
|
+
# In Java you would write following code to create a string Attribute:
|
34
|
+
# Attribute attribute = new Attribute("name", (FastVector) null);
|
35
|
+
#
|
36
|
+
# When we use a similar approach in JRuby:
|
37
|
+
# attribute = Attribute.new('name', nil)
|
38
|
+
# then a Java::JavaLang::NullPointerException is thrown.
|
39
|
+
#
|
40
|
+
# Thus, we use refelection here and call the contructor explicitly, see
|
41
|
+
# https://github.com/jruby/jruby/wiki/CallingJavaFromJRuby#constructors
|
42
|
+
#
|
43
|
+
# The object returned from Java constructor only has class
|
44
|
+
# Java::JavaObject so we need to cast it to the proper class
|
45
|
+
#
|
46
|
+
# See also:
|
47
|
+
# https://stackoverflow.com/questions/1792495/casting-objects-in-jruby
|
48
|
+
def new_string(name)
|
49
|
+
constructor = Attribute.java_class.declared_constructor(
|
50
|
+
java.lang.String,
|
51
|
+
java.util.List
|
52
|
+
)
|
53
|
+
|
54
|
+
constructor.new_instance(name.to_s, nil).to_java(Attribute)
|
55
|
+
end
|
56
|
+
end
|
6
57
|
|
7
58
|
def values
|
8
59
|
enumerate_values.to_a
|
9
60
|
end
|
10
61
|
|
62
|
+
##
|
63
|
+
# Returns the string representation of the attribute's type.
|
64
|
+
# Overwrites the weka.core.Attribute type Java method, which returns an
|
65
|
+
# integer representation of the type based on the defined type constants.
|
66
|
+
def type
|
67
|
+
self.class.type_to_string(self)
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
11
71
|
# The order of the if statements is important here, because a date is also
|
12
72
|
# a numeric.
|
13
73
|
def internal_value_of(value)
|
@@ -15,10 +75,8 @@ module Weka
|
|
15
75
|
return Float::NAN if [nil, '?'].include?(value)
|
16
76
|
return parse_date(value.to_s) if date?
|
17
77
|
return value.to_f if numeric?
|
18
|
-
return index_of_value(value.to_s) if nominal?
|
78
|
+
return index_of_value(value.to_s) if nominal? || string?
|
19
79
|
end
|
20
80
|
end
|
21
|
-
|
22
|
-
Weka::Core::Attribute.__persistent__ = true
|
23
81
|
end
|
24
82
|
end
|