weka 0.1.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.travis.yml +15 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Jarfile +1 -0
- data/Jarfile.lock +17 -0
- data/MIT-LICENSE.txt +19 -0
- data/README.md +687 -0
- data/Rakefile +21 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/weka.rb +32 -0
- data/lib/weka/attribute_selection.rb +1 -0
- data/lib/weka/attribute_selection/attribute_selection.rb +11 -0
- data/lib/weka/attribute_selection/evaluator.rb +29 -0
- data/lib/weka/attribute_selection/search.rb +14 -0
- data/lib/weka/class_builder.rb +88 -0
- data/lib/weka/classifiers.rb +1 -0
- data/lib/weka/classifiers/bayes.rb +16 -0
- data/lib/weka/classifiers/evaluation.rb +37 -0
- data/lib/weka/classifiers/functions.rb +21 -0
- data/lib/weka/classifiers/lazy.rb +13 -0
- data/lib/weka/classifiers/meta.rb +29 -0
- data/lib/weka/classifiers/rules.rb +16 -0
- data/lib/weka/classifiers/trees.rb +18 -0
- data/lib/weka/classifiers/utils.rb +138 -0
- data/lib/weka/clusterers.rb +16 -0
- data/lib/weka/clusterers/cluster_evaluation.rb +14 -0
- data/lib/weka/clusterers/utils.rb +103 -0
- data/lib/weka/concerns.rb +18 -0
- data/lib/weka/concerns/buildable.rb +19 -0
- data/lib/weka/concerns/describable.rb +30 -0
- data/lib/weka/concerns/optionizable.rb +49 -0
- data/lib/weka/concerns/persistent.rb +16 -0
- data/lib/weka/core.rb +6 -0
- data/lib/weka/core/attribute.rb +24 -0
- data/lib/weka/core/converters.rb +17 -0
- data/lib/weka/core/dense_instance.rb +68 -0
- data/lib/weka/core/instances.rb +199 -0
- data/lib/weka/core/loader.rb +32 -0
- data/lib/weka/core/saver.rb +34 -0
- data/lib/weka/exceptions.rb +6 -0
- data/lib/weka/filters.rb +1 -0
- data/lib/weka/filters/filter.rb +9 -0
- data/lib/weka/filters/supervised/attribute.rb +26 -0
- data/lib/weka/filters/supervised/instance.rb +16 -0
- data/lib/weka/filters/unsupervised/attribute.rb +67 -0
- data/lib/weka/filters/unsupervised/instance.rb +25 -0
- data/lib/weka/filters/utils.rb +17 -0
- data/lib/weka/jars.rb +19 -0
- data/lib/weka/version.rb +3 -0
- data/weka.gemspec +32 -0
- metadata +183 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
module Weka
|
2
|
+
module Clusterers
|
3
|
+
java_import 'weka.clusterers.ClusterEvaluation'
|
4
|
+
|
5
|
+
class ClusterEvaluation
|
6
|
+
|
7
|
+
alias :summary :cluster_results_to_string
|
8
|
+
alias :clusters_count :num_clusters
|
9
|
+
end
|
10
|
+
|
11
|
+
Java::WekaClusterers::ClusterEvaluation.__persistent__ = true
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
require 'weka/clusterers/cluster_evaluation'
|
3
|
+
require 'weka/core/instances'
|
4
|
+
|
5
|
+
module Weka
|
6
|
+
module Clusterers
|
7
|
+
module Utils
|
8
|
+
extend ActiveSupport::Concern
|
9
|
+
|
10
|
+
included do
|
11
|
+
java_import 'java.util.Random'
|
12
|
+
|
13
|
+
if instance_methods.include?(:build_clusterer)
|
14
|
+
attr_reader :training_instances
|
15
|
+
|
16
|
+
def train_with_instances(instances)
|
17
|
+
@training_instances = instances
|
18
|
+
build_clusterer(instances)
|
19
|
+
|
20
|
+
self
|
21
|
+
end
|
22
|
+
|
23
|
+
if ancestors.include?(Java::WekaClusterers::DensityBasedClusterer)
|
24
|
+
def cross_validate(folds: 3)
|
25
|
+
ensure_trained_with_instances!
|
26
|
+
|
27
|
+
ClusterEvaluation.cross_validate_model(
|
28
|
+
self,
|
29
|
+
training_instances,
|
30
|
+
folds.to_i,
|
31
|
+
Java::JavaUtil::Random.new(1)
|
32
|
+
)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def evaluate(test_instances)
|
37
|
+
ensure_trained_with_instances!
|
38
|
+
|
39
|
+
ClusterEvaluation.new.tap do |evaluation|
|
40
|
+
evaluation.clusterer = self
|
41
|
+
evaluation.evaluate_clusterer(test_instances)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
if instance_methods.include?(:cluster_instance)
|
47
|
+
def cluster(instance_or_values)
|
48
|
+
ensure_trained_with_instances!
|
49
|
+
|
50
|
+
instance = clusterable_instance_from(instance_or_values)
|
51
|
+
cluster_instance(instance)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if instance_methods.include?(:update_clusterer)
|
56
|
+
def add_training_instance(instance)
|
57
|
+
training_instances.add(instance)
|
58
|
+
update_clusterer(instance)
|
59
|
+
|
60
|
+
self
|
61
|
+
end
|
62
|
+
|
63
|
+
def add_training_data(data)
|
64
|
+
values = self.training_instances.internal_values_of(data)
|
65
|
+
instance = Weka::Core::DenseInstance.new(values)
|
66
|
+
add_training_instance(instance)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
if instance_methods.include?(:distribution_for_instance)
|
71
|
+
def distribution_for(instance_or_values)
|
72
|
+
ensure_trained_with_instances!
|
73
|
+
|
74
|
+
instance = clusterable_instance_from(instance_or_values)
|
75
|
+
distribution_for_instance(instance).to_a
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def ensure_trained_with_instances!
|
82
|
+
return unless training_instances.nil?
|
83
|
+
|
84
|
+
error = 'Clusterer is not trained with Instances.'
|
85
|
+
hint = 'You can set the training instances with #train_with_instances.'
|
86
|
+
message = "#{error} #{hint}"
|
87
|
+
|
88
|
+
raise UnassignedTrainingInstancesError, message
|
89
|
+
end
|
90
|
+
|
91
|
+
def clusterable_instance_from(instance_or_values)
|
92
|
+
attributes = training_instances.attributes
|
93
|
+
instances = Weka::Core::Instances.new(attributes: attributes)
|
94
|
+
|
95
|
+
instances.add_instance(instance_or_values)
|
96
|
+
instances.first
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
require 'weka/concerns/buildable'
|
3
|
+
require 'weka/concerns/describable'
|
4
|
+
require 'weka/concerns/optionizable'
|
5
|
+
require 'weka/concerns/persistent'
|
6
|
+
|
7
|
+
module Weka
|
8
|
+
module Concerns
|
9
|
+
extend ActiveSupport::Concern
|
10
|
+
|
11
|
+
included do
|
12
|
+
include Buildable
|
13
|
+
include Describable
|
14
|
+
include Optionizable
|
15
|
+
include Persistent
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Concerns
|
5
|
+
module Buildable
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
|
10
|
+
def build(&block)
|
11
|
+
instance = new
|
12
|
+
instance.instance_eval(&block) if block_given?
|
13
|
+
instance
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Concerns
|
5
|
+
module Describable
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
|
10
|
+
def description
|
11
|
+
new.global_info
|
12
|
+
end
|
13
|
+
|
14
|
+
def options
|
15
|
+
descriptions = new.list_options.map do |option|
|
16
|
+
description_for_option(option)
|
17
|
+
end
|
18
|
+
|
19
|
+
descriptions.join("\n")
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def description_for_option(option)
|
25
|
+
"#{option.synopsis}\t#{option.description.strip}"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Concerns
|
5
|
+
module Optionizable
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
included do
|
9
|
+
java_import "weka.core.Utils"
|
10
|
+
|
11
|
+
def use_options(*single_options, **hash_options)
|
12
|
+
joined_options = join_options(single_options, hash_options)
|
13
|
+
options = Java::WekaCore::Utils.split_options(joined_options)
|
14
|
+
|
15
|
+
set_options(options)
|
16
|
+
@options = joined_options
|
17
|
+
end
|
18
|
+
|
19
|
+
def options
|
20
|
+
@options || self.class.default_options
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def join_options(*single_options, **hash_options)
|
26
|
+
[
|
27
|
+
join_single_options(*single_options),
|
28
|
+
join_hash_options(**hash_options)
|
29
|
+
].reject(&:empty?).join(' ')
|
30
|
+
end
|
31
|
+
|
32
|
+
def join_single_options(options)
|
33
|
+
options.map { |option| "-#{option.to_s.sub(/^-/, '')}" }.join(' ')
|
34
|
+
end
|
35
|
+
|
36
|
+
def join_hash_options(options)
|
37
|
+
options.map { |key, value| "-#{key} #{value}" }.join(' ')
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
module ClassMethods
|
42
|
+
def default_options
|
43
|
+
new.get_options.to_a.join(' ')
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/lib/weka/core.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Weka
|
2
|
+
module Core
|
3
|
+
java_import "weka.core.Attribute"
|
4
|
+
|
5
|
+
class Attribute
|
6
|
+
|
7
|
+
def values
|
8
|
+
enumerate_values.to_a
|
9
|
+
end
|
10
|
+
|
11
|
+
# The order of the if statements is important here, because a date is also
|
12
|
+
# a numeric.
|
13
|
+
def internal_value_of(value)
|
14
|
+
if date?
|
15
|
+
parse_date(value.to_s)
|
16
|
+
elsif numeric?
|
17
|
+
value.to_f
|
18
|
+
elsif nominal?
|
19
|
+
index_of_value(value.to_s)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Core
|
5
|
+
module Converters
|
6
|
+
include ClassBuilder
|
7
|
+
|
8
|
+
build_classes :ArffLoader,
|
9
|
+
:ArffSaver,
|
10
|
+
:CSVLoader,
|
11
|
+
:CSVSaver,
|
12
|
+
:JSONLoader,
|
13
|
+
:JSONSaver,
|
14
|
+
include_concerns: false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Weka
|
2
|
+
module Core
|
3
|
+
java_import "weka.core.DenseInstance"
|
4
|
+
|
5
|
+
class DenseInstance
|
6
|
+
java_import "java.util.Date"
|
7
|
+
java_import "java.text.SimpleDateFormat"
|
8
|
+
|
9
|
+
def initialize(data, weight: 1.0)
|
10
|
+
super(weight, data.to_java(:double))
|
11
|
+
end
|
12
|
+
|
13
|
+
def attributes
|
14
|
+
enumerate_attributes.to_a
|
15
|
+
end
|
16
|
+
|
17
|
+
def each_attribute
|
18
|
+
if block_given?
|
19
|
+
enumerate_attributes.each { |attribute| yield(attribute) }
|
20
|
+
else
|
21
|
+
enumerate_attributes
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def each_attribute_with_index
|
26
|
+
enumerate_attributes.each_with_index do |attribute, index|
|
27
|
+
yield(attribute, index) if block_given?
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_a
|
32
|
+
to_double_array.each_with_index.map do |value, index|
|
33
|
+
attribute = attribute_at(index)
|
34
|
+
|
35
|
+
if attribute.date?
|
36
|
+
format_date(value, attribute.date_format)
|
37
|
+
elsif attribute.numeric?
|
38
|
+
value
|
39
|
+
elsif attribute.nominal?
|
40
|
+
attribute.value(value)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
alias :values :to_a
|
46
|
+
alias :values_count :num_values
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def attribute_at(index)
|
51
|
+
return attributes[index] unless dataset.class_attribute_defined?
|
52
|
+
|
53
|
+
if dataset.class_index == index
|
54
|
+
class_attribute
|
55
|
+
elsif index > dataset.class_index
|
56
|
+
attributes[index - 1]
|
57
|
+
else
|
58
|
+
attributes[index]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def format_date(value, format)
|
63
|
+
formatter = SimpleDateFormat.new(format)
|
64
|
+
formatter.format(Date.new(value))
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
require 'weka/core/converters'
|
2
|
+
require 'weka/core/loader'
|
3
|
+
require 'weka/core/saver'
|
4
|
+
require 'weka/core/dense_instance'
|
5
|
+
|
6
|
+
module Weka
|
7
|
+
module Core
|
8
|
+
java_import "weka.core.Instances"
|
9
|
+
java_import "weka.core.FastVector"
|
10
|
+
|
11
|
+
class Instances
|
12
|
+
|
13
|
+
DEFAULT_RELATION_NAME = 'Instances'
|
14
|
+
|
15
|
+
class << self
|
16
|
+
def from_arff(file)
|
17
|
+
Loader.load_arff(file)
|
18
|
+
end
|
19
|
+
|
20
|
+
def from_csv(file)
|
21
|
+
Loader.load_csv(file)
|
22
|
+
end
|
23
|
+
|
24
|
+
def from_json(file)
|
25
|
+
Loader.load_json(file)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def initialize(relation_name: DEFAULT_RELATION_NAME, attributes: [], &block)
|
30
|
+
attribute_list = FastVector.new
|
31
|
+
attributes.each { |attribute| attribute_list.add_element(attribute) }
|
32
|
+
|
33
|
+
super(relation_name.to_s, attribute_list, 0)
|
34
|
+
end
|
35
|
+
|
36
|
+
def instances
|
37
|
+
enumerate_instances.to_a
|
38
|
+
end
|
39
|
+
|
40
|
+
def attributes
|
41
|
+
enumerate_attributes.to_a
|
42
|
+
end
|
43
|
+
|
44
|
+
def attribute_names
|
45
|
+
attributes.map(&:name)
|
46
|
+
end
|
47
|
+
|
48
|
+
def add_attributes(&block)
|
49
|
+
self.instance_eval(&block) if block
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
alias :with_attributes :add_attributes
|
54
|
+
alias :instances_count :num_instances
|
55
|
+
alias :attributes_count :num_attributes
|
56
|
+
|
57
|
+
def each
|
58
|
+
if block_given?
|
59
|
+
enumerate_instances.each { |instance| yield(instance) }
|
60
|
+
else
|
61
|
+
enumerate_instances
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def each_with_index
|
66
|
+
enumerate_instances.each_with_index do |instance, index|
|
67
|
+
yield(instance, index) if block_given?
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def each_attribute
|
72
|
+
if block_given?
|
73
|
+
enumerate_attributes.each { |attribute| yield(attribute) }
|
74
|
+
else
|
75
|
+
enumerate_attributes
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def each_attribute_with_index
|
80
|
+
enumerate_attributes.each_with_index do |attribute, index|
|
81
|
+
yield(attribute, index) if block_given?
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_arff(file)
|
86
|
+
Saver.save_arff(file: file, instances: self)
|
87
|
+
end
|
88
|
+
|
89
|
+
def to_csv(file)
|
90
|
+
Saver.save_csv(file: file, instances: self)
|
91
|
+
end
|
92
|
+
|
93
|
+
def to_json(file)
|
94
|
+
Saver.save_json(file: file, instances: self)
|
95
|
+
end
|
96
|
+
|
97
|
+
def numeric(name, class_attribute: false)
|
98
|
+
attribute = Attribute.new(name.to_s)
|
99
|
+
add_attribute(attribute)
|
100
|
+
self.class_attribute = name if class_attribute
|
101
|
+
end
|
102
|
+
|
103
|
+
def nominal(name, values:, class_attribute: false)
|
104
|
+
attribute = Attribute.new(name.to_s, Array(values).map(&:to_s))
|
105
|
+
add_attribute(attribute)
|
106
|
+
self.class_attribute = name if class_attribute
|
107
|
+
end
|
108
|
+
|
109
|
+
def string(name, class_attribute: false)
|
110
|
+
attribute = Attribute.new(name.to_s, [])
|
111
|
+
add_attribute(attribute)
|
112
|
+
self.class_attribute = name if class_attribute
|
113
|
+
end
|
114
|
+
|
115
|
+
def date(name, format: 'yyyy-MM-dd HH:mm', class_attribute: false)
|
116
|
+
attribute = Attribute.new(name.to_s, format)
|
117
|
+
add_attribute(attribute)
|
118
|
+
self.class_attribute = name if class_attribute
|
119
|
+
end
|
120
|
+
|
121
|
+
def class_attribute=(name)
|
122
|
+
if name.nil?
|
123
|
+
reset_class_attribute
|
124
|
+
else
|
125
|
+
ensure_attribute_defined!(name)
|
126
|
+
setClass(attribute_with_name(name))
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
alias :add_numeric_attribute :numeric
|
131
|
+
alias :add_string_attribute :string
|
132
|
+
alias :add_nominal_attribute :nominal
|
133
|
+
alias :add_date_attribute :date
|
134
|
+
|
135
|
+
def class_attribute
|
136
|
+
classAttribute if class_attribute_defined?
|
137
|
+
end
|
138
|
+
|
139
|
+
def reset_class_attribute
|
140
|
+
set_class_index(-1)
|
141
|
+
end
|
142
|
+
|
143
|
+
def class_attribute_defined?
|
144
|
+
class_index >= 0
|
145
|
+
end
|
146
|
+
|
147
|
+
def add_instance(instance_or_values, weight: 1.0)
|
148
|
+
instance = instance_from(instance_or_values, weight: weight)
|
149
|
+
add(instance)
|
150
|
+
end
|
151
|
+
|
152
|
+
def add_instances(data, weight: 1.0)
|
153
|
+
data.each { |values| add_instance(values, weight: weight) }
|
154
|
+
end
|
155
|
+
|
156
|
+
def internal_values_of(values)
|
157
|
+
values.each_with_index.map do |value, index|
|
158
|
+
attribute(index).internal_value_of(value)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def apply_filter(filter)
|
163
|
+
filter.filter(self)
|
164
|
+
end
|
165
|
+
|
166
|
+
private
|
167
|
+
|
168
|
+
def add_attribute(attribute)
|
169
|
+
insert_attribute_at(attribute, attributes.count)
|
170
|
+
end
|
171
|
+
|
172
|
+
def ensure_attribute_defined!(name)
|
173
|
+
return if attribute_names.include?(name.to_s)
|
174
|
+
|
175
|
+
error = "\"#{name}\" is not defined."
|
176
|
+
hint = "Only defined attributes can be used as class attribute!"
|
177
|
+
message = "#{error} #{hint}"
|
178
|
+
|
179
|
+
raise ArgumentError, message
|
180
|
+
end
|
181
|
+
|
182
|
+
def attribute_with_name(name)
|
183
|
+
attributes.select { |attribute| attribute.name == name.to_s }.first
|
184
|
+
end
|
185
|
+
|
186
|
+
def instance_from(instance_or_values, weight:)
|
187
|
+
if instance_or_values.kind_of?(Java::WekaCore::Instance)
|
188
|
+
instance_or_values.weight = weight
|
189
|
+
instance_or_values
|
190
|
+
else
|
191
|
+
data = internal_values_of(instance_or_values)
|
192
|
+
DenseInstance.new(data, weight: weight)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
Java::WekaCore::Instances.__persistent__ = true
|
198
|
+
end
|
199
|
+
end
|