weka 0.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.travis.yml +15 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Jarfile +1 -0
- data/Jarfile.lock +17 -0
- data/MIT-LICENSE.txt +19 -0
- data/README.md +687 -0
- data/Rakefile +21 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/weka.rb +32 -0
- data/lib/weka/attribute_selection.rb +1 -0
- data/lib/weka/attribute_selection/attribute_selection.rb +11 -0
- data/lib/weka/attribute_selection/evaluator.rb +29 -0
- data/lib/weka/attribute_selection/search.rb +14 -0
- data/lib/weka/class_builder.rb +88 -0
- data/lib/weka/classifiers.rb +1 -0
- data/lib/weka/classifiers/bayes.rb +16 -0
- data/lib/weka/classifiers/evaluation.rb +37 -0
- data/lib/weka/classifiers/functions.rb +21 -0
- data/lib/weka/classifiers/lazy.rb +13 -0
- data/lib/weka/classifiers/meta.rb +29 -0
- data/lib/weka/classifiers/rules.rb +16 -0
- data/lib/weka/classifiers/trees.rb +18 -0
- data/lib/weka/classifiers/utils.rb +138 -0
- data/lib/weka/clusterers.rb +16 -0
- data/lib/weka/clusterers/cluster_evaluation.rb +14 -0
- data/lib/weka/clusterers/utils.rb +103 -0
- data/lib/weka/concerns.rb +18 -0
- data/lib/weka/concerns/buildable.rb +19 -0
- data/lib/weka/concerns/describable.rb +30 -0
- data/lib/weka/concerns/optionizable.rb +49 -0
- data/lib/weka/concerns/persistent.rb +16 -0
- data/lib/weka/core.rb +6 -0
- data/lib/weka/core/attribute.rb +24 -0
- data/lib/weka/core/converters.rb +17 -0
- data/lib/weka/core/dense_instance.rb +68 -0
- data/lib/weka/core/instances.rb +199 -0
- data/lib/weka/core/loader.rb +32 -0
- data/lib/weka/core/saver.rb +34 -0
- data/lib/weka/exceptions.rb +6 -0
- data/lib/weka/filters.rb +1 -0
- data/lib/weka/filters/filter.rb +9 -0
- data/lib/weka/filters/supervised/attribute.rb +26 -0
- data/lib/weka/filters/supervised/instance.rb +16 -0
- data/lib/weka/filters/unsupervised/attribute.rb +67 -0
- data/lib/weka/filters/unsupervised/instance.rb +25 -0
- data/lib/weka/filters/utils.rb +17 -0
- data/lib/weka/jars.rb +19 -0
- data/lib/weka/version.rb +3 -0
- data/weka.gemspec +32 -0
- metadata +183 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
module Weka
|
2
|
+
module Clusterers
|
3
|
+
java_import 'weka.clusterers.ClusterEvaluation'
|
4
|
+
|
5
|
+
class ClusterEvaluation
|
6
|
+
|
7
|
+
alias :summary :cluster_results_to_string
|
8
|
+
alias :clusters_count :num_clusters
|
9
|
+
end
|
10
|
+
|
11
|
+
Java::WekaClusterers::ClusterEvaluation.__persistent__ = true
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
require 'weka/clusterers/cluster_evaluation'
|
3
|
+
require 'weka/core/instances'
|
4
|
+
|
5
|
+
module Weka
|
6
|
+
module Clusterers
|
7
|
+
module Utils
|
8
|
+
extend ActiveSupport::Concern
|
9
|
+
|
10
|
+
included do
|
11
|
+
java_import 'java.util.Random'
|
12
|
+
|
13
|
+
if instance_methods.include?(:build_clusterer)
|
14
|
+
attr_reader :training_instances
|
15
|
+
|
16
|
+
def train_with_instances(instances)
|
17
|
+
@training_instances = instances
|
18
|
+
build_clusterer(instances)
|
19
|
+
|
20
|
+
self
|
21
|
+
end
|
22
|
+
|
23
|
+
if ancestors.include?(Java::WekaClusterers::DensityBasedClusterer)
|
24
|
+
def cross_validate(folds: 3)
|
25
|
+
ensure_trained_with_instances!
|
26
|
+
|
27
|
+
ClusterEvaluation.cross_validate_model(
|
28
|
+
self,
|
29
|
+
training_instances,
|
30
|
+
folds.to_i,
|
31
|
+
Java::JavaUtil::Random.new(1)
|
32
|
+
)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def evaluate(test_instances)
|
37
|
+
ensure_trained_with_instances!
|
38
|
+
|
39
|
+
ClusterEvaluation.new.tap do |evaluation|
|
40
|
+
evaluation.clusterer = self
|
41
|
+
evaluation.evaluate_clusterer(test_instances)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
if instance_methods.include?(:cluster_instance)
|
47
|
+
def cluster(instance_or_values)
|
48
|
+
ensure_trained_with_instances!
|
49
|
+
|
50
|
+
instance = clusterable_instance_from(instance_or_values)
|
51
|
+
cluster_instance(instance)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if instance_methods.include?(:update_clusterer)
|
56
|
+
def add_training_instance(instance)
|
57
|
+
training_instances.add(instance)
|
58
|
+
update_clusterer(instance)
|
59
|
+
|
60
|
+
self
|
61
|
+
end
|
62
|
+
|
63
|
+
def add_training_data(data)
|
64
|
+
values = self.training_instances.internal_values_of(data)
|
65
|
+
instance = Weka::Core::DenseInstance.new(values)
|
66
|
+
add_training_instance(instance)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
if instance_methods.include?(:distribution_for_instance)
|
71
|
+
def distribution_for(instance_or_values)
|
72
|
+
ensure_trained_with_instances!
|
73
|
+
|
74
|
+
instance = clusterable_instance_from(instance_or_values)
|
75
|
+
distribution_for_instance(instance).to_a
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def ensure_trained_with_instances!
|
82
|
+
return unless training_instances.nil?
|
83
|
+
|
84
|
+
error = 'Clusterer is not trained with Instances.'
|
85
|
+
hint = 'You can set the training instances with #train_with_instances.'
|
86
|
+
message = "#{error} #{hint}"
|
87
|
+
|
88
|
+
raise UnassignedTrainingInstancesError, message
|
89
|
+
end
|
90
|
+
|
91
|
+
def clusterable_instance_from(instance_or_values)
|
92
|
+
attributes = training_instances.attributes
|
93
|
+
instances = Weka::Core::Instances.new(attributes: attributes)
|
94
|
+
|
95
|
+
instances.add_instance(instance_or_values)
|
96
|
+
instances.first
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
require 'weka/concerns/buildable'
|
3
|
+
require 'weka/concerns/describable'
|
4
|
+
require 'weka/concerns/optionizable'
|
5
|
+
require 'weka/concerns/persistent'
|
6
|
+
|
7
|
+
module Weka
|
8
|
+
module Concerns
|
9
|
+
extend ActiveSupport::Concern
|
10
|
+
|
11
|
+
included do
|
12
|
+
include Buildable
|
13
|
+
include Describable
|
14
|
+
include Optionizable
|
15
|
+
include Persistent
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Concerns
|
5
|
+
module Buildable
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
|
10
|
+
def build(&block)
|
11
|
+
instance = new
|
12
|
+
instance.instance_eval(&block) if block_given?
|
13
|
+
instance
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Concerns
|
5
|
+
module Describable
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
|
10
|
+
def description
|
11
|
+
new.global_info
|
12
|
+
end
|
13
|
+
|
14
|
+
def options
|
15
|
+
descriptions = new.list_options.map do |option|
|
16
|
+
description_for_option(option)
|
17
|
+
end
|
18
|
+
|
19
|
+
descriptions.join("\n")
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def description_for_option(option)
|
25
|
+
"#{option.synopsis}\t#{option.description.strip}"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Concerns
|
5
|
+
module Optionizable
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
included do
|
9
|
+
java_import "weka.core.Utils"
|
10
|
+
|
11
|
+
def use_options(*single_options, **hash_options)
|
12
|
+
joined_options = join_options(single_options, hash_options)
|
13
|
+
options = Java::WekaCore::Utils.split_options(joined_options)
|
14
|
+
|
15
|
+
set_options(options)
|
16
|
+
@options = joined_options
|
17
|
+
end
|
18
|
+
|
19
|
+
def options
|
20
|
+
@options || self.class.default_options
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def join_options(*single_options, **hash_options)
|
26
|
+
[
|
27
|
+
join_single_options(*single_options),
|
28
|
+
join_hash_options(**hash_options)
|
29
|
+
].reject(&:empty?).join(' ')
|
30
|
+
end
|
31
|
+
|
32
|
+
def join_single_options(options)
|
33
|
+
options.map { |option| "-#{option.to_s.sub(/^-/, '')}" }.join(' ')
|
34
|
+
end
|
35
|
+
|
36
|
+
def join_hash_options(options)
|
37
|
+
options.map { |key, value| "-#{key} #{value}" }.join(' ')
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
module ClassMethods
|
42
|
+
def default_options
|
43
|
+
new.get_options.to_a.join(' ')
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/lib/weka/core.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Weka
|
2
|
+
module Core
|
3
|
+
java_import "weka.core.Attribute"
|
4
|
+
|
5
|
+
class Attribute
|
6
|
+
|
7
|
+
def values
|
8
|
+
enumerate_values.to_a
|
9
|
+
end
|
10
|
+
|
11
|
+
# The order of the if statements is important here, because a date is also
|
12
|
+
# a numeric.
|
13
|
+
def internal_value_of(value)
|
14
|
+
if date?
|
15
|
+
parse_date(value.to_s)
|
16
|
+
elsif numeric?
|
17
|
+
value.to_f
|
18
|
+
elsif nominal?
|
19
|
+
index_of_value(value.to_s)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
3
|
+
module Weka
|
4
|
+
module Core
|
5
|
+
module Converters
|
6
|
+
include ClassBuilder
|
7
|
+
|
8
|
+
build_classes :ArffLoader,
|
9
|
+
:ArffSaver,
|
10
|
+
:CSVLoader,
|
11
|
+
:CSVSaver,
|
12
|
+
:JSONLoader,
|
13
|
+
:JSONSaver,
|
14
|
+
include_concerns: false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Weka
|
2
|
+
module Core
|
3
|
+
java_import "weka.core.DenseInstance"
|
4
|
+
|
5
|
+
class DenseInstance
|
6
|
+
java_import "java.util.Date"
|
7
|
+
java_import "java.text.SimpleDateFormat"
|
8
|
+
|
9
|
+
def initialize(data, weight: 1.0)
|
10
|
+
super(weight, data.to_java(:double))
|
11
|
+
end
|
12
|
+
|
13
|
+
def attributes
|
14
|
+
enumerate_attributes.to_a
|
15
|
+
end
|
16
|
+
|
17
|
+
def each_attribute
|
18
|
+
if block_given?
|
19
|
+
enumerate_attributes.each { |attribute| yield(attribute) }
|
20
|
+
else
|
21
|
+
enumerate_attributes
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def each_attribute_with_index
|
26
|
+
enumerate_attributes.each_with_index do |attribute, index|
|
27
|
+
yield(attribute, index) if block_given?
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_a
|
32
|
+
to_double_array.each_with_index.map do |value, index|
|
33
|
+
attribute = attribute_at(index)
|
34
|
+
|
35
|
+
if attribute.date?
|
36
|
+
format_date(value, attribute.date_format)
|
37
|
+
elsif attribute.numeric?
|
38
|
+
value
|
39
|
+
elsif attribute.nominal?
|
40
|
+
attribute.value(value)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
alias :values :to_a
|
46
|
+
alias :values_count :num_values
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def attribute_at(index)
|
51
|
+
return attributes[index] unless dataset.class_attribute_defined?
|
52
|
+
|
53
|
+
if dataset.class_index == index
|
54
|
+
class_attribute
|
55
|
+
elsif index > dataset.class_index
|
56
|
+
attributes[index - 1]
|
57
|
+
else
|
58
|
+
attributes[index]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def format_date(value, format)
|
63
|
+
formatter = SimpleDateFormat.new(format)
|
64
|
+
formatter.format(Date.new(value))
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
require 'weka/core/converters'
|
2
|
+
require 'weka/core/loader'
|
3
|
+
require 'weka/core/saver'
|
4
|
+
require 'weka/core/dense_instance'
|
5
|
+
|
6
|
+
module Weka
|
7
|
+
module Core
|
8
|
+
java_import "weka.core.Instances"
|
9
|
+
java_import "weka.core.FastVector"
|
10
|
+
|
11
|
+
class Instances
|
12
|
+
|
13
|
+
DEFAULT_RELATION_NAME = 'Instances'
|
14
|
+
|
15
|
+
class << self
|
16
|
+
def from_arff(file)
|
17
|
+
Loader.load_arff(file)
|
18
|
+
end
|
19
|
+
|
20
|
+
def from_csv(file)
|
21
|
+
Loader.load_csv(file)
|
22
|
+
end
|
23
|
+
|
24
|
+
def from_json(file)
|
25
|
+
Loader.load_json(file)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def initialize(relation_name: DEFAULT_RELATION_NAME, attributes: [], &block)
|
30
|
+
attribute_list = FastVector.new
|
31
|
+
attributes.each { |attribute| attribute_list.add_element(attribute) }
|
32
|
+
|
33
|
+
super(relation_name.to_s, attribute_list, 0)
|
34
|
+
end
|
35
|
+
|
36
|
+
def instances
|
37
|
+
enumerate_instances.to_a
|
38
|
+
end
|
39
|
+
|
40
|
+
def attributes
|
41
|
+
enumerate_attributes.to_a
|
42
|
+
end
|
43
|
+
|
44
|
+
def attribute_names
|
45
|
+
attributes.map(&:name)
|
46
|
+
end
|
47
|
+
|
48
|
+
def add_attributes(&block)
|
49
|
+
self.instance_eval(&block) if block
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
alias :with_attributes :add_attributes
|
54
|
+
alias :instances_count :num_instances
|
55
|
+
alias :attributes_count :num_attributes
|
56
|
+
|
57
|
+
def each
|
58
|
+
if block_given?
|
59
|
+
enumerate_instances.each { |instance| yield(instance) }
|
60
|
+
else
|
61
|
+
enumerate_instances
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def each_with_index
|
66
|
+
enumerate_instances.each_with_index do |instance, index|
|
67
|
+
yield(instance, index) if block_given?
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def each_attribute
|
72
|
+
if block_given?
|
73
|
+
enumerate_attributes.each { |attribute| yield(attribute) }
|
74
|
+
else
|
75
|
+
enumerate_attributes
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def each_attribute_with_index
|
80
|
+
enumerate_attributes.each_with_index do |attribute, index|
|
81
|
+
yield(attribute, index) if block_given?
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_arff(file)
|
86
|
+
Saver.save_arff(file: file, instances: self)
|
87
|
+
end
|
88
|
+
|
89
|
+
def to_csv(file)
|
90
|
+
Saver.save_csv(file: file, instances: self)
|
91
|
+
end
|
92
|
+
|
93
|
+
def to_json(file)
|
94
|
+
Saver.save_json(file: file, instances: self)
|
95
|
+
end
|
96
|
+
|
97
|
+
def numeric(name, class_attribute: false)
|
98
|
+
attribute = Attribute.new(name.to_s)
|
99
|
+
add_attribute(attribute)
|
100
|
+
self.class_attribute = name if class_attribute
|
101
|
+
end
|
102
|
+
|
103
|
+
def nominal(name, values:, class_attribute: false)
|
104
|
+
attribute = Attribute.new(name.to_s, Array(values).map(&:to_s))
|
105
|
+
add_attribute(attribute)
|
106
|
+
self.class_attribute = name if class_attribute
|
107
|
+
end
|
108
|
+
|
109
|
+
def string(name, class_attribute: false)
|
110
|
+
attribute = Attribute.new(name.to_s, [])
|
111
|
+
add_attribute(attribute)
|
112
|
+
self.class_attribute = name if class_attribute
|
113
|
+
end
|
114
|
+
|
115
|
+
def date(name, format: 'yyyy-MM-dd HH:mm', class_attribute: false)
|
116
|
+
attribute = Attribute.new(name.to_s, format)
|
117
|
+
add_attribute(attribute)
|
118
|
+
self.class_attribute = name if class_attribute
|
119
|
+
end
|
120
|
+
|
121
|
+
def class_attribute=(name)
|
122
|
+
if name.nil?
|
123
|
+
reset_class_attribute
|
124
|
+
else
|
125
|
+
ensure_attribute_defined!(name)
|
126
|
+
setClass(attribute_with_name(name))
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
alias :add_numeric_attribute :numeric
|
131
|
+
alias :add_string_attribute :string
|
132
|
+
alias :add_nominal_attribute :nominal
|
133
|
+
alias :add_date_attribute :date
|
134
|
+
|
135
|
+
def class_attribute
|
136
|
+
classAttribute if class_attribute_defined?
|
137
|
+
end
|
138
|
+
|
139
|
+
def reset_class_attribute
|
140
|
+
set_class_index(-1)
|
141
|
+
end
|
142
|
+
|
143
|
+
def class_attribute_defined?
|
144
|
+
class_index >= 0
|
145
|
+
end
|
146
|
+
|
147
|
+
def add_instance(instance_or_values, weight: 1.0)
|
148
|
+
instance = instance_from(instance_or_values, weight: weight)
|
149
|
+
add(instance)
|
150
|
+
end
|
151
|
+
|
152
|
+
def add_instances(data, weight: 1.0)
|
153
|
+
data.each { |values| add_instance(values, weight: weight) }
|
154
|
+
end
|
155
|
+
|
156
|
+
def internal_values_of(values)
|
157
|
+
values.each_with_index.map do |value, index|
|
158
|
+
attribute(index).internal_value_of(value)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def apply_filter(filter)
|
163
|
+
filter.filter(self)
|
164
|
+
end
|
165
|
+
|
166
|
+
private
|
167
|
+
|
168
|
+
def add_attribute(attribute)
|
169
|
+
insert_attribute_at(attribute, attributes.count)
|
170
|
+
end
|
171
|
+
|
172
|
+
def ensure_attribute_defined!(name)
|
173
|
+
return if attribute_names.include?(name.to_s)
|
174
|
+
|
175
|
+
error = "\"#{name}\" is not defined."
|
176
|
+
hint = "Only defined attributes can be used as class attribute!"
|
177
|
+
message = "#{error} #{hint}"
|
178
|
+
|
179
|
+
raise ArgumentError, message
|
180
|
+
end
|
181
|
+
|
182
|
+
def attribute_with_name(name)
|
183
|
+
attributes.select { |attribute| attribute.name == name.to_s }.first
|
184
|
+
end
|
185
|
+
|
186
|
+
def instance_from(instance_or_values, weight:)
|
187
|
+
if instance_or_values.kind_of?(Java::WekaCore::Instance)
|
188
|
+
instance_or_values.weight = weight
|
189
|
+
instance_or_values
|
190
|
+
else
|
191
|
+
data = internal_values_of(instance_or_values)
|
192
|
+
DenseInstance.new(data, weight: weight)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
Java::WekaCore::Instances.__persistent__ = true
|
198
|
+
end
|
199
|
+
end
|