weka 0.4.0-java → 0.5.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rubocop.yml +30 -0
- data/README.md +2 -1
- data/lib/weka/class_builder.rb +23 -5
- data/lib/weka/classifiers/evaluation.rb +18 -2
- data/lib/weka/classifiers/utils.rb +116 -90
- data/lib/weka/clusterers/utils.rb +100 -74
- data/lib/weka/concerns/optionizable.rb +30 -29
- data/lib/weka/concerns/serializable.rb +2 -6
- data/lib/weka/core/attribute.rb +1 -1
- data/lib/weka/core/dense_instance.rb +7 -15
- data/lib/weka/core/instances.rb +120 -20
- data/lib/weka/filters/utils.rb +3 -7
- data/lib/weka/version.rb +1 -1
- data/weka.gemspec +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 07d0cbed2d245de34e10101d0597017194804735e3b5ea6bc972201282c60d5d
|
4
|
+
data.tar.gz: b304c03a4552b766f56e60e52b765068fdb0de0350d1e5c541882f085e90824b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 942621fa83a7670384adccb717610ac19dc5d961fd9de5ab579a331c40119ef8e0e73cb0b0962aaff9fe937628335ee453be2ef1ed0669edf5421dadc075acdc
|
7
|
+
data.tar.gz: ee68c277c9a40f6349fdea303897d44fd93ded9311d11ffba8c1cd3909764ea51d22638cc7240bb375570615a9acaa4d98ba591b8de3102c41c3dcabf550283b
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
AllCops:
|
2
|
+
TargetRubyVersion: 2.4
|
3
|
+
Exclude:
|
4
|
+
- 'bin/**/*'
|
5
|
+
- '*.gemspec'
|
6
|
+
- 'Gemfile'
|
7
|
+
- 'Gemfile.lock'
|
8
|
+
|
9
|
+
Style/Copyright:
|
10
|
+
Enabled: false
|
11
|
+
|
12
|
+
Style/Documentation:
|
13
|
+
Enabled: false
|
14
|
+
|
15
|
+
Metrics/LineLength:
|
16
|
+
Max: 80
|
17
|
+
|
18
|
+
Layout/MultilineMethodCallIndentation:
|
19
|
+
EnforcedStyle: indented
|
20
|
+
|
21
|
+
Style/FrozenStringLiteralComment:
|
22
|
+
Enabled: false
|
23
|
+
|
24
|
+
Metrics/ModuleLength:
|
25
|
+
Exclude:
|
26
|
+
- "**/*_spec.rb"
|
27
|
+
|
28
|
+
Metrics/BlockLength:
|
29
|
+
Exclude:
|
30
|
+
- "**/*_spec.rb"
|
data/README.md
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/weka.svg)](http://badge.fury.io/rb/weka)
|
4
4
|
[![Travis Build](https://travis-ci.org/paulgoetze/weka-jruby.svg)](https://travis-ci.org/paulgoetze/weka-jruby)
|
5
|
+
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/9634a6709ef545198e079a8daddff100)](https://www.codacy.com/app/paul-christoph-goetze/weka-jruby?utm_source=github.com&utm_medium=referral&utm_content=paulgoetze/weka-jruby&utm_campaign=Badge_Grade)
|
5
6
|
|
6
7
|
Machine Learning & Data Mining with JRuby based on the [Weka](http://www.cs.waikato.ac.nz/~ml/weka/index.html) Java library.
|
7
8
|
|
@@ -60,7 +61,7 @@ Here’s how to contribute:
|
|
60
61
|
Please try to add RSpec tests along with your new features. This will ensure that your code does not break existing functionality and that your feature is working as expected.
|
61
62
|
|
62
63
|
We use [Rubocop](https://github.com/bbatsov/rubocop) for code style recommendations.
|
63
|
-
Please make sure your contributions comply with the
|
64
|
+
Please make sure your contributions comply with the project’s Rubocop config.
|
64
65
|
|
65
66
|
## Acknowledgement
|
66
67
|
|
data/lib/weka/class_builder.rb
CHANGED
@@ -7,14 +7,24 @@ module Weka
|
|
7
7
|
end
|
8
8
|
|
9
9
|
module ClassMethods
|
10
|
-
def build_class(class_name, weka_module: nil, include_concerns: true)
|
10
|
+
def build_class(class_name, weka_module: nil, include_concerns: true, additional_includes: [])
|
11
11
|
java_import java_class_path(class_name, weka_module)
|
12
|
-
define_class(
|
12
|
+
define_class(
|
13
|
+
class_name,
|
14
|
+
weka_module,
|
15
|
+
include_concerns: include_concerns,
|
16
|
+
additional_includes: additional_includes
|
17
|
+
)
|
13
18
|
end
|
14
19
|
|
15
|
-
def build_classes(*class_names, weka_module: nil, include_concerns: true)
|
20
|
+
def build_classes(*class_names, weka_module: nil, include_concerns: true, additional_includes: [])
|
16
21
|
class_names.each do |name|
|
17
|
-
build_class(
|
22
|
+
build_class(
|
23
|
+
name,
|
24
|
+
weka_module: weka_module,
|
25
|
+
include_concerns: include_concerns,
|
26
|
+
additional_includes: additional_includes
|
27
|
+
)
|
18
28
|
end
|
19
29
|
end
|
20
30
|
|
@@ -58,12 +68,13 @@ module Weka
|
|
58
68
|
name.scan('::').count == 1
|
59
69
|
end
|
60
70
|
|
61
|
-
def define_class(class_name, weka_module, include_concerns: true)
|
71
|
+
def define_class(class_name, weka_module, include_concerns: true, additional_includes: [])
|
62
72
|
module_eval <<-CLASS_DEFINITION, __FILE__, __LINE__ + 1
|
63
73
|
class #{class_name}
|
64
74
|
#{'include Concerns' if include_concerns}
|
65
75
|
#{include_serializable_for(class_name, weka_module)}
|
66
76
|
#{include_utils}
|
77
|
+
#{include_additionals(additional_includes)}
|
67
78
|
end
|
68
79
|
CLASS_DEFINITION
|
69
80
|
end
|
@@ -84,6 +95,13 @@ module Weka
|
|
84
95
|
constantize(utils_super_modules).const_defined?(:Utils)
|
85
96
|
end
|
86
97
|
|
98
|
+
def include_additionals(modules)
|
99
|
+
modules = Array(modules)
|
100
|
+
return if modules.empty?
|
101
|
+
|
102
|
+
modules.map { |name| "include #{name}" }.join("\n")
|
103
|
+
end
|
104
|
+
|
87
105
|
def constantize(module_names)
|
88
106
|
Object.module_eval("::#{module_names}")
|
89
107
|
end
|
@@ -1,8 +1,12 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
1
3
|
module Weka
|
2
4
|
module Classifiers
|
3
5
|
java_import 'weka.classifiers.Evaluation'
|
4
6
|
|
5
7
|
class Evaluation
|
8
|
+
include ClassBuilder
|
9
|
+
|
6
10
|
# Use both nomenclatures f_measure and fmeasure for consistency
|
7
11
|
# due to jruby's auto method generation of 'fMeasure' to 'f_measure' and
|
8
12
|
# 'weightedFMeasure' to 'weighted_fmeasure'.
|
@@ -29,8 +33,20 @@ module Weka
|
|
29
33
|
alias average_cost avg_cost
|
30
34
|
|
31
35
|
alias cumulative_margin_distribution to_cumulative_margin_distribution_string
|
32
|
-
end
|
33
36
|
|
34
|
-
|
37
|
+
module Curve
|
38
|
+
def self.included(base)
|
39
|
+
base.class_eval do
|
40
|
+
alias_method :curve, :get_curve
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
build_classes :CostCurve,
|
46
|
+
:MarginCurve,
|
47
|
+
:ThresholdCurve,
|
48
|
+
weka_module: 'weka.classifiers.evaluation',
|
49
|
+
additional_includes: Curve
|
50
|
+
end
|
35
51
|
end
|
36
52
|
end
|
@@ -5,128 +5,154 @@ module Weka
|
|
5
5
|
module Classifiers
|
6
6
|
module Utils
|
7
7
|
def self.included(base)
|
8
|
-
base.
|
9
|
-
|
8
|
+
base.include Buildable if base.instance_methods.include?(:build_classifier)
|
9
|
+
base.include Classifiable if base.instance_methods.include?(:classify_instance)
|
10
|
+
base.include Updatable if base.instance_methods.include?(:update_classifier)
|
11
|
+
base.include Distributable if base.instance_methods.include?(:distribution_for_instance)
|
12
|
+
end
|
10
13
|
|
11
|
-
|
12
|
-
|
14
|
+
module Checks
|
15
|
+
private
|
13
16
|
|
14
|
-
|
15
|
-
|
17
|
+
def ensure_class_attribute_assigned!(instances)
|
18
|
+
return if instances.class_attribute_defined?
|
16
19
|
|
17
|
-
|
18
|
-
|
20
|
+
error = 'Class attribute is not assigned for Instances.'
|
21
|
+
hint = 'You can assign a class attribute with #class_attribute=.'
|
22
|
+
message = "#{error} #{hint}"
|
19
23
|
|
20
|
-
|
21
|
-
|
24
|
+
raise UnassignedClassError, message
|
25
|
+
end
|
22
26
|
|
23
|
-
|
24
|
-
|
27
|
+
def ensure_trained_with_instances!
|
28
|
+
return unless training_instances.nil?
|
25
29
|
|
26
|
-
|
27
|
-
|
30
|
+
error = 'Classifier is not trained with Instances.'
|
31
|
+
hint = 'You can set the training instances with #train_with_instances.'
|
32
|
+
message = "#{error} #{hint}"
|
28
33
|
|
29
|
-
|
30
|
-
|
31
|
-
|
34
|
+
raise UnassignedTrainingInstancesError, message
|
35
|
+
end
|
36
|
+
end
|
32
37
|
|
33
|
-
|
34
|
-
|
35
|
-
ensure_class_attribute_assigned!(test_instances)
|
38
|
+
module Transformers
|
39
|
+
private
|
36
40
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
end
|
41
|
-
end
|
41
|
+
def classifiable_instance_from(instance_or_values)
|
42
|
+
attributes = training_instances.attributes
|
43
|
+
instances = Weka::Core::Instances.new(attributes: attributes)
|
42
44
|
|
43
|
-
|
44
|
-
|
45
|
-
|
45
|
+
class_attribute = training_instances.class_attribute
|
46
|
+
class_index = training_instances.class_index
|
47
|
+
instances.insert_attribute_at(class_attribute, class_index)
|
46
48
|
|
47
|
-
|
48
|
-
|
49
|
+
instances.class_index = training_instances.class_index
|
50
|
+
instances.add_instance(instance_or_values)
|
49
51
|
|
50
|
-
|
51
|
-
|
52
|
-
|
52
|
+
instance = instances.first
|
53
|
+
instance.set_class_missing
|
54
|
+
instance
|
55
|
+
end
|
56
|
+
end
|
53
57
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
update_classifier(instance)
|
58
|
+
module Buildable
|
59
|
+
java_import 'java.util.Random'
|
60
|
+
include Checks
|
58
61
|
|
59
|
-
|
60
|
-
end
|
62
|
+
attr_reader :training_instances
|
61
63
|
|
62
|
-
|
63
|
-
|
64
|
-
instance = Weka::Core::DenseInstance.new(values)
|
65
|
-
add_training_instance(instance)
|
66
|
-
end
|
67
|
-
end
|
64
|
+
def train_with_instances(instances)
|
65
|
+
ensure_class_attribute_assigned!(instances)
|
68
66
|
|
69
|
-
|
70
|
-
|
71
|
-
ensure_trained_with_instances!
|
67
|
+
@training_instances = instances
|
68
|
+
build_classifier(instances)
|
72
69
|
|
73
|
-
|
74
|
-
|
70
|
+
self
|
71
|
+
end
|
75
72
|
|
76
|
-
|
77
|
-
|
78
|
-
end
|
73
|
+
def cross_validate(folds: 3)
|
74
|
+
ensure_trained_with_instances!
|
79
75
|
|
80
|
-
|
76
|
+
evaluation = Evaluation.new(training_instances)
|
77
|
+
random = Java::JavaUtil::Random.new(1)
|
81
78
|
|
82
|
-
|
83
|
-
|
79
|
+
evaluation.cross_validate_model(
|
80
|
+
self,
|
81
|
+
training_instances,
|
82
|
+
folds.to_i,
|
83
|
+
random
|
84
|
+
)
|
84
85
|
|
85
|
-
|
86
|
-
|
87
|
-
message = "#{error} #{hint}"
|
86
|
+
evaluation
|
87
|
+
end
|
88
88
|
|
89
|
-
|
90
|
-
|
89
|
+
def evaluate(test_instances)
|
90
|
+
ensure_trained_with_instances!
|
91
|
+
ensure_class_attribute_assigned!(test_instances)
|
91
92
|
|
92
|
-
|
93
|
-
|
93
|
+
evaluation = Evaluation.new(training_instances)
|
94
|
+
evaluation.evaluate_model(self, test_instances)
|
95
|
+
evaluation
|
96
|
+
end
|
97
|
+
end
|
94
98
|
|
95
|
-
|
96
|
-
|
97
|
-
|
99
|
+
module Classifiable
|
100
|
+
include Checks
|
101
|
+
include Transformers
|
98
102
|
|
99
|
-
|
100
|
-
|
103
|
+
def classify(instance_or_values)
|
104
|
+
ensure_trained_with_instances!
|
101
105
|
|
102
|
-
|
103
|
-
|
104
|
-
instances = Weka::Core::Instances.new(attributes: attributes)
|
106
|
+
instance = classifiable_instance_from(instance_or_values)
|
107
|
+
index = classify_instance(instance)
|
105
108
|
|
106
|
-
|
107
|
-
|
108
|
-
instances.insert_attribute_at(class_attribute, class_index)
|
109
|
+
class_value_of_index(index)
|
110
|
+
end
|
109
111
|
|
110
|
-
|
111
|
-
instances.add_instance(instance_or_values)
|
112
|
+
private
|
112
113
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
114
|
+
def class_value_of_index(index)
|
115
|
+
training_instances.class_attribute.value(index)
|
116
|
+
end
|
117
|
+
end
|
117
118
|
|
118
|
-
|
119
|
-
|
120
|
-
|
119
|
+
module Updatable
|
120
|
+
def add_training_instance(instance)
|
121
|
+
training_instances.add(instance)
|
122
|
+
update_classifier(instance)
|
123
|
+
|
124
|
+
self
|
125
|
+
end
|
126
|
+
|
127
|
+
def add_training_data(data)
|
128
|
+
values = training_instances.internal_values_of(data)
|
129
|
+
instance = Weka::Core::DenseInstance.new(values)
|
130
|
+
add_training_instance(instance)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
module Distributable
|
135
|
+
include Checks
|
136
|
+
include Transformers
|
137
|
+
|
138
|
+
def distribution_for(instance_or_values)
|
139
|
+
ensure_trained_with_instances!
|
140
|
+
|
141
|
+
instance = classifiable_instance_from(instance_or_values)
|
142
|
+
distributions = distribution_for_instance(instance)
|
143
|
+
|
144
|
+
class_distributions_from(distributions)
|
145
|
+
end
|
146
|
+
|
147
|
+
private
|
121
148
|
|
122
|
-
|
123
|
-
|
149
|
+
def class_distributions_from(distributions)
|
150
|
+
class_values = training_instances.class_attribute.values
|
124
151
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
end
|
152
|
+
distributions.each_with_object({}).with_index do |(distribution, result), index|
|
153
|
+
class_value = class_values[index]
|
154
|
+
result[class_value] = distribution
|
155
|
+
result
|
130
156
|
end
|
131
157
|
end
|
132
158
|
end
|
@@ -5,96 +5,122 @@ module Weka
|
|
5
5
|
module Clusterers
|
6
6
|
module Utils
|
7
7
|
def self.included(base)
|
8
|
-
base.
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
attr_reader :training_instances
|
13
|
-
|
14
|
-
def train_with_instances(instances)
|
15
|
-
@training_instances = instances
|
16
|
-
build_clusterer(instances)
|
17
|
-
|
18
|
-
self
|
19
|
-
end
|
20
|
-
|
21
|
-
if ancestors.include?(Java::WekaClusterers::DensityBasedClusterer)
|
22
|
-
def cross_validate(folds: 3)
|
23
|
-
ensure_trained_with_instances!
|
24
|
-
|
25
|
-
ClusterEvaluation.cross_validate_model(
|
26
|
-
self,
|
27
|
-
training_instances,
|
28
|
-
folds.to_i,
|
29
|
-
Java::JavaUtil::Random.new(1)
|
30
|
-
)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def evaluate(test_instances)
|
35
|
-
ensure_trained_with_instances!
|
36
|
-
|
37
|
-
ClusterEvaluation.new.tap do |evaluation|
|
38
|
-
evaluation.clusterer = self
|
39
|
-
evaluation.evaluate_clusterer(test_instances)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
8
|
+
if base.instance_methods.include?(:build_clusterer)
|
9
|
+
base.include Buildable
|
10
|
+
base.include CrossValidatable if density_based?(base)
|
11
|
+
end
|
43
12
|
|
44
|
-
|
45
|
-
|
46
|
-
|
13
|
+
base.include Clusterable if base.instance_methods.include?(:cluster_instance)
|
14
|
+
base.include Updatable if base.instance_methods.include?(:update_clusterer)
|
15
|
+
base.include Distributable if base.instance_methods.include?(:distribution_for_instance)
|
16
|
+
end
|
47
17
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
18
|
+
def self.density_based?(base)
|
19
|
+
base.ancestors.include?(Java::WekaClusterers::DensityBasedClusterer)
|
20
|
+
end
|
52
21
|
|
53
|
-
|
54
|
-
|
55
|
-
training_instances.add(instance)
|
56
|
-
update_clusterer(instance)
|
22
|
+
module Checks
|
23
|
+
private
|
57
24
|
|
58
|
-
|
59
|
-
|
25
|
+
def ensure_trained_with_instances!
|
26
|
+
return unless training_instances.nil?
|
60
27
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
add_training_instance(instance)
|
65
|
-
end
|
66
|
-
end
|
28
|
+
error = 'Clusterer is not trained with Instances.'
|
29
|
+
hint = 'You can set the training instances with #train_with_instances.'
|
30
|
+
message = "#{error} #{hint}"
|
67
31
|
|
68
|
-
|
69
|
-
|
70
|
-
|
32
|
+
raise UnassignedTrainingInstancesError, message
|
33
|
+
end
|
34
|
+
end
|
71
35
|
|
72
|
-
|
73
|
-
|
74
|
-
end
|
75
|
-
end
|
36
|
+
module Transformers
|
37
|
+
private
|
76
38
|
|
77
|
-
|
39
|
+
def clusterable_instance_from(instance_or_values)
|
40
|
+
attributes = training_instances.attributes
|
41
|
+
instances = Weka::Core::Instances.new(attributes: attributes)
|
78
42
|
|
79
|
-
|
80
|
-
|
43
|
+
instances.add_instance(instance_or_values)
|
44
|
+
instances.first
|
45
|
+
end
|
46
|
+
end
|
81
47
|
|
82
|
-
|
83
|
-
|
84
|
-
message = "#{error} #{hint}"
|
48
|
+
module Buildable
|
49
|
+
include Checks
|
85
50
|
|
86
|
-
|
87
|
-
end
|
51
|
+
attr_reader :training_instances
|
88
52
|
|
89
|
-
|
90
|
-
|
91
|
-
|
53
|
+
def train_with_instances(instances)
|
54
|
+
@training_instances = instances
|
55
|
+
build_clusterer(instances)
|
56
|
+
|
57
|
+
self
|
58
|
+
end
|
92
59
|
|
93
|
-
|
94
|
-
|
60
|
+
def evaluate(test_instances)
|
61
|
+
ensure_trained_with_instances!
|
62
|
+
|
63
|
+
ClusterEvaluation.new.tap do |evaluation|
|
64
|
+
evaluation.clusterer = self
|
65
|
+
evaluation.evaluate_clusterer(test_instances)
|
95
66
|
end
|
96
67
|
end
|
97
68
|
end
|
69
|
+
|
70
|
+
module CrossValidatable
|
71
|
+
java_import 'java.util.Random'
|
72
|
+
include Checks
|
73
|
+
|
74
|
+
def cross_validate(folds: 3)
|
75
|
+
ensure_trained_with_instances!
|
76
|
+
|
77
|
+
ClusterEvaluation.cross_validate_model(
|
78
|
+
self,
|
79
|
+
training_instances,
|
80
|
+
folds.to_i,
|
81
|
+
Java::JavaUtil::Random.new(1)
|
82
|
+
)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
module Clusterable
|
87
|
+
include Checks
|
88
|
+
include Transformers
|
89
|
+
|
90
|
+
def cluster(instance_or_values)
|
91
|
+
ensure_trained_with_instances!
|
92
|
+
|
93
|
+
instance = clusterable_instance_from(instance_or_values)
|
94
|
+
cluster_instance(instance)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
module Updatable
|
99
|
+
def add_training_instance(instance)
|
100
|
+
training_instances.add(instance)
|
101
|
+
update_clusterer(instance)
|
102
|
+
|
103
|
+
self
|
104
|
+
end
|
105
|
+
|
106
|
+
def add_training_data(data)
|
107
|
+
values = training_instances.internal_values_of(data)
|
108
|
+
instance = Weka::Core::DenseInstance.new(values)
|
109
|
+
add_training_instance(instance)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
module Distributable
|
114
|
+
include Checks
|
115
|
+
include Transformers
|
116
|
+
|
117
|
+
def distribution_for(instance_or_values)
|
118
|
+
ensure_trained_with_instances!
|
119
|
+
|
120
|
+
instance = clusterable_instance_from(instance_or_values)
|
121
|
+
distribution_for_instance(instance).to_a
|
122
|
+
end
|
123
|
+
end
|
98
124
|
end
|
99
125
|
end
|
100
126
|
end
|
@@ -2,45 +2,46 @@ module Weka
|
|
2
2
|
module Concerns
|
3
3
|
module Optionizable
|
4
4
|
def self.included(base)
|
5
|
-
base.extend
|
5
|
+
base.extend ClassMethods
|
6
|
+
base.include InstanceMethods
|
7
|
+
end
|
6
8
|
|
7
|
-
|
8
|
-
|
9
|
+
module ClassMethods
|
10
|
+
def default_options
|
11
|
+
new.get_options.to_a.join(' ')
|
12
|
+
end
|
13
|
+
end
|
9
14
|
|
10
|
-
|
11
|
-
|
12
|
-
options = Java::WekaCore::Utils.split_options(joined_options)
|
15
|
+
module InstanceMethods
|
16
|
+
java_import 'weka.core.Utils'
|
13
17
|
|
14
|
-
|
15
|
-
|
16
|
-
|
18
|
+
def use_options(*single_options, **hash_options)
|
19
|
+
joined_options = join_options(single_options, hash_options)
|
20
|
+
options = Java::WekaCore::Utils.split_options(joined_options)
|
17
21
|
|
18
|
-
|
19
|
-
|
20
|
-
|
22
|
+
set_options(options)
|
23
|
+
@options = joined_options
|
24
|
+
end
|
21
25
|
|
22
|
-
|
26
|
+
def options
|
27
|
+
@options || self.class.default_options
|
28
|
+
end
|
23
29
|
|
24
|
-
|
25
|
-
[
|
26
|
-
join_single_options(*single_options),
|
27
|
-
join_hash_options(**hash_options)
|
28
|
-
].reject(&:empty?).join(' ')
|
29
|
-
end
|
30
|
+
private
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
def join_options(*single_options, **hash_options)
|
33
|
+
[
|
34
|
+
join_single_options(*single_options),
|
35
|
+
join_hash_options(**hash_options)
|
36
|
+
].reject(&:empty?).join(' ')
|
37
|
+
end
|
34
38
|
|
35
|
-
|
36
|
-
|
37
|
-
end
|
39
|
+
def join_single_options(options)
|
40
|
+
options.map { |option| "-#{option.to_s.sub(/^-/, '')}" }.join(' ')
|
38
41
|
end
|
39
|
-
end
|
40
42
|
|
41
|
-
|
42
|
-
|
43
|
-
new.get_options.to_a.join(' ')
|
43
|
+
def join_hash_options(options)
|
44
|
+
options.map { |key, value| "-#{key} #{value}" }.join(' ')
|
44
45
|
end
|
45
46
|
end
|
46
47
|
end
|
@@ -3,12 +3,8 @@ require 'weka/core/serialization_helper'
|
|
3
3
|
module Weka
|
4
4
|
module Concerns
|
5
5
|
module Serializable
|
6
|
-
def
|
7
|
-
|
8
|
-
def serialize(filename)
|
9
|
-
Weka::Core::SerializationHelper.write(filename, self)
|
10
|
-
end
|
11
|
-
end
|
6
|
+
def serialize(filename)
|
7
|
+
Weka::Core::SerializationHelper.write(filename, self)
|
12
8
|
end
|
13
9
|
end
|
14
10
|
end
|
data/lib/weka/core/attribute.rb
CHANGED
@@ -57,25 +57,17 @@ module Weka
|
|
57
57
|
|
58
58
|
attribute = attribute_at(index)
|
59
59
|
|
60
|
-
if attribute.date?
|
61
|
-
|
62
|
-
|
63
|
-
value
|
64
|
-
elsif attribute.nominal? || attribute.string?
|
65
|
-
attribute.value(value)
|
66
|
-
end
|
60
|
+
return format_date(value, attribute.date_format) if attribute.date?
|
61
|
+
return value if attribute.numeric?
|
62
|
+
return attribute.value(value) if attribute.nominal? || attribute.string?
|
67
63
|
end
|
68
64
|
|
69
65
|
def attribute_at(index)
|
70
|
-
return attributes[index]
|
66
|
+
return attributes[index] unless dataset.class_attribute_defined?
|
67
|
+
return class_attribute if dataset.class_index == index
|
68
|
+
return attributes[index - 1] if index > dataset.class_index
|
71
69
|
|
72
|
-
|
73
|
-
class_attribute
|
74
|
-
elsif index > dataset.class_index
|
75
|
-
attributes[index - 1]
|
76
|
-
else
|
77
|
-
attributes[index]
|
78
|
-
end
|
70
|
+
attributes[index]
|
79
71
|
end
|
80
72
|
|
81
73
|
def format_date(value, format)
|
data/lib/weka/core/instances.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'matrix'
|
1
2
|
require 'weka/core/converters'
|
2
3
|
require 'weka/core/loader'
|
3
4
|
require 'weka/core/saver'
|
@@ -10,6 +11,7 @@ module Weka
|
|
10
11
|
java_import 'weka.core.FastVector'
|
11
12
|
|
12
13
|
class Instances
|
14
|
+
include Weka::Concerns::Persistent
|
13
15
|
include Weka::Concerns::Serializable
|
14
16
|
|
15
17
|
DEFAULT_RELATION_NAME = 'Instances'.freeze
|
@@ -38,7 +40,7 @@ module Weka
|
|
38
40
|
end
|
39
41
|
end
|
40
42
|
|
41
|
-
def initialize(relation_name: DEFAULT_RELATION_NAME, attributes: []
|
43
|
+
def initialize(relation_name: DEFAULT_RELATION_NAME, attributes: [])
|
42
44
|
attribute_list = FastVector.new
|
43
45
|
attributes.each { |attribute| attribute_list.add_element(attribute) }
|
44
46
|
|
@@ -49,12 +51,18 @@ module Weka
|
|
49
51
|
enumerate_instances.to_a
|
50
52
|
end
|
51
53
|
|
52
|
-
def attributes
|
53
|
-
enumerate_attributes.to_a
|
54
|
+
def attributes(include_class_attribute: false)
|
55
|
+
attrs = enumerate_attributes.to_a
|
56
|
+
|
57
|
+
if include_class_attribute && class_attribute_defined?
|
58
|
+
attrs.insert(class_index, class_attribute)
|
59
|
+
end
|
60
|
+
|
61
|
+
attrs
|
54
62
|
end
|
55
63
|
|
56
|
-
def attribute_names
|
57
|
-
attributes.map(&:name)
|
64
|
+
def attribute_names(include_class_attribute: false)
|
65
|
+
attributes(include_class_attribute: include_class_attribute).map(&:name)
|
58
66
|
end
|
59
67
|
|
60
68
|
def add_attributes(&block)
|
@@ -67,7 +75,7 @@ module Weka
|
|
67
75
|
alias attributes_count num_attributes
|
68
76
|
alias has_string_attribute? check_for_string_attributes
|
69
77
|
|
70
|
-
|
78
|
+
# Check if the instances has any attribute of the given type
|
71
79
|
# @param [String, Symbol, Integer] type type of the attribute to check
|
72
80
|
# String and Symbol argument are converted to corresponding type
|
73
81
|
# defined in Weka::Core::Attribute
|
@@ -189,6 +197,24 @@ module Weka
|
|
189
197
|
class_index >= 0
|
190
198
|
end
|
191
199
|
|
200
|
+
# Add new instance
|
201
|
+
# @param [Instance, Array, Hash] instance_or_values the attribute values
|
202
|
+
# of the instance to be added. If passing an array, the attribute values
|
203
|
+
# must be in the same order as the attributes defined in Instances.
|
204
|
+
# If passing a hash, The keys are the names of the attributes and their
|
205
|
+
# values are corresponding attributes values.
|
206
|
+
#
|
207
|
+
# @example Passing Instance
|
208
|
+
# instances.add_instance(instance)
|
209
|
+
#
|
210
|
+
# @example Passing an array of attribute values
|
211
|
+
# attr_values = [attr1_value, attr2_value, attr3_value]
|
212
|
+
# instances.add_instance(attr_values)
|
213
|
+
#
|
214
|
+
# @example Passing a hash of attribute values.
|
215
|
+
# attr_values = { attr1_name: attr1_value, attr2_name: attr2_value }
|
216
|
+
# instances.add_instance(attr_values)
|
217
|
+
#
|
192
218
|
def add_instance(instance_or_values, weight: 1.0)
|
193
219
|
instance = instance_from(instance_or_values, weight: weight)
|
194
220
|
add(instance)
|
@@ -198,10 +224,24 @@ module Weka
|
|
198
224
|
data.each { |values| add_instance(values, weight: weight) }
|
199
225
|
end
|
200
226
|
|
227
|
+
# Retrieve the internal floating point values used to represent
|
228
|
+
# the attributes.
|
229
|
+
#
|
230
|
+
# @param [Array, Hash] values the attribute values whose floating
|
231
|
+
# point representation should be retrieved.
|
232
|
+
#
|
233
|
+
# @return [Array, Hash] an array of the internal floating point
|
234
|
+
# representation if the input is an Array. Hash otherwise.
|
201
235
|
def internal_values_of(values)
|
202
|
-
values.
|
236
|
+
use_hash = values.is_a?(Hash)
|
237
|
+
values = attribute_values_from_hash(values) if use_hash
|
238
|
+
|
239
|
+
values = values.map.with_index do |value, index|
|
203
240
|
attribute(index).internal_value_of(value)
|
204
241
|
end
|
242
|
+
|
243
|
+
values = attribute_values_to_hash(values) if use_hash
|
244
|
+
values
|
205
245
|
end
|
206
246
|
|
207
247
|
def apply_filter(filter)
|
@@ -220,6 +260,13 @@ module Weka
|
|
220
260
|
end
|
221
261
|
end
|
222
262
|
|
263
|
+
# Get the all instances's values as Matrix.
|
264
|
+
#
|
265
|
+
# @return [Matrix] a Matrix holding the instance's values as rows.
|
266
|
+
def to_m
|
267
|
+
Matrix[*instances.map(&:values)]
|
268
|
+
end
|
269
|
+
|
223
270
|
private
|
224
271
|
|
225
272
|
def add_attribute(attribute)
|
@@ -227,7 +274,9 @@ module Weka
|
|
227
274
|
end
|
228
275
|
|
229
276
|
def ensure_attribute_defined!(name)
|
230
|
-
|
277
|
+
if attribute_names(include_class_attribute: true).include?(name.to_s)
|
278
|
+
return
|
279
|
+
end
|
231
280
|
|
232
281
|
error = "\"#{name}\" is not defined."
|
233
282
|
hint = 'Only defined attributes can be used as class attribute!'
|
@@ -237,25 +286,36 @@ module Weka
|
|
237
286
|
end
|
238
287
|
|
239
288
|
def attribute_with_name(name)
|
240
|
-
attributes.
|
289
|
+
attributes(include_class_attribute: true).find do |attribute|
|
290
|
+
attribute.name == name.to_s
|
291
|
+
end
|
241
292
|
end
|
242
293
|
|
294
|
+
# Wrap the attribute values for the instance to be added with
|
295
|
+
# an Instance object, if needed. The Instance object is
|
296
|
+
# assigned with the given weight.
|
297
|
+
#
|
298
|
+
# @param [Instance, Array, Hash] instance_or_values either the
|
299
|
+
# instance object to be added or the attribute values for it.
|
300
|
+
# For the latter case, it accepts an array or a hash.
|
301
|
+
#
|
302
|
+
# @param [Float] weight the weight for the Instance to be added
|
303
|
+
#
|
304
|
+
# @return [Instance] the object that contains the given
|
305
|
+
# attribute values.
|
243
306
|
def instance_from(instance_or_values, weight:)
|
244
307
|
if instance_or_values.is_a?(Java::WekaCore::Instance)
|
245
308
|
instance_or_values.weight = weight
|
246
309
|
instance_or_values
|
247
310
|
else
|
311
|
+
if instance_or_values.is_a?(Hash)
|
312
|
+
instance_or_values = attribute_values_from_hash(instance_or_values)
|
313
|
+
end
|
314
|
+
|
248
315
|
data = internal_values_of(instance_or_values)
|
249
316
|
|
250
|
-
|
251
|
-
|
252
|
-
# the attribute before creating the instance
|
253
|
-
data.map!.with_index do |value, index|
|
254
|
-
if value == -1 && attribute(index).string?
|
255
|
-
attribute(index).add_string_value(instance_or_values[index].to_s)
|
256
|
-
else
|
257
|
-
value
|
258
|
-
end
|
317
|
+
if has_string_attribute?
|
318
|
+
data = check_string_attributes(data, instance_or_values)
|
259
319
|
end
|
260
320
|
|
261
321
|
DenseInstance.new(data, weight: weight)
|
@@ -266,8 +326,48 @@ module Weka
|
|
266
326
|
return -1 unless Attribute::TYPES.include?(type.downcase.to_sym)
|
267
327
|
Attribute.const_get(type.upcase)
|
268
328
|
end
|
269
|
-
end
|
270
329
|
|
271
|
-
|
330
|
+
# Convert a hash whose keys are attribute names and values are attribute
|
331
|
+
# values into an array containing attribute values in the order
|
332
|
+
# of the Instances attributes.
|
333
|
+
#
|
334
|
+
# @param [Hash] hash a hash whose keys are attribute names and
|
335
|
+
# values are attribute values.
|
336
|
+
#
|
337
|
+
# @return [Array] an array containing attribute values in the
|
338
|
+
# correct order
|
339
|
+
def attribute_values_from_hash(hash)
|
340
|
+
names = attribute_names(include_class_attribute: true).map(&:to_sym)
|
341
|
+
hash.values_at(*names)
|
342
|
+
end
|
343
|
+
|
344
|
+
# Convert an array of attribute values in the same order as Instances
|
345
|
+
# attributes into a hash whose keys are attribute names and values
|
346
|
+
# are corresponding attribute values.
|
347
|
+
#
|
348
|
+
# @param [Array] values an array containing the attribute values
|
349
|
+
#
|
350
|
+
# @return [Hash] a hash as described above
|
351
|
+
def attribute_values_to_hash(values)
|
352
|
+
names = attribute_names(include_class_attribute: true).map(&:to_sym)
|
353
|
+
|
354
|
+
names.each_with_index.inject({}) do |hash, (name, index)|
|
355
|
+
hash.update(name => values[index])
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
def check_string_attributes(internal_values, attribute_values)
|
360
|
+
# string attribute has unlimited range of possible values.
|
361
|
+
# Check the return index, if it is -1 then add the value to
|
362
|
+
# the attribute before creating the instance
|
363
|
+
internal_values.map.with_index do |value, index|
|
364
|
+
if value == -1 && attribute(index).string?
|
365
|
+
attribute(index).add_string_value(attribute_values[index])
|
366
|
+
else
|
367
|
+
value
|
368
|
+
end
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
272
372
|
end
|
273
373
|
end
|
data/lib/weka/filters/utils.rb
CHANGED
@@ -1,13 +1,9 @@
|
|
1
1
|
module Weka
|
2
2
|
module Filters
|
3
3
|
module Utils
|
4
|
-
def
|
5
|
-
|
6
|
-
|
7
|
-
set_input_format(instances)
|
8
|
-
Filter.use_filter(instances, self)
|
9
|
-
end
|
10
|
-
end
|
4
|
+
def filter(instances)
|
5
|
+
set_input_format(instances)
|
6
|
+
Filter.use_filter(instances, self)
|
11
7
|
end
|
12
8
|
end
|
13
9
|
end
|
data/lib/weka/version.rb
CHANGED
data/weka.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: weka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Paul Götze
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -91,6 +91,7 @@ extra_rdoc_files: []
|
|
91
91
|
files:
|
92
92
|
- ".gitignore"
|
93
93
|
- ".rspec"
|
94
|
+
- ".rubocop.yml"
|
94
95
|
- ".travis.yml"
|
95
96
|
- CODE_OF_CONDUCT.md
|
96
97
|
- Gemfile
|
@@ -164,7 +165,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
164
165
|
version: '0'
|
165
166
|
requirements: []
|
166
167
|
rubyforge_project:
|
167
|
-
rubygems_version: 2.6.
|
168
|
+
rubygems_version: 2.6.11
|
168
169
|
signing_key:
|
169
170
|
specification_version: 4
|
170
171
|
summary: Machine Learning & Data Mining with JRuby.
|