weka 0.4.0-java → 0.5.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rubocop.yml +30 -0
- data/README.md +2 -1
- data/lib/weka/class_builder.rb +23 -5
- data/lib/weka/classifiers/evaluation.rb +18 -2
- data/lib/weka/classifiers/utils.rb +116 -90
- data/lib/weka/clusterers/utils.rb +100 -74
- data/lib/weka/concerns/optionizable.rb +30 -29
- data/lib/weka/concerns/serializable.rb +2 -6
- data/lib/weka/core/attribute.rb +1 -1
- data/lib/weka/core/dense_instance.rb +7 -15
- data/lib/weka/core/instances.rb +120 -20
- data/lib/weka/filters/utils.rb +3 -7
- data/lib/weka/version.rb +1 -1
- data/weka.gemspec +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 07d0cbed2d245de34e10101d0597017194804735e3b5ea6bc972201282c60d5d
|
4
|
+
data.tar.gz: b304c03a4552b766f56e60e52b765068fdb0de0350d1e5c541882f085e90824b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 942621fa83a7670384adccb717610ac19dc5d961fd9de5ab579a331c40119ef8e0e73cb0b0962aaff9fe937628335ee453be2ef1ed0669edf5421dadc075acdc
|
7
|
+
data.tar.gz: ee68c277c9a40f6349fdea303897d44fd93ded9311d11ffba8c1cd3909764ea51d22638cc7240bb375570615a9acaa4d98ba591b8de3102c41c3dcabf550283b
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
AllCops:
|
2
|
+
TargetRubyVersion: 2.4
|
3
|
+
Exclude:
|
4
|
+
- 'bin/**/*'
|
5
|
+
- '*.gemspec'
|
6
|
+
- 'Gemfile'
|
7
|
+
- 'Gemfile.lock'
|
8
|
+
|
9
|
+
Style/Copyright:
|
10
|
+
Enabled: false
|
11
|
+
|
12
|
+
Style/Documentation:
|
13
|
+
Enabled: false
|
14
|
+
|
15
|
+
Metrics/LineLength:
|
16
|
+
Max: 80
|
17
|
+
|
18
|
+
Layout/MultilineMethodCallIndentation:
|
19
|
+
EnforcedStyle: indented
|
20
|
+
|
21
|
+
Style/FrozenStringLiteralComment:
|
22
|
+
Enabled: false
|
23
|
+
|
24
|
+
Metrics/ModuleLength:
|
25
|
+
Exclude:
|
26
|
+
- "**/*_spec.rb"
|
27
|
+
|
28
|
+
Metrics/BlockLength:
|
29
|
+
Exclude:
|
30
|
+
- "**/*_spec.rb"
|
data/README.md
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
[](http://badge.fury.io/rb/weka)
|
4
4
|
[](https://travis-ci.org/paulgoetze/weka-jruby)
|
5
|
+
[](https://www.codacy.com/app/paul-christoph-goetze/weka-jruby?utm_source=github.com&utm_medium=referral&utm_content=paulgoetze/weka-jruby&utm_campaign=Badge_Grade)
|
5
6
|
|
6
7
|
Machine Learning & Data Mining with JRuby based on the [Weka](http://www.cs.waikato.ac.nz/~ml/weka/index.html) Java library.
|
7
8
|
|
@@ -60,7 +61,7 @@ Here’s how to contribute:
|
|
60
61
|
Please try to add RSpec tests along with your new features. This will ensure that your code does not break existing functionality and that your feature is working as expected.
|
61
62
|
|
62
63
|
We use [Rubocop](https://github.com/bbatsov/rubocop) for code style recommendations.
|
63
|
-
Please make sure your contributions comply with the
|
64
|
+
Please make sure your contributions comply with the project’s Rubocop config.
|
64
65
|
|
65
66
|
## Acknowledgement
|
66
67
|
|
data/lib/weka/class_builder.rb
CHANGED
@@ -7,14 +7,24 @@ module Weka
|
|
7
7
|
end
|
8
8
|
|
9
9
|
module ClassMethods
|
10
|
-
def build_class(class_name, weka_module: nil, include_concerns: true)
|
10
|
+
def build_class(class_name, weka_module: nil, include_concerns: true, additional_includes: [])
|
11
11
|
java_import java_class_path(class_name, weka_module)
|
12
|
-
define_class(
|
12
|
+
define_class(
|
13
|
+
class_name,
|
14
|
+
weka_module,
|
15
|
+
include_concerns: include_concerns,
|
16
|
+
additional_includes: additional_includes
|
17
|
+
)
|
13
18
|
end
|
14
19
|
|
15
|
-
def build_classes(*class_names, weka_module: nil, include_concerns: true)
|
20
|
+
def build_classes(*class_names, weka_module: nil, include_concerns: true, additional_includes: [])
|
16
21
|
class_names.each do |name|
|
17
|
-
build_class(
|
22
|
+
build_class(
|
23
|
+
name,
|
24
|
+
weka_module: weka_module,
|
25
|
+
include_concerns: include_concerns,
|
26
|
+
additional_includes: additional_includes
|
27
|
+
)
|
18
28
|
end
|
19
29
|
end
|
20
30
|
|
@@ -58,12 +68,13 @@ module Weka
|
|
58
68
|
name.scan('::').count == 1
|
59
69
|
end
|
60
70
|
|
61
|
-
def define_class(class_name, weka_module, include_concerns: true)
|
71
|
+
def define_class(class_name, weka_module, include_concerns: true, additional_includes: [])
|
62
72
|
module_eval <<-CLASS_DEFINITION, __FILE__, __LINE__ + 1
|
63
73
|
class #{class_name}
|
64
74
|
#{'include Concerns' if include_concerns}
|
65
75
|
#{include_serializable_for(class_name, weka_module)}
|
66
76
|
#{include_utils}
|
77
|
+
#{include_additionals(additional_includes)}
|
67
78
|
end
|
68
79
|
CLASS_DEFINITION
|
69
80
|
end
|
@@ -84,6 +95,13 @@ module Weka
|
|
84
95
|
constantize(utils_super_modules).const_defined?(:Utils)
|
85
96
|
end
|
86
97
|
|
98
|
+
def include_additionals(modules)
|
99
|
+
modules = Array(modules)
|
100
|
+
return if modules.empty?
|
101
|
+
|
102
|
+
modules.map { |name| "include #{name}" }.join("\n")
|
103
|
+
end
|
104
|
+
|
87
105
|
def constantize(module_names)
|
88
106
|
Object.module_eval("::#{module_names}")
|
89
107
|
end
|
@@ -1,8 +1,12 @@
|
|
1
|
+
require 'weka/class_builder'
|
2
|
+
|
1
3
|
module Weka
|
2
4
|
module Classifiers
|
3
5
|
java_import 'weka.classifiers.Evaluation'
|
4
6
|
|
5
7
|
class Evaluation
|
8
|
+
include ClassBuilder
|
9
|
+
|
6
10
|
# Use both nomenclatures f_measure and fmeasure for consistency
|
7
11
|
# due to jruby's auto method generation of 'fMeasure' to 'f_measure' and
|
8
12
|
# 'weightedFMeasure' to 'weighted_fmeasure'.
|
@@ -29,8 +33,20 @@ module Weka
|
|
29
33
|
alias average_cost avg_cost
|
30
34
|
|
31
35
|
alias cumulative_margin_distribution to_cumulative_margin_distribution_string
|
32
|
-
end
|
33
36
|
|
34
|
-
|
37
|
+
module Curve
|
38
|
+
def self.included(base)
|
39
|
+
base.class_eval do
|
40
|
+
alias_method :curve, :get_curve
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
build_classes :CostCurve,
|
46
|
+
:MarginCurve,
|
47
|
+
:ThresholdCurve,
|
48
|
+
weka_module: 'weka.classifiers.evaluation',
|
49
|
+
additional_includes: Curve
|
50
|
+
end
|
35
51
|
end
|
36
52
|
end
|
@@ -5,128 +5,154 @@ module Weka
|
|
5
5
|
module Classifiers
|
6
6
|
module Utils
|
7
7
|
def self.included(base)
|
8
|
-
base.
|
9
|
-
|
8
|
+
base.include Buildable if base.instance_methods.include?(:build_classifier)
|
9
|
+
base.include Classifiable if base.instance_methods.include?(:classify_instance)
|
10
|
+
base.include Updatable if base.instance_methods.include?(:update_classifier)
|
11
|
+
base.include Distributable if base.instance_methods.include?(:distribution_for_instance)
|
12
|
+
end
|
10
13
|
|
11
|
-
|
12
|
-
|
14
|
+
module Checks
|
15
|
+
private
|
13
16
|
|
14
|
-
|
15
|
-
|
17
|
+
def ensure_class_attribute_assigned!(instances)
|
18
|
+
return if instances.class_attribute_defined?
|
16
19
|
|
17
|
-
|
18
|
-
|
20
|
+
error = 'Class attribute is not assigned for Instances.'
|
21
|
+
hint = 'You can assign a class attribute with #class_attribute=.'
|
22
|
+
message = "#{error} #{hint}"
|
19
23
|
|
20
|
-
|
21
|
-
|
24
|
+
raise UnassignedClassError, message
|
25
|
+
end
|
22
26
|
|
23
|
-
|
24
|
-
|
27
|
+
def ensure_trained_with_instances!
|
28
|
+
return unless training_instances.nil?
|
25
29
|
|
26
|
-
|
27
|
-
|
30
|
+
error = 'Classifier is not trained with Instances.'
|
31
|
+
hint = 'You can set the training instances with #train_with_instances.'
|
32
|
+
message = "#{error} #{hint}"
|
28
33
|
|
29
|
-
|
30
|
-
|
31
|
-
|
34
|
+
raise UnassignedTrainingInstancesError, message
|
35
|
+
end
|
36
|
+
end
|
32
37
|
|
33
|
-
|
34
|
-
|
35
|
-
ensure_class_attribute_assigned!(test_instances)
|
38
|
+
module Transformers
|
39
|
+
private
|
36
40
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
end
|
41
|
-
end
|
41
|
+
def classifiable_instance_from(instance_or_values)
|
42
|
+
attributes = training_instances.attributes
|
43
|
+
instances = Weka::Core::Instances.new(attributes: attributes)
|
42
44
|
|
43
|
-
|
44
|
-
|
45
|
-
|
45
|
+
class_attribute = training_instances.class_attribute
|
46
|
+
class_index = training_instances.class_index
|
47
|
+
instances.insert_attribute_at(class_attribute, class_index)
|
46
48
|
|
47
|
-
|
48
|
-
|
49
|
+
instances.class_index = training_instances.class_index
|
50
|
+
instances.add_instance(instance_or_values)
|
49
51
|
|
50
|
-
|
51
|
-
|
52
|
-
|
52
|
+
instance = instances.first
|
53
|
+
instance.set_class_missing
|
54
|
+
instance
|
55
|
+
end
|
56
|
+
end
|
53
57
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
update_classifier(instance)
|
58
|
+
module Buildable
|
59
|
+
java_import 'java.util.Random'
|
60
|
+
include Checks
|
58
61
|
|
59
|
-
|
60
|
-
end
|
62
|
+
attr_reader :training_instances
|
61
63
|
|
62
|
-
|
63
|
-
|
64
|
-
instance = Weka::Core::DenseInstance.new(values)
|
65
|
-
add_training_instance(instance)
|
66
|
-
end
|
67
|
-
end
|
64
|
+
def train_with_instances(instances)
|
65
|
+
ensure_class_attribute_assigned!(instances)
|
68
66
|
|
69
|
-
|
70
|
-
|
71
|
-
ensure_trained_with_instances!
|
67
|
+
@training_instances = instances
|
68
|
+
build_classifier(instances)
|
72
69
|
|
73
|
-
|
74
|
-
|
70
|
+
self
|
71
|
+
end
|
75
72
|
|
76
|
-
|
77
|
-
|
78
|
-
end
|
73
|
+
def cross_validate(folds: 3)
|
74
|
+
ensure_trained_with_instances!
|
79
75
|
|
80
|
-
|
76
|
+
evaluation = Evaluation.new(training_instances)
|
77
|
+
random = Java::JavaUtil::Random.new(1)
|
81
78
|
|
82
|
-
|
83
|
-
|
79
|
+
evaluation.cross_validate_model(
|
80
|
+
self,
|
81
|
+
training_instances,
|
82
|
+
folds.to_i,
|
83
|
+
random
|
84
|
+
)
|
84
85
|
|
85
|
-
|
86
|
-
|
87
|
-
message = "#{error} #{hint}"
|
86
|
+
evaluation
|
87
|
+
end
|
88
88
|
|
89
|
-
|
90
|
-
|
89
|
+
def evaluate(test_instances)
|
90
|
+
ensure_trained_with_instances!
|
91
|
+
ensure_class_attribute_assigned!(test_instances)
|
91
92
|
|
92
|
-
|
93
|
-
|
93
|
+
evaluation = Evaluation.new(training_instances)
|
94
|
+
evaluation.evaluate_model(self, test_instances)
|
95
|
+
evaluation
|
96
|
+
end
|
97
|
+
end
|
94
98
|
|
95
|
-
|
96
|
-
|
97
|
-
|
99
|
+
module Classifiable
|
100
|
+
include Checks
|
101
|
+
include Transformers
|
98
102
|
|
99
|
-
|
100
|
-
|
103
|
+
def classify(instance_or_values)
|
104
|
+
ensure_trained_with_instances!
|
101
105
|
|
102
|
-
|
103
|
-
|
104
|
-
instances = Weka::Core::Instances.new(attributes: attributes)
|
106
|
+
instance = classifiable_instance_from(instance_or_values)
|
107
|
+
index = classify_instance(instance)
|
105
108
|
|
106
|
-
|
107
|
-
|
108
|
-
instances.insert_attribute_at(class_attribute, class_index)
|
109
|
+
class_value_of_index(index)
|
110
|
+
end
|
109
111
|
|
110
|
-
|
111
|
-
instances.add_instance(instance_or_values)
|
112
|
+
private
|
112
113
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
114
|
+
def class_value_of_index(index)
|
115
|
+
training_instances.class_attribute.value(index)
|
116
|
+
end
|
117
|
+
end
|
117
118
|
|
118
|
-
|
119
|
-
|
120
|
-
|
119
|
+
module Updatable
|
120
|
+
def add_training_instance(instance)
|
121
|
+
training_instances.add(instance)
|
122
|
+
update_classifier(instance)
|
123
|
+
|
124
|
+
self
|
125
|
+
end
|
126
|
+
|
127
|
+
def add_training_data(data)
|
128
|
+
values = training_instances.internal_values_of(data)
|
129
|
+
instance = Weka::Core::DenseInstance.new(values)
|
130
|
+
add_training_instance(instance)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
module Distributable
|
135
|
+
include Checks
|
136
|
+
include Transformers
|
137
|
+
|
138
|
+
def distribution_for(instance_or_values)
|
139
|
+
ensure_trained_with_instances!
|
140
|
+
|
141
|
+
instance = classifiable_instance_from(instance_or_values)
|
142
|
+
distributions = distribution_for_instance(instance)
|
143
|
+
|
144
|
+
class_distributions_from(distributions)
|
145
|
+
end
|
146
|
+
|
147
|
+
private
|
121
148
|
|
122
|
-
|
123
|
-
|
149
|
+
def class_distributions_from(distributions)
|
150
|
+
class_values = training_instances.class_attribute.values
|
124
151
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
end
|
152
|
+
distributions.each_with_object({}).with_index do |(distribution, result), index|
|
153
|
+
class_value = class_values[index]
|
154
|
+
result[class_value] = distribution
|
155
|
+
result
|
130
156
|
end
|
131
157
|
end
|
132
158
|
end
|
@@ -5,96 +5,122 @@ module Weka
|
|
5
5
|
module Clusterers
|
6
6
|
module Utils
|
7
7
|
def self.included(base)
|
8
|
-
base.
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
attr_reader :training_instances
|
13
|
-
|
14
|
-
def train_with_instances(instances)
|
15
|
-
@training_instances = instances
|
16
|
-
build_clusterer(instances)
|
17
|
-
|
18
|
-
self
|
19
|
-
end
|
20
|
-
|
21
|
-
if ancestors.include?(Java::WekaClusterers::DensityBasedClusterer)
|
22
|
-
def cross_validate(folds: 3)
|
23
|
-
ensure_trained_with_instances!
|
24
|
-
|
25
|
-
ClusterEvaluation.cross_validate_model(
|
26
|
-
self,
|
27
|
-
training_instances,
|
28
|
-
folds.to_i,
|
29
|
-
Java::JavaUtil::Random.new(1)
|
30
|
-
)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def evaluate(test_instances)
|
35
|
-
ensure_trained_with_instances!
|
36
|
-
|
37
|
-
ClusterEvaluation.new.tap do |evaluation|
|
38
|
-
evaluation.clusterer = self
|
39
|
-
evaluation.evaluate_clusterer(test_instances)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
8
|
+
if base.instance_methods.include?(:build_clusterer)
|
9
|
+
base.include Buildable
|
10
|
+
base.include CrossValidatable if density_based?(base)
|
11
|
+
end
|
43
12
|
|
44
|
-
|
45
|
-
|
46
|
-
|
13
|
+
base.include Clusterable if base.instance_methods.include?(:cluster_instance)
|
14
|
+
base.include Updatable if base.instance_methods.include?(:update_clusterer)
|
15
|
+
base.include Distributable if base.instance_methods.include?(:distribution_for_instance)
|
16
|
+
end
|
47
17
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
18
|
+
def self.density_based?(base)
|
19
|
+
base.ancestors.include?(Java::WekaClusterers::DensityBasedClusterer)
|
20
|
+
end
|
52
21
|
|
53
|
-
|
54
|
-
|
55
|
-
training_instances.add(instance)
|
56
|
-
update_clusterer(instance)
|
22
|
+
module Checks
|
23
|
+
private
|
57
24
|
|
58
|
-
|
59
|
-
|
25
|
+
def ensure_trained_with_instances!
|
26
|
+
return unless training_instances.nil?
|
60
27
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
add_training_instance(instance)
|
65
|
-
end
|
66
|
-
end
|
28
|
+
error = 'Clusterer is not trained with Instances.'
|
29
|
+
hint = 'You can set the training instances with #train_with_instances.'
|
30
|
+
message = "#{error} #{hint}"
|
67
31
|
|
68
|
-
|
69
|
-
|
70
|
-
|
32
|
+
raise UnassignedTrainingInstancesError, message
|
33
|
+
end
|
34
|
+
end
|
71
35
|
|
72
|
-
|
73
|
-
|
74
|
-
end
|
75
|
-
end
|
36
|
+
module Transformers
|
37
|
+
private
|
76
38
|
|
77
|
-
|
39
|
+
def clusterable_instance_from(instance_or_values)
|
40
|
+
attributes = training_instances.attributes
|
41
|
+
instances = Weka::Core::Instances.new(attributes: attributes)
|
78
42
|
|
79
|
-
|
80
|
-
|
43
|
+
instances.add_instance(instance_or_values)
|
44
|
+
instances.first
|
45
|
+
end
|
46
|
+
end
|
81
47
|
|
82
|
-
|
83
|
-
|
84
|
-
message = "#{error} #{hint}"
|
48
|
+
module Buildable
|
49
|
+
include Checks
|
85
50
|
|
86
|
-
|
87
|
-
end
|
51
|
+
attr_reader :training_instances
|
88
52
|
|
89
|
-
|
90
|
-
|
91
|
-
|
53
|
+
def train_with_instances(instances)
|
54
|
+
@training_instances = instances
|
55
|
+
build_clusterer(instances)
|
56
|
+
|
57
|
+
self
|
58
|
+
end
|
92
59
|
|
93
|
-
|
94
|
-
|
60
|
+
def evaluate(test_instances)
|
61
|
+
ensure_trained_with_instances!
|
62
|
+
|
63
|
+
ClusterEvaluation.new.tap do |evaluation|
|
64
|
+
evaluation.clusterer = self
|
65
|
+
evaluation.evaluate_clusterer(test_instances)
|
95
66
|
end
|
96
67
|
end
|
97
68
|
end
|
69
|
+
|
70
|
+
module CrossValidatable
|
71
|
+
java_import 'java.util.Random'
|
72
|
+
include Checks
|
73
|
+
|
74
|
+
def cross_validate(folds: 3)
|
75
|
+
ensure_trained_with_instances!
|
76
|
+
|
77
|
+
ClusterEvaluation.cross_validate_model(
|
78
|
+
self,
|
79
|
+
training_instances,
|
80
|
+
folds.to_i,
|
81
|
+
Java::JavaUtil::Random.new(1)
|
82
|
+
)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
module Clusterable
|
87
|
+
include Checks
|
88
|
+
include Transformers
|
89
|
+
|
90
|
+
def cluster(instance_or_values)
|
91
|
+
ensure_trained_with_instances!
|
92
|
+
|
93
|
+
instance = clusterable_instance_from(instance_or_values)
|
94
|
+
cluster_instance(instance)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
module Updatable
|
99
|
+
def add_training_instance(instance)
|
100
|
+
training_instances.add(instance)
|
101
|
+
update_clusterer(instance)
|
102
|
+
|
103
|
+
self
|
104
|
+
end
|
105
|
+
|
106
|
+
def add_training_data(data)
|
107
|
+
values = training_instances.internal_values_of(data)
|
108
|
+
instance = Weka::Core::DenseInstance.new(values)
|
109
|
+
add_training_instance(instance)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
module Distributable
|
114
|
+
include Checks
|
115
|
+
include Transformers
|
116
|
+
|
117
|
+
def distribution_for(instance_or_values)
|
118
|
+
ensure_trained_with_instances!
|
119
|
+
|
120
|
+
instance = clusterable_instance_from(instance_or_values)
|
121
|
+
distribution_for_instance(instance).to_a
|
122
|
+
end
|
123
|
+
end
|
98
124
|
end
|
99
125
|
end
|
100
126
|
end
|
@@ -2,45 +2,46 @@ module Weka
|
|
2
2
|
module Concerns
|
3
3
|
module Optionizable
|
4
4
|
def self.included(base)
|
5
|
-
base.extend
|
5
|
+
base.extend ClassMethods
|
6
|
+
base.include InstanceMethods
|
7
|
+
end
|
6
8
|
|
7
|
-
|
8
|
-
|
9
|
+
module ClassMethods
|
10
|
+
def default_options
|
11
|
+
new.get_options.to_a.join(' ')
|
12
|
+
end
|
13
|
+
end
|
9
14
|
|
10
|
-
|
11
|
-
|
12
|
-
options = Java::WekaCore::Utils.split_options(joined_options)
|
15
|
+
module InstanceMethods
|
16
|
+
java_import 'weka.core.Utils'
|
13
17
|
|
14
|
-
|
15
|
-
|
16
|
-
|
18
|
+
def use_options(*single_options, **hash_options)
|
19
|
+
joined_options = join_options(single_options, hash_options)
|
20
|
+
options = Java::WekaCore::Utils.split_options(joined_options)
|
17
21
|
|
18
|
-
|
19
|
-
|
20
|
-
|
22
|
+
set_options(options)
|
23
|
+
@options = joined_options
|
24
|
+
end
|
21
25
|
|
22
|
-
|
26
|
+
def options
|
27
|
+
@options || self.class.default_options
|
28
|
+
end
|
23
29
|
|
24
|
-
|
25
|
-
[
|
26
|
-
join_single_options(*single_options),
|
27
|
-
join_hash_options(**hash_options)
|
28
|
-
].reject(&:empty?).join(' ')
|
29
|
-
end
|
30
|
+
private
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
def join_options(*single_options, **hash_options)
|
33
|
+
[
|
34
|
+
join_single_options(*single_options),
|
35
|
+
join_hash_options(**hash_options)
|
36
|
+
].reject(&:empty?).join(' ')
|
37
|
+
end
|
34
38
|
|
35
|
-
|
36
|
-
|
37
|
-
end
|
39
|
+
def join_single_options(options)
|
40
|
+
options.map { |option| "-#{option.to_s.sub(/^-/, '')}" }.join(' ')
|
38
41
|
end
|
39
|
-
end
|
40
42
|
|
41
|
-
|
42
|
-
|
43
|
-
new.get_options.to_a.join(' ')
|
43
|
+
def join_hash_options(options)
|
44
|
+
options.map { |key, value| "-#{key} #{value}" }.join(' ')
|
44
45
|
end
|
45
46
|
end
|
46
47
|
end
|
@@ -3,12 +3,8 @@ require 'weka/core/serialization_helper'
|
|
3
3
|
module Weka
|
4
4
|
module Concerns
|
5
5
|
module Serializable
|
6
|
-
def
|
7
|
-
|
8
|
-
def serialize(filename)
|
9
|
-
Weka::Core::SerializationHelper.write(filename, self)
|
10
|
-
end
|
11
|
-
end
|
6
|
+
def serialize(filename)
|
7
|
+
Weka::Core::SerializationHelper.write(filename, self)
|
12
8
|
end
|
13
9
|
end
|
14
10
|
end
|
data/lib/weka/core/attribute.rb
CHANGED
@@ -57,25 +57,17 @@ module Weka
|
|
57
57
|
|
58
58
|
attribute = attribute_at(index)
|
59
59
|
|
60
|
-
if attribute.date?
|
61
|
-
|
62
|
-
|
63
|
-
value
|
64
|
-
elsif attribute.nominal? || attribute.string?
|
65
|
-
attribute.value(value)
|
66
|
-
end
|
60
|
+
return format_date(value, attribute.date_format) if attribute.date?
|
61
|
+
return value if attribute.numeric?
|
62
|
+
return attribute.value(value) if attribute.nominal? || attribute.string?
|
67
63
|
end
|
68
64
|
|
69
65
|
def attribute_at(index)
|
70
|
-
return attributes[index]
|
66
|
+
return attributes[index] unless dataset.class_attribute_defined?
|
67
|
+
return class_attribute if dataset.class_index == index
|
68
|
+
return attributes[index - 1] if index > dataset.class_index
|
71
69
|
|
72
|
-
|
73
|
-
class_attribute
|
74
|
-
elsif index > dataset.class_index
|
75
|
-
attributes[index - 1]
|
76
|
-
else
|
77
|
-
attributes[index]
|
78
|
-
end
|
70
|
+
attributes[index]
|
79
71
|
end
|
80
72
|
|
81
73
|
def format_date(value, format)
|
data/lib/weka/core/instances.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'matrix'
|
1
2
|
require 'weka/core/converters'
|
2
3
|
require 'weka/core/loader'
|
3
4
|
require 'weka/core/saver'
|
@@ -10,6 +11,7 @@ module Weka
|
|
10
11
|
java_import 'weka.core.FastVector'
|
11
12
|
|
12
13
|
class Instances
|
14
|
+
include Weka::Concerns::Persistent
|
13
15
|
include Weka::Concerns::Serializable
|
14
16
|
|
15
17
|
DEFAULT_RELATION_NAME = 'Instances'.freeze
|
@@ -38,7 +40,7 @@ module Weka
|
|
38
40
|
end
|
39
41
|
end
|
40
42
|
|
41
|
-
def initialize(relation_name: DEFAULT_RELATION_NAME, attributes: []
|
43
|
+
def initialize(relation_name: DEFAULT_RELATION_NAME, attributes: [])
|
42
44
|
attribute_list = FastVector.new
|
43
45
|
attributes.each { |attribute| attribute_list.add_element(attribute) }
|
44
46
|
|
@@ -49,12 +51,18 @@ module Weka
|
|
49
51
|
enumerate_instances.to_a
|
50
52
|
end
|
51
53
|
|
52
|
-
def attributes
|
53
|
-
enumerate_attributes.to_a
|
54
|
+
def attributes(include_class_attribute: false)
|
55
|
+
attrs = enumerate_attributes.to_a
|
56
|
+
|
57
|
+
if include_class_attribute && class_attribute_defined?
|
58
|
+
attrs.insert(class_index, class_attribute)
|
59
|
+
end
|
60
|
+
|
61
|
+
attrs
|
54
62
|
end
|
55
63
|
|
56
|
-
def attribute_names
|
57
|
-
attributes.map(&:name)
|
64
|
+
def attribute_names(include_class_attribute: false)
|
65
|
+
attributes(include_class_attribute: include_class_attribute).map(&:name)
|
58
66
|
end
|
59
67
|
|
60
68
|
def add_attributes(&block)
|
@@ -67,7 +75,7 @@ module Weka
|
|
67
75
|
alias attributes_count num_attributes
|
68
76
|
alias has_string_attribute? check_for_string_attributes
|
69
77
|
|
70
|
-
|
78
|
+
# Check if the instances has any attribute of the given type
|
71
79
|
# @param [String, Symbol, Integer] type type of the attribute to check
|
72
80
|
# String and Symbol argument are converted to corresponding type
|
73
81
|
# defined in Weka::Core::Attribute
|
@@ -189,6 +197,24 @@ module Weka
|
|
189
197
|
class_index >= 0
|
190
198
|
end
|
191
199
|
|
200
|
+
# Add new instance
|
201
|
+
# @param [Instance, Array, Hash] instance_or_values the attribute values
|
202
|
+
# of the instance to be added. If passing an array, the attribute values
|
203
|
+
# must be in the same order as the attributes defined in Instances.
|
204
|
+
# If passing a hash, The keys are the names of the attributes and their
|
205
|
+
# values are corresponding attributes values.
|
206
|
+
#
|
207
|
+
# @example Passing Instance
|
208
|
+
# instances.add_instance(instance)
|
209
|
+
#
|
210
|
+
# @example Passing an array of attribute values
|
211
|
+
# attr_values = [attr1_value, attr2_value, attr3_value]
|
212
|
+
# instances.add_instance(attr_values)
|
213
|
+
#
|
214
|
+
# @example Passing a hash of attribute values.
|
215
|
+
# attr_values = { attr1_name: attr1_value, attr2_name: attr2_value }
|
216
|
+
# instances.add_instance(attr_values)
|
217
|
+
#
|
192
218
|
def add_instance(instance_or_values, weight: 1.0)
|
193
219
|
instance = instance_from(instance_or_values, weight: weight)
|
194
220
|
add(instance)
|
@@ -198,10 +224,24 @@ module Weka
|
|
198
224
|
data.each { |values| add_instance(values, weight: weight) }
|
199
225
|
end
|
200
226
|
|
227
|
+
# Retrieve the internal floating point values used to represent
|
228
|
+
# the attributes.
|
229
|
+
#
|
230
|
+
# @param [Array, Hash] values the attribute values whose floating
|
231
|
+
# point representation should be retrieved.
|
232
|
+
#
|
233
|
+
# @return [Array, Hash] an array of the internal floating point
|
234
|
+
# representation if the input is an Array. Hash otherwise.
|
201
235
|
def internal_values_of(values)
|
202
|
-
values.
|
236
|
+
use_hash = values.is_a?(Hash)
|
237
|
+
values = attribute_values_from_hash(values) if use_hash
|
238
|
+
|
239
|
+
values = values.map.with_index do |value, index|
|
203
240
|
attribute(index).internal_value_of(value)
|
204
241
|
end
|
242
|
+
|
243
|
+
values = attribute_values_to_hash(values) if use_hash
|
244
|
+
values
|
205
245
|
end
|
206
246
|
|
207
247
|
def apply_filter(filter)
|
@@ -220,6 +260,13 @@ module Weka
|
|
220
260
|
end
|
221
261
|
end
|
222
262
|
|
263
|
+
# Get the all instances's values as Matrix.
|
264
|
+
#
|
265
|
+
# @return [Matrix] a Matrix holding the instance's values as rows.
|
266
|
+
def to_m
|
267
|
+
Matrix[*instances.map(&:values)]
|
268
|
+
end
|
269
|
+
|
223
270
|
private
|
224
271
|
|
225
272
|
def add_attribute(attribute)
|
@@ -227,7 +274,9 @@ module Weka
|
|
227
274
|
end
|
228
275
|
|
229
276
|
def ensure_attribute_defined!(name)
|
230
|
-
|
277
|
+
if attribute_names(include_class_attribute: true).include?(name.to_s)
|
278
|
+
return
|
279
|
+
end
|
231
280
|
|
232
281
|
error = "\"#{name}\" is not defined."
|
233
282
|
hint = 'Only defined attributes can be used as class attribute!'
|
@@ -237,25 +286,36 @@ module Weka
|
|
237
286
|
end
|
238
287
|
|
239
288
|
def attribute_with_name(name)
|
240
|
-
attributes.
|
289
|
+
attributes(include_class_attribute: true).find do |attribute|
|
290
|
+
attribute.name == name.to_s
|
291
|
+
end
|
241
292
|
end
|
242
293
|
|
294
|
+
# Wrap the attribute values for the instance to be added with
|
295
|
+
# an Instance object, if needed. The Instance object is
|
296
|
+
# assigned with the given weight.
|
297
|
+
#
|
298
|
+
# @param [Instance, Array, Hash] instance_or_values either the
|
299
|
+
# instance object to be added or the attribute values for it.
|
300
|
+
# For the latter case, it accepts an array or a hash.
|
301
|
+
#
|
302
|
+
# @param [Float] weight the weight for the Instance to be added
|
303
|
+
#
|
304
|
+
# @return [Instance] the object that contains the given
|
305
|
+
# attribute values.
|
243
306
|
def instance_from(instance_or_values, weight:)
|
244
307
|
if instance_or_values.is_a?(Java::WekaCore::Instance)
|
245
308
|
instance_or_values.weight = weight
|
246
309
|
instance_or_values
|
247
310
|
else
|
311
|
+
if instance_or_values.is_a?(Hash)
|
312
|
+
instance_or_values = attribute_values_from_hash(instance_or_values)
|
313
|
+
end
|
314
|
+
|
248
315
|
data = internal_values_of(instance_or_values)
|
249
316
|
|
250
|
-
|
251
|
-
|
252
|
-
# the attribute before creating the instance
|
253
|
-
data.map!.with_index do |value, index|
|
254
|
-
if value == -1 && attribute(index).string?
|
255
|
-
attribute(index).add_string_value(instance_or_values[index].to_s)
|
256
|
-
else
|
257
|
-
value
|
258
|
-
end
|
317
|
+
if has_string_attribute?
|
318
|
+
data = check_string_attributes(data, instance_or_values)
|
259
319
|
end
|
260
320
|
|
261
321
|
DenseInstance.new(data, weight: weight)
|
@@ -266,8 +326,48 @@ module Weka
|
|
266
326
|
return -1 unless Attribute::TYPES.include?(type.downcase.to_sym)
|
267
327
|
Attribute.const_get(type.upcase)
|
268
328
|
end
|
269
|
-
end
|
270
329
|
|
271
|
-
|
330
|
+
# Convert a hash whose keys are attribute names and values are attribute
|
331
|
+
# values into an array containing attribute values in the order
|
332
|
+
# of the Instances attributes.
|
333
|
+
#
|
334
|
+
# @param [Hash] hash a hash whose keys are attribute names and
|
335
|
+
# values are attribute values.
|
336
|
+
#
|
337
|
+
# @return [Array] an array containing attribute values in the
|
338
|
+
# correct order
|
339
|
+
def attribute_values_from_hash(hash)
|
340
|
+
names = attribute_names(include_class_attribute: true).map(&:to_sym)
|
341
|
+
hash.values_at(*names)
|
342
|
+
end
|
343
|
+
|
344
|
+
# Convert an array of attribute values in the same order as Instances
|
345
|
+
# attributes into a hash whose keys are attribute names and values
|
346
|
+
# are corresponding attribute values.
|
347
|
+
#
|
348
|
+
# @param [Array] values an array containing the attribute values
|
349
|
+
#
|
350
|
+
# @return [Hash] a hash as described above
|
351
|
+
def attribute_values_to_hash(values)
|
352
|
+
names = attribute_names(include_class_attribute: true).map(&:to_sym)
|
353
|
+
|
354
|
+
names.each_with_index.inject({}) do |hash, (name, index)|
|
355
|
+
hash.update(name => values[index])
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
def check_string_attributes(internal_values, attribute_values)
|
360
|
+
# string attribute has unlimited range of possible values.
|
361
|
+
# Check the return index, if it is -1 then add the value to
|
362
|
+
# the attribute before creating the instance
|
363
|
+
internal_values.map.with_index do |value, index|
|
364
|
+
if value == -1 && attribute(index).string?
|
365
|
+
attribute(index).add_string_value(attribute_values[index])
|
366
|
+
else
|
367
|
+
value
|
368
|
+
end
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
272
372
|
end
|
273
373
|
end
|
data/lib/weka/filters/utils.rb
CHANGED
@@ -1,13 +1,9 @@
|
|
1
1
|
module Weka
|
2
2
|
module Filters
|
3
3
|
module Utils
|
4
|
-
def
|
5
|
-
|
6
|
-
|
7
|
-
set_input_format(instances)
|
8
|
-
Filter.use_filter(instances, self)
|
9
|
-
end
|
10
|
-
end
|
4
|
+
def filter(instances)
|
5
|
+
set_input_format(instances)
|
6
|
+
Filter.use_filter(instances, self)
|
11
7
|
end
|
12
8
|
end
|
13
9
|
end
|
data/lib/weka/version.rb
CHANGED
data/weka.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: weka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Paul Götze
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -91,6 +91,7 @@ extra_rdoc_files: []
|
|
91
91
|
files:
|
92
92
|
- ".gitignore"
|
93
93
|
- ".rspec"
|
94
|
+
- ".rubocop.yml"
|
94
95
|
- ".travis.yml"
|
95
96
|
- CODE_OF_CONDUCT.md
|
96
97
|
- Gemfile
|
@@ -164,7 +165,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
164
165
|
version: '0'
|
165
166
|
requirements: []
|
166
167
|
rubyforge_project:
|
167
|
-
rubygems_version: 2.6.
|
168
|
+
rubygems_version: 2.6.11
|
168
169
|
signing_key:
|
169
170
|
specification_version: 4
|
170
171
|
summary: Machine Learning & Data Mining with JRuby.
|