weka 0.4.0-java → 0.5.0-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 24673716e3c980b803c2c77172efbed98a8d7de6
4
- data.tar.gz: 9c611dba1492b943ceaea2ca9c375bbbe95ada85
2
+ SHA256:
3
+ metadata.gz: 07d0cbed2d245de34e10101d0597017194804735e3b5ea6bc972201282c60d5d
4
+ data.tar.gz: b304c03a4552b766f56e60e52b765068fdb0de0350d1e5c541882f085e90824b
5
5
  SHA512:
6
- metadata.gz: 54236c07fa7110a5260a587cd2a1e57016705275ea6eba8b624ad7a5bc8ed58c7e878cf4b61913368f27c9286ccdde777323e34278917d0b05ed185b87fbdbb2
7
- data.tar.gz: 245cc55a7abf2751ddea7a2f4dda479eab73b3aff2852fe915705944a2a0290a26b4bb1d8188e97009168c72c12775bf464d724ca84473a359685a9df7f13ae9
6
+ metadata.gz: 942621fa83a7670384adccb717610ac19dc5d961fd9de5ab579a331c40119ef8e0e73cb0b0962aaff9fe937628335ee453be2ef1ed0669edf5421dadc075acdc
7
+ data.tar.gz: ee68c277c9a40f6349fdea303897d44fd93ded9311d11ffba8c1cd3909764ea51d22638cc7240bb375570615a9acaa4d98ba591b8de3102c41c3dcabf550283b
@@ -0,0 +1,30 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.4
3
+ Exclude:
4
+ - 'bin/**/*'
5
+ - '*.gemspec'
6
+ - 'Gemfile'
7
+ - 'Gemfile.lock'
8
+
9
+ Style/Copyright:
10
+ Enabled: false
11
+
12
+ Style/Documentation:
13
+ Enabled: false
14
+
15
+ Metrics/LineLength:
16
+ Max: 80
17
+
18
+ Layout/MultilineMethodCallIndentation:
19
+ EnforcedStyle: indented
20
+
21
+ Style/FrozenStringLiteralComment:
22
+ Enabled: false
23
+
24
+ Metrics/ModuleLength:
25
+ Exclude:
26
+ - "**/*_spec.rb"
27
+
28
+ Metrics/BlockLength:
29
+ Exclude:
30
+ - "**/*_spec.rb"
data/README.md CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/weka.svg)](http://badge.fury.io/rb/weka)
4
4
  [![Travis Build](https://travis-ci.org/paulgoetze/weka-jruby.svg)](https://travis-ci.org/paulgoetze/weka-jruby)
5
+ [![Codacy Badge](https://api.codacy.com/project/badge/Grade/9634a6709ef545198e079a8daddff100)](https://www.codacy.com/app/paul-christoph-goetze/weka-jruby?utm_source=github.com&utm_medium=referral&utm_content=paulgoetze/weka-jruby&utm_campaign=Badge_Grade)
5
6
 
6
7
  Machine Learning & Data Mining with JRuby based on the [Weka](http://www.cs.waikato.ac.nz/~ml/weka/index.html) Java library.
7
8
 
@@ -60,7 +61,7 @@ Here’s how to contribute:
60
61
  Please try to add RSpec tests along with your new features. This will ensure that your code does not break existing functionality and that your feature is working as expected.
61
62
 
62
63
  We use [Rubocop](https://github.com/bbatsov/rubocop) for code style recommendations.
63
- Please make sure your contributions comply with the default config of Rubocop.
64
+ Please make sure your contributions comply with the project’s Rubocop config.
64
65
 
65
66
  ## Acknowledgement
66
67
 
@@ -7,14 +7,24 @@ module Weka
7
7
  end
8
8
 
9
9
  module ClassMethods
10
- def build_class(class_name, weka_module: nil, include_concerns: true)
10
+ def build_class(class_name, weka_module: nil, include_concerns: true, additional_includes: [])
11
11
  java_import java_class_path(class_name, weka_module)
12
- define_class(class_name, weka_module, include_concerns: include_concerns)
12
+ define_class(
13
+ class_name,
14
+ weka_module,
15
+ include_concerns: include_concerns,
16
+ additional_includes: additional_includes
17
+ )
13
18
  end
14
19
 
15
- def build_classes(*class_names, weka_module: nil, include_concerns: true)
20
+ def build_classes(*class_names, weka_module: nil, include_concerns: true, additional_includes: [])
16
21
  class_names.each do |name|
17
- build_class(name, weka_module: weka_module, include_concerns: include_concerns)
22
+ build_class(
23
+ name,
24
+ weka_module: weka_module,
25
+ include_concerns: include_concerns,
26
+ additional_includes: additional_includes
27
+ )
18
28
  end
19
29
  end
20
30
 
@@ -58,12 +68,13 @@ module Weka
58
68
  name.scan('::').count == 1
59
69
  end
60
70
 
61
- def define_class(class_name, weka_module, include_concerns: true)
71
+ def define_class(class_name, weka_module, include_concerns: true, additional_includes: [])
62
72
  module_eval <<-CLASS_DEFINITION, __FILE__, __LINE__ + 1
63
73
  class #{class_name}
64
74
  #{'include Concerns' if include_concerns}
65
75
  #{include_serializable_for(class_name, weka_module)}
66
76
  #{include_utils}
77
+ #{include_additionals(additional_includes)}
67
78
  end
68
79
  CLASS_DEFINITION
69
80
  end
@@ -84,6 +95,13 @@ module Weka
84
95
  constantize(utils_super_modules).const_defined?(:Utils)
85
96
  end
86
97
 
98
+ def include_additionals(modules)
99
+ modules = Array(modules)
100
+ return if modules.empty?
101
+
102
+ modules.map { |name| "include #{name}" }.join("\n")
103
+ end
104
+
87
105
  def constantize(module_names)
88
106
  Object.module_eval("::#{module_names}")
89
107
  end
@@ -1,8 +1,12 @@
1
+ require 'weka/class_builder'
2
+
1
3
  module Weka
2
4
  module Classifiers
3
5
  java_import 'weka.classifiers.Evaluation'
4
6
 
5
7
  class Evaluation
8
+ include ClassBuilder
9
+
6
10
  # Use both nomenclatures f_measure and fmeasure for consistency
7
11
  # due to jruby's auto method generation of 'fMeasure' to 'f_measure' and
8
12
  # 'weightedFMeasure' to 'weighted_fmeasure'.
@@ -29,8 +33,20 @@ module Weka
29
33
  alias average_cost avg_cost
30
34
 
31
35
  alias cumulative_margin_distribution to_cumulative_margin_distribution_string
32
- end
33
36
 
34
- Java::WekaClassifiers::Evaluation.__persistent__ = true
37
+ module Curve
38
+ def self.included(base)
39
+ base.class_eval do
40
+ alias_method :curve, :get_curve
41
+ end
42
+ end
43
+ end
44
+
45
+ build_classes :CostCurve,
46
+ :MarginCurve,
47
+ :ThresholdCurve,
48
+ weka_module: 'weka.classifiers.evaluation',
49
+ additional_includes: Curve
50
+ end
35
51
  end
36
52
  end
@@ -5,128 +5,154 @@ module Weka
5
5
  module Classifiers
6
6
  module Utils
7
7
  def self.included(base)
8
- base.class_eval do
9
- java_import 'java.util.Random'
8
+ base.include Buildable if base.instance_methods.include?(:build_classifier)
9
+ base.include Classifiable if base.instance_methods.include?(:classify_instance)
10
+ base.include Updatable if base.instance_methods.include?(:update_classifier)
11
+ base.include Distributable if base.instance_methods.include?(:distribution_for_instance)
12
+ end
10
13
 
11
- if instance_methods.include?(:build_classifier)
12
- attr_reader :training_instances
14
+ module Checks
15
+ private
13
16
 
14
- def train_with_instances(instances)
15
- ensure_class_attribute_assigned!(instances)
17
+ def ensure_class_attribute_assigned!(instances)
18
+ return if instances.class_attribute_defined?
16
19
 
17
- @training_instances = instances
18
- build_classifier(instances)
20
+ error = 'Class attribute is not assigned for Instances.'
21
+ hint = 'You can assign a class attribute with #class_attribute=.'
22
+ message = "#{error} #{hint}"
19
23
 
20
- self
21
- end
24
+ raise UnassignedClassError, message
25
+ end
22
26
 
23
- def cross_validate(folds: 3)
24
- ensure_trained_with_instances!
27
+ def ensure_trained_with_instances!
28
+ return unless training_instances.nil?
25
29
 
26
- evaluation = Evaluation.new(training_instances)
27
- random = Java::JavaUtil::Random.new(1)
30
+ error = 'Classifier is not trained with Instances.'
31
+ hint = 'You can set the training instances with #train_with_instances.'
32
+ message = "#{error} #{hint}"
28
33
 
29
- evaluation.cross_validate_model(self, training_instances, folds.to_i, random)
30
- evaluation
31
- end
34
+ raise UnassignedTrainingInstancesError, message
35
+ end
36
+ end
32
37
 
33
- def evaluate(test_instances)
34
- ensure_trained_with_instances!
35
- ensure_class_attribute_assigned!(test_instances)
38
+ module Transformers
39
+ private
36
40
 
37
- evaluation = Evaluation.new(training_instances)
38
- evaluation.evaluate_model(self, test_instances)
39
- evaluation
40
- end
41
- end
41
+ def classifiable_instance_from(instance_or_values)
42
+ attributes = training_instances.attributes
43
+ instances = Weka::Core::Instances.new(attributes: attributes)
42
44
 
43
- if instance_methods.include?(:classify_instance)
44
- def classify(instance_or_values)
45
- ensure_trained_with_instances!
45
+ class_attribute = training_instances.class_attribute
46
+ class_index = training_instances.class_index
47
+ instances.insert_attribute_at(class_attribute, class_index)
46
48
 
47
- instance = classifiable_instance_from(instance_or_values)
48
- index = classify_instance(instance)
49
+ instances.class_index = training_instances.class_index
50
+ instances.add_instance(instance_or_values)
49
51
 
50
- class_value_of_index(index)
51
- end
52
- end
52
+ instance = instances.first
53
+ instance.set_class_missing
54
+ instance
55
+ end
56
+ end
53
57
 
54
- if instance_methods.include?(:update_classifier)
55
- def add_training_instance(instance)
56
- training_instances.add(instance)
57
- update_classifier(instance)
58
+ module Buildable
59
+ java_import 'java.util.Random'
60
+ include Checks
58
61
 
59
- self
60
- end
62
+ attr_reader :training_instances
61
63
 
62
- def add_training_data(data)
63
- values = training_instances.internal_values_of(data)
64
- instance = Weka::Core::DenseInstance.new(values)
65
- add_training_instance(instance)
66
- end
67
- end
64
+ def train_with_instances(instances)
65
+ ensure_class_attribute_assigned!(instances)
68
66
 
69
- if instance_methods.include?(:distribution_for_instance)
70
- def distribution_for(instance_or_values)
71
- ensure_trained_with_instances!
67
+ @training_instances = instances
68
+ build_classifier(instances)
72
69
 
73
- instance = classifiable_instance_from(instance_or_values)
74
- distributions = distribution_for_instance(instance)
70
+ self
71
+ end
75
72
 
76
- class_distributions_from(distributions)
77
- end
78
- end
73
+ def cross_validate(folds: 3)
74
+ ensure_trained_with_instances!
79
75
 
80
- private
76
+ evaluation = Evaluation.new(training_instances)
77
+ random = Java::JavaUtil::Random.new(1)
81
78
 
82
- def ensure_class_attribute_assigned!(instances)
83
- return if instances.class_attribute_defined?
79
+ evaluation.cross_validate_model(
80
+ self,
81
+ training_instances,
82
+ folds.to_i,
83
+ random
84
+ )
84
85
 
85
- error = 'Class attribute is not assigned for Instances.'
86
- hint = 'You can assign a class attribute with #class_attribute=.'
87
- message = "#{error} #{hint}"
86
+ evaluation
87
+ end
88
88
 
89
- raise UnassignedClassError, message
90
- end
89
+ def evaluate(test_instances)
90
+ ensure_trained_with_instances!
91
+ ensure_class_attribute_assigned!(test_instances)
91
92
 
92
- def ensure_trained_with_instances!
93
- return unless training_instances.nil?
93
+ evaluation = Evaluation.new(training_instances)
94
+ evaluation.evaluate_model(self, test_instances)
95
+ evaluation
96
+ end
97
+ end
94
98
 
95
- error = 'Classifier is not trained with Instances.'
96
- hint = 'You can set the training instances with #train_with_instances.'
97
- message = "#{error} #{hint}"
99
+ module Classifiable
100
+ include Checks
101
+ include Transformers
98
102
 
99
- raise UnassignedTrainingInstancesError, message
100
- end
103
+ def classify(instance_or_values)
104
+ ensure_trained_with_instances!
101
105
 
102
- def classifiable_instance_from(instance_or_values)
103
- attributes = training_instances.attributes
104
- instances = Weka::Core::Instances.new(attributes: attributes)
106
+ instance = classifiable_instance_from(instance_or_values)
107
+ index = classify_instance(instance)
105
108
 
106
- class_attribute = training_instances.class_attribute
107
- class_index = training_instances.class_index
108
- instances.insert_attribute_at(class_attribute, class_index)
109
+ class_value_of_index(index)
110
+ end
109
111
 
110
- instances.class_index = training_instances.class_index
111
- instances.add_instance(instance_or_values)
112
+ private
112
113
 
113
- instance = instances.first
114
- instance.set_class_missing
115
- instance
116
- end
114
+ def class_value_of_index(index)
115
+ training_instances.class_attribute.value(index)
116
+ end
117
+ end
117
118
 
118
- def class_value_of_index(index)
119
- training_instances.class_attribute.value(index)
120
- end
119
+ module Updatable
120
+ def add_training_instance(instance)
121
+ training_instances.add(instance)
122
+ update_classifier(instance)
123
+
124
+ self
125
+ end
126
+
127
+ def add_training_data(data)
128
+ values = training_instances.internal_values_of(data)
129
+ instance = Weka::Core::DenseInstance.new(values)
130
+ add_training_instance(instance)
131
+ end
132
+ end
133
+
134
+ module Distributable
135
+ include Checks
136
+ include Transformers
137
+
138
+ def distribution_for(instance_or_values)
139
+ ensure_trained_with_instances!
140
+
141
+ instance = classifiable_instance_from(instance_or_values)
142
+ distributions = distribution_for_instance(instance)
143
+
144
+ class_distributions_from(distributions)
145
+ end
146
+
147
+ private
121
148
 
122
- def class_distributions_from(distributions)
123
- class_values = training_instances.class_attribute.values
149
+ def class_distributions_from(distributions)
150
+ class_values = training_instances.class_attribute.values
124
151
 
125
- distributions.each_with_index.reduce({}) do |result, (distribution, index)|
126
- class_value = class_values[index]
127
- result[class_value] = distribution
128
- result
129
- end
152
+ distributions.each_with_object({}).with_index do |(distribution, result), index|
153
+ class_value = class_values[index]
154
+ result[class_value] = distribution
155
+ result
130
156
  end
131
157
  end
132
158
  end
@@ -5,96 +5,122 @@ module Weka
5
5
  module Clusterers
6
6
  module Utils
7
7
  def self.included(base)
8
- base.class_eval do
9
- java_import 'java.util.Random'
10
-
11
- if instance_methods.include?(:build_clusterer)
12
- attr_reader :training_instances
13
-
14
- def train_with_instances(instances)
15
- @training_instances = instances
16
- build_clusterer(instances)
17
-
18
- self
19
- end
20
-
21
- if ancestors.include?(Java::WekaClusterers::DensityBasedClusterer)
22
- def cross_validate(folds: 3)
23
- ensure_trained_with_instances!
24
-
25
- ClusterEvaluation.cross_validate_model(
26
- self,
27
- training_instances,
28
- folds.to_i,
29
- Java::JavaUtil::Random.new(1)
30
- )
31
- end
32
- end
33
-
34
- def evaluate(test_instances)
35
- ensure_trained_with_instances!
36
-
37
- ClusterEvaluation.new.tap do |evaluation|
38
- evaluation.clusterer = self
39
- evaluation.evaluate_clusterer(test_instances)
40
- end
41
- end
42
- end
8
+ if base.instance_methods.include?(:build_clusterer)
9
+ base.include Buildable
10
+ base.include CrossValidatable if density_based?(base)
11
+ end
43
12
 
44
- if instance_methods.include?(:cluster_instance)
45
- def cluster(instance_or_values)
46
- ensure_trained_with_instances!
13
+ base.include Clusterable if base.instance_methods.include?(:cluster_instance)
14
+ base.include Updatable if base.instance_methods.include?(:update_clusterer)
15
+ base.include Distributable if base.instance_methods.include?(:distribution_for_instance)
16
+ end
47
17
 
48
- instance = clusterable_instance_from(instance_or_values)
49
- cluster_instance(instance)
50
- end
51
- end
18
+ def self.density_based?(base)
19
+ base.ancestors.include?(Java::WekaClusterers::DensityBasedClusterer)
20
+ end
52
21
 
53
- if instance_methods.include?(:update_clusterer)
54
- def add_training_instance(instance)
55
- training_instances.add(instance)
56
- update_clusterer(instance)
22
+ module Checks
23
+ private
57
24
 
58
- self
59
- end
25
+ def ensure_trained_with_instances!
26
+ return unless training_instances.nil?
60
27
 
61
- def add_training_data(data)
62
- values = training_instances.internal_values_of(data)
63
- instance = Weka::Core::DenseInstance.new(values)
64
- add_training_instance(instance)
65
- end
66
- end
28
+ error = 'Clusterer is not trained with Instances.'
29
+ hint = 'You can set the training instances with #train_with_instances.'
30
+ message = "#{error} #{hint}"
67
31
 
68
- if instance_methods.include?(:distribution_for_instance)
69
- def distribution_for(instance_or_values)
70
- ensure_trained_with_instances!
32
+ raise UnassignedTrainingInstancesError, message
33
+ end
34
+ end
71
35
 
72
- instance = clusterable_instance_from(instance_or_values)
73
- distribution_for_instance(instance).to_a
74
- end
75
- end
36
+ module Transformers
37
+ private
76
38
 
77
- private
39
+ def clusterable_instance_from(instance_or_values)
40
+ attributes = training_instances.attributes
41
+ instances = Weka::Core::Instances.new(attributes: attributes)
78
42
 
79
- def ensure_trained_with_instances!
80
- return unless training_instances.nil?
43
+ instances.add_instance(instance_or_values)
44
+ instances.first
45
+ end
46
+ end
81
47
 
82
- error = 'Clusterer is not trained with Instances.'
83
- hint = 'You can set the training instances with #train_with_instances.'
84
- message = "#{error} #{hint}"
48
+ module Buildable
49
+ include Checks
85
50
 
86
- raise UnassignedTrainingInstancesError, message
87
- end
51
+ attr_reader :training_instances
88
52
 
89
- def clusterable_instance_from(instance_or_values)
90
- attributes = training_instances.attributes
91
- instances = Weka::Core::Instances.new(attributes: attributes)
53
+ def train_with_instances(instances)
54
+ @training_instances = instances
55
+ build_clusterer(instances)
56
+
57
+ self
58
+ end
92
59
 
93
- instances.add_instance(instance_or_values)
94
- instances.first
60
+ def evaluate(test_instances)
61
+ ensure_trained_with_instances!
62
+
63
+ ClusterEvaluation.new.tap do |evaluation|
64
+ evaluation.clusterer = self
65
+ evaluation.evaluate_clusterer(test_instances)
95
66
  end
96
67
  end
97
68
  end
69
+
70
+ module CrossValidatable
71
+ java_import 'java.util.Random'
72
+ include Checks
73
+
74
+ def cross_validate(folds: 3)
75
+ ensure_trained_with_instances!
76
+
77
+ ClusterEvaluation.cross_validate_model(
78
+ self,
79
+ training_instances,
80
+ folds.to_i,
81
+ Java::JavaUtil::Random.new(1)
82
+ )
83
+ end
84
+ end
85
+
86
+ module Clusterable
87
+ include Checks
88
+ include Transformers
89
+
90
+ def cluster(instance_or_values)
91
+ ensure_trained_with_instances!
92
+
93
+ instance = clusterable_instance_from(instance_or_values)
94
+ cluster_instance(instance)
95
+ end
96
+ end
97
+
98
+ module Updatable
99
+ def add_training_instance(instance)
100
+ training_instances.add(instance)
101
+ update_clusterer(instance)
102
+
103
+ self
104
+ end
105
+
106
+ def add_training_data(data)
107
+ values = training_instances.internal_values_of(data)
108
+ instance = Weka::Core::DenseInstance.new(values)
109
+ add_training_instance(instance)
110
+ end
111
+ end
112
+
113
+ module Distributable
114
+ include Checks
115
+ include Transformers
116
+
117
+ def distribution_for(instance_or_values)
118
+ ensure_trained_with_instances!
119
+
120
+ instance = clusterable_instance_from(instance_or_values)
121
+ distribution_for_instance(instance).to_a
122
+ end
123
+ end
98
124
  end
99
125
  end
100
126
  end
@@ -2,45 +2,46 @@ module Weka
2
2
  module Concerns
3
3
  module Optionizable
4
4
  def self.included(base)
5
- base.extend(ClassMethods)
5
+ base.extend ClassMethods
6
+ base.include InstanceMethods
7
+ end
6
8
 
7
- base.class_eval do
8
- java_import 'weka.core.Utils'
9
+ module ClassMethods
10
+ def default_options
11
+ new.get_options.to_a.join(' ')
12
+ end
13
+ end
9
14
 
10
- def use_options(*single_options, **hash_options)
11
- joined_options = join_options(single_options, hash_options)
12
- options = Java::WekaCore::Utils.split_options(joined_options)
15
+ module InstanceMethods
16
+ java_import 'weka.core.Utils'
13
17
 
14
- set_options(options)
15
- @options = joined_options
16
- end
18
+ def use_options(*single_options, **hash_options)
19
+ joined_options = join_options(single_options, hash_options)
20
+ options = Java::WekaCore::Utils.split_options(joined_options)
17
21
 
18
- def options
19
- @options || self.class.default_options
20
- end
22
+ set_options(options)
23
+ @options = joined_options
24
+ end
21
25
 
22
- private
26
+ def options
27
+ @options || self.class.default_options
28
+ end
23
29
 
24
- def join_options(*single_options, **hash_options)
25
- [
26
- join_single_options(*single_options),
27
- join_hash_options(**hash_options)
28
- ].reject(&:empty?).join(' ')
29
- end
30
+ private
30
31
 
31
- def join_single_options(options)
32
- options.map { |option| "-#{option.to_s.sub(/^-/, '')}" }.join(' ')
33
- end
32
+ def join_options(*single_options, **hash_options)
33
+ [
34
+ join_single_options(*single_options),
35
+ join_hash_options(**hash_options)
36
+ ].reject(&:empty?).join(' ')
37
+ end
34
38
 
35
- def join_hash_options(options)
36
- options.map { |key, value| "-#{key} #{value}" }.join(' ')
37
- end
39
+ def join_single_options(options)
40
+ options.map { |option| "-#{option.to_s.sub(/^-/, '')}" }.join(' ')
38
41
  end
39
- end
40
42
 
41
- module ClassMethods
42
- def default_options
43
- new.get_options.to_a.join(' ')
43
+ def join_hash_options(options)
44
+ options.map { |key, value| "-#{key} #{value}" }.join(' ')
44
45
  end
45
46
  end
46
47
  end
@@ -3,12 +3,8 @@ require 'weka/core/serialization_helper'
3
3
  module Weka
4
4
  module Concerns
5
5
  module Serializable
6
- def self.included(base)
7
- base.class_eval do
8
- def serialize(filename)
9
- Weka::Core::SerializationHelper.write(filename, self)
10
- end
11
- end
6
+ def serialize(filename)
7
+ Weka::Core::SerializationHelper.write(filename, self)
12
8
  end
13
9
  end
14
10
  end
@@ -7,7 +7,7 @@ module Weka
7
7
  class Attribute
8
8
  include Weka::Concerns::Persistent
9
9
 
10
- TYPES = %i(numeric nominal string date).freeze
10
+ TYPES = %i[numeric nominal string date].freeze
11
11
 
12
12
  class << self
13
13
  def new_numeric(name)
@@ -57,25 +57,17 @@ module Weka
57
57
 
58
58
  attribute = attribute_at(index)
59
59
 
60
- if attribute.date?
61
- format_date(value, attribute.date_format)
62
- elsif attribute.numeric?
63
- value
64
- elsif attribute.nominal? || attribute.string?
65
- attribute.value(value)
66
- end
60
+ return format_date(value, attribute.date_format) if attribute.date?
61
+ return value if attribute.numeric?
62
+ return attribute.value(value) if attribute.nominal? || attribute.string?
67
63
  end
68
64
 
69
65
  def attribute_at(index)
70
- return attributes[index] unless dataset.class_attribute_defined?
66
+ return attributes[index] unless dataset.class_attribute_defined?
67
+ return class_attribute if dataset.class_index == index
68
+ return attributes[index - 1] if index > dataset.class_index
71
69
 
72
- if dataset.class_index == index
73
- class_attribute
74
- elsif index > dataset.class_index
75
- attributes[index - 1]
76
- else
77
- attributes[index]
78
- end
70
+ attributes[index]
79
71
  end
80
72
 
81
73
  def format_date(value, format)
@@ -1,3 +1,4 @@
1
+ require 'matrix'
1
2
  require 'weka/core/converters'
2
3
  require 'weka/core/loader'
3
4
  require 'weka/core/saver'
@@ -10,6 +11,7 @@ module Weka
10
11
  java_import 'weka.core.FastVector'
11
12
 
12
13
  class Instances
14
+ include Weka::Concerns::Persistent
13
15
  include Weka::Concerns::Serializable
14
16
 
15
17
  DEFAULT_RELATION_NAME = 'Instances'.freeze
@@ -38,7 +40,7 @@ module Weka
38
40
  end
39
41
  end
40
42
 
41
- def initialize(relation_name: DEFAULT_RELATION_NAME, attributes: [], &block)
43
+ def initialize(relation_name: DEFAULT_RELATION_NAME, attributes: [])
42
44
  attribute_list = FastVector.new
43
45
  attributes.each { |attribute| attribute_list.add_element(attribute) }
44
46
 
@@ -49,12 +51,18 @@ module Weka
49
51
  enumerate_instances.to_a
50
52
  end
51
53
 
52
- def attributes
53
- enumerate_attributes.to_a
54
+ def attributes(include_class_attribute: false)
55
+ attrs = enumerate_attributes.to_a
56
+
57
+ if include_class_attribute && class_attribute_defined?
58
+ attrs.insert(class_index, class_attribute)
59
+ end
60
+
61
+ attrs
54
62
  end
55
63
 
56
- def attribute_names
57
- attributes.map(&:name)
64
+ def attribute_names(include_class_attribute: false)
65
+ attributes(include_class_attribute: include_class_attribute).map(&:name)
58
66
  end
59
67
 
60
68
  def add_attributes(&block)
@@ -67,7 +75,7 @@ module Weka
67
75
  alias attributes_count num_attributes
68
76
  alias has_string_attribute? check_for_string_attributes
69
77
 
70
- ## Check if the instances has any attribute of the given type
78
+ # Check if the instances has any attribute of the given type
71
79
  # @param [String, Symbol, Integer] type type of the attribute to check
72
80
  # String and Symbol argument are converted to corresponding type
73
81
  # defined in Weka::Core::Attribute
@@ -189,6 +197,24 @@ module Weka
189
197
  class_index >= 0
190
198
  end
191
199
 
200
+ # Add new instance
201
+ # @param [Instance, Array, Hash] instance_or_values the attribute values
202
+ # of the instance to be added. If passing an array, the attribute values
203
+ # must be in the same order as the attributes defined in Instances.
204
+ # If passing a hash, The keys are the names of the attributes and their
205
+ # values are corresponding attributes values.
206
+ #
207
+ # @example Passing Instance
208
+ # instances.add_instance(instance)
209
+ #
210
+ # @example Passing an array of attribute values
211
+ # attr_values = [attr1_value, attr2_value, attr3_value]
212
+ # instances.add_instance(attr_values)
213
+ #
214
+ # @example Passing a hash of attribute values.
215
+ # attr_values = { attr1_name: attr1_value, attr2_name: attr2_value }
216
+ # instances.add_instance(attr_values)
217
+ #
192
218
  def add_instance(instance_or_values, weight: 1.0)
193
219
  instance = instance_from(instance_or_values, weight: weight)
194
220
  add(instance)
@@ -198,10 +224,24 @@ module Weka
198
224
  data.each { |values| add_instance(values, weight: weight) }
199
225
  end
200
226
 
227
+ # Retrieve the internal floating point values used to represent
228
+ # the attributes.
229
+ #
230
+ # @param [Array, Hash] values the attribute values whose floating
231
+ # point representation should be retrieved.
232
+ #
233
+ # @return [Array, Hash] an array of the internal floating point
234
+ # representation if the input is an Array. Hash otherwise.
201
235
  def internal_values_of(values)
202
- values.each_with_index.map do |value, index|
236
+ use_hash = values.is_a?(Hash)
237
+ values = attribute_values_from_hash(values) if use_hash
238
+
239
+ values = values.map.with_index do |value, index|
203
240
  attribute(index).internal_value_of(value)
204
241
  end
242
+
243
+ values = attribute_values_to_hash(values) if use_hash
244
+ values
205
245
  end
206
246
 
207
247
  def apply_filter(filter)
@@ -220,6 +260,13 @@ module Weka
220
260
  end
221
261
  end
222
262
 
263
+ # Get the all instances's values as Matrix.
264
+ #
265
+ # @return [Matrix] a Matrix holding the instance's values as rows.
266
+ def to_m
267
+ Matrix[*instances.map(&:values)]
268
+ end
269
+
223
270
  private
224
271
 
225
272
  def add_attribute(attribute)
@@ -227,7 +274,9 @@ module Weka
227
274
  end
228
275
 
229
276
  def ensure_attribute_defined!(name)
230
- return if attribute_names.include?(name.to_s)
277
+ if attribute_names(include_class_attribute: true).include?(name.to_s)
278
+ return
279
+ end
231
280
 
232
281
  error = "\"#{name}\" is not defined."
233
282
  hint = 'Only defined attributes can be used as class attribute!'
@@ -237,25 +286,36 @@ module Weka
237
286
  end
238
287
 
239
288
  def attribute_with_name(name)
240
- attributes.select { |attribute| attribute.name == name.to_s }.first
289
+ attributes(include_class_attribute: true).find do |attribute|
290
+ attribute.name == name.to_s
291
+ end
241
292
  end
242
293
 
294
+ # Wrap the attribute values for the instance to be added with
295
+ # an Instance object, if needed. The Instance object is
296
+ # assigned with the given weight.
297
+ #
298
+ # @param [Instance, Array, Hash] instance_or_values either the
299
+ # instance object to be added or the attribute values for it.
300
+ # For the latter case, it accepts an array or a hash.
301
+ #
302
+ # @param [Float] weight the weight for the Instance to be added
303
+ #
304
+ # @return [Instance] the object that contains the given
305
+ # attribute values.
243
306
  def instance_from(instance_or_values, weight:)
244
307
  if instance_or_values.is_a?(Java::WekaCore::Instance)
245
308
  instance_or_values.weight = weight
246
309
  instance_or_values
247
310
  else
311
+ if instance_or_values.is_a?(Hash)
312
+ instance_or_values = attribute_values_from_hash(instance_or_values)
313
+ end
314
+
248
315
  data = internal_values_of(instance_or_values)
249
316
 
250
- # string attribute has unlimited range of possible values.
251
- # Check the return index, if it is -1 then add the value to
252
- # the attribute before creating the instance
253
- data.map!.with_index do |value, index|
254
- if value == -1 && attribute(index).string?
255
- attribute(index).add_string_value(instance_or_values[index].to_s)
256
- else
257
- value
258
- end
317
+ if has_string_attribute?
318
+ data = check_string_attributes(data, instance_or_values)
259
319
  end
260
320
 
261
321
  DenseInstance.new(data, weight: weight)
@@ -266,8 +326,48 @@ module Weka
266
326
  return -1 unless Attribute::TYPES.include?(type.downcase.to_sym)
267
327
  Attribute.const_get(type.upcase)
268
328
  end
269
- end
270
329
 
271
- Java::WekaCore::Instances.__persistent__ = true
330
+ # Convert a hash whose keys are attribute names and values are attribute
331
+ # values into an array containing attribute values in the order
332
+ # of the Instances attributes.
333
+ #
334
+ # @param [Hash] hash a hash whose keys are attribute names and
335
+ # values are attribute values.
336
+ #
337
+ # @return [Array] an array containing attribute values in the
338
+ # correct order
339
+ def attribute_values_from_hash(hash)
340
+ names = attribute_names(include_class_attribute: true).map(&:to_sym)
341
+ hash.values_at(*names)
342
+ end
343
+
344
+ # Convert an array of attribute values in the same order as Instances
345
+ # attributes into a hash whose keys are attribute names and values
346
+ # are corresponding attribute values.
347
+ #
348
+ # @param [Array] values an array containing the attribute values
349
+ #
350
+ # @return [Hash] a hash as described above
351
+ def attribute_values_to_hash(values)
352
+ names = attribute_names(include_class_attribute: true).map(&:to_sym)
353
+
354
+ names.each_with_index.inject({}) do |hash, (name, index)|
355
+ hash.update(name => values[index])
356
+ end
357
+ end
358
+
359
+ def check_string_attributes(internal_values, attribute_values)
360
+ # string attribute has unlimited range of possible values.
361
+ # Check the return index, if it is -1 then add the value to
362
+ # the attribute before creating the instance
363
+ internal_values.map.with_index do |value, index|
364
+ if value == -1 && attribute(index).string?
365
+ attribute(index).add_string_value(attribute_values[index])
366
+ else
367
+ value
368
+ end
369
+ end
370
+ end
371
+ end
272
372
  end
273
373
  end
@@ -1,13 +1,9 @@
1
1
  module Weka
2
2
  module Filters
3
3
  module Utils
4
- def self.included(base)
5
- base.class_eval do
6
- def filter(instances)
7
- set_input_format(instances)
8
- Filter.use_filter(instances, self)
9
- end
10
- end
4
+ def filter(instances)
5
+ set_input_format(instances)
6
+ Filter.use_filter(instances, self)
11
7
  end
12
8
  end
13
9
  end
@@ -1,3 +1,3 @@
1
1
  module Weka
2
- VERSION = '0.4.0'.freeze
2
+ VERSION = '0.5.0'.freeze
3
3
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+
2
3
  lib = File.expand_path('../lib', __FILE__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
5
  require 'weka/version'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: weka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: java
6
6
  authors:
7
7
  - Paul Götze
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-22 00:00:00.000000000 Z
11
+ date: 2017-06-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -91,6 +91,7 @@ extra_rdoc_files: []
91
91
  files:
92
92
  - ".gitignore"
93
93
  - ".rspec"
94
+ - ".rubocop.yml"
94
95
  - ".travis.yml"
95
96
  - CODE_OF_CONDUCT.md
96
97
  - Gemfile
@@ -164,7 +165,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
164
165
  version: '0'
165
166
  requirements: []
166
167
  rubyforge_project:
167
- rubygems_version: 2.6.6
168
+ rubygems_version: 2.6.11
168
169
  signing_key:
169
170
  specification_version: 4
170
171
  summary: Machine Learning & Data Mining with JRuby.