weka 0.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +15 -0
  5. data/CODE_OF_CONDUCT.md +13 -0
  6. data/Gemfile +4 -0
  7. data/Jarfile +1 -0
  8. data/Jarfile.lock +17 -0
  9. data/MIT-LICENSE.txt +19 -0
  10. data/README.md +687 -0
  11. data/Rakefile +21 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +7 -0
  14. data/lib/weka.rb +32 -0
  15. data/lib/weka/attribute_selection.rb +1 -0
  16. data/lib/weka/attribute_selection/attribute_selection.rb +11 -0
  17. data/lib/weka/attribute_selection/evaluator.rb +29 -0
  18. data/lib/weka/attribute_selection/search.rb +14 -0
  19. data/lib/weka/class_builder.rb +88 -0
  20. data/lib/weka/classifiers.rb +1 -0
  21. data/lib/weka/classifiers/bayes.rb +16 -0
  22. data/lib/weka/classifiers/evaluation.rb +37 -0
  23. data/lib/weka/classifiers/functions.rb +21 -0
  24. data/lib/weka/classifiers/lazy.rb +13 -0
  25. data/lib/weka/classifiers/meta.rb +29 -0
  26. data/lib/weka/classifiers/rules.rb +16 -0
  27. data/lib/weka/classifiers/trees.rb +18 -0
  28. data/lib/weka/classifiers/utils.rb +138 -0
  29. data/lib/weka/clusterers.rb +16 -0
  30. data/lib/weka/clusterers/cluster_evaluation.rb +14 -0
  31. data/lib/weka/clusterers/utils.rb +103 -0
  32. data/lib/weka/concerns.rb +18 -0
  33. data/lib/weka/concerns/buildable.rb +19 -0
  34. data/lib/weka/concerns/describable.rb +30 -0
  35. data/lib/weka/concerns/optionizable.rb +49 -0
  36. data/lib/weka/concerns/persistent.rb +16 -0
  37. data/lib/weka/core.rb +6 -0
  38. data/lib/weka/core/attribute.rb +24 -0
  39. data/lib/weka/core/converters.rb +17 -0
  40. data/lib/weka/core/dense_instance.rb +68 -0
  41. data/lib/weka/core/instances.rb +199 -0
  42. data/lib/weka/core/loader.rb +32 -0
  43. data/lib/weka/core/saver.rb +34 -0
  44. data/lib/weka/exceptions.rb +6 -0
  45. data/lib/weka/filters.rb +1 -0
  46. data/lib/weka/filters/filter.rb +9 -0
  47. data/lib/weka/filters/supervised/attribute.rb +26 -0
  48. data/lib/weka/filters/supervised/instance.rb +16 -0
  49. data/lib/weka/filters/unsupervised/attribute.rb +67 -0
  50. data/lib/weka/filters/unsupervised/instance.rb +25 -0
  51. data/lib/weka/filters/utils.rb +17 -0
  52. data/lib/weka/jars.rb +19 -0
  53. data/lib/weka/version.rb +3 -0
  54. data/weka.gemspec +32 -0
  55. metadata +183 -0
@@ -0,0 +1,16 @@
1
+ Weka.require_all :clusterers
2
+
3
+ require 'weka/class_builder'
4
+
5
+ module Weka
6
+ module Clusterers
7
+ include ClassBuilder
8
+
9
+ build_classes :Canopy,
10
+ :Cobweb,
11
+ :EM,
12
+ :FarthestFirst,
13
+ :HierarchicalClusterer,
14
+ :SimpleKMeans
15
+ end
16
+ end
@@ -0,0 +1,14 @@
1
+ module Weka
2
+ module Clusterers
3
+ java_import 'weka.clusterers.ClusterEvaluation'
4
+
5
+ class ClusterEvaluation
6
+
7
+ alias :summary :cluster_results_to_string
8
+ alias :clusters_count :num_clusters
9
+ end
10
+
11
+ Java::WekaClusterers::ClusterEvaluation.__persistent__ = true
12
+
13
+ end
14
+ end
@@ -0,0 +1,103 @@
1
+ require 'active_support/concern'
2
+ require 'weka/clusterers/cluster_evaluation'
3
+ require 'weka/core/instances'
4
+
5
+ module Weka
6
+ module Clusterers
7
+ module Utils
8
+ extend ActiveSupport::Concern
9
+
10
+ included do
11
+ java_import 'java.util.Random'
12
+
13
+ if instance_methods.include?(:build_clusterer)
14
+ attr_reader :training_instances
15
+
16
+ def train_with_instances(instances)
17
+ @training_instances = instances
18
+ build_clusterer(instances)
19
+
20
+ self
21
+ end
22
+
23
+ if ancestors.include?(Java::WekaClusterers::DensityBasedClusterer)
24
+ def cross_validate(folds: 3)
25
+ ensure_trained_with_instances!
26
+
27
+ ClusterEvaluation.cross_validate_model(
28
+ self,
29
+ training_instances,
30
+ folds.to_i,
31
+ Java::JavaUtil::Random.new(1)
32
+ )
33
+ end
34
+ end
35
+
36
+ def evaluate(test_instances)
37
+ ensure_trained_with_instances!
38
+
39
+ ClusterEvaluation.new.tap do |evaluation|
40
+ evaluation.clusterer = self
41
+ evaluation.evaluate_clusterer(test_instances)
42
+ end
43
+ end
44
+ end
45
+
46
+ if instance_methods.include?(:cluster_instance)
47
+ def cluster(instance_or_values)
48
+ ensure_trained_with_instances!
49
+
50
+ instance = clusterable_instance_from(instance_or_values)
51
+ cluster_instance(instance)
52
+ end
53
+ end
54
+
55
+ if instance_methods.include?(:update_clusterer)
56
+ def add_training_instance(instance)
57
+ training_instances.add(instance)
58
+ update_clusterer(instance)
59
+
60
+ self
61
+ end
62
+
63
+ def add_training_data(data)
64
+ values = self.training_instances.internal_values_of(data)
65
+ instance = Weka::Core::DenseInstance.new(values)
66
+ add_training_instance(instance)
67
+ end
68
+ end
69
+
70
+ if instance_methods.include?(:distribution_for_instance)
71
+ def distribution_for(instance_or_values)
72
+ ensure_trained_with_instances!
73
+
74
+ instance = clusterable_instance_from(instance_or_values)
75
+ distribution_for_instance(instance).to_a
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def ensure_trained_with_instances!
82
+ return unless training_instances.nil?
83
+
84
+ error = 'Clusterer is not trained with Instances.'
85
+ hint = 'You can set the training instances with #train_with_instances.'
86
+ message = "#{error} #{hint}"
87
+
88
+ raise UnassignedTrainingInstancesError, message
89
+ end
90
+
91
+ def clusterable_instance_from(instance_or_values)
92
+ attributes = training_instances.attributes
93
+ instances = Weka::Core::Instances.new(attributes: attributes)
94
+
95
+ instances.add_instance(instance_or_values)
96
+ instances.first
97
+ end
98
+ end
99
+
100
+ end
101
+ end
102
+ end
103
+
@@ -0,0 +1,18 @@
1
+ require 'active_support/concern'
2
+ require 'weka/concerns/buildable'
3
+ require 'weka/concerns/describable'
4
+ require 'weka/concerns/optionizable'
5
+ require 'weka/concerns/persistent'
6
+
7
+ module Weka
8
+ module Concerns
9
+ extend ActiveSupport::Concern
10
+
11
+ included do
12
+ include Buildable
13
+ include Describable
14
+ include Optionizable
15
+ include Persistent
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,19 @@
1
+ require 'active_support/concern'
2
+
3
+ module Weka
4
+ module Concerns
5
+ module Buildable
6
+ extend ActiveSupport::Concern
7
+
8
+ module ClassMethods
9
+
10
+ def build(&block)
11
+ instance = new
12
+ instance.instance_eval(&block) if block_given?
13
+ instance
14
+ end
15
+ end
16
+
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,30 @@
1
+ require 'active_support/concern'
2
+
3
+ module Weka
4
+ module Concerns
5
+ module Describable
6
+ extend ActiveSupport::Concern
7
+
8
+ module ClassMethods
9
+
10
+ def description
11
+ new.global_info
12
+ end
13
+
14
+ def options
15
+ descriptions = new.list_options.map do |option|
16
+ description_for_option(option)
17
+ end
18
+
19
+ descriptions.join("\n")
20
+ end
21
+
22
+ private
23
+
24
+ def description_for_option(option)
25
+ "#{option.synopsis}\t#{option.description.strip}"
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,49 @@
1
+ require 'active_support/concern'
2
+
3
+ module Weka
4
+ module Concerns
5
+ module Optionizable
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ java_import "weka.core.Utils"
10
+
11
+ def use_options(*single_options, **hash_options)
12
+ joined_options = join_options(single_options, hash_options)
13
+ options = Java::WekaCore::Utils.split_options(joined_options)
14
+
15
+ set_options(options)
16
+ @options = joined_options
17
+ end
18
+
19
+ def options
20
+ @options || self.class.default_options
21
+ end
22
+
23
+ private
24
+
25
+ def join_options(*single_options, **hash_options)
26
+ [
27
+ join_single_options(*single_options),
28
+ join_hash_options(**hash_options)
29
+ ].reject(&:empty?).join(' ')
30
+ end
31
+
32
+ def join_single_options(options)
33
+ options.map { |option| "-#{option.to_s.sub(/^-/, '')}" }.join(' ')
34
+ end
35
+
36
+ def join_hash_options(options)
37
+ options.map { |key, value| "-#{key} #{value}" }.join(' ')
38
+ end
39
+ end
40
+
41
+ module ClassMethods
42
+ def default_options
43
+ new.get_options.to_a.join(' ')
44
+ end
45
+ end
46
+
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,16 @@
1
+ require 'active_support/concern'
2
+
3
+ module Weka
4
+ module Concerns
5
+ module Persistent
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ if self.respond_to?(:__persistent__=)
10
+ self.__persistent__ = true
11
+ end
12
+ end
13
+
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,6 @@
1
+ require 'weka/core/converters'
2
+ require 'weka/core/loader'
3
+ require 'weka/core/saver'
4
+ require 'weka/core/attribute'
5
+ require 'weka/core/dense_instance'
6
+ require 'weka/core/instances'
@@ -0,0 +1,24 @@
1
+ module Weka
2
+ module Core
3
+ java_import "weka.core.Attribute"
4
+
5
+ class Attribute
6
+
7
+ def values
8
+ enumerate_values.to_a
9
+ end
10
+
11
+ # The order of the if statements is important here, because a date is also
12
+ # a numeric.
13
+ def internal_value_of(value)
14
+ if date?
15
+ parse_date(value.to_s)
16
+ elsif numeric?
17
+ value.to_f
18
+ elsif nominal?
19
+ index_of_value(value.to_s)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,17 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Core
5
+ module Converters
6
+ include ClassBuilder
7
+
8
+ build_classes :ArffLoader,
9
+ :ArffSaver,
10
+ :CSVLoader,
11
+ :CSVSaver,
12
+ :JSONLoader,
13
+ :JSONSaver,
14
+ include_concerns: false
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,68 @@
1
+ module Weka
2
+ module Core
3
+ java_import "weka.core.DenseInstance"
4
+
5
+ class DenseInstance
6
+ java_import "java.util.Date"
7
+ java_import "java.text.SimpleDateFormat"
8
+
9
+ def initialize(data, weight: 1.0)
10
+ super(weight, data.to_java(:double))
11
+ end
12
+
13
+ def attributes
14
+ enumerate_attributes.to_a
15
+ end
16
+
17
+ def each_attribute
18
+ if block_given?
19
+ enumerate_attributes.each { |attribute| yield(attribute) }
20
+ else
21
+ enumerate_attributes
22
+ end
23
+ end
24
+
25
+ def each_attribute_with_index
26
+ enumerate_attributes.each_with_index do |attribute, index|
27
+ yield(attribute, index) if block_given?
28
+ end
29
+ end
30
+
31
+ def to_a
32
+ to_double_array.each_with_index.map do |value, index|
33
+ attribute = attribute_at(index)
34
+
35
+ if attribute.date?
36
+ format_date(value, attribute.date_format)
37
+ elsif attribute.numeric?
38
+ value
39
+ elsif attribute.nominal?
40
+ attribute.value(value)
41
+ end
42
+ end
43
+ end
44
+
45
+ alias :values :to_a
46
+ alias :values_count :num_values
47
+
48
+ private
49
+
50
+ def attribute_at(index)
51
+ return attributes[index] unless dataset.class_attribute_defined?
52
+
53
+ if dataset.class_index == index
54
+ class_attribute
55
+ elsif index > dataset.class_index
56
+ attributes[index - 1]
57
+ else
58
+ attributes[index]
59
+ end
60
+ end
61
+
62
+ def format_date(value, format)
63
+ formatter = SimpleDateFormat.new(format)
64
+ formatter.format(Date.new(value))
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,199 @@
1
+ require 'weka/core/converters'
2
+ require 'weka/core/loader'
3
+ require 'weka/core/saver'
4
+ require 'weka/core/dense_instance'
5
+
6
+ module Weka
7
+ module Core
8
+ java_import "weka.core.Instances"
9
+ java_import "weka.core.FastVector"
10
+
11
+ class Instances
12
+
13
+ DEFAULT_RELATION_NAME = 'Instances'
14
+
15
+ class << self
16
+ def from_arff(file)
17
+ Loader.load_arff(file)
18
+ end
19
+
20
+ def from_csv(file)
21
+ Loader.load_csv(file)
22
+ end
23
+
24
+ def from_json(file)
25
+ Loader.load_json(file)
26
+ end
27
+ end
28
+
29
+ def initialize(relation_name: DEFAULT_RELATION_NAME, attributes: [], &block)
30
+ attribute_list = FastVector.new
31
+ attributes.each { |attribute| attribute_list.add_element(attribute) }
32
+
33
+ super(relation_name.to_s, attribute_list, 0)
34
+ end
35
+
36
+ def instances
37
+ enumerate_instances.to_a
38
+ end
39
+
40
+ def attributes
41
+ enumerate_attributes.to_a
42
+ end
43
+
44
+ def attribute_names
45
+ attributes.map(&:name)
46
+ end
47
+
48
+ def add_attributes(&block)
49
+ self.instance_eval(&block) if block
50
+ self
51
+ end
52
+
53
+ alias :with_attributes :add_attributes
54
+ alias :instances_count :num_instances
55
+ alias :attributes_count :num_attributes
56
+
57
+ def each
58
+ if block_given?
59
+ enumerate_instances.each { |instance| yield(instance) }
60
+ else
61
+ enumerate_instances
62
+ end
63
+ end
64
+
65
+ def each_with_index
66
+ enumerate_instances.each_with_index do |instance, index|
67
+ yield(instance, index) if block_given?
68
+ end
69
+ end
70
+
71
+ def each_attribute
72
+ if block_given?
73
+ enumerate_attributes.each { |attribute| yield(attribute) }
74
+ else
75
+ enumerate_attributes
76
+ end
77
+ end
78
+
79
+ def each_attribute_with_index
80
+ enumerate_attributes.each_with_index do |attribute, index|
81
+ yield(attribute, index) if block_given?
82
+ end
83
+ end
84
+
85
+ def to_arff(file)
86
+ Saver.save_arff(file: file, instances: self)
87
+ end
88
+
89
+ def to_csv(file)
90
+ Saver.save_csv(file: file, instances: self)
91
+ end
92
+
93
+ def to_json(file)
94
+ Saver.save_json(file: file, instances: self)
95
+ end
96
+
97
+ def numeric(name, class_attribute: false)
98
+ attribute = Attribute.new(name.to_s)
99
+ add_attribute(attribute)
100
+ self.class_attribute = name if class_attribute
101
+ end
102
+
103
+ def nominal(name, values:, class_attribute: false)
104
+ attribute = Attribute.new(name.to_s, Array(values).map(&:to_s))
105
+ add_attribute(attribute)
106
+ self.class_attribute = name if class_attribute
107
+ end
108
+
109
+ def string(name, class_attribute: false)
110
+ attribute = Attribute.new(name.to_s, [])
111
+ add_attribute(attribute)
112
+ self.class_attribute = name if class_attribute
113
+ end
114
+
115
+ def date(name, format: 'yyyy-MM-dd HH:mm', class_attribute: false)
116
+ attribute = Attribute.new(name.to_s, format)
117
+ add_attribute(attribute)
118
+ self.class_attribute = name if class_attribute
119
+ end
120
+
121
+ def class_attribute=(name)
122
+ if name.nil?
123
+ reset_class_attribute
124
+ else
125
+ ensure_attribute_defined!(name)
126
+ setClass(attribute_with_name(name))
127
+ end
128
+ end
129
+
130
+ alias :add_numeric_attribute :numeric
131
+ alias :add_string_attribute :string
132
+ alias :add_nominal_attribute :nominal
133
+ alias :add_date_attribute :date
134
+
135
+ def class_attribute
136
+ classAttribute if class_attribute_defined?
137
+ end
138
+
139
+ def reset_class_attribute
140
+ set_class_index(-1)
141
+ end
142
+
143
+ def class_attribute_defined?
144
+ class_index >= 0
145
+ end
146
+
147
+ def add_instance(instance_or_values, weight: 1.0)
148
+ instance = instance_from(instance_or_values, weight: weight)
149
+ add(instance)
150
+ end
151
+
152
+ def add_instances(data, weight: 1.0)
153
+ data.each { |values| add_instance(values, weight: weight) }
154
+ end
155
+
156
+ def internal_values_of(values)
157
+ values.each_with_index.map do |value, index|
158
+ attribute(index).internal_value_of(value)
159
+ end
160
+ end
161
+
162
+ def apply_filter(filter)
163
+ filter.filter(self)
164
+ end
165
+
166
+ private
167
+
168
+ def add_attribute(attribute)
169
+ insert_attribute_at(attribute, attributes.count)
170
+ end
171
+
172
+ def ensure_attribute_defined!(name)
173
+ return if attribute_names.include?(name.to_s)
174
+
175
+ error = "\"#{name}\" is not defined."
176
+ hint = "Only defined attributes can be used as class attribute!"
177
+ message = "#{error} #{hint}"
178
+
179
+ raise ArgumentError, message
180
+ end
181
+
182
+ def attribute_with_name(name)
183
+ attributes.select { |attribute| attribute.name == name.to_s }.first
184
+ end
185
+
186
+ def instance_from(instance_or_values, weight:)
187
+ if instance_or_values.kind_of?(Java::WekaCore::Instance)
188
+ instance_or_values.weight = weight
189
+ instance_or_values
190
+ else
191
+ data = internal_values_of(instance_or_values)
192
+ DenseInstance.new(data, weight: weight)
193
+ end
194
+ end
195
+ end
196
+
197
+ Java::WekaCore::Instances.__persistent__ = true
198
+ end
199
+ end