weka 0.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +15 -0
  5. data/CODE_OF_CONDUCT.md +13 -0
  6. data/Gemfile +4 -0
  7. data/Jarfile +1 -0
  8. data/Jarfile.lock +17 -0
  9. data/MIT-LICENSE.txt +19 -0
  10. data/README.md +687 -0
  11. data/Rakefile +21 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +7 -0
  14. data/lib/weka.rb +32 -0
  15. data/lib/weka/attribute_selection.rb +1 -0
  16. data/lib/weka/attribute_selection/attribute_selection.rb +11 -0
  17. data/lib/weka/attribute_selection/evaluator.rb +29 -0
  18. data/lib/weka/attribute_selection/search.rb +14 -0
  19. data/lib/weka/class_builder.rb +88 -0
  20. data/lib/weka/classifiers.rb +1 -0
  21. data/lib/weka/classifiers/bayes.rb +16 -0
  22. data/lib/weka/classifiers/evaluation.rb +37 -0
  23. data/lib/weka/classifiers/functions.rb +21 -0
  24. data/lib/weka/classifiers/lazy.rb +13 -0
  25. data/lib/weka/classifiers/meta.rb +29 -0
  26. data/lib/weka/classifiers/rules.rb +16 -0
  27. data/lib/weka/classifiers/trees.rb +18 -0
  28. data/lib/weka/classifiers/utils.rb +138 -0
  29. data/lib/weka/clusterers.rb +16 -0
  30. data/lib/weka/clusterers/cluster_evaluation.rb +14 -0
  31. data/lib/weka/clusterers/utils.rb +103 -0
  32. data/lib/weka/concerns.rb +18 -0
  33. data/lib/weka/concerns/buildable.rb +19 -0
  34. data/lib/weka/concerns/describable.rb +30 -0
  35. data/lib/weka/concerns/optionizable.rb +49 -0
  36. data/lib/weka/concerns/persistent.rb +16 -0
  37. data/lib/weka/core.rb +6 -0
  38. data/lib/weka/core/attribute.rb +24 -0
  39. data/lib/weka/core/converters.rb +17 -0
  40. data/lib/weka/core/dense_instance.rb +68 -0
  41. data/lib/weka/core/instances.rb +199 -0
  42. data/lib/weka/core/loader.rb +32 -0
  43. data/lib/weka/core/saver.rb +34 -0
  44. data/lib/weka/exceptions.rb +6 -0
  45. data/lib/weka/filters.rb +1 -0
  46. data/lib/weka/filters/filter.rb +9 -0
  47. data/lib/weka/filters/supervised/attribute.rb +26 -0
  48. data/lib/weka/filters/supervised/instance.rb +16 -0
  49. data/lib/weka/filters/unsupervised/attribute.rb +67 -0
  50. data/lib/weka/filters/unsupervised/instance.rb +25 -0
  51. data/lib/weka/filters/utils.rb +17 -0
  52. data/lib/weka/jars.rb +19 -0
  53. data/lib/weka/version.rb +3 -0
  54. data/weka.gemspec +32 -0
  55. metadata +183 -0
@@ -0,0 +1,16 @@
1
+ Weka.require_all :clusterers
2
+
3
+ require 'weka/class_builder'
4
+
5
+ module Weka
6
+ module Clusterers
7
+ include ClassBuilder
8
+
9
+ build_classes :Canopy,
10
+ :Cobweb,
11
+ :EM,
12
+ :FarthestFirst,
13
+ :HierarchicalClusterer,
14
+ :SimpleKMeans
15
+ end
16
+ end
@@ -0,0 +1,14 @@
1
+ module Weka
2
+ module Clusterers
3
+ java_import 'weka.clusterers.ClusterEvaluation'
4
+
5
+ class ClusterEvaluation
6
+
7
+ alias :summary :cluster_results_to_string
8
+ alias :clusters_count :num_clusters
9
+ end
10
+
11
+ Java::WekaClusterers::ClusterEvaluation.__persistent__ = true
12
+
13
+ end
14
+ end
@@ -0,0 +1,103 @@
1
+ require 'active_support/concern'
2
+ require 'weka/clusterers/cluster_evaluation'
3
+ require 'weka/core/instances'
4
+
5
+ module Weka
6
+ module Clusterers
7
+ module Utils
8
+ extend ActiveSupport::Concern
9
+
10
+ included do
11
+ java_import 'java.util.Random'
12
+
13
+ if instance_methods.include?(:build_clusterer)
14
+ attr_reader :training_instances
15
+
16
+ def train_with_instances(instances)
17
+ @training_instances = instances
18
+ build_clusterer(instances)
19
+
20
+ self
21
+ end
22
+
23
+ if ancestors.include?(Java::WekaClusterers::DensityBasedClusterer)
24
+ def cross_validate(folds: 3)
25
+ ensure_trained_with_instances!
26
+
27
+ ClusterEvaluation.cross_validate_model(
28
+ self,
29
+ training_instances,
30
+ folds.to_i,
31
+ Java::JavaUtil::Random.new(1)
32
+ )
33
+ end
34
+ end
35
+
36
+ def evaluate(test_instances)
37
+ ensure_trained_with_instances!
38
+
39
+ ClusterEvaluation.new.tap do |evaluation|
40
+ evaluation.clusterer = self
41
+ evaluation.evaluate_clusterer(test_instances)
42
+ end
43
+ end
44
+ end
45
+
46
+ if instance_methods.include?(:cluster_instance)
47
+ def cluster(instance_or_values)
48
+ ensure_trained_with_instances!
49
+
50
+ instance = clusterable_instance_from(instance_or_values)
51
+ cluster_instance(instance)
52
+ end
53
+ end
54
+
55
+ if instance_methods.include?(:update_clusterer)
56
+ def add_training_instance(instance)
57
+ training_instances.add(instance)
58
+ update_clusterer(instance)
59
+
60
+ self
61
+ end
62
+
63
+ def add_training_data(data)
64
+ values = self.training_instances.internal_values_of(data)
65
+ instance = Weka::Core::DenseInstance.new(values)
66
+ add_training_instance(instance)
67
+ end
68
+ end
69
+
70
+ if instance_methods.include?(:distribution_for_instance)
71
+ def distribution_for(instance_or_values)
72
+ ensure_trained_with_instances!
73
+
74
+ instance = clusterable_instance_from(instance_or_values)
75
+ distribution_for_instance(instance).to_a
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def ensure_trained_with_instances!
82
+ return unless training_instances.nil?
83
+
84
+ error = 'Clusterer is not trained with Instances.'
85
+ hint = 'You can set the training instances with #train_with_instances.'
86
+ message = "#{error} #{hint}"
87
+
88
+ raise UnassignedTrainingInstancesError, message
89
+ end
90
+
91
+ def clusterable_instance_from(instance_or_values)
92
+ attributes = training_instances.attributes
93
+ instances = Weka::Core::Instances.new(attributes: attributes)
94
+
95
+ instances.add_instance(instance_or_values)
96
+ instances.first
97
+ end
98
+ end
99
+
100
+ end
101
+ end
102
+ end
103
+
@@ -0,0 +1,18 @@
1
+ require 'active_support/concern'
2
+ require 'weka/concerns/buildable'
3
+ require 'weka/concerns/describable'
4
+ require 'weka/concerns/optionizable'
5
+ require 'weka/concerns/persistent'
6
+
7
+ module Weka
8
+ module Concerns
9
+ extend ActiveSupport::Concern
10
+
11
+ included do
12
+ include Buildable
13
+ include Describable
14
+ include Optionizable
15
+ include Persistent
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,19 @@
1
+ require 'active_support/concern'
2
+
3
+ module Weka
4
+ module Concerns
5
+ module Buildable
6
+ extend ActiveSupport::Concern
7
+
8
+ module ClassMethods
9
+
10
+ def build(&block)
11
+ instance = new
12
+ instance.instance_eval(&block) if block_given?
13
+ instance
14
+ end
15
+ end
16
+
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,30 @@
1
+ require 'active_support/concern'
2
+
3
+ module Weka
4
+ module Concerns
5
+ module Describable
6
+ extend ActiveSupport::Concern
7
+
8
+ module ClassMethods
9
+
10
+ def description
11
+ new.global_info
12
+ end
13
+
14
+ def options
15
+ descriptions = new.list_options.map do |option|
16
+ description_for_option(option)
17
+ end
18
+
19
+ descriptions.join("\n")
20
+ end
21
+
22
+ private
23
+
24
+ def description_for_option(option)
25
+ "#{option.synopsis}\t#{option.description.strip}"
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,49 @@
1
+ require 'active_support/concern'
2
+
3
+ module Weka
4
+ module Concerns
5
+ module Optionizable
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ java_import "weka.core.Utils"
10
+
11
+ def use_options(*single_options, **hash_options)
12
+ joined_options = join_options(single_options, hash_options)
13
+ options = Java::WekaCore::Utils.split_options(joined_options)
14
+
15
+ set_options(options)
16
+ @options = joined_options
17
+ end
18
+
19
+ def options
20
+ @options || self.class.default_options
21
+ end
22
+
23
+ private
24
+
25
+ def join_options(*single_options, **hash_options)
26
+ [
27
+ join_single_options(*single_options),
28
+ join_hash_options(**hash_options)
29
+ ].reject(&:empty?).join(' ')
30
+ end
31
+
32
+ def join_single_options(options)
33
+ options.map { |option| "-#{option.to_s.sub(/^-/, '')}" }.join(' ')
34
+ end
35
+
36
+ def join_hash_options(options)
37
+ options.map { |key, value| "-#{key} #{value}" }.join(' ')
38
+ end
39
+ end
40
+
41
+ module ClassMethods
42
+ def default_options
43
+ new.get_options.to_a.join(' ')
44
+ end
45
+ end
46
+
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,16 @@
1
+ require 'active_support/concern'
2
+
3
+ module Weka
4
+ module Concerns
5
+ module Persistent
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ if self.respond_to?(:__persistent__=)
10
+ self.__persistent__ = true
11
+ end
12
+ end
13
+
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,6 @@
1
+ require 'weka/core/converters'
2
+ require 'weka/core/loader'
3
+ require 'weka/core/saver'
4
+ require 'weka/core/attribute'
5
+ require 'weka/core/dense_instance'
6
+ require 'weka/core/instances'
@@ -0,0 +1,24 @@
1
+ module Weka
2
+ module Core
3
+ java_import "weka.core.Attribute"
4
+
5
+ class Attribute
6
+
7
+ def values
8
+ enumerate_values.to_a
9
+ end
10
+
11
+ # The order of the if statements is important here, because a date is also
12
+ # a numeric.
13
+ def internal_value_of(value)
14
+ if date?
15
+ parse_date(value.to_s)
16
+ elsif numeric?
17
+ value.to_f
18
+ elsif nominal?
19
+ index_of_value(value.to_s)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,17 @@
1
+ require 'weka/class_builder'
2
+
3
+ module Weka
4
+ module Core
5
+ module Converters
6
+ include ClassBuilder
7
+
8
+ build_classes :ArffLoader,
9
+ :ArffSaver,
10
+ :CSVLoader,
11
+ :CSVSaver,
12
+ :JSONLoader,
13
+ :JSONSaver,
14
+ include_concerns: false
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,68 @@
1
+ module Weka
2
+ module Core
3
+ java_import "weka.core.DenseInstance"
4
+
5
+ class DenseInstance
6
+ java_import "java.util.Date"
7
+ java_import "java.text.SimpleDateFormat"
8
+
9
+ def initialize(data, weight: 1.0)
10
+ super(weight, data.to_java(:double))
11
+ end
12
+
13
+ def attributes
14
+ enumerate_attributes.to_a
15
+ end
16
+
17
+ def each_attribute
18
+ if block_given?
19
+ enumerate_attributes.each { |attribute| yield(attribute) }
20
+ else
21
+ enumerate_attributes
22
+ end
23
+ end
24
+
25
+ def each_attribute_with_index
26
+ enumerate_attributes.each_with_index do |attribute, index|
27
+ yield(attribute, index) if block_given?
28
+ end
29
+ end
30
+
31
+ def to_a
32
+ to_double_array.each_with_index.map do |value, index|
33
+ attribute = attribute_at(index)
34
+
35
+ if attribute.date?
36
+ format_date(value, attribute.date_format)
37
+ elsif attribute.numeric?
38
+ value
39
+ elsif attribute.nominal?
40
+ attribute.value(value)
41
+ end
42
+ end
43
+ end
44
+
45
+ alias :values :to_a
46
+ alias :values_count :num_values
47
+
48
+ private
49
+
50
+ def attribute_at(index)
51
+ return attributes[index] unless dataset.class_attribute_defined?
52
+
53
+ if dataset.class_index == index
54
+ class_attribute
55
+ elsif index > dataset.class_index
56
+ attributes[index - 1]
57
+ else
58
+ attributes[index]
59
+ end
60
+ end
61
+
62
+ def format_date(value, format)
63
+ formatter = SimpleDateFormat.new(format)
64
+ formatter.format(Date.new(value))
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,199 @@
1
+ require 'weka/core/converters'
2
+ require 'weka/core/loader'
3
+ require 'weka/core/saver'
4
+ require 'weka/core/dense_instance'
5
+
6
+ module Weka
7
+ module Core
8
+ java_import "weka.core.Instances"
9
+ java_import "weka.core.FastVector"
10
+
11
+ class Instances
12
+
13
+ DEFAULT_RELATION_NAME = 'Instances'
14
+
15
+ class << self
16
+ def from_arff(file)
17
+ Loader.load_arff(file)
18
+ end
19
+
20
+ def from_csv(file)
21
+ Loader.load_csv(file)
22
+ end
23
+
24
+ def from_json(file)
25
+ Loader.load_json(file)
26
+ end
27
+ end
28
+
29
+ def initialize(relation_name: DEFAULT_RELATION_NAME, attributes: [], &block)
30
+ attribute_list = FastVector.new
31
+ attributes.each { |attribute| attribute_list.add_element(attribute) }
32
+
33
+ super(relation_name.to_s, attribute_list, 0)
34
+ end
35
+
36
+ def instances
37
+ enumerate_instances.to_a
38
+ end
39
+
40
+ def attributes
41
+ enumerate_attributes.to_a
42
+ end
43
+
44
+ def attribute_names
45
+ attributes.map(&:name)
46
+ end
47
+
48
+ def add_attributes(&block)
49
+ self.instance_eval(&block) if block
50
+ self
51
+ end
52
+
53
+ alias :with_attributes :add_attributes
54
+ alias :instances_count :num_instances
55
+ alias :attributes_count :num_attributes
56
+
57
+ def each
58
+ if block_given?
59
+ enumerate_instances.each { |instance| yield(instance) }
60
+ else
61
+ enumerate_instances
62
+ end
63
+ end
64
+
65
+ def each_with_index
66
+ enumerate_instances.each_with_index do |instance, index|
67
+ yield(instance, index) if block_given?
68
+ end
69
+ end
70
+
71
+ def each_attribute
72
+ if block_given?
73
+ enumerate_attributes.each { |attribute| yield(attribute) }
74
+ else
75
+ enumerate_attributes
76
+ end
77
+ end
78
+
79
+ def each_attribute_with_index
80
+ enumerate_attributes.each_with_index do |attribute, index|
81
+ yield(attribute, index) if block_given?
82
+ end
83
+ end
84
+
85
+ def to_arff(file)
86
+ Saver.save_arff(file: file, instances: self)
87
+ end
88
+
89
+ def to_csv(file)
90
+ Saver.save_csv(file: file, instances: self)
91
+ end
92
+
93
+ def to_json(file)
94
+ Saver.save_json(file: file, instances: self)
95
+ end
96
+
97
+ def numeric(name, class_attribute: false)
98
+ attribute = Attribute.new(name.to_s)
99
+ add_attribute(attribute)
100
+ self.class_attribute = name if class_attribute
101
+ end
102
+
103
+ def nominal(name, values:, class_attribute: false)
104
+ attribute = Attribute.new(name.to_s, Array(values).map(&:to_s))
105
+ add_attribute(attribute)
106
+ self.class_attribute = name if class_attribute
107
+ end
108
+
109
+ def string(name, class_attribute: false)
110
+ attribute = Attribute.new(name.to_s, [])
111
+ add_attribute(attribute)
112
+ self.class_attribute = name if class_attribute
113
+ end
114
+
115
+ def date(name, format: 'yyyy-MM-dd HH:mm', class_attribute: false)
116
+ attribute = Attribute.new(name.to_s, format)
117
+ add_attribute(attribute)
118
+ self.class_attribute = name if class_attribute
119
+ end
120
+
121
+ def class_attribute=(name)
122
+ if name.nil?
123
+ reset_class_attribute
124
+ else
125
+ ensure_attribute_defined!(name)
126
+ setClass(attribute_with_name(name))
127
+ end
128
+ end
129
+
130
+ alias :add_numeric_attribute :numeric
131
+ alias :add_string_attribute :string
132
+ alias :add_nominal_attribute :nominal
133
+ alias :add_date_attribute :date
134
+
135
+ def class_attribute
136
+ classAttribute if class_attribute_defined?
137
+ end
138
+
139
+ def reset_class_attribute
140
+ set_class_index(-1)
141
+ end
142
+
143
+ def class_attribute_defined?
144
+ class_index >= 0
145
+ end
146
+
147
+ def add_instance(instance_or_values, weight: 1.0)
148
+ instance = instance_from(instance_or_values, weight: weight)
149
+ add(instance)
150
+ end
151
+
152
+ def add_instances(data, weight: 1.0)
153
+ data.each { |values| add_instance(values, weight: weight) }
154
+ end
155
+
156
+ def internal_values_of(values)
157
+ values.each_with_index.map do |value, index|
158
+ attribute(index).internal_value_of(value)
159
+ end
160
+ end
161
+
162
+ def apply_filter(filter)
163
+ filter.filter(self)
164
+ end
165
+
166
+ private
167
+
168
+ def add_attribute(attribute)
169
+ insert_attribute_at(attribute, attributes.count)
170
+ end
171
+
172
+ def ensure_attribute_defined!(name)
173
+ return if attribute_names.include?(name.to_s)
174
+
175
+ error = "\"#{name}\" is not defined."
176
+ hint = "Only defined attributes can be used as class attribute!"
177
+ message = "#{error} #{hint}"
178
+
179
+ raise ArgumentError, message
180
+ end
181
+
182
+ def attribute_with_name(name)
183
+ attributes.select { |attribute| attribute.name == name.to_s }.first
184
+ end
185
+
186
+ def instance_from(instance_or_values, weight:)
187
+ if instance_or_values.kind_of?(Java::WekaCore::Instance)
188
+ instance_or_values.weight = weight
189
+ instance_or_values
190
+ else
191
+ data = internal_values_of(instance_or_values)
192
+ DenseInstance.new(data, weight: weight)
193
+ end
194
+ end
195
+ end
196
+
197
+ Java::WekaCore::Instances.__persistent__ = true
198
+ end
199
+ end