weka 0.3.0-java → 0.4.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/.travis.yml +1 -1
- data/README.md +5 -2
- data/Rakefile +5 -5
- data/bin/console +4 -4
- data/lib/weka/attribute_selection/attribute_selection.rb +2 -3
- data/lib/weka/class_builder.rb +21 -12
- data/lib/weka/classifiers/evaluation.rb +18 -19
- data/lib/weka/classifiers/utils.rb +87 -90
- data/lib/weka/clusterers/cluster_evaluation.rb +2 -4
- data/lib/weka/clusterers/utils.rb +63 -66
- data/lib/weka/concerns.rb +5 -8
- data/lib/weka/concerns/buildable.rb +3 -5
- data/lib/weka/concerns/describable.rb +3 -4
- data/lib/weka/concerns/optionizable.rb +26 -27
- data/lib/weka/concerns/persistent.rb +3 -8
- data/lib/weka/concerns/serializable.rb +6 -8
- data/lib/weka/core/attribute.rb +62 -4
- data/lib/weka/core/converters.rb +2 -0
- data/lib/weka/core/dense_instance.rb +7 -7
- data/lib/weka/core/instances.rb +77 -17
- data/lib/weka/core/loader.rb +10 -1
- data/lib/weka/core/saver.rb +21 -1
- data/lib/weka/core/serialization_helper.rb +2 -3
- data/lib/weka/filters/filter.rb +0 -1
- data/lib/weka/filters/supervised/attribute.rb +2 -3
- data/lib/weka/filters/utils.rb +6 -9
- data/lib/weka/jars.rb +9 -12
- data/lib/weka/version.rb +1 -1
- data/weka.gemspec +2 -3
- metadata +17 -31
data/lib/weka/core/converters.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
module Weka
|
2
2
|
module Core
|
3
|
-
java_import
|
3
|
+
java_import 'weka.core.DenseInstance'
|
4
4
|
|
5
5
|
class DenseInstance
|
6
|
-
java_import
|
7
|
-
java_import
|
6
|
+
java_import 'java.util.Date'
|
7
|
+
java_import 'java.text.SimpleDateFormat'
|
8
8
|
|
9
9
|
def initialize(data, weight: 1.0)
|
10
|
-
if data.
|
10
|
+
if data.is_a?(Integer)
|
11
11
|
super(data)
|
12
12
|
else
|
13
13
|
super(weight, to_java_double(data))
|
@@ -38,8 +38,8 @@ module Weka
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
-
alias
|
42
|
-
alias
|
41
|
+
alias values to_a
|
42
|
+
alias values_count num_values
|
43
43
|
|
44
44
|
private
|
45
45
|
|
@@ -61,7 +61,7 @@ module Weka
|
|
61
61
|
format_date(value, attribute.date_format)
|
62
62
|
elsif attribute.numeric?
|
63
63
|
value
|
64
|
-
elsif attribute.nominal?
|
64
|
+
elsif attribute.nominal? || attribute.string?
|
65
65
|
attribute.value(value)
|
66
66
|
end
|
67
67
|
end
|
data/lib/weka/core/instances.rb
CHANGED
@@ -6,13 +6,13 @@ require 'weka/concerns/serializable'
|
|
6
6
|
|
7
7
|
module Weka
|
8
8
|
module Core
|
9
|
-
java_import
|
10
|
-
java_import
|
9
|
+
java_import 'weka.core.Instances'
|
10
|
+
java_import 'weka.core.FastVector'
|
11
11
|
|
12
12
|
class Instances
|
13
13
|
include Weka::Concerns::Serializable
|
14
14
|
|
15
|
-
DEFAULT_RELATION_NAME = 'Instances'
|
15
|
+
DEFAULT_RELATION_NAME = 'Instances'.freeze
|
16
16
|
|
17
17
|
class << self
|
18
18
|
def from_arff(file)
|
@@ -26,6 +26,16 @@ module Weka
|
|
26
26
|
def from_json(file)
|
27
27
|
Loader.load_json(file)
|
28
28
|
end
|
29
|
+
|
30
|
+
# Loads instances based on a given *.names file (holding the attribute
|
31
|
+
# values) or a given *.data file (holding the attribute values).
|
32
|
+
# The respective other file is loaded from the same directory.
|
33
|
+
#
|
34
|
+
# See http://www.cs.washington.edu/dm/vfml/appendixes/c45.htm for more
|
35
|
+
# information about the C4.5 file format.
|
36
|
+
def from_c45(file)
|
37
|
+
Loader.load_c45(file)
|
38
|
+
end
|
29
39
|
end
|
30
40
|
|
31
41
|
def initialize(relation_name: DEFAULT_RELATION_NAME, attributes: [], &block)
|
@@ -48,13 +58,33 @@ module Weka
|
|
48
58
|
end
|
49
59
|
|
50
60
|
def add_attributes(&block)
|
51
|
-
|
61
|
+
instance_eval(&block) if block
|
52
62
|
self
|
53
63
|
end
|
54
64
|
|
55
|
-
alias
|
56
|
-
alias
|
57
|
-
alias
|
65
|
+
alias with_attributes add_attributes
|
66
|
+
alias instances_count num_instances
|
67
|
+
alias attributes_count num_attributes
|
68
|
+
alias has_string_attribute? check_for_string_attributes
|
69
|
+
|
70
|
+
## Check if the instances has any attribute of the given type
|
71
|
+
# @param [String, Symbol, Integer] type type of the attribute to check
|
72
|
+
# String and Symbol argument are converted to corresponding type
|
73
|
+
# defined in Weka::Core::Attribute
|
74
|
+
#
|
75
|
+
# @example Passing String
|
76
|
+
# instances.has_attribute_type?('string')
|
77
|
+
# instances.has_attribute_type?('String')
|
78
|
+
#
|
79
|
+
# @example Passing Symbol
|
80
|
+
# instances.has_attribute_type?(:String)
|
81
|
+
#
|
82
|
+
# @example Passing Integer
|
83
|
+
# instances.has_attribute_type?(Attribute::STRING)
|
84
|
+
def has_attribute_type?(type)
|
85
|
+
type = map_attribute_type(type) unless type.is_a?(Integer)
|
86
|
+
check_for_attribute_type(type)
|
87
|
+
end
|
58
88
|
|
59
89
|
def each
|
60
90
|
if block_given?
|
@@ -96,26 +126,39 @@ module Weka
|
|
96
126
|
Saver.save_json(file: file, instances: self)
|
97
127
|
end
|
98
128
|
|
129
|
+
# Creates a file with the istances's attribute values and a *.data file
|
130
|
+
# with the actual data.
|
131
|
+
#
|
132
|
+
# You should choose another file extension than .data (preferably
|
133
|
+
# *.names) for the file, else it will just be overwritten with the
|
134
|
+
# automatically created *.data file.
|
135
|
+
#
|
136
|
+
# See http://www.cs.washington.edu/dm/vfml/appendixes/c45.htm for more
|
137
|
+
# information about the C4.5 file format.
|
138
|
+
def to_c45(file)
|
139
|
+
Saver.save_c45(file: file, instances: self)
|
140
|
+
end
|
141
|
+
|
99
142
|
def numeric(name, class_attribute: false)
|
100
|
-
attribute = Attribute.
|
143
|
+
attribute = Attribute.new_numeric(name)
|
101
144
|
add_attribute(attribute)
|
102
145
|
self.class_attribute = name if class_attribute
|
103
146
|
end
|
104
147
|
|
105
148
|
def nominal(name, values:, class_attribute: false)
|
106
|
-
attribute = Attribute.
|
149
|
+
attribute = Attribute.new_nominal(name, values)
|
107
150
|
add_attribute(attribute)
|
108
151
|
self.class_attribute = name if class_attribute
|
109
152
|
end
|
110
153
|
|
111
154
|
def string(name, class_attribute: false)
|
112
|
-
attribute = Attribute.
|
155
|
+
attribute = Attribute.new_string(name)
|
113
156
|
add_attribute(attribute)
|
114
157
|
self.class_attribute = name if class_attribute
|
115
158
|
end
|
116
159
|
|
117
160
|
def date(name, format: 'yyyy-MM-dd HH:mm', class_attribute: false)
|
118
|
-
attribute = Attribute.
|
161
|
+
attribute = Attribute.new_date(name, format)
|
119
162
|
add_attribute(attribute)
|
120
163
|
self.class_attribute = name if class_attribute
|
121
164
|
end
|
@@ -129,10 +172,10 @@ module Weka
|
|
129
172
|
end
|
130
173
|
end
|
131
174
|
|
132
|
-
alias
|
133
|
-
alias
|
134
|
-
alias
|
135
|
-
alias
|
175
|
+
alias add_numeric_attribute numeric
|
176
|
+
alias add_string_attribute string
|
177
|
+
alias add_nominal_attribute nominal
|
178
|
+
alias add_date_attribute date
|
136
179
|
|
137
180
|
def class_attribute
|
138
181
|
classAttribute if class_attribute_defined?
|
@@ -187,7 +230,7 @@ module Weka
|
|
187
230
|
return if attribute_names.include?(name.to_s)
|
188
231
|
|
189
232
|
error = "\"#{name}\" is not defined."
|
190
|
-
hint =
|
233
|
+
hint = 'Only defined attributes can be used as class attribute!'
|
191
234
|
message = "#{error} #{hint}"
|
192
235
|
|
193
236
|
raise ArgumentError, message
|
@@ -198,14 +241,31 @@ module Weka
|
|
198
241
|
end
|
199
242
|
|
200
243
|
def instance_from(instance_or_values, weight:)
|
201
|
-
if instance_or_values.
|
244
|
+
if instance_or_values.is_a?(Java::WekaCore::Instance)
|
202
245
|
instance_or_values.weight = weight
|
203
246
|
instance_or_values
|
204
247
|
else
|
205
248
|
data = internal_values_of(instance_or_values)
|
249
|
+
|
250
|
+
# string attribute has unlimited range of possible values.
|
251
|
+
# Check the return index, if it is -1 then add the value to
|
252
|
+
# the attribute before creating the instance
|
253
|
+
data.map!.with_index do |value, index|
|
254
|
+
if value == -1 && attribute(index).string?
|
255
|
+
attribute(index).add_string_value(instance_or_values[index].to_s)
|
256
|
+
else
|
257
|
+
value
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
206
261
|
DenseInstance.new(data, weight: weight)
|
207
262
|
end
|
208
263
|
end
|
264
|
+
|
265
|
+
def map_attribute_type(type)
|
266
|
+
return -1 unless Attribute::TYPES.include?(type.downcase.to_sym)
|
267
|
+
Attribute.const_get(type.upcase)
|
268
|
+
end
|
209
269
|
end
|
210
270
|
|
211
271
|
Java::WekaCore::Instances.__persistent__ = true
|
data/lib/weka/core/loader.rb
CHANGED
@@ -18,6 +18,16 @@ module Weka
|
|
18
18
|
load_with(Converters::JSONLoader, file: file)
|
19
19
|
end
|
20
20
|
|
21
|
+
# Takes either a *.names or a *.data file and loads the respective other
|
22
|
+
# file from the same directory automatically.
|
23
|
+
# Returns a Weka::Core::Instances object.
|
24
|
+
#
|
25
|
+
# See http://www.cs.washington.edu/dm/vfml/appendixes/c45.htm for more
|
26
|
+
# information about the C4.5 file format.
|
27
|
+
def load_c45(file)
|
28
|
+
load_with(Converters::C45Loader, file: file)
|
29
|
+
end
|
30
|
+
|
21
31
|
private
|
22
32
|
|
23
33
|
def load_with(loader_class, file:)
|
@@ -27,6 +37,5 @@ module Weka
|
|
27
37
|
end
|
28
38
|
end
|
29
39
|
end
|
30
|
-
|
31
40
|
end
|
32
41
|
end
|
data/lib/weka/core/saver.rb
CHANGED
@@ -18,6 +18,27 @@ module Weka
|
|
18
18
|
save_with(Converters::JSONSaver, file: file, instances: instances)
|
19
19
|
end
|
20
20
|
|
21
|
+
# Saves the given `instances` into a file with the given name and a
|
22
|
+
# *.data file in the same directory.
|
23
|
+
# The file with the given file name includes the instances's attribute
|
24
|
+
# values, the *.data file holds the actual data.
|
25
|
+
#
|
26
|
+
# Example:
|
27
|
+
#
|
28
|
+
# Weka::Core::Saver.save_c45(
|
29
|
+
# file: './path/to/example.names',
|
30
|
+
# instances: instances
|
31
|
+
# )
|
32
|
+
#
|
33
|
+
# creates an example.names file and an example.data file in the
|
34
|
+
# ./path/to/ directory.
|
35
|
+
#
|
36
|
+
# See: http://www.cs.washington.edu/dm/vfml/appendixes/c45.htm for more
|
37
|
+
# information about the C4.5 file format.
|
38
|
+
def save_c45(file:, instances:)
|
39
|
+
save_with(Converters::C45Saver, file: file, instances: instances)
|
40
|
+
end
|
41
|
+
|
21
42
|
private
|
22
43
|
|
23
44
|
def save_with(saver_class, file:, instances:)
|
@@ -29,6 +50,5 @@ module Weka
|
|
29
50
|
end
|
30
51
|
end
|
31
52
|
end
|
32
|
-
|
33
53
|
end
|
34
54
|
end
|
data/lib/weka/filters/filter.rb
CHANGED
data/lib/weka/filters/utils.rb
CHANGED
@@ -1,17 +1,14 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
1
|
module Weka
|
4
2
|
module Filters
|
5
3
|
module Utils
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
4
|
+
def self.included(base)
|
5
|
+
base.class_eval do
|
6
|
+
def filter(instances)
|
7
|
+
set_input_format(instances)
|
8
|
+
Filter.use_filter(instances, self)
|
9
|
+
end
|
12
10
|
end
|
13
11
|
end
|
14
|
-
|
15
12
|
end
|
16
13
|
end
|
17
14
|
end
|
data/lib/weka/jars.rb
CHANGED
@@ -1,19 +1,16 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
1
|
module Weka
|
4
2
|
module Jars
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
require 'lock_jar'
|
3
|
+
def self.included(base)
|
4
|
+
base.class_eval do
|
5
|
+
require 'lock_jar'
|
9
6
|
|
10
|
-
|
11
|
-
|
12
|
-
|
7
|
+
lib_path = File.expand_path('../../', File.dirname(__FILE__))
|
8
|
+
lockfile = File.join(lib_path, 'Jarfile.lock')
|
9
|
+
jars_dir = File.join(lib_path, 'jars')
|
13
10
|
|
14
|
-
|
15
|
-
|
11
|
+
LockJar.install(lockfile, local_repo: jars_dir)
|
12
|
+
LockJar.load(lockfile, local_repo: jars_dir)
|
13
|
+
end
|
16
14
|
end
|
17
|
-
|
18
15
|
end
|
19
16
|
end
|
data/lib/weka/version.rb
CHANGED
data/weka.gemspec
CHANGED
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ['Paul Götze']
|
10
10
|
spec.email = ['paul.christoph.goetze@gmail.com']
|
11
11
|
|
12
|
-
spec.summary =
|
13
|
-
spec.description =
|
12
|
+
spec.summary = 'Machine Learning & Data Mining with JRuby.'
|
13
|
+
spec.description = 'A JRuby wrapper for the Weka library (http://www.cs.waikato.ac.nz/ml/weka/)'
|
14
14
|
spec.homepage = 'https://github.com/paulgoetze/weka-jruby'
|
15
15
|
spec.license = 'MIT'
|
16
16
|
|
@@ -23,7 +23,6 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.require_paths = ['lib']
|
24
24
|
|
25
25
|
spec.add_runtime_dependency 'lock_jar', '~> 0.13'
|
26
|
-
spec.add_runtime_dependency 'activesupport', '~> 4.0'
|
27
26
|
|
28
27
|
spec.add_development_dependency 'bundler', '~> 1.6'
|
29
28
|
spec.add_development_dependency 'rake', '~> 10.0'
|
metadata
CHANGED
@@ -1,99 +1,85 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: weka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Paul Götze
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: lock_jar
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.13'
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - "~>"
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: '0.13'
|
19
|
+
name: lock_jar
|
25
20
|
prerelease: false
|
26
21
|
type: :runtime
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: activesupport
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
24
|
- - "~>"
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
26
|
+
version: '0.13'
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - "~>"
|
37
31
|
- !ruby/object:Gem::Version
|
38
|
-
version: '
|
39
|
-
prerelease: false
|
40
|
-
type: :runtime
|
41
|
-
- !ruby/object:Gem::Dependency
|
32
|
+
version: '1.6'
|
42
33
|
name: bundler
|
34
|
+
prerelease: false
|
35
|
+
type: :development
|
43
36
|
version_requirements: !ruby/object:Gem::Requirement
|
44
37
|
requirements:
|
45
38
|
- - "~>"
|
46
39
|
- !ruby/object:Gem::Version
|
47
40
|
version: '1.6'
|
41
|
+
- !ruby/object:Gem::Dependency
|
48
42
|
requirement: !ruby/object:Gem::Requirement
|
49
43
|
requirements:
|
50
44
|
- - "~>"
|
51
45
|
- !ruby/object:Gem::Version
|
52
|
-
version: '
|
46
|
+
version: '10.0'
|
47
|
+
name: rake
|
53
48
|
prerelease: false
|
54
49
|
type: :development
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rake
|
57
50
|
version_requirements: !ruby/object:Gem::Requirement
|
58
51
|
requirements:
|
59
52
|
- - "~>"
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '10.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
62
56
|
requirement: !ruby/object:Gem::Requirement
|
63
57
|
requirements:
|
64
58
|
- - "~>"
|
65
59
|
- !ruby/object:Gem::Version
|
66
|
-
version: '
|
60
|
+
version: '3.0'
|
61
|
+
name: rspec
|
67
62
|
prerelease: false
|
68
63
|
type: :development
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: rspec
|
71
64
|
version_requirements: !ruby/object:Gem::Requirement
|
72
65
|
requirements:
|
73
66
|
- - "~>"
|
74
67
|
- !ruby/object:Gem::Version
|
75
68
|
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
76
70
|
requirement: !ruby/object:Gem::Requirement
|
77
71
|
requirements:
|
78
72
|
- - "~>"
|
79
73
|
- !ruby/object:Gem::Version
|
80
74
|
version: '3.0'
|
75
|
+
name: shoulda-matchers
|
81
76
|
prerelease: false
|
82
77
|
type: :development
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: shoulda-matchers
|
85
78
|
version_requirements: !ruby/object:Gem::Requirement
|
86
79
|
requirements:
|
87
80
|
- - "~>"
|
88
81
|
- !ruby/object:Gem::Version
|
89
82
|
version: '3.0'
|
90
|
-
requirement: !ruby/object:Gem::Requirement
|
91
|
-
requirements:
|
92
|
-
- - "~>"
|
93
|
-
- !ruby/object:Gem::Version
|
94
|
-
version: '3.0'
|
95
|
-
prerelease: false
|
96
|
-
type: :development
|
97
83
|
description: A JRuby wrapper for the Weka library (http://www.cs.waikato.ac.nz/ml/weka/)
|
98
84
|
email:
|
99
85
|
- paul.christoph.goetze@gmail.com
|
@@ -178,7 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
164
|
version: '0'
|
179
165
|
requirements: []
|
180
166
|
rubyforge_project:
|
181
|
-
rubygems_version: 2.
|
167
|
+
rubygems_version: 2.6.6
|
182
168
|
signing_key:
|
183
169
|
specification_version: 4
|
184
170
|
summary: Machine Learning & Data Mining with JRuby.
|