weka 0.1.0-java → 0.2.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +47 -0
- data/lib/weka/class_builder.rb +10 -2
- data/lib/weka/concerns.rb +1 -0
- data/lib/weka/concerns/serializable.rb +17 -0
- data/lib/weka/core.rb +1 -0
- data/lib/weka/core/instances.rb +8 -0
- data/lib/weka/core/serialization_helper.rb +13 -0
- data/lib/weka/version.rb +1 -1
- data/weka.gemspec +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e635e174779d58d634091b4a7d0f523cd56e8548
|
4
|
+
data.tar.gz: 5695cfe2a3b173e8d5b96c4b9234a10da7d9205e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e081b08900c2d77b9070ed4d963b4a5dcc9e2a52acc1bbc3f32cefd168abc1630f6a9f03331896be298b394bb222c5d258a9d8c11fe6e5d34251ca4697f5fbe
|
7
|
+
data.tar.gz: 56b23d5ba46094ebd5f4f74eb90cbaca74887c06726aea639616520dc13f6c910b2dc4edb2601f914d87c1a35b374a06fd6591a9d5b1791e793380261844bc27
|
data/README.md
CHANGED
@@ -23,6 +23,13 @@ Or install it yourself as:
|
|
23
23
|
|
24
24
|
## Usage
|
25
25
|
|
26
|
+
* [Instances](#instances)
|
27
|
+
* [Filters](#filters)
|
28
|
+
* [Attribute selection](#attribute-selection)
|
29
|
+
* [Classifiers](#classifiers)
|
30
|
+
* [Clusterers](#clusterers)
|
31
|
+
* [Serializing objects](#serializing-objects)
|
32
|
+
|
26
33
|
Start using Weka's Machine Learning and Data Mining algorithms by requiring the gem:
|
27
34
|
|
28
35
|
```ruby
|
@@ -233,6 +240,9 @@ discretize = Discretize.new
|
|
233
240
|
|
234
241
|
# apply a filter chain on instances
|
235
242
|
filtered_data = instances.apply_filter(normalize).apply_filter(discretize)
|
243
|
+
|
244
|
+
# or even shorter
|
245
|
+
filtered_data = instances.apply_filters(normalize, discretize)
|
236
246
|
```
|
237
247
|
|
238
248
|
#### Setting Filter options
|
@@ -655,6 +665,43 @@ sunny,80,90,TRUE,cluster1
|
|
655
665
|
...
|
656
666
|
```
|
657
667
|
|
668
|
+
### Serializing Objects
|
669
|
+
|
670
|
+
You can serialize objects with the `Weka::Core::SerializationHelper` class:
|
671
|
+
|
672
|
+
```ruby
|
673
|
+
# writing an Object to a file:
|
674
|
+
Weka::Core::SerializationHelper.write('path/to/file.model', classifier)
|
675
|
+
|
676
|
+
# load an Object from a serialized file:
|
677
|
+
object = Weka::Core::SerializationHelper.read('path/to/file.model')
|
678
|
+
```
|
679
|
+
|
680
|
+
Instead of `.write` and `.read` you can also call the aliases `.serialize` and `.deserialize`.
|
681
|
+
|
682
|
+
Serialization can be helpful if the training of e.g. a classifier model takes
|
683
|
+
some minutes. Instead of running the whole training on instantiating a classifier you
|
684
|
+
can speed up this process tremendously by serializing a classifier once it was trained and later load it from the file again.
|
685
|
+
|
686
|
+
Classifiers, Clusterers, Instances and Filters also have a `#serialize` method
|
687
|
+
which you can use to directly serialize an Instance of these, e.g. for a Classifier:
|
688
|
+
|
689
|
+
```ruby
|
690
|
+
instances = Weka::Core::Instances.from_arff('weather.arff')
|
691
|
+
instances.class_attribute = :play
|
692
|
+
|
693
|
+
classifier = Weka::Core::Trees::RandomForest.build do
|
694
|
+
train_with_instances instances
|
695
|
+
end
|
696
|
+
|
697
|
+
# store trained model as binary file
|
698
|
+
classifier.serialize('randomforest.model')
|
699
|
+
|
700
|
+
# load Classifier from binary file
|
701
|
+
loaded_classifier = Weka::Core::SerializationHelper.deserialize('randomforest.model')
|
702
|
+
# => #<Java::WekaClassifiersTrees::RandomForest:0x197db331>
|
703
|
+
```
|
704
|
+
|
658
705
|
## Development
|
659
706
|
|
660
707
|
After checking out the repo, run `bin/setup` to install dependencies.
|
data/lib/weka/class_builder.rb
CHANGED
@@ -11,7 +11,7 @@ module Weka
|
|
11
11
|
|
12
12
|
def build_class(class_name, weka_module: nil, include_concerns: true)
|
13
13
|
java_import java_class_path(class_name, weka_module)
|
14
|
-
define_class(class_name, include_concerns: include_concerns)
|
14
|
+
define_class(class_name, weka_module, include_concerns: include_concerns)
|
15
15
|
end
|
16
16
|
|
17
17
|
def build_classes(*class_names, weka_module: nil, include_concerns: true)
|
@@ -52,15 +52,23 @@ module Weka
|
|
52
52
|
self.name.scan('::').count == 1
|
53
53
|
end
|
54
54
|
|
55
|
-
def define_class(class_name, include_concerns: true)
|
55
|
+
def define_class(class_name, weka_module, include_concerns: true)
|
56
56
|
module_eval <<-CLASS_DEFINITION, __FILE__, __LINE__ + 1
|
57
57
|
class #{class_name}
|
58
58
|
#{'include Concerns' if include_concerns}
|
59
|
+
#{include_serializable_for(class_name, weka_module)}
|
59
60
|
#{include_utils}
|
60
61
|
end
|
61
62
|
CLASS_DEFINITION
|
62
63
|
end
|
63
64
|
|
65
|
+
def include_serializable_for(class_name, weka_module)
|
66
|
+
class_path = java_class_path(class_name, weka_module)
|
67
|
+
serializable = Weka::Core::SerializationHelper.serializable?(class_path)
|
68
|
+
|
69
|
+
"include Weka::Concerns::Serializable" if serializable
|
70
|
+
end
|
71
|
+
|
64
72
|
def include_utils
|
65
73
|
return unless utils_defined?
|
66
74
|
"include #{utils}"
|
data/lib/weka/concerns.rb
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
require 'weka/core/serialization_helper'
|
3
|
+
|
4
|
+
module Weka
|
5
|
+
module Concerns
|
6
|
+
module Serializable
|
7
|
+
extend ActiveSupport::Concern
|
8
|
+
|
9
|
+
included do
|
10
|
+
def serialize(filename)
|
11
|
+
Weka::Core::SerializationHelper.write(filename, self)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/weka/core.rb
CHANGED
data/lib/weka/core/instances.rb
CHANGED
@@ -2,6 +2,7 @@ require 'weka/core/converters'
|
|
2
2
|
require 'weka/core/loader'
|
3
3
|
require 'weka/core/saver'
|
4
4
|
require 'weka/core/dense_instance'
|
5
|
+
require 'weka/concerns/serializable'
|
5
6
|
|
6
7
|
module Weka
|
7
8
|
module Core
|
@@ -9,6 +10,7 @@ module Weka
|
|
9
10
|
java_import "weka.core.FastVector"
|
10
11
|
|
11
12
|
class Instances
|
13
|
+
include Weka::Concerns::Serializable
|
12
14
|
|
13
15
|
DEFAULT_RELATION_NAME = 'Instances'
|
14
16
|
|
@@ -163,6 +165,12 @@ module Weka
|
|
163
165
|
filter.filter(self)
|
164
166
|
end
|
165
167
|
|
168
|
+
def apply_filters(*filters)
|
169
|
+
filters.inject(self) do |filtered_instances, filter|
|
170
|
+
filter.filter(filtered_instances)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
166
174
|
private
|
167
175
|
|
168
176
|
def add_attribute(attribute)
|
data/lib/weka/version.rb
CHANGED
data/weka.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ['paul.christoph.goetze@gmail.com']
|
11
11
|
|
12
12
|
spec.summary = %q{Machine Learning & Data Mining with JRuby.}
|
13
|
-
spec.description = %q{A wrapper for the Weka library (http://www.cs.waikato.ac.nz/ml/weka/)}
|
13
|
+
spec.description = %q{A JRuby wrapper for the Weka library (http://www.cs.waikato.ac.nz/ml/weka/)}
|
14
14
|
spec.homepage = 'https://github.com/paulgoetze/weka-jruby'
|
15
15
|
spec.license = 'MIT'
|
16
16
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: weka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Paul Götze
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: lock_jar
|
@@ -94,7 +94,7 @@ dependencies:
|
|
94
94
|
version: '3.0'
|
95
95
|
prerelease: false
|
96
96
|
type: :development
|
97
|
-
description: A wrapper for the Weka library (http://www.cs.waikato.ac.nz/ml/weka/)
|
97
|
+
description: A JRuby wrapper for the Weka library (http://www.cs.waikato.ac.nz/ml/weka/)
|
98
98
|
email:
|
99
99
|
- paul.christoph.goetze@gmail.com
|
100
100
|
executables:
|
@@ -138,6 +138,7 @@ files:
|
|
138
138
|
- lib/weka/concerns/describable.rb
|
139
139
|
- lib/weka/concerns/optionizable.rb
|
140
140
|
- lib/weka/concerns/persistent.rb
|
141
|
+
- lib/weka/concerns/serializable.rb
|
141
142
|
- lib/weka/core.rb
|
142
143
|
- lib/weka/core/attribute.rb
|
143
144
|
- lib/weka/core/converters.rb
|
@@ -145,6 +146,7 @@ files:
|
|
145
146
|
- lib/weka/core/instances.rb
|
146
147
|
- lib/weka/core/loader.rb
|
147
148
|
- lib/weka/core/saver.rb
|
149
|
+
- lib/weka/core/serialization_helper.rb
|
148
150
|
- lib/weka/exceptions.rb
|
149
151
|
- lib/weka/filters.rb
|
150
152
|
- lib/weka/filters/filter.rb
|