weka 0.5.0-java → 0.7.3-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 07d0cbed2d245de34e10101d0597017194804735e3b5ea6bc972201282c60d5d
4
- data.tar.gz: b304c03a4552b766f56e60e52b765068fdb0de0350d1e5c541882f085e90824b
3
+ metadata.gz: c2dee943ecd6d4df29a0339de43d88b1d72171dcf84db300270fbc31cf4cedcf
4
+ data.tar.gz: d6bec02306fbccec16ac31089dfb65d544673c4c6ebec870bb8cfa19cdc9c486
5
5
  SHA512:
6
- metadata.gz: 942621fa83a7670384adccb717610ac19dc5d961fd9de5ab579a331c40119ef8e0e73cb0b0962aaff9fe937628335ee453be2ef1ed0669edf5421dadc075acdc
7
- data.tar.gz: ee68c277c9a40f6349fdea303897d44fd93ded9311d11ffba8c1cd3909764ea51d22638cc7240bb375570615a9acaa4d98ba591b8de3102c41c3dcabf550283b
6
+ metadata.gz: 5b2f624be137dd9fe055d495f2fd356880d14da95ca8923f823f20f7a8cce4d54c09c6b173a4e7a17173f488a2c99b1cad73c8c86720a3c3eca80487a9413208
7
+ data.tar.gz: 7d33b187f59f8de850cf23909728a6154c30490cf329cea8b50c26407906f90bd07a232b295217a63567529a17ef1b980fcf6da1125f79b8d0d29aa12c793d87
data/.gitignore CHANGED
@@ -1,10 +1,11 @@
1
- /.bundle/
2
- /.yardoc
3
- /Gemfile.lock
4
- /_yardoc/
5
- /coverage/
6
- /doc/
7
- /pkg/
8
- /spec/reports/
9
- /tmp/
1
+ .bundle
2
+ .yardoc
3
+ Gemfile.lock
4
+ _yardoc
5
+ coverage
6
+ doc
7
+ pkg
8
+ spec/reports
9
+ tmp
10
10
  jars
11
+ lib/*_jars.rb
@@ -5,6 +5,7 @@ AllCops:
5
5
  - '*.gemspec'
6
6
  - 'Gemfile'
7
7
  - 'Gemfile.lock'
8
+ - 'lib/weka_jars.rb'
8
9
 
9
10
  Style/Copyright:
10
11
  Enabled: false
@@ -13,7 +14,10 @@ Style/Documentation:
13
14
  Enabled: false
14
15
 
15
16
  Metrics/LineLength:
16
- Max: 80
17
+ Max: 105
18
+
19
+ Metrics/ClassLength:
20
+ Max: 250
17
21
 
18
22
  Layout/MultilineMethodCallIndentation:
19
23
  EnforcedStyle: indented
@@ -28,3 +32,28 @@ Metrics/ModuleLength:
28
32
  Metrics/BlockLength:
29
33
  Exclude:
30
34
  - "**/*_spec.rb"
35
+
36
+ Lint/ConstantDefinitionInBlock:
37
+ Exclude:
38
+ - "**/*_spec.rb"
39
+
40
+ Layout/EmptyLineBetweenDefs:
41
+ Enabled: false
42
+
43
+ Layout/HashAlignment:
44
+ Enabled: false
45
+
46
+ Lint/BooleanSymbol:
47
+ Enabled: false
48
+
49
+ Style/HashEachMethods:
50
+ Exclude:
51
+ - lib/weka/core/instances.rb
52
+ - spec/classifiers/evaluation_spec.rb
53
+
54
+ Naming/PredicateName:
55
+ Exclude:
56
+ - lib/weka/core/instances.rb
57
+
58
+ Metrics/CyclomaticComplexity:
59
+ Max: 8
@@ -0,0 +1,104 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
6
+ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.7.3] – 2021-01-21
11
+ ### Changed
12
+ - Update Weka Jar dependency to weka-dev v3.9.5
13
+ Please also refer to the weka changelog for included updates & fixes:
14
+ https://www.cs.waikato.ac.nz/~ml/weka/CHANGELOG-3-9-5
15
+ - Update dev dependencies
16
+
17
+ ## [0.7.2] – 2020-03-20
18
+ ### Changed
19
+ - Update Weka Jar dependency to weka-dev v3.9.4
20
+ Please also refer to the weka changelog for included updates & fixes:
21
+ https://www.cs.waikato.ac.nz/~ml/weka/CHANGELOG-3-9-4
22
+ - Drop shoulda-matchers as dev-dependency
23
+
24
+ ## [0.7.1] – 2018-11-09
25
+ ### Changed
26
+ - Update Weka Jar dependency to weka-dev v3.9.3
27
+ - Update rake to \~>12.0, jar-dependencies to \~>0.4
28
+
29
+ ### Fixed
30
+ - Float::NAN check in Weka::Core::Attributes#internal_value_of
31
+
32
+ ## [0.7.0] – 2018-01-01
33
+ ### Added
34
+ - Add new unsupervised attribute filters added in weka-dev v3.9.2
35
+ - Make classes from weka.filters module available
36
+ - Make WeightedInstancesHandlerWrapper available in Weka::Classifiers::Meta
37
+ - Make FilteredClusterer & MakeDensityBasedClusterer available in Weka::Clusterers
38
+
39
+ ### Changed
40
+ - Update Weka Jar dependency to weka-dev v3.9.2
41
+
42
+ ## [0.6.0] – 2017-12-17
43
+ ### Added
44
+ - #copy method for Weka::Core::Instances
45
+
46
+ ### Changed
47
+ - Load Jars with jar-dependencies instead of lock_jar gem
48
+ - Make Weka::Core::Instances#instance_from public
49
+
50
+ ### Fixed
51
+ - Weka::UnassignedTrainingInstancesError when running #classify/#cluster and
52
+ #distribution_for on deserialized classfiers and clusterers
53
+
54
+
55
+ ## [0.5.0] – 2017-06-17
56
+ ### Added
57
+ - #to_m (to Matrix) method for Weka::Core::Instances
58
+ - Curve classes in Weka::Classifiers::Evaluation module
59
+ - Allow including additional modules on class building
60
+ - Rubocop config for project
61
+
62
+ ### Changed
63
+ - Allow passing a hash to Weka::Core::Instances#add_instance
64
+
65
+ ### Removed
66
+ - Block argument in Weka::Core::Instances#initialize
67
+
68
+
69
+ ## [0.4.0] – 2016-12-22
70
+ ### Added
71
+ - C45 converters
72
+ - Full support for string attributes
73
+
74
+ ### Removed
75
+ - ActiveSupport as dependency
76
+
77
+
78
+ ## [0.3.0] – 2016-02-10
79
+ ### Added
80
+ - Allow adding Instances with missing values
81
+ - Allow creating DenseInstances with missing values
82
+ - #merge method for Weka::Core:Instances
83
+
84
+
85
+ ## [0.2.0] – 2016-01-19
86
+ ### Added
87
+ - Serialization/deserialization functionality
88
+ - #apply_filters method for Weka::Core::Instances
89
+
90
+
91
+ ## [0.1.0] – 2015-12-26
92
+ Initial release
93
+
94
+ [Unreleased]: https://github.com/paulgoetze/weka-jruby/compare/v0.7.3...HEAD
95
+ [0.7.3]: https://github.com/paulgoetze/weka-jruby/compare/v0.7.2...v0.7.3
96
+ [0.7.2]: https://github.com/paulgoetze/weka-jruby/compare/v0.7.1...v0.7.2
97
+ [0.7.1]: https://github.com/paulgoetze/weka-jruby/compare/v0.7.0...v0.7.1
98
+ [0.7.0]: https://github.com/paulgoetze/weka-jruby/compare/v0.6.0...v0.7.0
99
+ [0.6.0]: https://github.com/paulgoetze/weka-jruby/compare/v0.5.0...v0.6.0
100
+ [0.5.0]: https://github.com/paulgoetze/weka-jruby/compare/v0.4.0...v0.5.0
101
+ [0.4.0]: https://github.com/paulgoetze/weka-jruby/compare/v0.3.0...v0.4.0
102
+ [0.3.0]: https://github.com/paulgoetze/weka-jruby/compare/v0.2.0...v0.3.0
103
+ [0.2.0]: https://github.com/paulgoetze/weka-jruby/compare/v0.1.0...v0.2.0
104
+ [0.1.0]: https://github.com/paulgoetze/weka-jruby/compare/ce6a985017c28ea755290a9baba4d81acddc2d20...v0.1.0
@@ -1,13 +1,76 @@
1
- # Contributor Code of Conduct
1
+ # Contributor Covenant Code of Conduct
2
2
 
3
- As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.
3
+ ## Our Pledge
4
4
 
5
- We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
5
+ In the interest of fostering an open and welcoming environment, we as
6
+ contributors and maintainers pledge to make participation in our project and
7
+ our community a harassment-free experience for everyone, regardless of age, body
8
+ size, disability, ethnicity, sex characteristics, gender identity and expression,
9
+ level of experience, education, socio-economic status, nationality, personal
10
+ appearance, race, religion, or sexual identity and orientation.
6
11
 
7
- Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.
12
+ ## Our Standards
8
13
 
9
- Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.
14
+ Examples of behavior that contributes to creating a positive environment
15
+ include:
10
16
 
11
- Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.
17
+ * Using welcoming and inclusive language
18
+ * Being respectful of differing viewpoints and experiences
19
+ * Gracefully accepting constructive criticism
20
+ * Focusing on what is best for the community
21
+ * Showing empathy towards other community members
12
22
 
13
- This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.0.0, available at [http://contributor-covenant.org/version/1/0/0/](http://contributor-covenant.org/version/1/0/0/)
23
+ Examples of unacceptable behavior by participants include:
24
+
25
+ * The use of sexualized language or imagery and unwelcome sexual attention or
26
+ advances
27
+ * Trolling, insulting/derogatory comments, and personal or political attacks
28
+ * Public or private harassment
29
+ * Publishing others' private information, such as a physical or electronic
30
+ address, without explicit permission
31
+ * Other conduct which could reasonably be considered inappropriate in a
32
+ professional setting
33
+
34
+ ## Our Responsibilities
35
+
36
+ Project maintainers are responsible for clarifying the standards of acceptable
37
+ behavior and are expected to take appropriate and fair corrective action in
38
+ response to any instances of unacceptable behavior.
39
+
40
+ Project maintainers have the right and responsibility to remove, edit, or
41
+ reject comments, commits, code, wiki edits, issues, and other contributions
42
+ that are not aligned to this Code of Conduct, or to ban temporarily or
43
+ permanently any contributor for other behaviors that they deem inappropriate,
44
+ threatening, offensive, or harmful.
45
+
46
+ ## Scope
47
+
48
+ This Code of Conduct applies within all project spaces, and it also applies when
49
+ an individual is representing the project or its community in public spaces.
50
+ Examples of representing a project or community include using an official
51
+ project e-mail address, posting via an official social media account, or acting
52
+ as an appointed representative at an online or offline event. Representation of
53
+ a project may be further defined and clarified by project maintainers.
54
+
55
+ ## Enforcement
56
+
57
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
58
+ reported by contacting the project team at paul.christoph.goetze@gmail.com. All
59
+ complaints will be reviewed and investigated and will result in a response that
60
+ is deemed necessary and appropriate to the circumstances. The project team is
61
+ obligated to maintain confidentiality with regard to the reporter of an incident.
62
+ Further details of specific enforcement policies may be posted separately.
63
+
64
+ Project maintainers who do not follow or enforce the Code of Conduct in good
65
+ faith may face temporary or permanent repercussions as determined by other
66
+ members of the project's leadership.
67
+
68
+ ## Attribution
69
+
70
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71
+ available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72
+
73
+ [homepage]: https://www.contributor-covenant.org
74
+
75
+ For answers to common questions about this code of conduct, see
76
+ https://www.contributor-covenant.org/faq
@@ -1,4 +1,4 @@
1
- Copyright (c) 2015 Paul Götze
1
+ Copyright (c) 2015-2021 Paul Götze
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining a copy
4
4
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -1,8 +1,9 @@
1
1
  # Weka
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/weka.svg)](http://badge.fury.io/rb/weka)
4
- [![Travis Build](https://travis-ci.org/paulgoetze/weka-jruby.svg)](https://travis-ci.org/paulgoetze/weka-jruby)
4
+ [![Build Status](https://github.com/paulgoetze/weka-jruby/workflows/JRuby%20CI/badge.svg)](https://github.com/paulgoetze/weka-jruby/workflows/JRuby%20CI/badge.svg)
5
5
  [![Codacy Badge](https://api.codacy.com/project/badge/Grade/9634a6709ef545198e079a8daddff100)](https://www.codacy.com/app/paul-christoph-goetze/weka-jruby?utm_source=github.com&utm_medium=referral&utm_content=paulgoetze/weka-jruby&utm_campaign=Badge_Grade)
6
+ [![Open Source Helpers](https://www.codetriage.com/paulgoetze/weka-jruby/badges/users.svg)](https://www.codetriage.com/paulgoetze/weka-jruby)
6
7
 
7
8
  Machine Learning & Data Mining with JRuby based on the [Weka](http://www.cs.waikato.ac.nz/~ml/weka/index.html) Java library.
8
9
 
@@ -39,26 +40,30 @@ detailed information about how to use weka with JRuby and some examplary code sn
39
40
 
40
41
  ## Development
41
42
 
42
- After checking out the repo, run `bin/setup` to install dependencies.
43
- To install this gem onto your local machine, run `bundle exec rake install`.
43
+ 1. Check out the repo with `git clone git@github.com:paulgoetze/weka-jruby.git`.
44
+ 2. Set a local environment variable `export JARS_VENDOR=false`.
45
+ This will prevent
46
+ compiling the jars into your repo’s /lib directory and will load them from your local maven repository instead.
47
+ See the [jar-dependencies README](https://github.com/mkristian/jar-dependencies#for-development-you-do-not-need-to-vendor-the-jars-at-all) for more information.
48
+ 3. Run `bin/setup` or `bundle install` to install the dependencies.
44
49
 
45
50
  Then, run `rake spec` to run the tests. You can also run `bin/console` or `rake irb` for an interactive prompt that will allow you to experiment.
46
51
 
47
52
  ## Contributing
48
53
 
49
- Bug reports and pull requests are welcome on GitHub at https://github.com/paulgoetze/weka-jruby. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant code of conduct](http://contributor-covenant.org/version/1/2/0).
54
+ Bug reports and pull requests are welcome on GitHub at https://github.com/paulgoetze/weka-jruby. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant Code of Conduct](https://github.com/paulgoetze/weka-jruby/blob/main/CODE_OF_CONDUCT.md).
50
55
 
51
56
  For development we use the [git branching model](http://nvie.com/posts/a-successful-git-branching-model/) described by [nvie](https://github.com/nvie).
52
57
 
53
58
  Here’s how to contribute:
54
59
 
55
- 1. Fork it ( https://github.com/paulgoetze/weka-jruby/fork )
60
+ 1. Fork it (https://github.com/paulgoetze/weka-jruby/fork)
56
61
  2. Create your feature branch (`git checkout -b feature/my-new-feature develop`)
57
62
  3. Commit your changes (`git commit -am 'Add some feature'`)
58
63
  4. Push to the branch (`git push origin feature/my-new-feature`)
59
64
  5. Create a new Pull Request
60
65
 
61
- Please try to add RSpec tests along with your new features. This will ensure that your code does not break existing functionality and that your feature is working as expected.
66
+ Please try to add RSpec tests along with your new feature. This will ensure that your code does not break existing functionality and that your feature is working as expected.
62
67
 
63
68
  We use [Rubocop](https://github.com/bbatsov/rubocop) for code style recommendations.
64
69
  Please make sure your contributions comply with the project’s Rubocop config.
@@ -69,4 +74,4 @@ The original ideas for wrapping Weka in JRuby come from [@arrigonialberto86](htt
69
74
 
70
75
  ## License
71
76
 
72
- The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
77
+ The gem is available as open source under the terms of the [MIT License](https://github.com/paulgoetze/weka-jruby/blob/main/MIT-LICENSE.txt).
data/Rakefile CHANGED
@@ -3,17 +3,7 @@ require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task default: :prepare
7
- task install: :prepare
8
-
9
- desc 'Install weka jars & dependencies'
10
- task :prepare do
11
- require 'lock_jar'
12
- lib_path = File.expand_path('.', File.dirname(__FILE__))
13
- jars_dir = File.join(lib_path, 'jars')
14
-
15
- LockJar.install('Jarfile.lock', local_repo: jars_dir)
16
- end
6
+ task default: :spec
17
7
 
18
8
  desc 'Start an irb session with the gem loaded'
19
9
  task :irb do
@@ -1,11 +1,9 @@
1
1
  require 'java'
2
- require 'weka/jars'
3
2
  require 'weka/version'
4
3
  require 'weka/exceptions'
4
+ require Dir[File.join(File.dirname(__FILE__), '*_jars.rb')].first
5
5
 
6
6
  module Weka
7
- include Jars
8
-
9
7
  class << self
10
8
  def require_all(type)
11
9
  files = Dir[File.expand_path("../weka/#{type}/**/*.rb", __FILE__)]
@@ -88,6 +88,7 @@ module Weka
88
88
 
89
89
  def include_utils
90
90
  return unless utils_defined?
91
+
91
92
  "include #{utils}"
92
93
  end
93
94
 
@@ -103,7 +104,7 @@ module Weka
103
104
  end
104
105
 
105
106
  def constantize(module_names)
106
- Object.module_eval("::#{module_names}")
107
+ Object.module_eval("::#{module_names}", __FILE__, __LINE__)
107
108
  end
108
109
 
109
110
  def utils
@@ -116,6 +117,7 @@ module Weka
116
117
 
117
118
  def downcase_first_char(string)
118
119
  return if string.nil? || string.empty?
120
+
119
121
  string[0].downcase + string[1..-1]
120
122
  end
121
123
  end
@@ -1,4 +1,5 @@
1
1
  require 'weka/class_builder'
2
+ require 'weka/concerns'
2
3
 
3
4
  module Weka
4
5
  module Classifiers
@@ -6,6 +7,7 @@ module Weka
6
7
 
7
8
  class Evaluation
8
9
  include ClassBuilder
10
+ include Weka::Concerns::Persistent
9
11
 
10
12
  # Use both nomenclatures f_measure and fmeasure for consistency
11
13
  # due to jruby's auto method generation of 'fMeasure' to 'f_measure' and
@@ -23,7 +23,8 @@ module Weka
23
23
  :RandomSubSpace,
24
24
  :RegressionByDiscretization,
25
25
  :Stacking,
26
- :Vote
26
+ :Vote,
27
+ :WeightedInstancesHandlerWrapper
27
28
  end
28
29
  end
29
30
  end
@@ -19,7 +19,7 @@ module Weka
19
19
 
20
20
  error = 'Class attribute is not assigned for Instances.'
21
21
  hint = 'You can assign a class attribute with #class_attribute=.'
22
- message = "#{error} #{hint}"
22
+ message = "#{error}\n#{hint}"
23
23
 
24
24
  raise UnassignedClassError, message
25
25
  end
@@ -29,27 +29,49 @@ module Weka
29
29
 
30
30
  error = 'Classifier is not trained with Instances.'
31
31
  hint = 'You can set the training instances with #train_with_instances.'
32
- message = "#{error} #{hint}"
32
+ message = "#{error}\n#{hint}"
33
33
 
34
34
  raise UnassignedTrainingInstancesError, message
35
35
  end
36
+
37
+ def ensure_valid_instances_structure!(instances)
38
+ unless instances.is_a?(Weka::Core::Instances)
39
+ message = 'Instances has to be a Weka::Core::Instances object.'
40
+ raise ArgumentError, message
41
+ end
42
+
43
+ return if training_instances.nil?
44
+ return if instances.equal_headers(training_instances)
45
+
46
+ message = 'The passed instances need to have the same structure as ' \
47
+ 'the classifier’s training instances.'
48
+
49
+ raise InvalidInstancesStructureError, message
50
+ end
51
+
52
+ def ensure_instances_structure_available!
53
+ return unless instances_structure.nil?
54
+
55
+ error = 'Classifier does not have any instances structure info.'
56
+ hint = 'You probably tried to classify values with a ' \
57
+ 'classifier that is untrained or doesn’t have an ' \
58
+ 'instances_structure set. Please run ' \
59
+ '#train_with_instances, try serializing and ' \
60
+ 'deserializing your classifier again in case you used a ' \
61
+ 'deserialized classifier or set its instances_structure.'
62
+ message = "#{error}\n#{hint}"
63
+
64
+ raise MissingInstancesStructureError, message
65
+ end
36
66
  end
37
67
 
38
68
  module Transformers
39
69
  private
40
70
 
41
71
  def classifiable_instance_from(instance_or_values)
42
- attributes = training_instances.attributes
43
- instances = Weka::Core::Instances.new(attributes: attributes)
72
+ ensure_instances_structure_available!
44
73
 
45
- class_attribute = training_instances.class_attribute
46
- class_index = training_instances.class_index
47
- instances.insert_attribute_at(class_attribute, class_index)
48
-
49
- instances.class_index = training_instances.class_index
50
- instances.add_instance(instance_or_values)
51
-
52
- instance = instances.first
74
+ instance = instances_structure.instance_from(instance_or_values)
53
75
  instance.set_class_missing
54
76
  instance
55
77
  end
@@ -59,17 +81,24 @@ module Weka
59
81
  java_import 'java.util.Random'
60
82
  include Checks
61
83
 
62
- attr_reader :training_instances
84
+ attr_reader :training_instances, :instances_structure
63
85
 
64
86
  def train_with_instances(instances)
65
87
  ensure_class_attribute_assigned!(instances)
66
88
 
67
89
  @training_instances = instances
90
+ @instances_structure = instances.string_free_structure
91
+
68
92
  build_classifier(instances)
69
93
 
70
94
  self
71
95
  end
72
96
 
97
+ def instances_structure=(instances)
98
+ ensure_valid_instances_structure!(instances)
99
+ @instances_structure = instances.string_free_structure
100
+ end
101
+
73
102
  def cross_validate(folds: 3)
74
103
  ensure_trained_with_instances!
75
104
 
@@ -101,8 +130,6 @@ module Weka
101
130
  include Transformers
102
131
 
103
132
  def classify(instance_or_values)
104
- ensure_trained_with_instances!
105
-
106
133
  instance = classifiable_instance_from(instance_or_values)
107
134
  index = classify_instance(instance)
108
135
 
@@ -112,7 +139,7 @@ module Weka
112
139
  private
113
140
 
114
141
  def class_value_of_index(index)
115
- training_instances.class_attribute.value(index)
142
+ instances_structure.class_attribute.value(index)
116
143
  end
117
144
  end
118
145
 
@@ -136,8 +163,6 @@ module Weka
136
163
  include Transformers
137
164
 
138
165
  def distribution_for(instance_or_values)
139
- ensure_trained_with_instances!
140
-
141
166
  instance = classifiable_instance_from(instance_or_values)
142
167
  distributions = distribution_for_instance(instance)
143
168
 
@@ -147,7 +172,7 @@ module Weka
147
172
  private
148
173
 
149
174
  def class_distributions_from(distributions)
150
- class_values = training_instances.class_attribute.values
175
+ class_values = instances_structure.class_attribute.values
151
176
 
152
177
  distributions.each_with_object({}).with_index do |(distribution, result), index|
153
178
  class_value = class_values[index]
@@ -10,7 +10,9 @@ module Weka
10
10
  :Cobweb,
11
11
  :EM,
12
12
  :FarthestFirst,
13
+ :FilteredClusterer,
13
14
  :HierarchicalClusterer,
15
+ :MakeDensityBasedClusterer,
14
16
  :SimpleKMeans
15
17
  end
16
18
  end
@@ -1,12 +1,14 @@
1
+ require 'weka/concerns'
2
+
1
3
  module Weka
2
4
  module Clusterers
3
5
  java_import 'weka.clusterers.ClusterEvaluation'
4
6
 
5
7
  class ClusterEvaluation
8
+ include Concerns::Persistent
9
+
6
10
  alias summary cluster_results_to_string
7
11
  alias clusters_count num_clusters
8
12
  end
9
-
10
- Java::WekaClusterers::ClusterEvaluation.__persistent__ = true
11
13
  end
12
14
  end
@@ -27,36 +27,61 @@ module Weka
27
27
 
28
28
  error = 'Clusterer is not trained with Instances.'
29
29
  hint = 'You can set the training instances with #train_with_instances.'
30
- message = "#{error} #{hint}"
30
+ message = "#{error}\n#{hint}"
31
31
 
32
32
  raise UnassignedTrainingInstancesError, message
33
33
  end
34
- end
35
34
 
36
- module Transformers
37
- private
35
+ def ensure_valid_instances_structure!(instances)
36
+ unless instances.is_a?(Weka::Core::Instances)
37
+ message = 'Instances has to be a Weka::Core::Instances object.'
38
+ raise ArgumentError, message
39
+ end
40
+
41
+ return if training_instances.nil?
42
+ return if instances.equal_headers(training_instances)
38
43
 
39
- def clusterable_instance_from(instance_or_values)
40
- attributes = training_instances.attributes
41
- instances = Weka::Core::Instances.new(attributes: attributes)
44
+ message = 'The passed instances need to have the same structure as ' \
45
+ 'the clusterers training instances.'
42
46
 
43
- instances.add_instance(instance_or_values)
44
- instances.first
47
+ raise InvalidInstancesStructureError, message
48
+ end
49
+
50
+ def ensure_instances_structure_available!
51
+ return unless instances_structure.nil?
52
+
53
+ error = 'Clusterer does not have any instances structure info.'
54
+ hint = 'You probably tried to cluster values with a clusterer ' \
55
+ 'that is untrained or doesn’t have an ' \
56
+ 'instances_structure set. Please run ' \
57
+ '#train_with_instances, try serializing and ' \
58
+ 'deserializing your clusterer again in case you used a ' \
59
+ 'deserialized clusterer or set its instances_structure.'
60
+ message = "#{error}\n#{hint}"
61
+
62
+ raise MissingInstancesStructureError, message
45
63
  end
46
64
  end
47
65
 
48
66
  module Buildable
49
67
  include Checks
50
68
 
51
- attr_reader :training_instances
69
+ attr_reader :training_instances, :instances_structure
52
70
 
53
71
  def train_with_instances(instances)
54
72
  @training_instances = instances
73
+ @instances_structure = instances.string_free_structure
74
+
55
75
  build_clusterer(instances)
56
76
 
57
77
  self
58
78
  end
59
79
 
80
+ def instances_structure=(instances)
81
+ ensure_valid_instances_structure!(instances)
82
+ @instances_structure = instances.string_free_structure
83
+ end
84
+
60
85
  def evaluate(test_instances)
61
86
  ensure_trained_with_instances!
62
87
 
@@ -85,12 +110,11 @@ module Weka
85
110
 
86
111
  module Clusterable
87
112
  include Checks
88
- include Transformers
89
113
 
90
114
  def cluster(instance_or_values)
91
- ensure_trained_with_instances!
115
+ ensure_instances_structure_available!
92
116
 
93
- instance = clusterable_instance_from(instance_or_values)
117
+ instance = instances_structure.instance_from(instance_or_values)
94
118
  cluster_instance(instance)
95
119
  end
96
120
  end
@@ -112,12 +136,11 @@ module Weka
112
136
 
113
137
  module Distributable
114
138
  include Checks
115
- include Transformers
116
139
 
117
140
  def distribution_for(instance_or_values)
118
- ensure_trained_with_instances!
141
+ ensure_instances_structure_available!
119
142
 
120
- instance = clusterable_instance_from(instance_or_values)
143
+ instance = instances_structure.instance_from(instance_or_values)
121
144
  distribution_for_instance(instance).to_a
122
145
  end
123
146
  end
@@ -71,7 +71,7 @@ module Weka
71
71
  # The order of the if statements is important here, because a date is also
72
72
  # a numeric.
73
73
  def internal_value_of(value)
74
- return value if value === Float::NAN
74
+ return value if value.respond_to?(:nan?) && value.nan?
75
75
  return Float::NAN if [nil, '?'].include?(value)
76
76
  return parse_date(value.to_s) if date?
77
77
  return value.to_f if numeric?
@@ -18,17 +18,19 @@ module Weka
18
18
  enumerate_attributes.to_a
19
19
  end
20
20
 
21
- def each_attribute
21
+ def each_attribute(&block)
22
22
  if block_given?
23
- enumerate_attributes.each { |attribute| yield(attribute) }
23
+ enumerate_attributes.each(&block)
24
24
  else
25
25
  enumerate_attributes
26
26
  end
27
27
  end
28
28
 
29
- def each_attribute_with_index
30
- enumerate_attributes.each_with_index do |attribute, index|
31
- yield(attribute, index) if block_given?
29
+ def each_attribute_with_index(&block)
30
+ if block_given?
31
+ enumerate_attributes.each_with_index(&block)
32
+ else
33
+ enumerate_attributes
32
34
  end
33
35
  end
34
36
 
@@ -47,6 +47,11 @@ module Weka
47
47
  super(relation_name.to_s, attribute_list, 0)
48
48
  end
49
49
 
50
+ def copy
51
+ constructor = Instances.java_class.declared_constructor(Instances)
52
+ constructor.new_instance(self).to_java(Instances)
53
+ end
54
+
50
55
  def instances
51
56
  enumerate_instances.to_a
52
57
  end
@@ -54,9 +59,8 @@ module Weka
54
59
  def attributes(include_class_attribute: false)
55
60
  attrs = enumerate_attributes.to_a
56
61
 
57
- if include_class_attribute && class_attribute_defined?
58
- attrs.insert(class_index, class_attribute)
59
- end
62
+ class_available = include_class_attribute && class_attribute_defined?
63
+ attrs.insert(class_index, class_attribute) if class_available
60
64
 
61
65
  attrs
62
66
  end
@@ -94,31 +98,35 @@ module Weka
94
98
  check_for_attribute_type(type)
95
99
  end
96
100
 
97
- def each
101
+ def each(&block)
98
102
  if block_given?
99
- enumerate_instances.each { |instance| yield(instance) }
103
+ enumerate_instances.each(&block)
100
104
  else
101
105
  enumerate_instances
102
106
  end
103
107
  end
104
108
 
105
- def each_with_index
106
- enumerate_instances.each_with_index do |instance, index|
107
- yield(instance, index) if block_given?
109
+ def each_with_index(&block)
110
+ if block_given?
111
+ enumerate_instances.each_with_index(&block)
112
+ else
113
+ enumerate_instances
108
114
  end
109
115
  end
110
116
 
111
- def each_attribute
117
+ def each_attribute(&block)
112
118
  if block_given?
113
- enumerate_attributes.each { |attribute| yield(attribute) }
119
+ enumerate_attributes.each(&block)
114
120
  else
115
121
  enumerate_attributes
116
122
  end
117
123
  end
118
124
 
119
- def each_attribute_with_index
120
- enumerate_attributes.each_with_index do |attribute, index|
121
- yield(attribute, index) if block_given?
125
+ def each_attribute_with_index(&block)
126
+ if block_given?
127
+ enumerate_attributes.each_with_index(&block)
128
+ else
129
+ enumerate_attributes
122
130
  end
123
131
  end
124
132
 
@@ -267,6 +275,33 @@ module Weka
267
275
  Matrix[*instances.map(&:values)]
268
276
  end
269
277
 
278
+ # Wrap the attribute values for the instance to be added with
279
+ # an Instance object, if needed. The Instance object is
280
+ # assigned with the given weight.
281
+ #
282
+ # @param [Instance, Array, Hash] instance_or_values either the
283
+ # instance object to be added or the attribute values for it.
284
+ # For the latter case, it accepts an array or a hash.
285
+ #
286
+ # @param [Float] weight the weight for the Instance to be added
287
+ #
288
+ # @return [Instance] the object that contains the given
289
+ # attribute values.
290
+ def instance_from(instance_or_values, weight: 1.0)
291
+ dataset = string_free_structure
292
+
293
+ if instance_or_values.is_a?(Java::WekaCore::Instance)
294
+ instance = instance_or_values
295
+ instance.weight = weight
296
+ else
297
+ data = instance_data(instance_or_values)
298
+ instance = DenseInstance.new(data, weight: weight)
299
+ end
300
+
301
+ dataset.add(instance)
302
+ dataset.first
303
+ end
304
+
270
305
  private
271
306
 
272
307
  def add_attribute(attribute)
@@ -274,9 +309,7 @@ module Weka
274
309
  end
275
310
 
276
311
  def ensure_attribute_defined!(name)
277
- if attribute_names(include_class_attribute: true).include?(name.to_s)
278
- return
279
- end
312
+ return if attribute_names(include_class_attribute: true).include?(name.to_s)
280
313
 
281
314
  error = "\"#{name}\" is not defined."
282
315
  hint = 'Only defined attributes can be used as class attribute!'
@@ -291,42 +324,19 @@ module Weka
291
324
  end
292
325
  end
293
326
 
294
- # Wrap the attribute values for the instance to be added with
295
- # an Instance object, if needed. The Instance object is
296
- # assigned with the given weight.
297
- #
298
- # @param [Instance, Array, Hash] instance_or_values either the
299
- # instance object to be added or the attribute values for it.
300
- # For the latter case, it accepts an array or a hash.
301
- #
302
- # @param [Float] weight the weight for the Instance to be added
303
- #
304
- # @return [Instance] the object that contains the given
305
- # attribute values.
306
- def instance_from(instance_or_values, weight:)
307
- if instance_or_values.is_a?(Java::WekaCore::Instance)
308
- instance_or_values.weight = weight
309
- instance_or_values
310
- else
311
- if instance_or_values.is_a?(Hash)
312
- instance_or_values = attribute_values_from_hash(instance_or_values)
313
- end
314
-
315
- data = internal_values_of(instance_or_values)
316
-
317
- if has_string_attribute?
318
- data = check_string_attributes(data, instance_or_values)
319
- end
320
-
321
- DenseInstance.new(data, weight: weight)
322
- end
323
- end
324
-
325
327
  def map_attribute_type(type)
326
328
  return -1 unless Attribute::TYPES.include?(type.downcase.to_sym)
329
+
327
330
  Attribute.const_get(type.upcase)
328
331
  end
329
332
 
333
+ def instance_data(values)
334
+ values = attribute_values_from_hash(values) if values.is_a?(Hash)
335
+ data = internal_values_of(values)
336
+ data = check_string_attributes(data, values) if has_string_attribute?
337
+ data
338
+ end
339
+
330
340
  # Convert a hash whose keys are attribute names and values are attribute
331
341
  # values into an array containing attribute values in the order
332
342
  # of the Instances attributes.
@@ -3,9 +3,47 @@ module Weka
3
3
  java_import 'weka.core.SerializationHelper'
4
4
 
5
5
  class SerializationHelper
6
+ STRUCTURE_FILE_EXTENSION = 'structure'.freeze
7
+
6
8
  class << self
9
+ original_read = instance_method(:read)
10
+ original_write = instance_method(:write)
11
+
12
+ define_method(:read) do |filename|
13
+ object = original_read.bind(self).call(filename)
14
+
15
+ structure_filename = structure_file(filename)
16
+ structure_needed = object.respond_to?(:instances_structure)
17
+ structure_available = File.exist?(structure_filename)
18
+
19
+ if structure_needed && structure_available
20
+ structure = original_read.bind(self).call(structure_filename)
21
+ object.instances_structure = structure
22
+ end
23
+
24
+ object
25
+ end
26
+
27
+ define_method(:write) do |filename, object|
28
+ structure_needed = object.respond_to?(:instances_structure)
29
+
30
+ if structure_needed && object.instances_structure
31
+ structure_filename = structure_file(filename)
32
+ structure = object.instances_structure
33
+ original_write.bind(self).call(structure_filename, structure)
34
+ end
35
+
36
+ original_write.bind(self).call(filename, object)
37
+ end
38
+
7
39
  alias deserialize read
8
40
  alias serialize write
41
+
42
+ private
43
+
44
+ def structure_file(filename)
45
+ "#{filename}.#{STRUCTURE_FILE_EXTENSION}"
46
+ end
9
47
  end
10
48
  end
11
49
  end
@@ -3,4 +3,6 @@ module Weka
3
3
 
4
4
  class UnassignedClassError < Error; end
5
5
  class UnassignedTrainingInstancesError < Error; end
6
+ class MissingInstancesStructureError < Error; end
7
+ class InvalidInstancesStructureError < Error; end
6
8
  end
@@ -1 +1,17 @@
1
1
  Weka.require_all :filters
2
+
3
+ require 'weka/class_builder'
4
+
5
+ module Weka
6
+ module Filters
7
+ include ClassBuilder
8
+
9
+ build_classes :CheckSource,
10
+ :Filter,
11
+ :MultiFilter,
12
+ :RenameRelation,
13
+ :SimpleBatchFilter,
14
+ :SimpleFilter,
15
+ :SimpleStreamFilter
16
+ end
17
+ end
@@ -15,13 +15,16 @@ module Weka
15
15
  :AddUserFields,
16
16
  :AddUserFieldsBeanInfo,
17
17
  :AddValues,
18
+ :CartesianProduct,
18
19
  :Center,
19
20
  :ChangeDateFormat,
20
21
  :ClassAssigner,
21
22
  :ClusterMembership,
22
23
  :Copy,
23
24
  :Discretize,
25
+ :DateToNumeric,
24
26
  :FirstOrder,
27
+ :FixedDictionaryStringToWordVector,
25
28
  :InterquartileRange,
26
29
  :KernelFilter,
27
30
  :MakeIndicator,
@@ -34,9 +37,11 @@ module Weka
34
37
  :Normalize,
35
38
  :NumericCleaner,
36
39
  :NumericToBinary,
40
+ :NumericToDate,
37
41
  :NumericToNominal,
38
42
  :NumericTransform,
39
43
  :Obfuscate,
44
+ :OrdinalToNumeric,
40
45
  :PartitionedMultiFilter,
41
46
  :PKIDiscretize,
42
47
  :PotentialClassIgnorer,
@@ -1,3 +1,3 @@
1
1
  module Weka
2
- VERSION = '0.5.0'.freeze
2
+ VERSION = '0.7.3'.freeze
3
3
  end
@@ -18,15 +18,15 @@ Gem::Specification.new do |spec|
18
18
  spec.platform = 'java'
19
19
  spec.required_ruby_version = '~> 2.0'
20
20
 
21
- spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
+ spec.files = Dir['**/{.*,*}'].reject { |f| f.match(%r{^((spec|jars|pkg)/|.*\.lock|lib/.*_jars\.rb)}) || File.directory?(f) }
22
22
  spec.bindir = 'bin'
23
23
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
24
24
  spec.require_paths = ['lib']
25
25
 
26
- spec.add_runtime_dependency 'lock_jar', '~> 0.13'
26
+ spec.add_development_dependency 'bundler', '~> 2.0'
27
+ spec.add_development_dependency 'rake', '~> 13.0'
28
+ spec.add_development_dependency 'rspec', '~> 3.10'
27
29
 
28
- spec.add_development_dependency 'bundler', '~> 1.6'
29
- spec.add_development_dependency 'rake', '~> 10.0'
30
- spec.add_development_dependency 'rspec', '~> 3.0'
31
- spec.add_development_dependency 'shoulda-matchers', '~> 3.0'
30
+ spec.add_runtime_dependency 'jar-dependencies', '~> 0.4.1'
31
+ spec.requirements << 'jar nz.ac.waikato.cms.weka, weka-dev, 3.9.5'
32
32
  end
metadata CHANGED
@@ -1,85 +1,71 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: weka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.7.3
5
5
  platform: java
6
6
  authors:
7
7
  - Paul Götze
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-17 00:00:00.000000000 Z
11
+ date: 2021-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
16
  - - "~>"
17
17
  - !ruby/object:Gem::Version
18
- version: '0.13'
19
- name: lock_jar
20
- prerelease: false
21
- type: :runtime
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '0.13'
27
- - !ruby/object:Gem::Dependency
28
- requirement: !ruby/object:Gem::Requirement
29
- requirements:
30
- - - "~>"
31
- - !ruby/object:Gem::Version
32
- version: '1.6'
18
+ version: '2.0'
33
19
  name: bundler
34
- prerelease: false
35
20
  type: :development
21
+ prerelease: false
36
22
  version_requirements: !ruby/object:Gem::Requirement
37
23
  requirements:
38
24
  - - "~>"
39
25
  - !ruby/object:Gem::Version
40
- version: '1.6'
26
+ version: '2.0'
41
27
  - !ruby/object:Gem::Dependency
42
28
  requirement: !ruby/object:Gem::Requirement
43
29
  requirements:
44
30
  - - "~>"
45
31
  - !ruby/object:Gem::Version
46
- version: '10.0'
32
+ version: '13.0'
47
33
  name: rake
48
- prerelease: false
49
34
  type: :development
35
+ prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
38
  - - "~>"
53
39
  - !ruby/object:Gem::Version
54
- version: '10.0'
40
+ version: '13.0'
55
41
  - !ruby/object:Gem::Dependency
56
42
  requirement: !ruby/object:Gem::Requirement
57
43
  requirements:
58
44
  - - "~>"
59
45
  - !ruby/object:Gem::Version
60
- version: '3.0'
46
+ version: '3.10'
61
47
  name: rspec
62
- prerelease: false
63
48
  type: :development
49
+ prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
52
  - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: '3.0'
54
+ version: '3.10'
69
55
  - !ruby/object:Gem::Dependency
70
56
  requirement: !ruby/object:Gem::Requirement
71
57
  requirements:
72
58
  - - "~>"
73
59
  - !ruby/object:Gem::Version
74
- version: '3.0'
75
- name: shoulda-matchers
60
+ version: 0.4.1
61
+ name: jar-dependencies
62
+ type: :runtime
76
63
  prerelease: false
77
- type: :development
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
66
  - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: '3.0'
68
+ version: 0.4.1
83
69
  description: A JRuby wrapper for the Weka library (http://www.cs.waikato.ac.nz/ml/weka/)
84
70
  email:
85
71
  - paul.christoph.goetze@gmail.com
@@ -92,11 +78,9 @@ files:
92
78
  - ".gitignore"
93
79
  - ".rspec"
94
80
  - ".rubocop.yml"
95
- - ".travis.yml"
81
+ - CHANGELOG.md
96
82
  - CODE_OF_CONDUCT.md
97
83
  - Gemfile
98
- - Jarfile
99
- - Jarfile.lock
100
84
  - MIT-LICENSE.txt
101
85
  - README.md
102
86
  - Rakefile
@@ -142,7 +126,6 @@ files:
142
126
  - lib/weka/filters/unsupervised/attribute.rb
143
127
  - lib/weka/filters/unsupervised/instance.rb
144
128
  - lib/weka/filters/utils.rb
145
- - lib/weka/jars.rb
146
129
  - lib/weka/version.rb
147
130
  - weka.gemspec
148
131
  homepage: https://github.com/paulgoetze/weka-jruby
@@ -163,9 +146,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
163
146
  - - ">="
164
147
  - !ruby/object:Gem::Version
165
148
  version: '0'
166
- requirements: []
167
- rubyforge_project:
168
- rubygems_version: 2.6.11
149
+ requirements:
150
+ - jar nz.ac.waikato.cms.weka, weka-dev, 3.9.5
151
+ rubygems_version: 3.0.6
169
152
  signing_key:
170
153
  specification_version: 4
171
154
  summary: Machine Learning & Data Mining with JRuby.
@@ -1,15 +0,0 @@
1
- sudo: false
2
- language: ruby
3
-
4
- rvm:
5
- - jruby-9000
6
-
7
- cache:
8
- - bundler
9
-
10
- before_install:
11
- - rvm get head
12
- - rvm use jruby-9.0.1.0 --install
13
- - gem install bundler
14
-
15
- script: bundle exec rake spec
data/Jarfile DELETED
@@ -1 +0,0 @@
1
- jar 'nz.ac.waikato.cms.weka:weka-dev:jar:3.7.13'
@@ -1,17 +0,0 @@
1
- ---
2
- version: 0.13.0
3
- groups:
4
- default:
5
- dependencies:
6
- - nz.ac.waikato.cms.weka.thirdparty:java-cup-11b-runtime:jar:2015.03.26
7
- - nz.ac.waikato.cms.weka.thirdparty:java-cup-11b:jar:2015.03.26
8
- - nz.ac.waikato.cms.weka:weka-dev:jar:3.7.13
9
- - org.pentaho.pentaho-commons:pentaho-package-manager:jar:1.0.11
10
- artifacts:
11
- - jar:nz.ac.waikato.cms.weka:weka-dev:jar:3.7.13:
12
- transitive:
13
- nz.ac.waikato.cms.weka.thirdparty:java-cup-11b:jar:2015.03.26: {}
14
- org.pentaho.pentaho-commons:pentaho-package-manager:jar:1.0.11: {}
15
- nz.ac.waikato.cms.weka.thirdparty:java-cup-11b-runtime:jar:2015.03.26: {}
16
- remote_repositories:
17
- - http://repo1.maven.org/maven2/
@@ -1,16 +0,0 @@
1
- module Weka
2
- module Jars
3
- def self.included(base)
4
- base.class_eval do
5
- require 'lock_jar'
6
-
7
- lib_path = File.expand_path('../../', File.dirname(__FILE__))
8
- lockfile = File.join(lib_path, 'Jarfile.lock')
9
- jars_dir = File.join(lib_path, 'jars')
10
-
11
- LockJar.install(lockfile, local_repo: jars_dir)
12
- LockJar.load(lockfile, local_repo: jars_dir)
13
- end
14
- end
15
- end
16
- end