machine_learner 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ae7126878df4103a2ee33bd433c109970ec39c3e
4
+ data.tar.gz: 8f33eb69cd5b5f86bebe1659c3e529c789180217
5
+ SHA512:
6
+ metadata.gz: 73bd4e5130971a2de7766f6c044d1e77792dfcb48e32eb7d16314a63aebfa22b3bf6e69c85c1d646e252e913e45a7637e692e838691ed29ce0719544bbe91616
7
+ data.tar.gz: 22439d8d45b57399efe48fb4addd0b9411b5b53be0d0c84c46b347719d666328c8be528c86acabe17d23ce116eb14e49bc4063f10f6cf101c002514067d49f5c
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
15
+ machine_learner-*.gem
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.1
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in machine_learner.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 YutaTanaka
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # MachineLearner
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'machine_learner'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install machine_learner
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/machine_learner/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
7
+
@@ -0,0 +1,115 @@
1
+ #coding utf-8
2
+
3
+ require_relative 'dataset.rb'
4
+ require_relative 'learner.rb'
5
+
6
+ module MachineLearner
7
+ # 複数の識別器から最良のものを自動選択する学習器
8
+ class SimpleLearner < Learner
9
+
10
+ # データを元に学習を行う
11
+ # classifiersの各戻り値に対し識別を行い、結果のもっともよいものを採用する
12
+ # @param datas [Array<DataSet>] トレーニングデータの配列
13
+ # @param ds [Array<Float>] 各トレーナーデータの重みの配列
14
+ # @return [Array<Boolean>] 識別結果の配列
15
+ def learn(datas, ds)
16
+ @log = []
17
+ enum_classifier(datas, ds) do |cl|
18
+ results = datas.map{|data| cl.test(data) }
19
+ epsilon = results.zip(ds).map{|r, w| r ? 0 : w }.inject(:+)
20
+ @log << {epsilon: epsilon, classifier: cl, results: results}
21
+ end
22
+
23
+ @best = @log.min {|h1, h2| h1[:epsilon] <=> h2[:epsilon] }
24
+ @best[:results]
25
+ end
26
+
27
+ # 識別器を列挙する
28
+ # 識別器はyieldでblockに渡す
29
+ # @param datas [Array<DataSet>] トレーニングデータの配列
30
+ # @param ds [Array<Float>] 各トレーナーデータの重みの配列
31
+ def classifiers(datas, ds)
32
+ raise NotImplementedError.new
33
+ end
34
+
35
+ # 識別器を文字列で表現する
36
+ # @return [String] 識別器を表す文字列
37
+ def to_s
38
+ cl.to_s
39
+ end
40
+
41
+ # 識別を行う
42
+ # @param x 特徴空間
43
+ # @return [Fixnum] 識別結果
44
+ def classify(x)
45
+ cl.classify(x)
46
+ end
47
+
48
+ private
49
+
50
+ # 性能の最も良い識別器を取得する
51
+ # @return [Classifier] 性能の最も良い識別器を取得する
52
+ def cl
53
+ @best[:classifier]
54
+ end
55
+
56
+ # データを識別器に掛けた結果
57
+ # ハッシュの配列であり、各ハッシュはキー[:classifier, :result, :epsiron]を持つ
58
+ # @return log [Array<Hash>] 識別結果の配列
59
+ def log
60
+ @log
61
+ end
62
+
63
+ end
64
+
65
+ # AdaBoost クラス
66
+ class AdaBoost < Learner
67
+
68
+ # コンストラクタ
69
+ # @param learners [Array<Learner>] 学習器のリスト
70
+ def initialize(learners)
71
+ @learners = learners
72
+ @alphas = []
73
+ end
74
+
75
+ # データを元に学習を行う
76
+ # @param datas [Array<DataSet>] トレーニングデータの配列
77
+ # @return [Array<Boolean>] 識別結果の配列
78
+ def learn(datas, ds = nil)
79
+ ds ||= [1.0 / datas.size] * datas.size
80
+
81
+ @learners.each do |learner|
82
+ # 学習記に順番に学習させる
83
+ # results : トレーニングデータの正解、不正解の配列
84
+ results = learner.learn(datas, ds)
85
+ # epsilon : 学習器のエラー率(失敗した学習データの重みの総和)
86
+ epsilon = results.zip(ds).map{|r, w| r ? 0 : w }.inject(:+)
87
+ epsilon = ds.min * 0.1 if(epsilon == 0)
88
+ # alpha : 学習器の重み(エラー率epsilonが低いほど高い値を取る)
89
+ alpha = Math.log((1 - epsilon) / epsilon) / 2
90
+ @alphas << alpha
91
+ # 重みの更新
92
+ ds = ds.map.with_index{|w, i| w * Math.exp(alpha * (results[i] ? -1 : 1)) }
93
+ z = ds.inject(&:+)
94
+ ds.map!{|w| w / z}
95
+ end
96
+ end
97
+
98
+ # 識別を行う
99
+ # @param x 特徴空間
100
+ # @return [Fixnum] 識別結果
101
+ def classify(x)
102
+ score = [@learners, @alphas].transpose.reduce(0) { |score, (l, a)|
103
+ score += l.classify(x) * a
104
+ }
105
+ score > 0 ? 1 : -1
106
+ end
107
+
108
+ # Learnerを表すログ
109
+ def to_s
110
+ [@learners, @alphas].transpose.map {|l, a| "(#{a.round(3)} * #{l})" }.join(" + ");
111
+ end
112
+ end
113
+ end
114
+
115
+ # vim: set et ts=2 sts=2 sw=2:
@@ -0,0 +1,72 @@
1
+ #coding utf-8
2
+
3
+ require_relative 'dataset.rb'
4
+ require_relative 'learner.rb'
5
+
6
+ module MachineLearner
7
+
8
+ # ナイブベイズ学習器
9
+ class BayesLearner < Learner
10
+
11
+ # データを元に学習を行う
12
+ # @param datas [Array<DataSet>] トレーニングデータの配列
13
+ # @return [Array<Boolean>] 識別結果の配列
14
+ def learn(datas, ds = nil)
15
+ @training = datas
16
+ @ys = @training.map{|data| data.y }
17
+ @candidate_y = @ys.uniq.compact
18
+ end
19
+
20
+ # 識別を行う
21
+ # @param x 特徴空間
22
+ # @return [Fixnum] 識別結果
23
+ def classify(xs)
24
+ # 最も尤度の高い候補 y を探す
25
+ max_y, max_likelihood = 0, -Float::INFINITY
26
+ @candidate_y.each do |y|
27
+ likelihood = p_y_x(y, xs)
28
+ max_y, max_likelihood = y, likelihood if max_likelihood < likelihood
29
+ end
30
+ return max_y
31
+ end
32
+
33
+ private
34
+
35
+ # P(Y | X) を計算
36
+ def p_y_x(y, xs)
37
+ p = Math.log(p_y(y))
38
+ xs.each_with_index do |xi, i|
39
+ next if xi.nil?
40
+ p += Math.log(p_xi_y(xi, i, y))
41
+ end
42
+ return p
43
+ end
44
+
45
+ # P(Xi | Y) を計算
46
+ def p_xi_y(x, i, y)
47
+ total = count = 0
48
+ @ys.each_with_index do |ysj, j|
49
+ next if ysj != y
50
+ total += 1
51
+ count += 1 if @training[j].x[i] == x
52
+ end
53
+ return (count + 1).to_f / (total + candidate_x(i).size + 1).to_f
54
+ end
55
+
56
+ # P(Y) を計算
57
+ def p_y(y)
58
+ count = @ys.count(y)
59
+ return (count + 1).to_f / (@ys.size + @candidate_y.size + 1).to_f
60
+ end
61
+
62
+ # xの候補を列挙
63
+ def candidate_x(field)
64
+ @candidate_x ||= []
65
+ @candidate_x[field] ||= @training.map{|data| data.x[field]}.uniq.compact
66
+ end
67
+ end
68
+
69
+ end
70
+
71
+ # vim: set et ts=2 sts=2 sw=2:
72
+
@@ -0,0 +1,24 @@
1
+ #coding utf-8
2
+
3
+ module MachineLearner
4
+ # 学習器にかけるデータ
5
+ # 特徴空間xと識別結果yを持つ
6
+ class DataSet
7
+ attr_reader :x, :y
8
+
9
+ # コンストラクタ
10
+ # @param x 特徴空間
11
+ # @param y 識別結果
12
+ def initialize(x, y)
13
+ @x, @y = x, y
14
+ end
15
+
16
+ # @return データセットを表現する文字列
17
+ def to_s
18
+ "(#{@x}=>#{@y})"
19
+ end
20
+
21
+ end
22
+ end
23
+
24
+ # vim: set et ts=2 sts=2 sw=2:
@@ -0,0 +1,44 @@
1
+ #coding utf-8
2
+
3
+ module MachineLearner
4
+
5
+ # 識別器クラス
6
+ class Classifier
7
+
8
+ # 識別を行う
9
+ # @param x 特徴空間
10
+ # @return [Fixnum] 識別結果
11
+ def classify(x)
12
+ raise NotImplementedError.new
13
+ end
14
+
15
+ # 性能の測定を行う
16
+ # @param data[DataSet] データ
17
+ # @return [Boolean] 識別できたらtrue, 外れたらfalse
18
+ def test(data)
19
+ classify(data.x) == data.y
20
+ end
21
+
22
+ # 性能の測定を行う
23
+ # @param datas[Array<DataSet>] テストデータセット
24
+ # @return [Float] 正解率(0以上1以下)
25
+ def evaluate(datas)
26
+ datas.count{|data| test(data)} / datas.size.to_f
27
+ end
28
+ end
29
+
30
+ # 学習器クラス
31
+ class Learner < Classifier
32
+
33
+ # トレーニングデータを元に学習を行う
34
+ # @param datas [Array<DataSet>] トレーニングデータセット
35
+ # @param ds [Array<Float>] 各データセットの重み
36
+ # @return [Array<Boolean>] 学習後の識別結果
37
+ def learn(datas, ds)
38
+ raise NotImplementedError.new
39
+ end
40
+ end
41
+
42
+ end
43
+
44
+ # vim: set et ts=2 sts=2 sw=2:
@@ -0,0 +1,3 @@
1
+ module MachineLearner
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,3 @@
1
+ require "machine_learner/version"
2
+ require "machine_learner/adaboost"
3
+ require "machine_learner/bayes"
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'machine_learner/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "machine_learner"
8
+ spec.version = MachineLearner::VERSION
9
+ spec.authors = ["YutaTanaka"]
10
+ spec.email = ["yuta84q.ihcarok@gmail.com"]
11
+ spec.summary = %q{Library for machine learning.}
12
+ spec.description = "This is a library for machine learning.\n" +
13
+ "You can use AdaBoost and Naive Bayes easily.\n"
14
+ spec.homepage = "https://github.com/84q/machine_learning"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0")
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.6"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency 'rspec', '~> 0'
25
+ end
26
+
27
+ # vim: set et ts=2 sts=2 sw=2:
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe MachineLearner do
4
+ it 'has a version number' do
5
+ expect(MachineLearner::VERSION).not_to be nil
6
+ end
7
+
8
+ it 'does something useful' do
9
+ expect(false).to eq(true)
10
+ end
11
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'machine_learner'
metadata ADDED
@@ -0,0 +1,107 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: machine_learner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - YutaTanaka
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: |
56
+ This is a library for machine learning.
57
+ You can use AdaBoost and Naive Bayes easily.
58
+ email:
59
+ - yuta84q.ihcarok@gmail.com
60
+ executables: []
61
+ extensions: []
62
+ extra_rdoc_files: []
63
+ files:
64
+ - ".gitignore"
65
+ - ".rspec"
66
+ - ".travis.yml"
67
+ - Gemfile
68
+ - LICENSE.txt
69
+ - README.md
70
+ - Rakefile
71
+ - lib/machine_learner.rb
72
+ - lib/machine_learner/adaboost.rb
73
+ - lib/machine_learner/bayes.rb
74
+ - lib/machine_learner/dataset.rb
75
+ - lib/machine_learner/learner.rb
76
+ - lib/machine_learner/version.rb
77
+ - machine_learner.gemspec
78
+ - spec/machine_learner_spec.rb
79
+ - spec/spec_helper.rb
80
+ homepage: https://github.com/84q/machine_learning
81
+ licenses:
82
+ - MIT
83
+ metadata: {}
84
+ post_install_message:
85
+ rdoc_options: []
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubyforge_project:
100
+ rubygems_version: 2.2.2
101
+ signing_key:
102
+ specification_version: 4
103
+ summary: Library for machine learning.
104
+ test_files:
105
+ - spec/machine_learner_spec.rb
106
+ - spec/spec_helper.rb
107
+ has_rdoc: