fisher_classifier 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OGYyYTEwNDI3NjdhNjYwNmYxMTllYzhlZWJiNDE1ODhjNGQ5OGE0Yw==
5
+ data.tar.gz: !binary |-
6
+ NjAzYTczODQzZWIwZjhjNTRjM2I1OTFiMTk2MDk1ZjMxNWI4ODEyMg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NWQ3NjQyN2U1MzI4MjExMzIyNjAwNDFiYzQxMmMyMTdjMTNjNTE0N2FmMDU3
10
+ OWQ2NDc5OGIxMzVlOGZjNDUwMzY2MTliNjg5YjU2ZTkxZWYwYTUwM2QxZTgy
11
+ M2FhOWVhYWYwZjgzZTI5ZTM4YWYzMThmMDZlYzE0YjEwMGM2N2Q=
12
+ data.tar.gz: !binary |-
13
+ NGMwNjFlZDEwZjlhZDZkNjY0M2E4MTJlY2Q3YTdhNzVhOGUzNmYwMWQ5MzA5
14
+ OTIzOGRiMjZlNjZmMjE4YTIxYjg0ZjhkMTQwMjA3MjUxNTNhNTdkMGRkYzkw
15
+ MTgwOWZmNTZkYWE3ZTg4Y2RjMDYxYWY1Y2ZiNGFlZDZmZWFkNmQ=
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ rvm:
2
+ - 1.9.2
3
+ - 1.9.3
4
+ - 2.0.0
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fisher_classifier.gemspec
4
+ gemspec
5
+
6
+ gem 'rake'
7
+ gem 'rspec'
8
+ gem 'coveralls', :require => false
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Andrew8xx8
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # FisherClassifier
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'fisher_classifier'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install fisher_classifier
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
7
+ task :test => :spec
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fisher_classifier/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fisher_classifier"
8
+ spec.version = FisherClassifier::VERSION
9
+ spec.authors = ["Andrew8xx8"]
10
+ spec.email = ["avk@8xx8.ru"]
11
+ spec.description = %q{Light document classifier based on Fisher method}
12
+ spec.summary = %q{Light document classifier based on Fisher method}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "awesome_print"
24
+ end
@@ -0,0 +1,70 @@
1
+ require "fisher_classifier/version"
2
+
3
+ module FisherClassifier
4
+ autoload :Classifier, 'fisher_classifier/classifier'
5
+ autoload :Config, 'fisher_classifier/config'
6
+ autoload :Meta, 'fisher_classifier/meta'
7
+
8
+ class << self
9
+ def create(&block)
10
+ config = Config.new block
11
+
12
+ Classifier.new config
13
+ end
14
+
15
+ def create_in_memory
16
+ create do
17
+ @features = {}
18
+ @categories = {}
19
+
20
+ inc_feature do |feature, category|
21
+ @features[category] ||= {}
22
+
23
+ if @features[category].has_key? feature
24
+ @features[category][feature] += 1
25
+ else
26
+ @features[category][feature] = 1
27
+ end
28
+ end
29
+
30
+ inc_category do |category|
31
+ if @categories.has_key? category
32
+ @categories[category] += 1
33
+ else
34
+ @categories[category] = 1
35
+ end
36
+ end
37
+
38
+ get_features do |text|
39
+ text.split(' ')
40
+ end
41
+
42
+ categories do
43
+ [:good, :bad]
44
+ end
45
+
46
+ category_count do |category|
47
+ if @features.has_key?(category)
48
+ @categories[category] || 0
49
+ else
50
+ 0
51
+ end
52
+ end
53
+
54
+ features_count do |feature, category|
55
+ if @features.has_key?(category) && @features[category].has_key?(feature)
56
+ @features[category][feature] || 0
57
+ else
58
+ 0
59
+ end
60
+ end
61
+
62
+ default_category do
63
+ :none
64
+ end
65
+
66
+ end
67
+
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,137 @@
1
+ # encoding: utf-8
2
+ module FisherClassifier
3
+ class Classifier
4
+ include FisherClassifier::Meta
5
+
6
+ def initialize(config)
7
+ @config = config
8
+ end
9
+
10
+ def train(text, category)
11
+ get_features(text).each do |feature|
12
+ inc_feature(feature, category)
13
+ inc_category(category)
14
+ end
15
+ end
16
+
17
+ def classify(text)
18
+ features = get_features(text)
19
+ best = default_category
20
+ max = 0.0
21
+
22
+ categories.each do |category|
23
+ prob = fisher_prob(category, features)
24
+
25
+ if prob > max
26
+ best = category
27
+ max = prob
28
+ end
29
+ end
30
+
31
+ best
32
+ end
33
+
34
+ private
35
+
36
+ def fisher_prob(category, features)
37
+ invchi2(
38
+ fisher_factor(
39
+ probs_multiply(features, category)
40
+ ), features.size * 2
41
+ )
42
+ end
43
+
44
+ def probs_multiply(features, category)
45
+ fprobs = features.map { |f| weighted_prob(f, category) }
46
+ probs_multiply = fprobs.inject(:*)
47
+ probs_multiply ||= 0
48
+ end
49
+
50
+ def fisher_factor(probs_multiply)
51
+ -2 * Math.log(probs_multiply)
52
+ end
53
+
54
+ def feature_prob(feature, category)
55
+ cc = category_count(category)
56
+ return cc if cc.zero?
57
+
58
+ features_count(feature, category) / cc.to_f
59
+ end
60
+
61
+ def weighted_prob(feature, category)
62
+ current_prob = category_prob(feature, category)
63
+ totals = feature_in_all_categories(feature)
64
+
65
+ (weight * ap + totals * current_prob) / ( weight + totals).to_f
66
+ end
67
+
68
+ def feature_in_all_categories(feature)
69
+ counts = categories.map { |c| features_count(feature, c) }
70
+ counts.inject(:+)
71
+ end
72
+
73
+ def category_prob(feature, category)
74
+ fp = feature_prob(feature, category)
75
+ return fp if fp.zero?
76
+
77
+ fp / feature_freqsum(feature, category)
78
+ end
79
+
80
+ def feature_freqsum(feature, category)
81
+ counts = categories.map { |c| feature_prob(feature, c) }
82
+ counts.inject(:+)
83
+ end
84
+
85
+ def invchi2(chi, df)
86
+ m = chi / 2.0
87
+ sum = term = Math.exp(-m)
88
+
89
+ for i in 1..(df / 2)
90
+ term *= m / i
91
+ sum += term
92
+ end
93
+
94
+ [sum, 1.0].min
95
+ end
96
+
97
+ def default_category
98
+ @config.call(:default_category)
99
+ end
100
+
101
+ def category_threshold(category)
102
+ @config.call(:category_threshold, category)
103
+ end
104
+
105
+ def weight
106
+ @config.get(:weight)
107
+ end
108
+
109
+ def ap
110
+ @config.get(:ap)
111
+ end
112
+
113
+ def get_features(text)
114
+ @config.call(:get_features, text)
115
+ end
116
+
117
+ def categories
118
+ @config.call(:categories)
119
+ end
120
+
121
+ def category_count(category)
122
+ @config.call(:category_count, category)
123
+ end
124
+
125
+ def features_count(feature, category)
126
+ @config.call(:features_count, feature, category)
127
+ end
128
+
129
+ def inc_feature(feature, category)
130
+ @config.call :inc_feature, feature, category
131
+ end
132
+
133
+ def inc_category(category)
134
+ @config.call :inc_category, category
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,34 @@
1
+ module FisherClassifier
2
+ class Config
3
+
4
+ def initialize(block)
5
+ @config = {
6
+ weight: 1.0,
7
+ ap: 0.5
8
+ }
9
+ @methods = {}
10
+ instance_eval &block
11
+ end
12
+
13
+ def get(key)
14
+ raise "'#{key}' value does not defined in config" unless @config.has_key? key
15
+
16
+ @config[key]
17
+ end
18
+
19
+ def call(name, *args)
20
+ raise "'#{name}' mehtod does not defined in config" unless @methods.has_key? name
21
+
22
+ @methods[name].call *args
23
+ end
24
+
25
+ def method_missing(key, value = nil, &block)
26
+ if block_given?
27
+ @methods[key] = block
28
+ else
29
+ @config[key] = value
30
+ end
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,51 @@
1
+ module FisherClassifier
2
+ module Meta
3
+ def meta_classify(text)
4
+ features = get_features(text)
5
+
6
+ {
7
+ text: text,
8
+ default_category: default_category,
9
+ selected_category: classify(text),
10
+ features: meta_features(features),
11
+ categories: meta_categories(features),
12
+ }
13
+ end
14
+
15
+ private
16
+
17
+ def meta_categories(features)
18
+ categories.map do |category|
19
+ pm = probs_multiply(features, category)
20
+ {
21
+ name: category,
22
+ probs_multiply: pm,
23
+ fisher_factor: fisher_factor(pm),
24
+ fisher_prob: fisher_prob(category, features),
25
+ }
26
+ end
27
+ end
28
+
29
+ def meta_features(features)
30
+ features.map do |feature|
31
+ {
32
+ name: feature,
33
+ feature_in_all_categories: feature_in_all_categories(feature),
34
+ categories: meta_feature_categories(feature)
35
+ }
36
+ end
37
+ end
38
+
39
+ def meta_feature_categories(feature)
40
+ categories.map do |category|
41
+ {
42
+ name: category,
43
+ category_prob: category_prob(feature, category),
44
+ feature_prob: feature_prob(feature, category),
45
+ weighted_prob: weighted_prob(feature, category),
46
+ freqsum: feature_freqsum(feature, category)
47
+ }
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,3 @@
1
+ module FisherClassifier
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,39 @@
1
+ require 'spec_helper'
2
+
3
+ describe FisherClassifier::Config do
4
+ before do
5
+ config = Proc.new do
6
+ key :value
7
+
8
+ features_count do
9
+ 3
10
+ end
11
+ end
12
+
13
+ @instance = FisherClassifier::Config.new config
14
+ end
15
+
16
+ it 'should be initialized with configuration block' do
17
+ expect( @instance ).to be
18
+ end
19
+
20
+ it 'should raise error if config value missing' do
21
+ expect{ @instance.get(:missing_value) }.to raise_error
22
+ end
23
+
24
+ it 'should raise error if config method missing' do
25
+ expect{ @instance.get(:missing_value) }.to raise_error
26
+ end
27
+
28
+ it 'should be save value for key' do
29
+ value = @instance.get(:key)
30
+
31
+ expect( value ).to eq :value
32
+ end
33
+
34
+ it 'should execute block stored in config' do
35
+ result = @instance.call(:features_count)
36
+
37
+ expect( result ).to eq 3
38
+ end
39
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe FisherClassifier do
4
+ before do
5
+ # Use in-memory classifier
6
+ @classifier = FisherClassifier.create_in_memory
7
+ end
8
+
9
+ it 'should calculate feature prob' do
10
+ expect(@classifier).to be
11
+
12
+ @classifier.train 'the quick rabbit jumps fences', :good
13
+ @classifier.train 'buy pharmaceuticals now', :bad
14
+
15
+ cat = @classifier.classify('buy now')
16
+ expect(cat).to eq :bad
17
+
18
+ cat = @classifier.classify('buy stuff now')
19
+ expect(cat).to eq :bad
20
+
21
+ cat = @classifier.classify('rabbit jumps now')
22
+ expect(cat).to eq :good
23
+ end
24
+
25
+ end
@@ -0,0 +1,7 @@
1
+ require 'bundler/setup'
2
+ Bundler.require
3
+
4
+ if ENV["TRAVIS"]
5
+ require 'coveralls'
6
+ Coveralls.wear!
7
+ end
data/spec_helper.rb ADDED
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+
3
+ require 'sfk'
4
+ require 'bundler/setup'
5
+ Bundler.require(:default, :development, :test)
6
+
7
+ require 'simplecov'
8
+ if ENV["COVERAGE"]
9
+ SimpleCov.start
10
+ files = Dir[File.join(File.dirname(__FILE__), "../lib/**/*.rb")]
11
+ files.each {|file| require file }
12
+ end
13
+
14
+ require 'webmock/rspec'
15
+ require File.expand_path('../support/stub_helpers', __FILE__)
16
+
17
+ SFK::RSpec.configure do |config|
18
+ config.root_path = Pathname(File.join(File.dirname(__FILE__), '../'))
19
+ config.apps_path = Pathname(File.join(File.dirname(__FILE__), 'support'))
20
+ config.shared_app_contexts [:main] do
21
+ let(:base) {app.view('base')}
22
+ end
23
+
24
+ include StubHelpers
25
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fisher_classifier
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Andrew8xx8
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-12-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: awesome_print
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Light document classifier based on Fisher method
56
+ email:
57
+ - avk@8xx8.ru
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - .gitignore
63
+ - .travis.yml
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - fisher_classifier.gemspec
69
+ - lib/fisher_classifier.rb
70
+ - lib/fisher_classifier/classifier.rb
71
+ - lib/fisher_classifier/config.rb
72
+ - lib/fisher_classifier/meta.rb
73
+ - lib/fisher_classifier/version.rb
74
+ - spec/fisher_classifier/config_spec.rb
75
+ - spec/fisher_classifier_spec.rb
76
+ - spec/spec_helper.rb
77
+ - spec_helper.rb
78
+ homepage: ''
79
+ licenses:
80
+ - MIT
81
+ metadata: {}
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ! '>='
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ requirements: []
97
+ rubyforge_project:
98
+ rubygems_version: 2.1.11
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: Light document classifier based on Fisher method
102
+ test_files:
103
+ - spec/fisher_classifier/config_spec.rb
104
+ - spec/fisher_classifier_spec.rb
105
+ - spec/spec_helper.rb