fisher_classifier 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/README.md +134 -12
- data/README_EN.md +29 -0
- data/examples/classifier_initializer.rb +39 -0
- data/lib/fisher_classifier.rb +8 -15
- data/lib/fisher_classifier/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OWYzNGVmOGRhZGYzYjg5ZGQ3ZGU5OGY2NWI3YTk1MjllM2IyZDNkYw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YWRlNDQxZWE0M2E2ZjdiNTA1NWM2YTE0ZDhhM2MyNDQxZWEwNzJlMQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ODJiMjE4NjNiOWVjOTc5NjQyY2M0MGY1ZTJlOTZkNjNhNTc3YmQxMTI5OGQx
|
10
|
+
NGU2ZTcxZjI3YTM0YjI2NjY0NWJmMGUwODllZDEyNDYzZDNlYzhlMDFmMWRi
|
11
|
+
YTI0MzYyOWM4N2JlOTIzZDUwNTU3YWE0NDA5YWVhZjAwMTBlZWU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZTMyNDEyODYxYTAyYmExYzk0MDE1NDllMWU0NjNmM2RlMjFmMWFlMGI4YWNi
|
14
|
+
YjRiOGYzOWUwMTA4NWZlZDhlOGFlZGI2YjE2NWYwMzM0ZDFiZGNhYTFlMGFm
|
15
|
+
N2FiZmRlODIzNDg3NTY4ZDAzYzQ4OGEwYWI2ZTM0MWEwMjA5OTY=
|
data/README.md
CHANGED
@@ -1,29 +1,151 @@
|
|
1
1
|
# FisherClassifier
|
2
2
|
|
3
|
-
|
3
|
+
Реализация статистического классификатора докуметов на основе линейного дискриминанта Фишера.
|
4
4
|
|
5
|
-
|
5
|
+
Предоставляет прозрачный DSL для кофигурирования с возможностью определить:
|
6
6
|
|
7
|
-
|
7
|
+
* набор категорий;
|
8
|
+
* способ определения признаков;
|
9
|
+
* коэффициент для подсчета взвешенной вероятности;
|
10
|
+
* минимальный порог для определения принадлежности к категории;
|
11
|
+
* любое хранилище статистики.
|
12
|
+
|
13
|
+
Подробнее с теорией:
|
14
|
+
* [Баесовский классификатор](http://www.machinelearning.ru/wiki/index.php?title=%D0%91%D0%B0%D0%B9%D0%B5%D1%81%D0%BE%D0%B2%D1%81%D0%BA%D0%B8%D0%B9_%D0%BA%D0%BB%D0%B0%D1%81%D1%81%D0%B8%D1%84%D0%B8%D0%BA%D0%B0%D1%82%D0%BE%D1%80)
|
15
|
+
* [Линейный дискриминант Фишера](http://www.machinelearning.ru/wiki/index.php?title=%D0%9B%D0%B8%D0%BD%D0%B5%D0%B9%D0%BD%D1%8B%D0%B9_%D0%B4%D0%B8%D1%81%D0%BA%D1%80%D0%B8%D0%BC%D0%B8%D0%BD%D0%B0%D0%BD%D1%82_%D0%A4%D0%B8%D1%88%D0%B5%D1%80%D0%B0)
|
16
|
+
|
17
|
+
## Установка
|
18
|
+
|
19
|
+
Добавить в Gemfile:
|
8
20
|
|
9
21
|
gem 'fisher_classifier'
|
10
22
|
|
11
|
-
|
23
|
+
Выполнить:
|
12
24
|
|
13
25
|
$ bundle
|
14
26
|
|
15
|
-
|
27
|
+
Или поставить как гем:
|
16
28
|
|
17
29
|
$ gem install fisher_classifier
|
18
30
|
|
19
|
-
##
|
31
|
+
## Try Before You Buy™
|
32
|
+
|
33
|
+
Попробовать можно в консоли, вот так:
|
34
|
+
|
35
|
+
$ irb
|
36
|
+
|
37
|
+
1.9.3-p448 :002 > require 'fisher_classifier'
|
38
|
+
1.9.3-p448 :003 > cl = FisherClassifier.create_in_memory
|
39
|
+
1.9.3-p448 :005 > cl.train('Nobody owns the water.','good')
|
40
|
+
=> ["Nobody", "owns", "the", "water."]
|
41
|
+
1.9.3-p448 :006 > cl.train('the quick rabbit jumps fences','good')
|
42
|
+
=> ["the", "quick", "rabbit", "jumps", "fences"]
|
43
|
+
1.9.3-p448 :007 > cl.train('buy pharmaceuticals now','bad')
|
44
|
+
=> ["buy", "pharmaceuticals", "now"]
|
45
|
+
1.9.3-p448 :008 > cl.train('make quick money at the online casino','bad')
|
46
|
+
=> ["make", "quick", "money", "at", "the", "online", "casino"]
|
47
|
+
1.9.3-p448 :009 > cl.train('the quick brown fox jumps','good')
|
48
|
+
=> ["the", "quick", "brown", "fox", "jumps"]
|
49
|
+
1.9.3-p448 :015 > cl.train('online trading with forex','bad')
|
50
|
+
=> ["online", "trading", "with", "forex"]
|
51
|
+
1.9.3-p448 :008 > cl.classify('the quick money with forex now')
|
52
|
+
=> :bad
|
53
|
+
1.9.3-p448 :009 > cl.classify('quck mouse runs from fox')
|
54
|
+
=> :good
|
55
|
+
|
56
|
+
В данном примере в качестве хранилища используется оперативная память.
|
57
|
+
|
58
|
+
## DSL
|
59
|
+
|
60
|
+
### Определениепризнаков
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
get_features do |text|
|
64
|
+
# Выделить набор признаков из текста
|
65
|
+
end
|
66
|
+
```
|
67
|
+
|
68
|
+
### Обучение
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
inc_feature do |feature, category|
|
72
|
+
# Увеличить счетчик кол-ва использований признака в категории
|
73
|
+
end
|
74
|
+
|
75
|
+
inc_category do |category|
|
76
|
+
# Увеличить счетчик кол-ва использований категории
|
77
|
+
end
|
78
|
+
```
|
79
|
+
|
80
|
+
### Классификация
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
# Предполагаемая вероятность (Вероятность признака, если он ни разу не появлялся)
|
84
|
+
assumed_prob 0.4
|
85
|
+
|
86
|
+
# Порог. Минимальное значение вероятности принадлежности текста в категории
|
87
|
+
fisher_threshold 0.1
|
88
|
+
|
89
|
+
categories do
|
90
|
+
# Возможные категории
|
91
|
+
end
|
92
|
+
|
93
|
+
category_count do |category|
|
94
|
+
# Кол-во использований категории
|
95
|
+
end
|
96
|
+
|
97
|
+
features_count do |feature, category|
|
98
|
+
# Кол-во использований признака в категории
|
99
|
+
end
|
100
|
+
|
101
|
+
default_category do
|
102
|
+
# Категория по умолчанию
|
103
|
+
end
|
104
|
+
```
|
105
|
+
|
106
|
+
## Rails (Active Record)
|
107
|
+
|
108
|
+
Миграция (db/migrate/20131106143644_create_classifier_features.rb):
|
109
|
+
|
110
|
+
```ruby
|
111
|
+
class CreateClassifierFeatures < ActiveRecord::Migration
|
112
|
+
def change
|
113
|
+
create_table :classifier_features do |t|
|
114
|
+
t.string :name
|
115
|
+
t.string :category
|
116
|
+
t.integer :count, default: 1
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
```
|
121
|
+
|
122
|
+
Модель (app/models/classifier_feature.rb):
|
123
|
+
|
124
|
+
```ruby
|
125
|
+
class ClassifierFeature < ActiveRecord::Base
|
126
|
+
validates :category, presence: true
|
127
|
+
validates :name, presence: true, uniqueness: {:scope => :category}
|
128
|
+
|
129
|
+
def self.categories
|
130
|
+
[:good, :bad]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
```
|
134
|
+
|
135
|
+
Инициалайзер:
|
136
|
+
|
137
|
+
[config/initializers/classifier.rb](https://github.com/Andrew8xx8/fisher_classifier/blob/master/examples/classifier_initializer.rb)
|
138
|
+
|
139
|
+
Использование:
|
20
140
|
|
21
|
-
|
141
|
+
$ rails c
|
142
|
+
1.9.3-p448 :009 > Classifier.train('the quick brown fox jumps', :good)
|
143
|
+
1.9.3-p448 :009 > Classifier.classify('the quick brown fox jumps', :good)
|
22
144
|
|
23
|
-
##
|
145
|
+
## Если хочется что-то исправить
|
24
146
|
|
25
|
-
1.
|
26
|
-
2.
|
27
|
-
3.
|
28
|
-
4.
|
147
|
+
1. Форкни
|
148
|
+
2. Зафигач фиче-ветку (`git checkout -b my-new-feature`)
|
149
|
+
3. Коммить изменения (`git commit -am 'Add some feature'`)
|
150
|
+
4. Пуш ветку (`git push origin my-new-feature`)
|
29
151
|
5. Create new Pull Request
|
data/README_EN.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# FisherClassifier
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'fisher_classifier'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install fisher_classifier
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Classifier = FisherClassifier.create do
|
2
|
+
assumed_prob 0.4
|
3
|
+
fisher_threshold 0.1
|
4
|
+
|
5
|
+
inc_feature do |feature, category|
|
6
|
+
feature = ClassifierFeature.find_or_initialize_by(name: feature, category: category)
|
7
|
+
feature.count += 1 if feature
|
8
|
+
feature.save
|
9
|
+
end
|
10
|
+
|
11
|
+
get_features do |text|
|
12
|
+
if text
|
13
|
+
text.to_s.split(' ').map { |s| s.downcase }
|
14
|
+
else
|
15
|
+
[]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
categories do
|
20
|
+
ClassifierFeature.categories
|
21
|
+
end
|
22
|
+
|
23
|
+
category_count do |category|
|
24
|
+
ClassifierFeature.where(category: category).count
|
25
|
+
end
|
26
|
+
|
27
|
+
features_count do |feature, category|
|
28
|
+
f = ClassifierFeature.find_by(name: feature, category: category)
|
29
|
+
if f
|
30
|
+
f.count
|
31
|
+
else
|
32
|
+
0
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
default_category do
|
37
|
+
"bad"
|
38
|
+
end
|
39
|
+
end
|
data/lib/fisher_classifier.rb
CHANGED
@@ -19,24 +19,17 @@ module FisherClassifier
|
|
19
19
|
|
20
20
|
inc_feature do |feature, category|
|
21
21
|
@features[category] ||= {}
|
22
|
-
|
23
|
-
|
24
|
-
@features[category][feature] += 1
|
25
|
-
else
|
26
|
-
@features[category][feature] = 1
|
27
|
-
end
|
22
|
+
@features[category][feature] ||= 0
|
23
|
+
@features[category][feature] += 1
|
28
24
|
end
|
29
25
|
|
30
26
|
inc_category do |category|
|
31
|
-
|
32
|
-
|
33
|
-
else
|
34
|
-
@categories[category] = 1
|
35
|
-
end
|
27
|
+
@categories[category] ||= 0
|
28
|
+
@categories[category] += 1
|
36
29
|
end
|
37
30
|
|
38
31
|
get_features do |text|
|
39
|
-
text.split(' ')
|
32
|
+
text.split(' ').map { |s| s.downcase }
|
40
33
|
end
|
41
34
|
|
42
35
|
categories do
|
@@ -44,8 +37,8 @@ module FisherClassifier
|
|
44
37
|
end
|
45
38
|
|
46
39
|
category_count do |category|
|
47
|
-
if @
|
48
|
-
@categories[category]
|
40
|
+
if @categories.has_key?(category)
|
41
|
+
@categories[category]
|
49
42
|
else
|
50
43
|
0
|
51
44
|
end
|
@@ -53,7 +46,7 @@ module FisherClassifier
|
|
53
46
|
|
54
47
|
features_count do |feature, category|
|
55
48
|
if @features.has_key?(category) && @features[category].has_key?(feature)
|
56
|
-
@features[category][feature]
|
49
|
+
@features[category][feature]
|
57
50
|
else
|
58
51
|
0
|
59
52
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fisher_classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew8xx8
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -64,7 +64,9 @@ files:
|
|
64
64
|
- Gemfile
|
65
65
|
- LICENSE.txt
|
66
66
|
- README.md
|
67
|
+
- README_EN.md
|
67
68
|
- Rakefile
|
69
|
+
- examples/classifier_initializer.rb
|
68
70
|
- fisher_classifier.gemspec
|
69
71
|
- lib/fisher_classifier.rb
|
70
72
|
- lib/fisher_classifier/classifier.rb
|