fisher_classifier 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZmMyNjg5M2FhZmM5YjBhNTRhNzQ1Y2U0ZGNlZTY1MjgyZTRlMjc0YQ==
4
+ OWYzNGVmOGRhZGYzYjg5ZGQ3ZGU5OGY2NWI3YTk1MjllM2IyZDNkYw==
5
5
  data.tar.gz: !binary |-
6
- MTQ4M2E0YjgwMzUwZGU2NGIwNjAwZGQwNzZhODdlNWYwYjUyZWFkMQ==
6
+ YWRlNDQxZWE0M2E2ZjdiNTA1NWM2YTE0ZDhhM2MyNDQxZWEwNzJlMQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- M2M0NGI4Njg1MjdkZDRiZWJiYmJiYTJlNTE3YmY5YTM3MWEyNTQwMGVkOTlm
10
- OGRiNDgzNTIyNjZiMWFkZGFkM2Q2ZDJkOTgyNDdlMjc5OGNkOTMxNWEyMTM4
11
- YjNhMWIzYWIxMmM0NWEyZjFmNzVkNjczODcxMjQ0YTlkN2ZhNDE=
9
+ ODJiMjE4NjNiOWVjOTc5NjQyY2M0MGY1ZTJlOTZkNjNhNTc3YmQxMTI5OGQx
10
+ NGU2ZTcxZjI3YTM0YjI2NjY0NWJmMGUwODllZDEyNDYzZDNlYzhlMDFmMWRi
11
+ YTI0MzYyOWM4N2JlOTIzZDUwNTU3YWE0NDA5YWVhZjAwMTBlZWU=
12
12
  data.tar.gz: !binary |-
13
- YTNlM2M5MzZlOWQyY2FmN2NjOTA0NzUzNTlhMTBhMDNhYWMwYjQwZGUyMjVl
14
- OGZlM2Y1MWYyMzg5NTUyYjM2YWFhZTk4Zjg0MjViYWM5NGNhNGE1YWIyYjBk
15
- OWE5MjFlZGY4YTY4YmExYzU5ZTIwNmYzMzQ5M2RkMmQzMTA3NTE=
13
+ ZTMyNDEyODYxYTAyYmExYzk0MDE1NDllMWU0NjNmM2RlMjFmMWFlMGI4YWNi
14
+ YjRiOGYzOWUwMTA4NWZlZDhlOGFlZGI2YjE2NWYwMzM0ZDFiZGNhYTFlMGFm
15
+ N2FiZmRlODIzNDg3NTY4ZDAzYzQ4OGEwYWI2ZTM0MWEwMjA5OTY=
data/README.md CHANGED
@@ -1,29 +1,151 @@
1
1
  # FisherClassifier
2
2
 
3
- TODO: Write a gem description
3
+ Реализация статистического классификатора докуметов на основе линейного дискриминанта Фишера.
4
4
 
5
- ## Installation
5
+ Предоставляет прозрачный DSL для кофигурирования с возможностью определить:
6
6
 
7
- Add this line to your application's Gemfile:
7
+ * набор категорий;
8
+ * способ определения признаков;
9
+ * коэффициент для подсчета взвешенной вероятности;
10
+ * минимальный порог для определения принадлежности к категории;
11
+ * любое хранилище статистики.
12
+
13
+ Подробнее с теорией:
14
+ * [Баесовский классификатор](http://www.machinelearning.ru/wiki/index.php?title=%D0%91%D0%B0%D0%B9%D0%B5%D1%81%D0%BE%D0%B2%D1%81%D0%BA%D0%B8%D0%B9_%D0%BA%D0%BB%D0%B0%D1%81%D1%81%D0%B8%D1%84%D0%B8%D0%BA%D0%B0%D1%82%D0%BE%D1%80)
15
+ * [Линейный дискриминант Фишера](http://www.machinelearning.ru/wiki/index.php?title=%D0%9B%D0%B8%D0%BD%D0%B5%D0%B9%D0%BD%D1%8B%D0%B9_%D0%B4%D0%B8%D1%81%D0%BA%D1%80%D0%B8%D0%BC%D0%B8%D0%BD%D0%B0%D0%BD%D1%82_%D0%A4%D0%B8%D1%88%D0%B5%D1%80%D0%B0)
16
+
17
+ ## Установка
18
+
19
+ Добавить в Gemfile:
8
20
 
9
21
  gem 'fisher_classifier'
10
22
 
11
- And then execute:
23
+ Выполнить:
12
24
 
13
25
  $ bundle
14
26
 
15
- Or install it yourself as:
27
+ Или поставить как гем:
16
28
 
17
29
  $ gem install fisher_classifier
18
30
 
19
- ## Usage
31
+ ## Try Before You Buy™
32
+
33
+ Попробовать можно в консоли, вот так:
34
+
35
+ $ irb
36
+
37
+ 1.9.3-p448 :002 > require 'fisher_classifier'
38
+ 1.9.3-p448 :003 > cl = FisherClassifier.create_in_memory
39
+ 1.9.3-p448 :005 > cl.train('Nobody owns the water.','good')
40
+ => ["Nobody", "owns", "the", "water."]
41
+ 1.9.3-p448 :006 > cl.train('the quick rabbit jumps fences','good')
42
+ => ["the", "quick", "rabbit", "jumps", "fences"]
43
+ 1.9.3-p448 :007 > cl.train('buy pharmaceuticals now','bad')
44
+ => ["buy", "pharmaceuticals", "now"]
45
+ 1.9.3-p448 :008 > cl.train('make quick money at the online casino','bad')
46
+ => ["make", "quick", "money", "at", "the", "online", "casino"]
47
+ 1.9.3-p448 :009 > cl.train('the quick brown fox jumps','good')
48
+ => ["the", "quick", "brown", "fox", "jumps"]
49
+ 1.9.3-p448 :015 > cl.train('online trading with forex','bad')
50
+ => ["online", "trading", "with", "forex"]
51
+ 1.9.3-p448 :008 > cl.classify('the quick money with forex now')
52
+ => :bad
53
+ 1.9.3-p448 :009 > cl.classify('quck mouse runs from fox')
54
+ => :good
55
+
56
+ В данном примере в качестве хранилища используется оперативная память.
57
+
58
+ ## DSL
59
+
60
+ ### Определениепризнаков
61
+
62
+ ```ruby
63
+ get_features do |text|
64
+ # Выделить набор признаков из текста
65
+ end
66
+ ```
67
+
68
+ ### Обучение
69
+
70
+ ```ruby
71
+ inc_feature do |feature, category|
72
+ # Увеличить счетчик кол-ва использований признака в категории
73
+ end
74
+
75
+ inc_category do |category|
76
+ # Увеличить счетчик кол-ва использований категории
77
+ end
78
+ ```
79
+
80
+ ### Классификация
81
+
82
+ ```ruby
83
+ # Предполагаемая вероятность (Вероятность признака, если он ни разу не появлялся)
84
+ assumed_prob 0.4
85
+
86
+ # Порог. Минимальное значение вероятности принадлежности текста в категории
87
+ fisher_threshold 0.1
88
+
89
+ categories do
90
+ # Возможные категории
91
+ end
92
+
93
+ category_count do |category|
94
+ # Кол-во использований категории
95
+ end
96
+
97
+ features_count do |feature, category|
98
+ # Кол-во использований признака в категории
99
+ end
100
+
101
+ default_category do
102
+ # Категория по умолчанию
103
+ end
104
+ ```
105
+
106
+ ## Rails (Active Record)
107
+
108
+ Миграция (db/migrate/20131106143644_create_classifier_features.rb):
109
+
110
+ ```ruby
111
+ class CreateClassifierFeatures < ActiveRecord::Migration
112
+ def change
113
+ create_table :classifier_features do |t|
114
+ t.string :name
115
+ t.string :category
116
+ t.integer :count, default: 1
117
+ end
118
+ end
119
+ end
120
+ ```
121
+
122
+ Модель (app/models/classifier_feature.rb):
123
+
124
+ ```ruby
125
+ class ClassifierFeature < ActiveRecord::Base
126
+ validates :category, presence: true
127
+ validates :name, presence: true, uniqueness: {:scope => :category}
128
+
129
+ def self.categories
130
+ [:good, :bad]
131
+ end
132
+ end
133
+ ```
134
+
135
+ Инициалайзер:
136
+
137
+ [config/initializers/classifier.rb](https://github.com/Andrew8xx8/fisher_classifier/blob/master/examples/classifier_initializer.rb)
138
+
139
+ Использование:
20
140
 
21
- TODO: Write usage instructions here
141
+ $ rails c
142
+ 1.9.3-p448 :009 > Classifier.train('the quick brown fox jumps', :good)
143
+ 1.9.3-p448 :009 > Classifier.classify('the quick brown fox jumps', :good)
22
144
 
23
- ## Contributing
145
+ ## Если хочется что-то исправить
24
146
 
25
- 1. Fork it
26
- 2. Create your feature branch (`git checkout -b my-new-feature`)
27
- 3. Commit your changes (`git commit -am 'Add some feature'`)
28
- 4. Push to the branch (`git push origin my-new-feature`)
147
+ 1. Форкни
148
+ 2. Зафигач фиче-ветку (`git checkout -b my-new-feature`)
149
+ 3. Коммить изменения (`git commit -am 'Add some feature'`)
150
+ 4. Пуш ветку (`git push origin my-new-feature`)
29
151
  5. Create new Pull Request
@@ -0,0 +1,29 @@
1
+ # FisherClassifier
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'fisher_classifier'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install fisher_classifier
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
@@ -0,0 +1,39 @@
1
+ Classifier = FisherClassifier.create do
2
+ assumed_prob 0.4
3
+ fisher_threshold 0.1
4
+
5
+ inc_feature do |feature, category|
6
+ feature = ClassifierFeature.find_or_initialize_by(name: feature, category: category)
7
+ feature.count += 1 if feature
8
+ feature.save
9
+ end
10
+
11
+ get_features do |text|
12
+ if text
13
+ text.to_s.split(' ').map { |s| s.downcase }
14
+ else
15
+ []
16
+ end
17
+ end
18
+
19
+ categories do
20
+ ClassifierFeature.categories
21
+ end
22
+
23
+ category_count do |category|
24
+ ClassifierFeature.where(category: category).count
25
+ end
26
+
27
+ features_count do |feature, category|
28
+ f = ClassifierFeature.find_by(name: feature, category: category)
29
+ if f
30
+ f.count
31
+ else
32
+ 0
33
+ end
34
+ end
35
+
36
+ default_category do
37
+ "bad"
38
+ end
39
+ end
@@ -19,24 +19,17 @@ module FisherClassifier
19
19
 
20
20
  inc_feature do |feature, category|
21
21
  @features[category] ||= {}
22
-
23
- if @features[category].has_key? feature
24
- @features[category][feature] += 1
25
- else
26
- @features[category][feature] = 1
27
- end
22
+ @features[category][feature] ||= 0
23
+ @features[category][feature] += 1
28
24
  end
29
25
 
30
26
  inc_category do |category|
31
- if @categories.has_key? category
32
- @categories[category] += 1
33
- else
34
- @categories[category] = 1
35
- end
27
+ @categories[category] ||= 0
28
+ @categories[category] += 1
36
29
  end
37
30
 
38
31
  get_features do |text|
39
- text.split(' ')
32
+ text.split(' ').map { |s| s.downcase }
40
33
  end
41
34
 
42
35
  categories do
@@ -44,8 +37,8 @@ module FisherClassifier
44
37
  end
45
38
 
46
39
  category_count do |category|
47
- if @features.has_key?(category)
48
- @categories[category] || 0
40
+ if @categories.has_key?(category)
41
+ @categories[category]
49
42
  else
50
43
  0
51
44
  end
@@ -53,7 +46,7 @@ module FisherClassifier
53
46
 
54
47
  features_count do |feature, category|
55
48
  if @features.has_key?(category) && @features[category].has_key?(feature)
56
- @features[category][feature] || 0
49
+ @features[category][feature]
57
50
  else
58
51
  0
59
52
  end
@@ -1,3 +1,3 @@
1
1
  module FisherClassifier
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fisher_classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew8xx8
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-14 00:00:00.000000000 Z
11
+ date: 2013-12-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -64,7 +64,9 @@ files:
64
64
  - Gemfile
65
65
  - LICENSE.txt
66
66
  - README.md
67
+ - README_EN.md
67
68
  - Rakefile
69
+ - examples/classifier_initializer.rb
68
70
  - fisher_classifier.gemspec
69
71
  - lib/fisher_classifier.rb
70
72
  - lib/fisher_classifier/classifier.rb