yury-classifier 1.3.2 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +2 -2
- data/classifier.gemspec +5 -5
- data/lib/classifier/base.rb +1 -1
- data/test/bayes/bayesian_test.rb +10 -0
- metadata +4 -4
data/Rakefile
CHANGED
@@ -2,14 +2,14 @@ require 'rubygems'
|
|
2
2
|
require 'rake'
|
3
3
|
require 'echoe'
|
4
4
|
|
5
|
-
Echoe.new('classifier', '1.3.
|
5
|
+
Echoe.new('classifier', '1.3.3') do |p|
|
6
6
|
p.description = "A general classifier module to allow Bayesian and other types of classifications."
|
7
7
|
p.url = "http://github.com/yury/classifier"
|
8
8
|
p.author = "Yury Korolev"
|
9
9
|
p.email = "yury.korolev@gmail.com"
|
10
10
|
p.ignore_pattern = ["tmp/*", "script/*"]
|
11
11
|
p.development_dependencies = []
|
12
|
-
p.runtime_dependencies = ["activesupport >= 2.2.2", "
|
12
|
+
p.runtime_dependencies = ["activesupport >= 2.2.2", "ruby-stemmer >= 0.5.1"]
|
13
13
|
end
|
14
14
|
|
15
15
|
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
|
data/classifier.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{classifier}
|
5
|
-
s.version = "1.3.
|
5
|
+
s.version = "1.3.3"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Yury Korolev"]
|
9
|
-
s.date = %q{2009-02-
|
9
|
+
s.date = %q{2009-02-05}
|
10
10
|
s.description = %q{A general classifier module to allow Bayesian and other types of classifications.}
|
11
11
|
s.email = %q{yury.korolev@gmail.com}
|
12
12
|
s.extra_rdoc_files = ["lib/classifier/base.rb", "lib/classifier/bayes.rb", "lib/classifier/extensions/vector.rb", "lib/classifier/extensions/vector_serialize.rb", "lib/classifier/lsi/content_node.rb", "lib/classifier/lsi/summary.rb", "lib/classifier/lsi/word_list.rb", "lib/classifier/lsi.rb", "lib/classifier.rb", "lib/init.rb", "LICENSE", "README"]
|
@@ -26,13 +26,13 @@ Gem::Specification.new do |s|
|
|
26
26
|
|
27
27
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
28
28
|
s.add_runtime_dependency(%q<activesupport>, [">= 0", "= 2.2.2"])
|
29
|
-
s.add_runtime_dependency(%q<
|
29
|
+
s.add_runtime_dependency(%q<ruby-stemmer>, [">= 0", "= 0.5.1"])
|
30
30
|
else
|
31
31
|
s.add_dependency(%q<activesupport>, [">= 0", "= 2.2.2"])
|
32
|
-
s.add_dependency(%q<
|
32
|
+
s.add_dependency(%q<ruby-stemmer>, [">= 0", "= 0.5.1"])
|
33
33
|
end
|
34
34
|
else
|
35
35
|
s.add_dependency(%q<activesupport>, [">= 0", "= 2.2.2"])
|
36
|
-
s.add_dependency(%q<
|
36
|
+
s.add_dependency(%q<ruby-stemmer>, [">= 0", "= 0.5.1"])
|
37
37
|
end
|
38
38
|
end
|
data/lib/classifier/base.rb
CHANGED
@@ -40,7 +40,7 @@ module Classifier
|
|
40
40
|
d = Hash.new
|
41
41
|
skip_words = SKIP_WORDS[@options[:language]] || []
|
42
42
|
words.each do |word|
|
43
|
-
word.mb_chars.downcase
|
43
|
+
word = word.mb_chars.downcase.to_s if word =~ /[\w]+/
|
44
44
|
key = stemmer.stem(word).intern
|
45
45
|
if word =~ /[^\w]/ || ! skip_words.include?(word) && word.length > 2
|
46
46
|
d[key] ||= 0
|
data/test/bayes/bayesian_test.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding:utf-8
|
2
|
+
$KCODE = 'utf8'
|
2
3
|
|
3
4
|
require File.dirname(__FILE__) + '/../test_helper'
|
4
5
|
class BayesianTest < Test::Unit::TestCase
|
@@ -39,4 +40,13 @@ class BayesianTest < Test::Unit::TestCase
|
|
39
40
|
c.train_uninteresting "вот несколько плохих слов. Я тебя ненавижу"
|
40
41
|
assert_equal 'Uninteresting', c.classify("Я ненавижу плохие слова и тебя")
|
41
42
|
end
|
43
|
+
|
44
|
+
def test_case_insensitive
|
45
|
+
c = Classifier::Bayes.new :categories => [:good, :bad], :language => "ru"
|
46
|
+
c.train_good "Хорошо"
|
47
|
+
c.train_bad "Плохо"
|
48
|
+
|
49
|
+
assert_equal c.classifications("ХОРОШО"), c.classifications("хорошо")
|
50
|
+
assert_equal c.classifications("плОХО"), c.classifications("плохо")
|
51
|
+
end
|
42
52
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yury-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yury Korolev
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-02-
|
12
|
+
date: 2009-02-05 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -25,7 +25,7 @@ dependencies:
|
|
25
25
|
version: 2.2.2
|
26
26
|
version:
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: ruby-stemmer
|
29
29
|
version_requirement:
|
30
30
|
version_requirements: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
version: "0"
|
35
35
|
- - "="
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 0.5.
|
37
|
+
version: 0.5.1
|
38
38
|
version:
|
39
39
|
description: A general classifier module to allow Bayesian and other types of classifications.
|
40
40
|
email: yury.korolev@gmail.com
|