luisparravicini-classifier 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.yml +2 -2
- data/lib/classifier/base.rb +5 -3
- data/luisparravicini-classifier.gemspec +2 -2
- data/test/base_test.rb +2 -2
- data/test/lsi/lsi_test.rb +2 -2
- metadata +2 -2
data/VERSION.yml
CHANGED
data/lib/classifier/base.rb
CHANGED
@@ -10,7 +10,7 @@ module Classifier
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def prepare_category_name val
|
13
|
-
val.to_s.gsub("_"," ").capitalize
|
13
|
+
val.to_s.gsub("_"," ").capitalize
|
14
14
|
end
|
15
15
|
|
16
16
|
# Removes common punctuation symbols, returning a new string.
|
@@ -22,7 +22,7 @@ module Classifier
|
|
22
22
|
end
|
23
23
|
|
24
24
|
# Return a Hash of strings => ints. Each word in the string is stemmed,
|
25
|
-
#
|
25
|
+
# and indexes to its frequency in the document.
|
26
26
|
def word_hash str
|
27
27
|
word_hash_for_words(str.gsub(/[^\w\s]/,"").split + str.gsub(/[\w]/," ").split)
|
28
28
|
end
|
@@ -50,9 +50,11 @@ module Classifier
|
|
50
50
|
def word_hash_for_words(words)
|
51
51
|
d = Hash.new
|
52
52
|
skip_words = StopWords.for(@options[:language], @options[:lang_dir])
|
53
|
+
encoding_name = @options[:encoding].gsub(/_/, '-')
|
53
54
|
words.each do |word|
|
54
55
|
word = word.mb_chars.downcase.to_s if word =~ /[\w]+/
|
55
|
-
key = stemmer.stem(word)
|
56
|
+
key = stemmer.stem(word)
|
57
|
+
key.force_encoding(encoding_name)
|
56
58
|
if word =~ /[^\w]/ || ! skip_words.include?(word) && word.length > 2
|
57
59
|
d[key] ||= 0
|
58
60
|
d[key] += 1
|
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{luisparravicini-classifier}
|
8
|
-
s.version = "1.4.
|
8
|
+
s.version = "1.4.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Luis Parravicini"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-02-02}
|
13
13
|
s.description = %q{Bayesian classifier and others.}
|
14
14
|
s.email = %q{lparravi@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/test/base_test.rb
CHANGED
@@ -3,14 +3,14 @@ class HelpersTest < Test::Unit::TestCase
|
|
3
3
|
|
4
4
|
def test_word_hash
|
5
5
|
c = Classifier::Base.new
|
6
|
-
hash = {
|
6
|
+
hash = {'good'=>1, "!"=>1, 'hope'=>1, "'"=>1, "."=>1, 'love'=>1, 'word'=>1, 'them'=>1, 'test'=>1}
|
7
7
|
assert_equal hash, c.word_hash("here are some good words of test's. I hope you love them!")
|
8
8
|
end
|
9
9
|
|
10
10
|
|
11
11
|
def test_clean_word_hash
|
12
12
|
c = Classifier::Base.new
|
13
|
-
hash = {
|
13
|
+
hash = {'good'=>1, 'word'=>1, 'hope'=>1, 'love'=>1, 'them'=>1, 'test'=>1}
|
14
14
|
assert_equal hash, c.clean_word_hash("here are some good words of test's. I hope you love them!")
|
15
15
|
end
|
16
16
|
|
data/test/lsi/lsi_test.rb
CHANGED
@@ -157,11 +157,11 @@ class LSITest < Test::Unit::TestCase
|
|
157
157
|
lsi.add_item @str4, "Cat"
|
158
158
|
lsi.add_item @str5, "Bird"
|
159
159
|
|
160
|
-
assert_equal [
|
160
|
+
assert_equal ['dog', 'text', 'deal'], lsi.highest_ranked_stems(@str1)
|
161
161
|
end
|
162
162
|
|
163
163
|
def test_summary
|
164
164
|
assert_equal "This text involves dogs too [...] This text also involves cats", [@str1, @str2, @str3, @str4, @str5].join.summary(2)
|
165
165
|
end
|
166
166
|
|
167
|
-
end
|
167
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: luisparravicini-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis Parravicini
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-02-02 00:00:00 -02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|