luisparravicini-classifier 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +2 -2
- data/lib/classifier/base.rb +5 -3
- data/luisparravicini-classifier.gemspec +2 -2
- data/test/base_test.rb +2 -2
- data/test/lsi/lsi_test.rb +2 -2
- metadata +2 -2
data/VERSION.yml
CHANGED
data/lib/classifier/base.rb
CHANGED
@@ -10,7 +10,7 @@ module Classifier
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def prepare_category_name val
|
13
|
-
val.to_s.gsub("_"," ").capitalize
|
13
|
+
val.to_s.gsub("_"," ").capitalize
|
14
14
|
end
|
15
15
|
|
16
16
|
# Removes common punctuation symbols, returning a new string.
|
@@ -22,7 +22,7 @@ module Classifier
|
|
22
22
|
end
|
23
23
|
|
24
24
|
# Return a Hash of strings => ints. Each word in the string is stemmed,
|
25
|
-
#
|
25
|
+
# and indexes to its frequency in the document.
|
26
26
|
def word_hash str
|
27
27
|
word_hash_for_words(str.gsub(/[^\w\s]/,"").split + str.gsub(/[\w]/," ").split)
|
28
28
|
end
|
@@ -50,9 +50,11 @@ module Classifier
|
|
50
50
|
def word_hash_for_words(words)
|
51
51
|
d = Hash.new
|
52
52
|
skip_words = StopWords.for(@options[:language], @options[:lang_dir])
|
53
|
+
encoding_name = @options[:encoding].gsub(/_/, '-')
|
53
54
|
words.each do |word|
|
54
55
|
word = word.mb_chars.downcase.to_s if word =~ /[\w]+/
|
55
|
-
key = stemmer.stem(word)
|
56
|
+
key = stemmer.stem(word)
|
57
|
+
key.force_encoding(encoding_name)
|
56
58
|
if word =~ /[^\w]/ || ! skip_words.include?(word) && word.length > 2
|
57
59
|
d[key] ||= 0
|
58
60
|
d[key] += 1
|
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{luisparravicini-classifier}
|
8
|
-
s.version = "1.4.
|
8
|
+
s.version = "1.4.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Luis Parravicini"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-02-02}
|
13
13
|
s.description = %q{Bayesian classifier and others.}
|
14
14
|
s.email = %q{lparravi@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/test/base_test.rb
CHANGED
@@ -3,14 +3,14 @@ class HelpersTest < Test::Unit::TestCase
|
|
3
3
|
|
4
4
|
def test_word_hash
|
5
5
|
c = Classifier::Base.new
|
6
|
-
hash = {
|
6
|
+
hash = {'good'=>1, "!"=>1, 'hope'=>1, "'"=>1, "."=>1, 'love'=>1, 'word'=>1, 'them'=>1, 'test'=>1}
|
7
7
|
assert_equal hash, c.word_hash("here are some good words of test's. I hope you love them!")
|
8
8
|
end
|
9
9
|
|
10
10
|
|
11
11
|
def test_clean_word_hash
|
12
12
|
c = Classifier::Base.new
|
13
|
-
hash = {
|
13
|
+
hash = {'good'=>1, 'word'=>1, 'hope'=>1, 'love'=>1, 'them'=>1, 'test'=>1}
|
14
14
|
assert_equal hash, c.clean_word_hash("here are some good words of test's. I hope you love them!")
|
15
15
|
end
|
16
16
|
|
data/test/lsi/lsi_test.rb
CHANGED
@@ -157,11 +157,11 @@ class LSITest < Test::Unit::TestCase
|
|
157
157
|
lsi.add_item @str4, "Cat"
|
158
158
|
lsi.add_item @str5, "Bird"
|
159
159
|
|
160
|
-
assert_equal [
|
160
|
+
assert_equal ['dog', 'text', 'deal'], lsi.highest_ranked_stems(@str1)
|
161
161
|
end
|
162
162
|
|
163
163
|
def test_summary
|
164
164
|
assert_equal "This text involves dogs too [...] This text also involves cats", [@str1, @str2, @str3, @str4, @str5].join.summary(2)
|
165
165
|
end
|
166
166
|
|
167
|
-
end
|
167
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: luisparravicini-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis Parravicini
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-02-02 00:00:00 -02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|