luisparravicini-classifier 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
  ---
2
- :build:
3
- :patch: 1
4
2
  :major: 1
5
3
  :minor: 4
4
+ :patch: 2
5
+ :build:
@@ -10,7 +10,7 @@ module Classifier
10
10
  end
11
11
 
12
12
  def prepare_category_name val
13
- val.to_s.gsub("_"," ").capitalize.intern
13
+ val.to_s.gsub("_"," ").capitalize
14
14
  end
15
15
 
16
16
  # Removes common punctuation symbols, returning a new string.
@@ -22,7 +22,7 @@ module Classifier
22
22
  end
23
23
 
24
24
  # Return a Hash of strings => ints. Each word in the string is stemmed,
25
- # interned, and indexes to its frequency in the document.
25
+ # and indexes to its frequency in the document.
26
26
  def word_hash str
27
27
  word_hash_for_words(str.gsub(/[^\w\s]/,"").split + str.gsub(/[\w]/," ").split)
28
28
  end
@@ -50,9 +50,11 @@ module Classifier
50
50
  def word_hash_for_words(words)
51
51
  d = Hash.new
52
52
  skip_words = StopWords.for(@options[:language], @options[:lang_dir])
53
+ encoding_name = @options[:encoding].gsub(/_/, '-')
53
54
  words.each do |word|
54
55
  word = word.mb_chars.downcase.to_s if word =~ /[\w]+/
55
- key = stemmer.stem(word).intern
56
+ key = stemmer.stem(word)
57
+ key.force_encoding(encoding_name)
56
58
  if word =~ /[^\w]/ || ! skip_words.include?(word) && word.length > 2
57
59
  d[key] ||= 0
58
60
  d[key] += 1
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{luisparravicini-classifier}
8
- s.version = "1.4.1"
8
+ s.version = "1.4.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Luis Parravicini"]
12
- s.date = %q{2010-01-29}
12
+ s.date = %q{2010-02-02}
13
13
  s.description = %q{Bayesian classifier and others.}
14
14
  s.email = %q{lparravi@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -3,14 +3,14 @@ class HelpersTest < Test::Unit::TestCase
3
3
 
4
4
  def test_word_hash
5
5
  c = Classifier::Base.new
6
- hash = {:good=>1, :"!"=>1, :hope=>1, :"'"=>1, :"."=>1, :love=>1, :word=>1, :them=>1, :test=>1}
6
+ hash = {'good'=>1, "!"=>1, 'hope'=>1, "'"=>1, "."=>1, 'love'=>1, 'word'=>1, 'them'=>1, 'test'=>1}
7
7
  assert_equal hash, c.word_hash("here are some good words of test's. I hope you love them!")
8
8
  end
9
9
 
10
10
 
11
11
  def test_clean_word_hash
12
12
  c = Classifier::Base.new
13
- hash = {:good=>1, :word=>1, :hope=>1, :love=>1, :them=>1, :test=>1}
13
+ hash = {'good'=>1, 'word'=>1, 'hope'=>1, 'love'=>1, 'them'=>1, 'test'=>1}
14
14
  assert_equal hash, c.clean_word_hash("here are some good words of test's. I hope you love them!")
15
15
  end
16
16
 
@@ -157,11 +157,11 @@ class LSITest < Test::Unit::TestCase
157
157
  lsi.add_item @str4, "Cat"
158
158
  lsi.add_item @str5, "Bird"
159
159
 
160
- assert_equal [:dog, :text, :deal], lsi.highest_ranked_stems(@str1)
160
+ assert_equal ['dog', 'text', 'deal'], lsi.highest_ranked_stems(@str1)
161
161
  end
162
162
 
163
163
  def test_summary
164
164
  assert_equal "This text involves dogs too [...] This text also involves cats", [@str1, @str2, @str3, @str4, @str5].join.summary(2)
165
165
  end
166
166
 
167
- end
167
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: luisparravicini-classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.1
4
+ version: 1.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis Parravicini
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-29 00:00:00 -02:00
12
+ date: 2010-02-02 00:00:00 -02:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency