luisparravicini-classifier 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  ---
2
- :build:
3
- :patch: 1
4
2
  :major: 1
5
3
  :minor: 4
4
+ :patch: 2
5
+ :build:
@@ -10,7 +10,7 @@ module Classifier
10
10
  end
11
11
 
12
12
  def prepare_category_name val
13
- val.to_s.gsub("_"," ").capitalize.intern
13
+ val.to_s.gsub("_"," ").capitalize
14
14
  end
15
15
 
16
16
  # Removes common punctuation symbols, returning a new string.
@@ -22,7 +22,7 @@ module Classifier
22
22
  end
23
23
 
24
24
  # Return a Hash of strings => ints. Each word in the string is stemmed,
25
- # interned, and indexes to its frequency in the document.
25
+ # and indexes to its frequency in the document.
26
26
  def word_hash str
27
27
  word_hash_for_words(str.gsub(/[^\w\s]/,"").split + str.gsub(/[\w]/," ").split)
28
28
  end
@@ -50,9 +50,11 @@ module Classifier
50
50
  def word_hash_for_words(words)
51
51
  d = Hash.new
52
52
  skip_words = StopWords.for(@options[:language], @options[:lang_dir])
53
+ encoding_name = @options[:encoding].gsub(/_/, '-')
53
54
  words.each do |word|
54
55
  word = word.mb_chars.downcase.to_s if word =~ /[\w]+/
55
- key = stemmer.stem(word).intern
56
+ key = stemmer.stem(word)
57
+ key.force_encoding(encoding_name)
56
58
  if word =~ /[^\w]/ || ! skip_words.include?(word) && word.length > 2
57
59
  d[key] ||= 0
58
60
  d[key] += 1
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{luisparravicini-classifier}
8
- s.version = "1.4.1"
8
+ s.version = "1.4.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Luis Parravicini"]
12
- s.date = %q{2010-01-29}
12
+ s.date = %q{2010-02-02}
13
13
  s.description = %q{Bayesian classifier and others.}
14
14
  s.email = %q{lparravi@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -3,14 +3,14 @@ class HelpersTest < Test::Unit::TestCase
3
3
 
4
4
  def test_word_hash
5
5
  c = Classifier::Base.new
6
- hash = {:good=>1, :"!"=>1, :hope=>1, :"'"=>1, :"."=>1, :love=>1, :word=>1, :them=>1, :test=>1}
6
+ hash = {'good'=>1, "!"=>1, 'hope'=>1, "'"=>1, "."=>1, 'love'=>1, 'word'=>1, 'them'=>1, 'test'=>1}
7
7
  assert_equal hash, c.word_hash("here are some good words of test's. I hope you love them!")
8
8
  end
9
9
 
10
10
 
11
11
  def test_clean_word_hash
12
12
  c = Classifier::Base.new
13
- hash = {:good=>1, :word=>1, :hope=>1, :love=>1, :them=>1, :test=>1}
13
+ hash = {'good'=>1, 'word'=>1, 'hope'=>1, 'love'=>1, 'them'=>1, 'test'=>1}
14
14
  assert_equal hash, c.clean_word_hash("here are some good words of test's. I hope you love them!")
15
15
  end
16
16
 
@@ -157,11 +157,11 @@ class LSITest < Test::Unit::TestCase
157
157
  lsi.add_item @str4, "Cat"
158
158
  lsi.add_item @str5, "Bird"
159
159
 
160
- assert_equal [:dog, :text, :deal], lsi.highest_ranked_stems(@str1)
160
+ assert_equal ['dog', 'text', 'deal'], lsi.highest_ranked_stems(@str1)
161
161
  end
162
162
 
163
163
  def test_summary
164
164
  assert_equal "This text involves dogs too [...] This text also involves cats", [@str1, @str2, @str3, @str4, @str5].join.summary(2)
165
165
  end
166
166
 
167
- end
167
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: luisparravicini-classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.1
4
+ version: 1.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis Parravicini
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-29 00:00:00 -02:00
12
+ date: 2010-02-02 00:00:00 -02:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency