reclassifier 0.1.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,3 @@
1
1
  module Reclassifier
2
- VERSION = "0.1.4"
2
+ VERSION = "0.2.5"
3
3
  end
@@ -1,84 +1,8 @@
1
1
  module Reclassifier::WordHash
2
- CORPUS_SKIP_WORDS = ["a",
3
- "again",
4
- "all",
5
- "along",
6
- "are",
7
- "also",
8
- "an",
9
- "and",
10
- "as",
11
- "at",
12
- "but",
13
- "by",
14
- "came",
15
- "can",
16
- "cant",
17
- "couldnt",
18
- "did",
19
- "didn",
20
- "didnt",
21
- "do",
22
- "doesnt",
23
- "dont",
24
- "ever",
25
- "first",
26
- "from",
27
- "have",
28
- "her",
29
- "here",
30
- "him",
31
- "how",
32
- "i",
33
- "if",
34
- "in",
35
- "into",
36
- "is",
37
- "isnt",
38
- "it",
39
- "itll",
40
- "just",
41
- "last",
42
- "least",
43
- "like",
44
- "most",
45
- "my",
46
- "new",
47
- "no",
48
- "not",
49
- "now",
50
- "of",
51
- "on",
52
- "or",
53
- "should",
54
- "sinc",
55
- "so",
56
- "some",
57
- "th",
58
- "than",
59
- "this",
60
- "that",
61
- "the",
62
- "their",
63
- "then",
64
- "those",
65
- "to",
66
- "told",
67
- "too",
68
- "true",
69
- "try",
70
- "until",
71
- "url",
72
- "us",
73
- "were",
74
- "when",
75
- "whether",
76
- "while",
77
- "with",
78
- "within",
79
- "yes",
80
- "you",
81
- "youll"]
2
+ CORPUS_SKIP_WORDS = %w(a again all along are also an and as at but by came can cant couldnt did didn didnt do
3
+ doesnt dont ever first from have her here him how i if in into is isnt it itll just last
4
+ least like most my new no not now of on or should sinc so some th than this that the
5
+ their then those to told too true try until url us were when whether while with within yes you youll)
82
6
 
83
7
  # Return a Hash of strings => ints. Each word in the string is stemmed,
84
8
  # symbolized, and indexed to its frequency in the document.
@@ -88,7 +12,7 @@ module Reclassifier::WordHash
88
12
 
89
13
  # Return a word hash without extra punctuation or short symbols, just stemmed words
90
14
  def clean_word_hash(string)
91
- word_hash_for_words(string.gsub(/[^\w\s]/,"").split)
15
+ word_hash_for_words(string.gsub(/[^\w\s]/," ").split)
92
16
  end
93
17
 
94
18
  def word_hash_for_words(words)
@@ -15,5 +15,15 @@ describe Reclassifier::Bayes do
15
15
 
16
16
  subject.clean_word_hash("here are some good words of test's. I hope you love them!").should eq(hash)
17
17
  end
18
+
19
+ it 'should convert non-word characters to spaces' do
20
+ subject.clean_word_hash('Payment-Transfer').should eq(:payment => 1, :transfer => 1)
21
+ end
22
+ end
23
+
24
+ [:word_hash, :clean_word_hash].each do |method|
25
+ it "#{method} should trim each word" do
26
+ subject.send(method, "test test123 \t\t\t aaa").should eq(:test => 1, :test123 => 1, :aaa => 1)
27
+ end
18
28
  end
19
29
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: reclassifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: