groupie 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -19,17 +19,17 @@ rescue LoadError
19
19
  end
20
20
 
21
21
  require 'rake/testtask'
22
- Rake::TestTask.new(:test) do |test|
23
- test.libs << 'lib' << 'test'
24
- test.pattern = 'test/**/*_test.rb'
22
+ Rake::TestTask.new(:spec) do |test|
23
+ test.libs << 'lib' << 'spec'
24
+ test.pattern = 'spec/**/*_spec.rb'
25
25
  test.verbose = true
26
26
  end
27
27
 
28
28
  begin
29
29
  require 'rcov/rcovtask'
30
30
  Rcov::RcovTask.new do |test|
31
- test.libs << 'test'
32
- test.pattern = 'test/**/*_test.rb'
31
+ test.libs << 'spec'
32
+ test.pattern = 'spec/**/*_spec.rb'
33
33
  test.verbose = true
34
34
  end
35
35
  rescue LoadError
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{groupie}
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Wes Oldenbeuving"]
@@ -25,12 +25,15 @@ Gem::Specification.new do |s|
25
25
  "lib/groupie/core_ext/string.rb",
26
26
  "lib/groupie/group.rb",
27
27
  "readme.rdoc",
28
- "test/fixtures/ham/spam.la-44116217.txt",
29
- "test/fixtures/spam/spam.la-44118014.txt",
30
- "test/groupie/core_ext/string_test.rb",
31
- "test/groupie/group_test.rb",
32
- "test/groupie_test.rb",
33
- "test/test_helper.rb"
28
+ "spec/fixtures/ham/email_ham1.txt",
29
+ "spec/fixtures/ham/spam.la-44116217.txt",
30
+ "spec/fixtures/spam/email_spam1.txt",
31
+ "spec/fixtures/spam/email_spam2.txt",
32
+ "spec/fixtures/spam/spam.la-44118014.txt",
33
+ "spec/groupie/core_ext/string_spec.rb",
34
+ "spec/groupie/group_spec.rb",
35
+ "spec/groupie_spec.rb",
36
+ "spec/spec_helper.rb"
34
37
  ]
35
38
  s.homepage = %q{http://github.com/Narnach/groupie}
36
39
  s.rdoc_options = ["--charset=UTF-8"]
@@ -38,10 +41,10 @@ Gem::Specification.new do |s|
38
41
  s.rubygems_version = %q{1.3.7}
39
42
  s.summary = %q{Group and classify text}
40
43
  s.test_files = [
41
- "test/groupie/core_ext/string_test.rb",
42
- "test/groupie/group_test.rb",
43
- "test/groupie_test.rb",
44
- "test/test_helper.rb"
44
+ "spec/groupie/core_ext/string_spec.rb",
45
+ "spec/groupie/group_spec.rb",
46
+ "spec/groupie_spec.rb",
47
+ "spec/spec_helper.rb"
45
48
  ]
46
49
 
47
50
  if s.respond_to? :specification_version then
@@ -6,7 +6,7 @@ class Groupie
6
6
  gsub(/\s/," ").
7
7
  gsub(/[$']/,'').
8
8
  gsub(/<[^>]+?>|[^\w -.,]/,'').
9
- split(" ").map {|str| str.gsub(/[,.]+\Z/,'')}
9
+ split(" ").map {|str| str.gsub(/\A['"]+|[!,."']+\Z/,'')}
10
10
  end
11
11
  end
12
12
  end
@@ -9,7 +9,6 @@ The eventual goal is to have Groupie work as a sort of bayesian spam filter, whe
9
9
  Groupie is a 'fun' project that has the following goals, in descending order of importance:
10
10
  * Have fun playing with code
11
11
  * Play with Bayesian-like (spam) filtering
12
- * Check out the Testy BDD framework. It's pretty good for 60 lines of code!
13
12
 
14
13
  == Current functionality
15
14
 
@@ -0,0 +1,13 @@
1
+ Re: [ubuntu-art] [Breathe] Network Manager-icons
2
+ Am Sonntag, den 31.05.2009, 17:53 +0200 schrieb Steve Dodier:
3
+ > Hello,
4
+ >
5
+ > I think the notify-osd icons have a completely different style, which
6
+ > is looking great within the notification bubbles, but i doubt it'd
7
+ > look great to have the notify-osd wifi icons in the panel. I think the
8
+ > drawing of the notification- wifi icons should be done afterwards, and
9
+ > if they should be based on those of the icon set, they could be made
10
+ > smoother, and possibly desaturated for some of them, to avoid drawing
11
+ > too much attention from the user when popping up.
12
+ >
13
+ > Cordially, SD.
@@ -0,0 +1,5 @@
1
+ I noticed your flirt
2
+ If you cannot see the pictures and links below, please click here to view them.
3
+ PHARMACY CLUB | UNSUBSCRIBE | YOUR PRIVACY RIGHTS
4
+ Copyright 2009 Zjfqq, all rights reserved
5
+ Customer Service Dept., 87 Hizq Iveox Street, Isahaylo, VS 25270
@@ -0,0 +1,7 @@
1
+ Re: Your subscribe #976589
2
+ Tell a friend · Download latest version See this email as a webpage
3
+ Hello!
4
+ Shipped Privately And Discreetly To Your Door!
5
+ We want to put a great big grin on your face in 2009. You'll be to rejoice all year.
6
+ Unsubscribe · Lost Password · Account Settings · Help · Terms of Service · Privacy
7
+ Ottho Heldringstraat 2, 31719 AZ Amsterdam, The Netherlands
@@ -0,0 +1,37 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe String do
4
+ context "tokenize" do
5
+ it 'should split words' do
6
+ "hello world".tokenize.should == %w[hello world]
7
+ end
8
+
9
+ it 'should downcase words' do
10
+ "Hello World".tokenize.should == %w[hello world]
11
+ end
12
+
13
+ it 'should strip special characters' do
14
+ "blah, bla!".tokenize.should == %w[blah bla]
15
+ end
16
+
17
+ it 'should prserve infix hyphens and underscores' do
18
+ "hyphen-ated under_score".tokenize.should == %w[hyphen-ated under_score]
19
+ end
20
+
21
+ it 'should sanitize html tags' do
22
+ '<a href="http://example.org">example</a>'.tokenize.should == %w[example]
23
+ end
24
+
25
+ it 'should preserve infix periods' do
26
+ 'example.org rocks. read it...'.tokenize.should == %w[example.org rocks read it]
27
+ end
28
+
29
+ it "should preserve infix commas" do
30
+ '$1,000,000.00 or $1.000.000,00'.tokenize.should == %w[1,000,000.00 or 1.000.000,00]
31
+ end
32
+
33
+ it "should strip quotes around tokens" do
34
+ '"first last"'.tokenize.should == %w[first last]
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,12 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. spec_helper])
2
+ require 'yaml'
3
+
4
+ describe Groupie::Group do
5
+ it "can be serialized and loaded through YAML" do
6
+ group = Groupie::Group.new 'group'
7
+ group.add %w[buy flowers]
8
+ loaded_group = YAML.load(group.to_yaml)
9
+ loaded_group.add %w[buy candy]
10
+ loaded_group.count("candy").should == 1
11
+ end
12
+ end
@@ -0,0 +1,75 @@
1
+ require File.join(File.dirname(__FILE__), 'spec_helper')
2
+
3
+ describe Groupie do
4
+ describe "classify" do
5
+ it 'should work when 100% certaint' do
6
+ g = Groupie.new
7
+ g[:spam].add %w[viagra]
8
+ g[:ham].add %w[flowers]
9
+ g.classify('viagra').should == {:spam => 1.0, :ham => 0.0}
10
+ end
11
+
12
+ it 'should work when split 50/50 between two groups' do
13
+ g = Groupie.new
14
+ g[:spam].add %w[buy viagra now]
15
+ g[:ham].add %w[buy flowers for your mom]
16
+ g.classify('buy').should == {:spam => 0.5, :ham => 0.5}
17
+ end
18
+
19
+ it 'should work when weighed more towards one group' do
20
+ g = Groupie.new
21
+ g[:spam].add %w[buy viagra now]
22
+ g[:spam].add %w[buy cialis now]
23
+ g[:ham].add %w[buy flowers for your mom]
24
+ g.classify('buy').should == {:spam => 2 / 3.0, :ham => 1 / 3.0}
25
+ end
26
+
27
+ it 'should work with more than two groups' do
28
+ g = Groupie.new
29
+ g[:weight].add 'pound'
30
+ g[:currency].add 'pound'
31
+ g[:phone_key].add 'pound'
32
+ g.classify('pound').should == {:weight => 1/3.0, :currency => 1/3.0, :phone_key => 1/3.0}
33
+ end
34
+
35
+ it 'should tokenize and classify emails' do
36
+ email = File.read(File.join(File.dirname(__FILE__), %w[fixtures spam email_spam1.txt]))
37
+ email2 = File.read(File.join(File.dirname(__FILE__), %w[fixtures spam email_spam2.txt]))
38
+ email3 = File.read(File.join(File.dirname(__FILE__), %w[fixtures ham email_ham1.txt]))
39
+ g = Groupie.new
40
+ g[:spam].add email.tokenize
41
+ g[:spam].add email2.tokenize
42
+ g[:ham].add email3.tokenize
43
+ c = g.classify('discreetly')
44
+ c[:spam].should > c[:ham]
45
+ c2 = g.classify('user')
46
+ c2[:ham].should > c2[:spam]
47
+ end
48
+ end
49
+ context "classify_text" do
50
+ it 'should tokenized html emails' do
51
+ g = Groupie.new
52
+ spam_tokens = File.read(File.join(File.dirname(__FILE__), %w[fixtures spam spam.la-44118014.txt])).tokenize
53
+ ham_tokens = File.read(File.join(File.dirname(__FILE__), %w[fixtures ham spam.la-44116217.txt])).tokenize
54
+ g[:spam].add spam_tokens
55
+ g[:ham].add ham_tokens
56
+
57
+ c = g.classify 'user'
58
+ c[:ham].should > c[:spam]
59
+
60
+ c = g.classify_text(spam_tokens)
61
+ c[:spam].should > c[:ham]
62
+ end
63
+
64
+ it 'should classify a text' do
65
+ g = Groupie.new
66
+ g[:spam].add %w[buy viagra now to grow fast]
67
+ g[:spam].add %w[buy cialis on our website]
68
+ g[:ham].add %w[buy flowers for your mom]
69
+ result = g.classify_text "Grow flowers to sell on our website".tokenize
70
+ result[:spam].should > result[:ham]
71
+ result2 = g.classify_text "Grow flowers to give to your mom".tokenize
72
+ result2[:ham].should == result2[:spam]
73
+ end
74
+ end
75
+ end
@@ -0,0 +1 @@
1
+ require 'lib/groupie'
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: groupie
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 0
10
- version: 0.1.0
9
+ - 1
10
+ version: 0.1.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Wes Oldenbeuving
@@ -50,12 +50,15 @@ files:
50
50
  - lib/groupie/core_ext/string.rb
51
51
  - lib/groupie/group.rb
52
52
  - readme.rdoc
53
- - test/fixtures/ham/spam.la-44116217.txt
54
- - test/fixtures/spam/spam.la-44118014.txt
55
- - test/groupie/core_ext/string_test.rb
56
- - test/groupie/group_test.rb
57
- - test/groupie_test.rb
58
- - test/test_helper.rb
53
+ - spec/fixtures/ham/email_ham1.txt
54
+ - spec/fixtures/ham/spam.la-44116217.txt
55
+ - spec/fixtures/spam/email_spam1.txt
56
+ - spec/fixtures/spam/email_spam2.txt
57
+ - spec/fixtures/spam/spam.la-44118014.txt
58
+ - spec/groupie/core_ext/string_spec.rb
59
+ - spec/groupie/group_spec.rb
60
+ - spec/groupie_spec.rb
61
+ - spec/spec_helper.rb
59
62
  has_rdoc: true
60
63
  homepage: http://github.com/Narnach/groupie
61
64
  licenses: []
@@ -91,7 +94,7 @@ signing_key:
91
94
  specification_version: 3
92
95
  summary: Group and classify text
93
96
  test_files:
94
- - test/groupie/core_ext/string_test.rb
95
- - test/groupie/group_test.rb
96
- - test/groupie_test.rb
97
- - test/test_helper.rb
97
+ - spec/groupie/core_ext/string_spec.rb
98
+ - spec/groupie/group_spec.rb
99
+ - spec/groupie_spec.rb
100
+ - spec/spec_helper.rb
@@ -1,45 +0,0 @@
1
- require File.join(File.dirname(__FILE__), %w[.. .. test_helper])
2
-
3
- Testy.testing 'String' do
4
- context 'tokenize' do
5
- test 'split words' do |t|
6
- tokens = "hello world".tokenize
7
- t.check 'words are split',
8
- :expect => %w[hello world],
9
- :actual => tokens
10
- end
11
-
12
- test 'downcase words' do |t|
13
- tokens = "Hello World".tokenize
14
- t.check 'words are downcased',
15
- :expect => %w[hello world],
16
- :actual => tokens
17
- end
18
-
19
- test 'most symbols are stripped' do |t|
20
- tokens = "hyphen-ated, under_score!".tokenize
21
- t.check 'some symbols are left',
22
- :expect => %w[hyphen-ated under_score],
23
- :actual => tokens
24
- end
25
-
26
- test 'html tags are sanitized' do |t|
27
- tokens = '<a href="http://example.org">example</a>'.tokenize
28
- t.check 'only content of tags is retained',
29
- :expect => %w[example],
30
- :actual => tokens
31
- end
32
-
33
- test 'some dots are ok' do |t|
34
- tokens = 'example.org rocks. read it...'.tokenize
35
- t.check 'infix dots are kept',
36
- :expect => %w[example.org rocks read it],
37
- :actual => tokens
38
-
39
- tokens2 = '$1,000,000.00 or $1.000.000,00'.tokenize
40
- t.check 'infix commas are kept',
41
- :expect => %w[1,000,000.00 or 1.000.000,00],
42
- :actual => tokens2
43
- end
44
- end
45
- end
@@ -1,15 +0,0 @@
1
- require File.join(File.dirname(__FILE__), %w[.. test_helper])
2
-
3
- Testy.testing 'Groupie::Group' do
4
- test 'can be serialized and loaded through YAML' do |t|
5
- require 'yaml'
6
-
7
- g = Groupie::Group.new 'group'
8
- g.add %w[buy flowers]
9
- g2 = YAML.load(g.to_yaml)
10
- g2.add %w[buy candy]
11
- t.check 'default value works for new entries',
12
- :expect => 1,
13
- :actual => g2.count('candy')
14
- end
15
- end
@@ -1,124 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'test_helper')
2
-
3
- Testy.testing 'Groupie' do
4
- test 'classification is certain' do |t|
5
- g = Groupie.new
6
- g[:spam].add %w[viagra]
7
- g[:ham].add %w[flowers]
8
- classification = g.classify 'viagra'
9
- t.check 'viagra is',
10
- :expect => {:spam => 1.0, :ham => 0.0},
11
- :actual => classification
12
- end
13
-
14
- test 'classification is split between two groups' do |t|
15
- g = Groupie.new
16
- g[:spam].add %w[buy viagra now]
17
- g[:ham].add %w[buy flowers for your mom]
18
- classification = g.classify 'buy'
19
- t.check 'buy is classified as',
20
- :expect => {:spam => 0.5, :ham => 0.5},
21
- :actual => classification
22
- end
23
-
24
- test 'classification is weighed more heavy in one group' do |t|
25
- g = Groupie.new
26
- g[:spam].add %w[buy viagra now]
27
- g[:spam].add %w[buy cialis now]
28
- g[:ham].add %w[buy flowers for your mom]
29
- t.check 'buy is classified as',
30
- :expect => {:spam => 2 / 3.0, :ham => 1 / 3.0},
31
- :actual => g.classify('buy')
32
- end
33
-
34
- test 'classification works fine with more than two groups' do |t|
35
- g = Groupie.new
36
- g[:weight].add 'pound'
37
- g[:currency].add 'pound'
38
- g[:phone_key].add 'pound'
39
- t.check 'pound is classified as',
40
- :expect => {:weight => 1/3.0, :currency => 1/3.0, :phone_key => 1/3.0},
41
- :actual => g.classify('pound')
42
- end
43
-
44
- test 'tokenized emails' do |t|
45
- email = <<-EMAIL
46
- I noticed your flirt
47
- If you cannot see the pictures and links below, please click here to view them.
48
- PHARMACY CLUB | UNSUBSCRIBE | YOUR PRIVACY RIGHTS
49
- Copyright 2009 Zjfqq, all rights reserved
50
- Customer Service Dept., 87 Hizq Iveox Street, Isahaylo, VS 25270
51
- EMAIL
52
- email2 = <<-EMAIL
53
- Re: Your subscribe #976589
54
- Tell a friend · Download latest version See this email as a webpage
55
- Hello!
56
- Shipped Privately And Discreetly To Your Door!
57
- We want to put a great big grin on your face in 2009. You'll be to rejoice all year.
58
- Unsubscribe · Lost Password · Account Settings · Help · Terms of Service · Privacy
59
- Ottho Heldringstraat 2, 31719 AZ Amsterdam, The Netherlands
60
- EMAIL
61
- email3 = <<-EMAIL
62
- Re: [ubuntu-art] [Breathe] Network Manager-icons
63
- Am Sonntag, den 31.05.2009, 17:53 +0200 schrieb Steve Dodier:
64
- > Hello,
65
- >
66
- > I think the notify-osd icons have a completely different style, which
67
- > is looking great within the notification bubbles, but i doubt it'd
68
- > look great to have the notify-osd wifi icons in the panel. I think the
69
- > drawing of the notification- wifi icons should be done afterwards, and
70
- > if they should be based on those of the icon set, they could be made
71
- > smoother, and possibly desaturated for some of them, to avoid drawing
72
- > too much attention from the user when popping up.
73
- >
74
- > Cordially, SD.
75
- EMAIL
76
- g = Groupie.new
77
- g[:spam].add email.tokenize
78
- g[:spam].add email2.tokenize
79
- g[:ham].add email3.tokenize
80
- c = g.classify('discreetly')
81
- t.check 'classification of "discreetly" is spam',
82
- :expect => true,
83
- :actual => c[:spam] > c[:ham]
84
- c2 = g.classify('user')
85
- t.check 'classification of "user" is ham',
86
- :expect => true,
87
- :actual => c2[:ham] > c2[:spam]
88
- end
89
-
90
- test 'tokenized html emails' do |t|
91
- g = Groupie.new
92
- spam_tokens = File.read(File.join(File.dirname(__FILE__),
93
- %w[fixtures spam spam.la-44118014.txt])).tokenize
94
- ham_tokens = File.read(File.join(File.dirname(__FILE__),
95
- %w[fixtures ham spam.la-44116217.txt])).tokenize
96
- g[:spam].add spam_tokens
97
- g[:ham].add ham_tokens
98
-
99
- c = g.classify 'user'
100
- t.check 'classification of the word "user" is ham',
101
- :expect => true,
102
- :actual => (c[:ham] > c[:spam])
103
-
104
- c = g.classify_text(spam_tokens)
105
- t.check 'classification of spam email is spam',
106
- :expect => true,
107
- :actual => (c[:spam] > c[:ham])
108
- end
109
-
110
- test 'classify a text' do |t|
111
- g = Groupie.new
112
- g[:spam].add %w[buy viagra now to grow fast]
113
- g[:spam].add %w[buy cialis on our website]
114
- g[:ham].add %w[buy flowers for your mom]
115
- result = g.classify_text "Grow flowers to sell on our website".tokenize
116
- t.check 'classification of a spammy text is spam',
117
- :expect => true,
118
- :actual => result[:spam] > result[:ham]
119
- result2 = g.classify_text "Grow flowers to give to your mom".tokenize
120
- t.check 'classification of a non-spammy text is ham',
121
- :expect => true,
122
- :actual => result2[:ham] > result2[:spam]
123
- end
124
- end
@@ -1,3 +0,0 @@
1
- require 'rubygems'
2
- require 'testy'
3
- require 'lib/groupie'