groupie 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -19,17 +19,17 @@ rescue LoadError
19
19
  end
20
20
 
21
21
  require 'rake/testtask'
22
- Rake::TestTask.new(:test) do |test|
23
- test.libs << 'lib' << 'test'
24
- test.pattern = 'test/**/*_test.rb'
22
+ Rake::TestTask.new(:spec) do |test|
23
+ test.libs << 'lib' << 'spec'
24
+ test.pattern = 'spec/**/*_spec.rb'
25
25
  test.verbose = true
26
26
  end
27
27
 
28
28
  begin
29
29
  require 'rcov/rcovtask'
30
30
  Rcov::RcovTask.new do |test|
31
- test.libs << 'test'
32
- test.pattern = 'test/**/*_test.rb'
31
+ test.libs << 'spec'
32
+ test.pattern = 'spec/**/*_spec.rb'
33
33
  test.verbose = true
34
34
  end
35
35
  rescue LoadError
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{groupie}
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Wes Oldenbeuving"]
@@ -25,12 +25,15 @@ Gem::Specification.new do |s|
25
25
  "lib/groupie/core_ext/string.rb",
26
26
  "lib/groupie/group.rb",
27
27
  "readme.rdoc",
28
- "test/fixtures/ham/spam.la-44116217.txt",
29
- "test/fixtures/spam/spam.la-44118014.txt",
30
- "test/groupie/core_ext/string_test.rb",
31
- "test/groupie/group_test.rb",
32
- "test/groupie_test.rb",
33
- "test/test_helper.rb"
28
+ "spec/fixtures/ham/email_ham1.txt",
29
+ "spec/fixtures/ham/spam.la-44116217.txt",
30
+ "spec/fixtures/spam/email_spam1.txt",
31
+ "spec/fixtures/spam/email_spam2.txt",
32
+ "spec/fixtures/spam/spam.la-44118014.txt",
33
+ "spec/groupie/core_ext/string_spec.rb",
34
+ "spec/groupie/group_spec.rb",
35
+ "spec/groupie_spec.rb",
36
+ "spec/spec_helper.rb"
34
37
  ]
35
38
  s.homepage = %q{http://github.com/Narnach/groupie}
36
39
  s.rdoc_options = ["--charset=UTF-8"]
@@ -38,10 +41,10 @@ Gem::Specification.new do |s|
38
41
  s.rubygems_version = %q{1.3.7}
39
42
  s.summary = %q{Group and classify text}
40
43
  s.test_files = [
41
- "test/groupie/core_ext/string_test.rb",
42
- "test/groupie/group_test.rb",
43
- "test/groupie_test.rb",
44
- "test/test_helper.rb"
44
+ "spec/groupie/core_ext/string_spec.rb",
45
+ "spec/groupie/group_spec.rb",
46
+ "spec/groupie_spec.rb",
47
+ "spec/spec_helper.rb"
45
48
  ]
46
49
 
47
50
  if s.respond_to? :specification_version then
@@ -6,7 +6,7 @@ class Groupie
6
6
  gsub(/\s/," ").
7
7
  gsub(/[$']/,'').
8
8
  gsub(/<[^>]+?>|[^\w -.,]/,'').
9
- split(" ").map {|str| str.gsub(/[,.]+\Z/,'')}
9
+ split(" ").map {|str| str.gsub(/\A['"]+|[!,."']+\Z/,'')}
10
10
  end
11
11
  end
12
12
  end
@@ -9,7 +9,6 @@ The eventual goal is to have Groupie work as a sort of bayesian spam filter, whe
9
9
  Groupie is a 'fun' project that has the following goals, in descending order of importance:
10
10
  * Have fun playing with code
11
11
  * Play with Bayesian-like (spam) filtering
12
- * Check out the Testy BDD framework. It's pretty good for 60 lines of code!
13
12
 
14
13
  == Current functionality
15
14
 
@@ -0,0 +1,13 @@
1
+ Re: [ubuntu-art] [Breathe] Network Manager-icons
2
+ Am Sonntag, den 31.05.2009, 17:53 +0200 schrieb Steve Dodier:
3
+ > Hello,
4
+ >
5
+ > I think the notify-osd icons have a completely different style, which
6
+ > is looking great within the notification bubbles, but i doubt it'd
7
+ > look great to have the notify-osd wifi icons in the panel. I think the
8
+ > drawing of the notification- wifi icons should be done afterwards, and
9
+ > if they should be based on those of the icon set, they could be made
10
+ > smoother, and possibly desaturated for some of them, to avoid drawing
11
+ > too much attention from the user when popping up.
12
+ >
13
+ > Cordially, SD.
@@ -0,0 +1,5 @@
1
+ I noticed your flirt
2
+ If you cannot see the pictures and links below, please click here to view them.
3
+ PHARMACY CLUB | UNSUBSCRIBE | YOUR PRIVACY RIGHTS
4
+ Copyright 2009 Zjfqq, all rights reserved
5
+ Customer Service Dept., 87 Hizq Iveox Street, Isahaylo, VS 25270
@@ -0,0 +1,7 @@
1
+ Re: Your subscribe #976589
2
+ Tell a friend · Download latest version See this email as a webpage
3
+ Hello!
4
+ Shipped Privately And Discreetly To Your Door!
5
+ We want to put a great big grin on your face in 2009. You'll be to rejoice all year.
6
+ Unsubscribe · Lost Password · Account Settings · Help · Terms of Service · Privacy
7
+ Ottho Heldringstraat 2, 31719 AZ Amsterdam, The Netherlands
@@ -0,0 +1,37 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe String do
4
+ context "tokenize" do
5
+ it 'should split words' do
6
+ "hello world".tokenize.should == %w[hello world]
7
+ end
8
+
9
+ it 'should downcase words' do
10
+ "Hello World".tokenize.should == %w[hello world]
11
+ end
12
+
13
+ it 'should strip special characters' do
14
+ "blah, bla!".tokenize.should == %w[blah bla]
15
+ end
16
+
17
+ it 'should prserve infix hyphens and underscores' do
18
+ "hyphen-ated under_score".tokenize.should == %w[hyphen-ated under_score]
19
+ end
20
+
21
+ it 'should sanitize html tags' do
22
+ '<a href="http://example.org">example</a>'.tokenize.should == %w[example]
23
+ end
24
+
25
+ it 'should preserve infix periods' do
26
+ 'example.org rocks. read it...'.tokenize.should == %w[example.org rocks read it]
27
+ end
28
+
29
+ it "should preserve infix commas" do
30
+ '$1,000,000.00 or $1.000.000,00'.tokenize.should == %w[1,000,000.00 or 1.000.000,00]
31
+ end
32
+
33
+ it "should strip quotes around tokens" do
34
+ '"first last"'.tokenize.should == %w[first last]
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,12 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. spec_helper])
2
+ require 'yaml'
3
+
4
+ describe Groupie::Group do
5
+ it "can be serialized and loaded through YAML" do
6
+ group = Groupie::Group.new 'group'
7
+ group.add %w[buy flowers]
8
+ loaded_group = YAML.load(group.to_yaml)
9
+ loaded_group.add %w[buy candy]
10
+ loaded_group.count("candy").should == 1
11
+ end
12
+ end
@@ -0,0 +1,75 @@
1
+ require File.join(File.dirname(__FILE__), 'spec_helper')
2
+
3
+ describe Groupie do
4
+ describe "classify" do
5
+ it 'should work when 100% certaint' do
6
+ g = Groupie.new
7
+ g[:spam].add %w[viagra]
8
+ g[:ham].add %w[flowers]
9
+ g.classify('viagra').should == {:spam => 1.0, :ham => 0.0}
10
+ end
11
+
12
+ it 'should work when split 50/50 between two groups' do
13
+ g = Groupie.new
14
+ g[:spam].add %w[buy viagra now]
15
+ g[:ham].add %w[buy flowers for your mom]
16
+ g.classify('buy').should == {:spam => 0.5, :ham => 0.5}
17
+ end
18
+
19
+ it 'should work when weighed more towards one group' do
20
+ g = Groupie.new
21
+ g[:spam].add %w[buy viagra now]
22
+ g[:spam].add %w[buy cialis now]
23
+ g[:ham].add %w[buy flowers for your mom]
24
+ g.classify('buy').should == {:spam => 2 / 3.0, :ham => 1 / 3.0}
25
+ end
26
+
27
+ it 'should work with more than two groups' do
28
+ g = Groupie.new
29
+ g[:weight].add 'pound'
30
+ g[:currency].add 'pound'
31
+ g[:phone_key].add 'pound'
32
+ g.classify('pound').should == {:weight => 1/3.0, :currency => 1/3.0, :phone_key => 1/3.0}
33
+ end
34
+
35
+ it 'should tokenize and classify emails' do
36
+ email = File.read(File.join(File.dirname(__FILE__), %w[fixtures spam email_spam1.txt]))
37
+ email2 = File.read(File.join(File.dirname(__FILE__), %w[fixtures spam email_spam2.txt]))
38
+ email3 = File.read(File.join(File.dirname(__FILE__), %w[fixtures ham email_ham1.txt]))
39
+ g = Groupie.new
40
+ g[:spam].add email.tokenize
41
+ g[:spam].add email2.tokenize
42
+ g[:ham].add email3.tokenize
43
+ c = g.classify('discreetly')
44
+ c[:spam].should > c[:ham]
45
+ c2 = g.classify('user')
46
+ c2[:ham].should > c2[:spam]
47
+ end
48
+ end
49
+ context "classify_text" do
50
+ it 'should tokenized html emails' do
51
+ g = Groupie.new
52
+ spam_tokens = File.read(File.join(File.dirname(__FILE__), %w[fixtures spam spam.la-44118014.txt])).tokenize
53
+ ham_tokens = File.read(File.join(File.dirname(__FILE__), %w[fixtures ham spam.la-44116217.txt])).tokenize
54
+ g[:spam].add spam_tokens
55
+ g[:ham].add ham_tokens
56
+
57
+ c = g.classify 'user'
58
+ c[:ham].should > c[:spam]
59
+
60
+ c = g.classify_text(spam_tokens)
61
+ c[:spam].should > c[:ham]
62
+ end
63
+
64
+ it 'should classify a text' do
65
+ g = Groupie.new
66
+ g[:spam].add %w[buy viagra now to grow fast]
67
+ g[:spam].add %w[buy cialis on our website]
68
+ g[:ham].add %w[buy flowers for your mom]
69
+ result = g.classify_text "Grow flowers to sell on our website".tokenize
70
+ result[:spam].should > result[:ham]
71
+ result2 = g.classify_text "Grow flowers to give to your mom".tokenize
72
+ result2[:ham].should == result2[:spam]
73
+ end
74
+ end
75
+ end
@@ -0,0 +1 @@
1
+ require 'lib/groupie'
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: groupie
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 0
10
- version: 0.1.0
9
+ - 1
10
+ version: 0.1.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Wes Oldenbeuving
@@ -50,12 +50,15 @@ files:
50
50
  - lib/groupie/core_ext/string.rb
51
51
  - lib/groupie/group.rb
52
52
  - readme.rdoc
53
- - test/fixtures/ham/spam.la-44116217.txt
54
- - test/fixtures/spam/spam.la-44118014.txt
55
- - test/groupie/core_ext/string_test.rb
56
- - test/groupie/group_test.rb
57
- - test/groupie_test.rb
58
- - test/test_helper.rb
53
+ - spec/fixtures/ham/email_ham1.txt
54
+ - spec/fixtures/ham/spam.la-44116217.txt
55
+ - spec/fixtures/spam/email_spam1.txt
56
+ - spec/fixtures/spam/email_spam2.txt
57
+ - spec/fixtures/spam/spam.la-44118014.txt
58
+ - spec/groupie/core_ext/string_spec.rb
59
+ - spec/groupie/group_spec.rb
60
+ - spec/groupie_spec.rb
61
+ - spec/spec_helper.rb
59
62
  has_rdoc: true
60
63
  homepage: http://github.com/Narnach/groupie
61
64
  licenses: []
@@ -91,7 +94,7 @@ signing_key:
91
94
  specification_version: 3
92
95
  summary: Group and classify text
93
96
  test_files:
94
- - test/groupie/core_ext/string_test.rb
95
- - test/groupie/group_test.rb
96
- - test/groupie_test.rb
97
- - test/test_helper.rb
97
+ - spec/groupie/core_ext/string_spec.rb
98
+ - spec/groupie/group_spec.rb
99
+ - spec/groupie_spec.rb
100
+ - spec/spec_helper.rb
@@ -1,45 +0,0 @@
1
- require File.join(File.dirname(__FILE__), %w[.. .. test_helper])
2
-
3
- Testy.testing 'String' do
4
- context 'tokenize' do
5
- test 'split words' do |t|
6
- tokens = "hello world".tokenize
7
- t.check 'words are split',
8
- :expect => %w[hello world],
9
- :actual => tokens
10
- end
11
-
12
- test 'downcase words' do |t|
13
- tokens = "Hello World".tokenize
14
- t.check 'words are downcased',
15
- :expect => %w[hello world],
16
- :actual => tokens
17
- end
18
-
19
- test 'most symbols are stripped' do |t|
20
- tokens = "hyphen-ated, under_score!".tokenize
21
- t.check 'some symbols are left',
22
- :expect => %w[hyphen-ated under_score],
23
- :actual => tokens
24
- end
25
-
26
- test 'html tags are sanitized' do |t|
27
- tokens = '<a href="http://example.org">example</a>'.tokenize
28
- t.check 'only content of tags is retained',
29
- :expect => %w[example],
30
- :actual => tokens
31
- end
32
-
33
- test 'some dots are ok' do |t|
34
- tokens = 'example.org rocks. read it...'.tokenize
35
- t.check 'infix dots are kept',
36
- :expect => %w[example.org rocks read it],
37
- :actual => tokens
38
-
39
- tokens2 = '$1,000,000.00 or $1.000.000,00'.tokenize
40
- t.check 'infix commas are kept',
41
- :expect => %w[1,000,000.00 or 1.000.000,00],
42
- :actual => tokens2
43
- end
44
- end
45
- end
@@ -1,15 +0,0 @@
1
- require File.join(File.dirname(__FILE__), %w[.. test_helper])
2
-
3
- Testy.testing 'Groupie::Group' do
4
- test 'can be serialized and loaded through YAML' do |t|
5
- require 'yaml'
6
-
7
- g = Groupie::Group.new 'group'
8
- g.add %w[buy flowers]
9
- g2 = YAML.load(g.to_yaml)
10
- g2.add %w[buy candy]
11
- t.check 'default value works for new entries',
12
- :expect => 1,
13
- :actual => g2.count('candy')
14
- end
15
- end
@@ -1,124 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'test_helper')
2
-
3
- Testy.testing 'Groupie' do
4
- test 'classification is certain' do |t|
5
- g = Groupie.new
6
- g[:spam].add %w[viagra]
7
- g[:ham].add %w[flowers]
8
- classification = g.classify 'viagra'
9
- t.check 'viagra is',
10
- :expect => {:spam => 1.0, :ham => 0.0},
11
- :actual => classification
12
- end
13
-
14
- test 'classification is split between two groups' do |t|
15
- g = Groupie.new
16
- g[:spam].add %w[buy viagra now]
17
- g[:ham].add %w[buy flowers for your mom]
18
- classification = g.classify 'buy'
19
- t.check 'buy is classified as',
20
- :expect => {:spam => 0.5, :ham => 0.5},
21
- :actual => classification
22
- end
23
-
24
- test 'classification is weighed more heavy in one group' do |t|
25
- g = Groupie.new
26
- g[:spam].add %w[buy viagra now]
27
- g[:spam].add %w[buy cialis now]
28
- g[:ham].add %w[buy flowers for your mom]
29
- t.check 'buy is classified as',
30
- :expect => {:spam => 2 / 3.0, :ham => 1 / 3.0},
31
- :actual => g.classify('buy')
32
- end
33
-
34
- test 'classification works fine with more than two groups' do |t|
35
- g = Groupie.new
36
- g[:weight].add 'pound'
37
- g[:currency].add 'pound'
38
- g[:phone_key].add 'pound'
39
- t.check 'pound is classified as',
40
- :expect => {:weight => 1/3.0, :currency => 1/3.0, :phone_key => 1/3.0},
41
- :actual => g.classify('pound')
42
- end
43
-
44
- test 'tokenized emails' do |t|
45
- email = <<-EMAIL
46
- I noticed your flirt
47
- If you cannot see the pictures and links below, please click here to view them.
48
- PHARMACY CLUB | UNSUBSCRIBE | YOUR PRIVACY RIGHTS
49
- Copyright 2009 Zjfqq, all rights reserved
50
- Customer Service Dept., 87 Hizq Iveox Street, Isahaylo, VS 25270
51
- EMAIL
52
- email2 = <<-EMAIL
53
- Re: Your subscribe #976589
54
- Tell a friend · Download latest version See this email as a webpage
55
- Hello!
56
- Shipped Privately And Discreetly To Your Door!
57
- We want to put a great big grin on your face in 2009. You'll be to rejoice all year.
58
- Unsubscribe · Lost Password · Account Settings · Help · Terms of Service · Privacy
59
- Ottho Heldringstraat 2, 31719 AZ Amsterdam, The Netherlands
60
- EMAIL
61
- email3 = <<-EMAIL
62
- Re: [ubuntu-art] [Breathe] Network Manager-icons
63
- Am Sonntag, den 31.05.2009, 17:53 +0200 schrieb Steve Dodier:
64
- > Hello,
65
- >
66
- > I think the notify-osd icons have a completely different style, which
67
- > is looking great within the notification bubbles, but i doubt it'd
68
- > look great to have the notify-osd wifi icons in the panel. I think the
69
- > drawing of the notification- wifi icons should be done afterwards, and
70
- > if they should be based on those of the icon set, they could be made
71
- > smoother, and possibly desaturated for some of them, to avoid drawing
72
- > too much attention from the user when popping up.
73
- >
74
- > Cordially, SD.
75
- EMAIL
76
- g = Groupie.new
77
- g[:spam].add email.tokenize
78
- g[:spam].add email2.tokenize
79
- g[:ham].add email3.tokenize
80
- c = g.classify('discreetly')
81
- t.check 'classification of "discreetly" is spam',
82
- :expect => true,
83
- :actual => c[:spam] > c[:ham]
84
- c2 = g.classify('user')
85
- t.check 'classification of "user" is ham',
86
- :expect => true,
87
- :actual => c2[:ham] > c2[:spam]
88
- end
89
-
90
- test 'tokenized html emails' do |t|
91
- g = Groupie.new
92
- spam_tokens = File.read(File.join(File.dirname(__FILE__),
93
- %w[fixtures spam spam.la-44118014.txt])).tokenize
94
- ham_tokens = File.read(File.join(File.dirname(__FILE__),
95
- %w[fixtures ham spam.la-44116217.txt])).tokenize
96
- g[:spam].add spam_tokens
97
- g[:ham].add ham_tokens
98
-
99
- c = g.classify 'user'
100
- t.check 'classification of the word "user" is ham',
101
- :expect => true,
102
- :actual => (c[:ham] > c[:spam])
103
-
104
- c = g.classify_text(spam_tokens)
105
- t.check 'classification of spam email is spam',
106
- :expect => true,
107
- :actual => (c[:spam] > c[:ham])
108
- end
109
-
110
- test 'classify a text' do |t|
111
- g = Groupie.new
112
- g[:spam].add %w[buy viagra now to grow fast]
113
- g[:spam].add %w[buy cialis on our website]
114
- g[:ham].add %w[buy flowers for your mom]
115
- result = g.classify_text "Grow flowers to sell on our website".tokenize
116
- t.check 'classification of a spammy text is spam',
117
- :expect => true,
118
- :actual => result[:spam] > result[:ham]
119
- result2 = g.classify_text "Grow flowers to give to your mom".tokenize
120
- t.check 'classification of a non-spammy text is ham',
121
- :expect => true,
122
- :actual => result2[:ham] > result2[:spam]
123
- end
124
- end
@@ -1,3 +0,0 @@
1
- require 'rubygems'
2
- require 'testy'
3
- require 'lib/groupie'