groupie 0.2.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE/bug_report.md +32 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +24 -0
- data/.github/dependabot.yml +13 -0
- data/.github/workflows/gem.yml +16 -0
- data/.github/workflows/rspec.yml +22 -0
- data/.github/workflows/rubocop.yml +26 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.rubocop.yml +53 -0
- data/CHANGELOG.md +107 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +71 -0
- data/LICENSE.txt +21 -0
- data/README.md +140 -0
- data/Rakefile +7 -48
- data/SECURITY.md +18 -0
- data/bin/console +15 -0
- data/bin/rubocop +2 -0
- data/bin/setup +9 -0
- data/groupie.gemspec +32 -57
- data/lib/groupie/group.rb +19 -5
- data/lib/groupie/version.rb +10 -0
- data/lib/groupie.rb +145 -51
- metadata +56 -85
- data/.document +0 -5
- data/LICENSE +0 -20
- data/VERSION +0 -1
- data/lib/groupie/core_ext/string.rb +0 -17
- data/readme.rdoc +0 -27
- data/spec/fixtures/ham/email_ham1.txt +0 -13
- data/spec/fixtures/ham/spam.la-44116217.txt +0 -79
- data/spec/fixtures/spam/email_spam1.txt +0 -5
- data/spec/fixtures/spam/email_spam2.txt +0 -7
- data/spec/fixtures/spam/spam.la-44118014.txt +0 -73
- data/spec/groupie/core_ext/string_spec.rb +0 -37
- data/spec/groupie/group_spec.rb +0 -12
- data/spec/groupie_spec.rb +0 -130
- data/spec/spec_helper.rb +0 -1
@@ -1,79 +0,0 @@
|
|
1
|
-
From **HIDDEN**@lists.ubuntu.com Sun May 31 10:22:46 2009
|
2
|
-
Return-Path: <**HIDDEN**@lists.ubuntu.com>
|
3
|
-
X-Original-To: **HIDDEN**@spam.la
|
4
|
-
Delivered-To: **HIDDEN**@speedo.dreamhost.com
|
5
|
-
Received: from chlorine.canonical.com (chlorine.canonical.com [91.189.94.204])
|
6
|
-
by speedo.dreamhost.com (Postfix) with ESMTP id 18A4F145730
|
7
|
-
for <**HIDDEN**@spam.la>; Sun, 31 May 2009 10:22:47 -0700 (PDT)
|
8
|
-
Received: from localhost ([127.0.0.1] helo=chlorine.canonical.com)
|
9
|
-
by chlorine.canonical.com with esmtp (Exim 4.60)
|
10
|
-
(envelope-from <**HIDDEN**@lists.ubuntu.com>)
|
11
|
-
id 1MAoKV-0000Uv-RB; Sun, 31 May 2009 17:56:15 +0100
|
12
|
-
Received: from smtp104.mail.ukl.yahoo.com ([77.238.184.36])
|
13
|
-
by chlorine.canonical.com with smtp (Exim 4.60)
|
14
|
-
(envelope-from <**HIDDEN**@yahoo.de>) id 1MAoKO-0000T5-0J
|
15
|
-
for **HIDDEN**@lists.ubuntu.com; Sun, 31 May 2009 17:56:08 +0100
|
16
|
-
Received: (qmail 95693 invoked from network); 31 May 2009 16:56:07 -0000
|
17
|
-
Received: from unknown (HELO ?192.168.1.33?) **HIDDEN**@88.5.92.30 with plain)
|
18
|
-
by smtp104.mail.ukl.yahoo.com with SMTP; 31 May 2009 16:56:07 -0000
|
19
|
-
X-Yahoo-SMTP: omQsrMiswBC_IZdIQhRgQAA3Gn6tTTc-
|
20
|
-
X-YMail-OSG: e1ihGm4VM1ljqtG.6IPGOps5aG8IYZJEPQLptGPSxphH174zk4rRTWYQJmj9MMc2nJwZjNEqUnYAjErWKypElvLWu0n.v8baMMlcOOELQK2IZfFaV5Ij3HUpUDWRbd0n6PCV5iFLHlyruq5CSGsiZvfME6HpngIO0RuAcin3rePXdzWpmPnTlZwuC3qjSE9N8wC4pdBdwfmYHy4EKSKFRCXUNzdy9DPgfwqrjiCTP_tqaWmpeUOqA2Os13l0j5d6acIpgo9DcW8P_1ENNVGjJ2Lk4XbZ0oc51M_BJ2n6DHMxoazT
|
21
|
-
X-Yahoo-Newman-Property: ymail-3
|
22
|
-
From: Oliver Scholtz 1 <**HIDDEN**@yahoo.de>
|
23
|
-
To: Discussion on Ubuntu artwork <**HIDDEN**@lists.ubuntu.com>
|
24
|
-
In-Reply-To: <**HIDDEN**@mail.gmail.com>
|
25
|
-
References: <**HIDDEN**@yahoo.com>
|
26
|
-
<**HIDDEN**@jws141-laptop> <**HIDDEN**@yahoo.com>
|
27
|
-
<**HIDDEN**@isabel-desktop>
|
28
|
-
<**HIDDEN**@web95411.mail.in2.yahoo.com>
|
29
|
-
<**HIDDEN**@dani-desktop> <**HIDDEN**@yahoo.com>
|
30
|
-
<**HIDDEN**@mail.gmail.com>
|
31
|
-
<**HIDDEN**@mail.gmail.com>
|
32
|
-
Date: Sun, 31 May 2009 18:56:05 +0200
|
33
|
-
Message-Id: <**HIDDEN**@oliver-ubuntu>
|
34
|
-
Mime-Version: 1.0
|
35
|
-
X-Mailer: Evolution 2.26.1
|
36
|
-
Subject: Re: [ubuntu-art] [Breathe] Network Manager-icons
|
37
|
-
X-BeenThere: **HIDDEN**@lists.ubuntu.com
|
38
|
-
X-Mailman-Version: 2.1.8
|
39
|
-
Precedence: list
|
40
|
-
Reply-To: Discussion on Ubuntu artwork <**HIDDEN**@lists.ubuntu.com>
|
41
|
-
List-Id: Discussion on Ubuntu artwork <ubuntu-art.lists.ubuntu.com>
|
42
|
-
List-Unsubscribe: <https://lists.ubuntu.com/mailman/listinfo/ubuntu-art>,
|
43
|
-
<**HIDDEN**@lists.ubuntu.com?subject=unsubscribe>
|
44
|
-
List-Archive: <https://lists.ubuntu.com/archives/ubuntu-art>
|
45
|
-
List-Post: <**HIDDEN**@lists.ubuntu.com>
|
46
|
-
List-Help: <**HIDDEN**@lists.ubuntu.com?subject=help>
|
47
|
-
List-Subscribe: <https://lists.ubuntu.com/mailman/listinfo/ubuntu-art>,
|
48
|
-
<**HIDDEN**@lists.ubuntu.com?subject=subscribe>
|
49
|
-
Content-Type: text/plain; charset="us-ascii"
|
50
|
-
Content-Transfer-Encoding: 7bit
|
51
|
-
Sender: **HIDDEN**@lists.ubuntu.com
|
52
|
-
Errors-To: **HIDDEN**@lists.ubuntu.com
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
Am Sonntag, den 31.05.2009, 17:53 +0200 schrieb Steve Dodier:
|
57
|
-
> Hello,
|
58
|
-
>
|
59
|
-
> I think the notify-osd icons have a completely different style, which
|
60
|
-
> is looking great within the notification bubbles, but i doubt it'd
|
61
|
-
> look great to have the notify-osd wifi icons in the panel. I think the
|
62
|
-
> drawing of the notification- wifi icons should be done afterwards, and
|
63
|
-
> if they should be based on those of the icon set, they could be made
|
64
|
-
> smoother, and possibly desaturated for some of them, to avoid drawing
|
65
|
-
> too much attention from the user when popping up.
|
66
|
-
>
|
67
|
-
> Cordially, SD.
|
68
|
-
|
69
|
-
+1
|
70
|
-
---
|
71
|
-
And Mac ... much better! Maybe 22 and 16 without pale. ;)
|
72
|
-
|
73
|
-
Oliver
|
74
|
-
|
75
|
-
|
76
|
-
--
|
77
|
-
ubuntu-art mailing list
|
78
|
-
**HIDDEN**@lists.ubuntu.com
|
79
|
-
https://lists.ubuntu.com/mailman/listinfo/ubuntu-art
|
@@ -1,7 +0,0 @@
|
|
1
|
-
Re: Your subscribe #976589
|
2
|
-
Tell a friend · Download latest version See this email as a webpage
|
3
|
-
Hello!
|
4
|
-
Shipped Privately And Discreetly To Your Door!
|
5
|
-
We want to put a great big grin on your face in 2009. You'll be to rejoice all year.
|
6
|
-
Unsubscribe · Lost Password · Account Settings · Help · Terms of Service · Privacy
|
7
|
-
Ottho Heldringstraat 2, 31719 AZ Amsterdam, The Netherlands
|
@@ -1,73 +0,0 @@
|
|
1
|
-
From **HIDDEN**@manpoints.net Sun May 31 10:34:01 2009
|
2
|
-
Return-Path: <**HIDDEN**@manpoints.net>
|
3
|
-
X-Original-To: **HIDDEN**@spam.la
|
4
|
-
Delivered-To: **HIDDEN**@speedo.dreamhost.com
|
5
|
-
Received: from 201-40-49-243.bsace702.dsl.brasiltelecom.net.br (201-40-49-243.bsace702.dsl.brasiltelecom.net.br [201.40.49.243])
|
6
|
-
by speedo.dreamhost.com (Postfix) with ESMTP id 4BDC714572F
|
7
|
-
for <**HIDDEN**@spam.la>; Sun, 31 May 2009 10:33:56 -0700 (PDT)
|
8
|
-
Message-Id: <**HIDDEN**@201-40-49-243.bsace702.dsl.brasiltelecom.net.br>
|
9
|
-
From: "Leskovar L. Golda" <**HIDDEN**@manpoints.net>
|
10
|
-
To: **HIDDEN**@spam.la
|
11
|
-
Subject: My official mail blocked
|
12
|
-
Content-Type: text/html; charset="iso-8859-1"
|
13
|
-
Content-Transfer-Encoding: 7bit
|
14
|
-
MIME-Version: 1.0
|
15
|
-
Date: Sun, 31 May 2009 10:33:56 -0700 (PDT)
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
20
|
-
<html>
|
21
|
-
<head>
|
22
|
-
<title></title>
|
23
|
-
<meta content="text/html; charset=iso-8859-1" http-equiv="Content-Type">
|
24
|
-
</head>
|
25
|
-
<body bgcolor="#FFFFFF" topmargin="0" leftmargin="0" marginwidth="0" marginheight="0">
|
26
|
-
|
27
|
-
|
28
|
-
<table width="646" cellspacing="0" border="0" align="center" cellpadding="4">
|
29
|
-
<tr>
|
30
|
-
<td align="center"><font face="Arial" size="1" color="#000000">If you cannot see
|
31
|
-
the pictures and links below, please <a href="http://www.qicweman.cn/?abo=0C8C72B3E1C8648676158B">
|
32
|
-
click here</a> to view them.<br></font></td>
|
33
|
-
</tr>
|
34
|
-
|
35
|
-
</table>
|
36
|
-
|
37
|
-
<table width="742" cellspacing="0" border="0" align="center" cellpadding="0">
|
38
|
-
|
39
|
-
<tr valign="top">
|
40
|
-
<td width="475" style="border-left:1px solid #371E96;">
|
41
|
-
<a href="http://www.qicweman.cn/?ex=0C8C72B3E1C8648676158B">
|
42
|
-
<img alt="click to see the full version" src="http://www.qicweman.cn/d.jpg" style="border-width: 0px" /></a></td>
|
43
|
-
|
44
|
-
</tr>
|
45
|
-
|
46
|
-
<tr>
|
47
|
-
<td><br>
|
48
|
-
|
49
|
-
<div style="padding:10px;">
|
50
|
-
|
51
|
-
<span style="font-size:10px;color:#666666;font-family:arial;">You may also
|
52
|
-
respond to this email and subscribe to <i>Hizqru</i> by calling
|
53
|
-
1-085-417-9085, Monday-Friday, 8 a.m.-6 p.m. ET. Outside the U.S. and in
|
54
|
-
Canada, please call 1-254-403-7409.<br><br>
|
55
|
-
|
56
|
-
To opt out from receiving any future marketing-related emails from
|
57
|
-
Dqpjnjp, please <a style="color:#666666;" href="http://www.qicweman.cn/?jr=0C8C72B3E1C8648676158B&email=**HIDDEN**@spam.la">
|
58
|
-
click here</a>.<br />
|
59
|
-
Please be assured that we respect the privacy of our subscribers. To view
|
60
|
-
our privacy policy, please <a style="color:#666666;" href="http://www.qicweman.cn/?sj=0C8C72B3E1C8648676158B">
|
61
|
-
click here</a>.<br><br>
|
62
|
-
|
63
|
-
© 2009 Jdqofypy, Inc., 14 Poze Iylqod, 08th Floor, New York, NY 74172.<br></span>
|
64
|
-
|
65
|
-
</div>
|
66
|
-
</td>
|
67
|
-
</tr>
|
68
|
-
</table>
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
</body>
|
73
|
-
</html>
|
@@ -1,37 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
|
2
|
-
|
3
|
-
describe String do
|
4
|
-
context "tokenize" do
|
5
|
-
it 'should split words' do
|
6
|
-
"hello world".tokenize.should == %w[hello world]
|
7
|
-
end
|
8
|
-
|
9
|
-
it 'should downcase words' do
|
10
|
-
"Hello World".tokenize.should == %w[hello world]
|
11
|
-
end
|
12
|
-
|
13
|
-
it 'should strip special characters' do
|
14
|
-
"blah, bla!".tokenize.should == %w[blah bla]
|
15
|
-
end
|
16
|
-
|
17
|
-
it 'should prserve infix hyphens and underscores' do
|
18
|
-
"hyphen-ated under_score".tokenize.should == %w[hyphen-ated under_score]
|
19
|
-
end
|
20
|
-
|
21
|
-
it 'should sanitize html tags' do
|
22
|
-
'<a href="http://example.org">example</a>'.tokenize.should == %w[example]
|
23
|
-
end
|
24
|
-
|
25
|
-
it 'should preserve infix periods' do
|
26
|
-
'example.org rocks. read it...'.tokenize.should == %w[example.org rocks read it]
|
27
|
-
end
|
28
|
-
|
29
|
-
it "should preserve infix commas" do
|
30
|
-
'$1,000,000.00 or $1.000.000,00'.tokenize.should == %w[1,000,000.00 or 1.000.000,00]
|
31
|
-
end
|
32
|
-
|
33
|
-
it "should strip quotes around tokens" do
|
34
|
-
'"first last"'.tokenize.should == %w[first last]
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
data/spec/groupie/group_spec.rb
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), %w[.. spec_helper])
|
2
|
-
require 'yaml'
|
3
|
-
|
4
|
-
describe Groupie::Group do
|
5
|
-
it "can be serialized and loaded through YAML" do
|
6
|
-
group = Groupie::Group.new 'group'
|
7
|
-
group.add %w[buy flowers]
|
8
|
-
loaded_group = YAML.load(group.to_yaml)
|
9
|
-
loaded_group.add %w[buy candy]
|
10
|
-
loaded_group.count("candy").should == 1
|
11
|
-
end
|
12
|
-
end
|
data/spec/groupie_spec.rb
DELETED
@@ -1,130 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'spec_helper')
|
2
|
-
|
3
|
-
describe Groupie do
|
4
|
-
describe "classify" do
|
5
|
-
it 'should work when 100% certaint' do
|
6
|
-
g = Groupie.new
|
7
|
-
g[:spam].add %w[viagra]
|
8
|
-
g[:ham].add %w[flowers]
|
9
|
-
g.classify('viagra').should == {:spam => 1.0, :ham => 0.0}
|
10
|
-
end
|
11
|
-
|
12
|
-
it 'should work when split 50/50 between two groups' do
|
13
|
-
g = Groupie.new
|
14
|
-
g[:spam].add %w[buy viagra now]
|
15
|
-
g[:ham].add %w[buy flowers for your mom]
|
16
|
-
g.classify('buy').should == {:spam => 0.5, :ham => 0.5}
|
17
|
-
end
|
18
|
-
|
19
|
-
it 'should work when weighed more towards one group' do
|
20
|
-
g = Groupie.new
|
21
|
-
g[:spam].add %w[buy viagra now]
|
22
|
-
g[:spam].add %w[buy cialis now]
|
23
|
-
g[:ham].add %w[buy flowers for your mom]
|
24
|
-
g.classify('buy').should == {:spam => 2 / 3.0, :ham => 1 / 3.0}
|
25
|
-
end
|
26
|
-
|
27
|
-
it 'should work with more than two groups' do
|
28
|
-
g = Groupie.new
|
29
|
-
g[:weight].add 'pound'
|
30
|
-
g[:currency].add 'pound'
|
31
|
-
g[:phone_key].add 'pound'
|
32
|
-
g.classify('pound').should == {:weight => 1/3.0, :currency => 1/3.0, :phone_key => 1/3.0}
|
33
|
-
end
|
34
|
-
|
35
|
-
it 'should tokenize and classify emails' do
|
36
|
-
email = File.read(File.join(File.dirname(__FILE__), %w[fixtures spam email_spam1.txt]))
|
37
|
-
email2 = File.read(File.join(File.dirname(__FILE__), %w[fixtures spam email_spam2.txt]))
|
38
|
-
email3 = File.read(File.join(File.dirname(__FILE__), %w[fixtures ham email_ham1.txt]))
|
39
|
-
g = Groupie.new
|
40
|
-
g[:spam].add email.tokenize
|
41
|
-
g[:spam].add email2.tokenize
|
42
|
-
g[:ham].add email3.tokenize
|
43
|
-
c = g.classify('discreetly')
|
44
|
-
c[:spam].should > c[:ham]
|
45
|
-
c2 = g.classify('user')
|
46
|
-
c2[:ham].should > c2[:spam]
|
47
|
-
end
|
48
|
-
|
49
|
-
describe "strategies" do
|
50
|
-
describe "sum" do
|
51
|
-
it "should weigh words for the sum of their occurances" do
|
52
|
-
g = Groupie.new
|
53
|
-
g[:spam].add %w[word] * 9
|
54
|
-
g[:ham].add %w[word]
|
55
|
-
g.classify('word', :sum).should == {:spam=>0.9, :ham=>0.1}
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
describe "sqrt" do
|
60
|
-
it "should weigh words for the square root of the sum of ocurances" do
|
61
|
-
g = Groupie.new
|
62
|
-
g[:spam].add %w[word] * 9
|
63
|
-
g[:ham].add %w[word]
|
64
|
-
g.classify('word', :sqrt).should == {:spam=>0.75, :ham=>0.25}
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
describe "log" do
|
69
|
-
it "should weigh words for log10 of their sum of occurances" do
|
70
|
-
g = Groupie.new
|
71
|
-
g[:spam].add %w[word] * 1000
|
72
|
-
g[:ham].add %w[word] * 10
|
73
|
-
g.classify('word', :log).should == {:spam=>0.75, :ham=>0.25}
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
context "classify_text" do
|
80
|
-
it 'should tokenized html emails' do
|
81
|
-
g = Groupie.new
|
82
|
-
spam_tokens = File.read(File.join(File.dirname(__FILE__), %w[fixtures spam spam.la-44118014.txt])).tokenize
|
83
|
-
ham_tokens = File.read(File.join(File.dirname(__FILE__), %w[fixtures ham spam.la-44116217.txt])).tokenize
|
84
|
-
g[:spam].add spam_tokens
|
85
|
-
g[:ham].add ham_tokens
|
86
|
-
|
87
|
-
c = g.classify 'user'
|
88
|
-
c[:ham].should > c[:spam]
|
89
|
-
|
90
|
-
c = g.classify_text(spam_tokens)
|
91
|
-
c[:spam].should > c[:ham]
|
92
|
-
end
|
93
|
-
|
94
|
-
it 'should classify a text' do
|
95
|
-
g = Groupie.new
|
96
|
-
g[:spam].add %w[buy viagra now to grow fast]
|
97
|
-
g[:spam].add %w[buy cialis on our website]
|
98
|
-
g[:ham].add %w[buy flowers for your mom]
|
99
|
-
result = g.classify_text "Grow flowers to sell on our website".tokenize
|
100
|
-
result[:spam].should > result[:ham]
|
101
|
-
result2 = g.classify_text "Grow flowers to give to your mom".tokenize
|
102
|
-
result2[:ham].should == result2[:spam]
|
103
|
-
end
|
104
|
-
|
105
|
-
it "should skip unknown tokens" do
|
106
|
-
g = Groupie.new
|
107
|
-
g[:spam].add %w[buy viagra now]
|
108
|
-
g[:ham].add %w[buy flowers now]
|
109
|
-
g.classify_text(%w[buy buckets now]).should == {:spam=>0.5, :ham=>0.5}
|
110
|
-
end
|
111
|
-
|
112
|
-
it "should support the sqrt strategy" do
|
113
|
-
g = Groupie.new
|
114
|
-
g[:spam].add %w[one] * 9
|
115
|
-
g[:ham].add %w[one]
|
116
|
-
g[:spam].add %w[two] * 9
|
117
|
-
g[:ham].add %w[two]
|
118
|
-
g.classify_text(%w[one two three], :sqrt).should == {:spam=>0.75, :ham=>0.25}
|
119
|
-
end
|
120
|
-
|
121
|
-
it "should support the log strategy" do
|
122
|
-
g = Groupie.new
|
123
|
-
g[:spam].add %w[one] * 100
|
124
|
-
g[:ham].add %w[one]
|
125
|
-
g[:spam].add %w[two]
|
126
|
-
g[:ham].add %w[two] * 100
|
127
|
-
g.classify_text(%w[one two three], :log).should == {:spam=>0.5, :ham=>0.5}
|
128
|
-
end
|
129
|
-
end
|
130
|
-
end
|
data/spec/spec_helper.rb
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
require 'lib/groupie'
|