sad_panda 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,155 +6,170 @@ require 'lingua/stemmer'
6
6
 
7
7
  module SadPanda
8
8
 
9
- # this method reads the text of the status message
10
- # inputed by the user, removes common english words,
11
- # strips punctuation and capitalized letters, isolates
12
- # the stem of the word, and ultimately produces a hash
13
- # where the keys are the stems of the remaining words,
14
- # and the values are their respective frequencies within
15
- # the status message
16
- def self.build_term_frequencies message
17
-
18
- @message = message
19
-
20
- # create empty term_frequencies
21
- term_frequencies = {}
22
-
23
- # clean the text of the status message
24
- if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]"))
25
- @happy_que = true
26
- end
27
- if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-["))
28
- @sad_que = true
29
- end
30
-
31
- message_text = @message.gsub(/[^a-z ]/i, '').downcase
32
- message_text.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '')
33
- message_text.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '')
34
- message_text.gsub!(/\s\s+/,' ')
35
- words = message_text.split(" ")
36
-
37
- #filter for english stopwords
38
- stopwords = Stopwords.stopwords
39
- words = words - stopwords
40
-
41
- #get word stems
42
- word_stems = SadPanda.get_word_stems words
43
-
44
- #create term_frequencies
45
- word_stems.each do |stem|
46
- term_frequencies[stem] = word_stems.count(stem)
47
- end
48
-
49
- #return term frequency matrix
50
- term_frequencies
51
- end
52
-
53
- # this method takes an array of words an returns an array of word stems
54
- def self.get_word_stems words
55
- @stemmer = Lingua::Stemmer.new(:language => "en")
56
- output = []
57
- words.each do |word|
58
- output << @stemmer.stem(word)
59
- end
60
- output
61
- end
62
-
63
- # this method takes an emotion-words hash and a hash containing word
64
- # frequencies for the status message, calculates a numerical score
65
- # for each possble emotion, and returns the emotion with the highest
66
- # "score"
67
- def self.get_emotion_score(emotions, term_frequencies, verbose = false)
68
- emotion_score = {}
69
- term_frequencies.each do |key,value|
70
- emotions.keys.each do |k|
71
- if emotions[k].include?(key)
72
- emotion_score[k] ||= 0
73
- emotion_score[k] += value
74
- end
75
- end
76
- end
77
- if @verbose
78
- emotion_score.keys.each do |key|
79
- puts "EMOTION: "+key
80
- puts "SCORE: "+emotion_score[key].to_s
81
- end
82
- end
83
- # return an emotion_score_hash to be processed by emotion
84
- # get clue from any emoticons present
85
- if (@happy_que && @sad_que)
86
- return "ambiguous"
87
- elsif @happy_que
88
- return "joy"
89
- elsif @sad_que
90
- return "sadness"
91
- else
92
- ## 0 if unable to detect emotion
93
- if emotion_score == {}
94
- return "ambiguous"
9
+ # this method returns the best-fit emotion for the status message
10
+ def self.emotion(message)
11
+ # get the emotion for which the emotion score value is highest
12
+ SadPanda.get_emotion_score(message, EmotionBank.get_term_emotions, build_term_frequencies(message))
13
+ end
14
+
15
+ # this method returns the polarity value for the status message
16
+ # (normalized by the number of 'polar' words that the status
17
+ # message contains)
18
+ def self.polarity(message)
19
+ # get the polarity for which the polarity score value is highest
20
+ SadPanda.get_polarity_score(message, TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
21
+ end
22
+
23
+
24
+ private
25
+
26
+ # this method reads the text of the status message
27
+ # inputed by the user, removes common english words,
28
+ # strips punctuation and capitalized letters, isolates
29
+ # the stem of the word, and ultimately produces a hash
30
+ # where the keys are the stems of the remaining words,
31
+ # and the values are their respective frequencies within
32
+ # the status message
33
+ def self.build_term_frequencies(message, term_frequencies = {})
34
+ # clean the text of the status message
35
+ happy_emoticon = happy_emoticon(message)
36
+ sad_emoticon = sad_emoticon(message)
37
+ words = words_from_message_text(message)
38
+ #filter for english stopwords
39
+ stopwords = Stopwords.stopwords
40
+ words = words - stopwords
41
+ #get word stems
42
+ word_stems = SadPanda.get_word_stems words
43
+ #create term_frequencies
44
+ #return term frequency hash
45
+ create_term_frequencies(word_stems, term_frequencies)
46
+ end
47
+
48
+ # this method takes an array of words an returns an array of word stems
49
+ def self.get_word_stems(words, output=[])
50
+ stemmer = Lingua::Stemmer.new(:language => "en")
51
+ words.each do |word|
52
+ output << stemmer.stem(word)
53
+ end
54
+ output
55
+ end
56
+
57
+ # this method takes an emotion-words hash and a hash containing word
58
+ # frequencies for the status message, calculates a numerical score
59
+ # for each possble emotion, and returns the emotion with the highest
60
+ # "score"
61
+ def self.get_emotion_score(message, emotions, term_frequencies, emotion_score = {})
62
+ term_frequencies.each do |key,value|
63
+ set_emotions(emotions, emotion_score, key, value)
64
+ end
65
+ # return an emotion_score_hash to be processed by emotion
66
+ # get clue from any emoticons present
67
+ check_emoticon_for_emotion(emotion_score, message)
68
+ end
69
+
70
+ # this method gives the status method a normalized polarity
71
+ # value based on the words it contains
72
+ def self.get_polarity_score (message, polarity_hash, term_frequencies, polarity_scores = [])
73
+ term_frequencies.each do |key, value|
74
+ set_polarities(key, value, polarity_hash, polarity_scores)
75
+ end
76
+
77
+ # return an polarity_score_hash to be processed by polarity method
78
+ # return an emotion_score_hash to be processed by emotion
79
+ # get clue from any emoticons present
80
+ check_emoticon_for_polarity(polarity_scores, message)
81
+ end
82
+
83
+ def self.happy_emoticon(message)
84
+ (message.include?(":)") || message.include?(":-)") || message.include?(":]") || message.include?(":-]"))
85
+ end
86
+
87
+ def self.sad_emoticon(message)
88
+ (message.include?(":(") || message.include?(":-(") || message.include?(":[") || message.include?(":-["))
89
+ end
90
+
91
+ def self.words_from_message_text(message)
92
+ message.gsub!(/[^a-z ]/i, '')
93
+ message.downcase!
94
+ message.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '')
95
+ message.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '')
96
+ message.gsub!(/\s\s+/,' ')
97
+ message.split(" ")
98
+ end
99
+
100
+ def self.set_emotions(emotions, emotion_score, term, frequency)
101
+ emotions.keys.each do |k|
102
+ store_emotions(emotions, emotion_score, k, term, frequency)
103
+ end
104
+ end
105
+
106
+ def self.set_polarities(term, frequency, polarity_hash, polarity_scores)
107
+ polarity_hash.keys.each do |k|
108
+ store_polarities(term, k, polarity_hash, polarity_scores)
109
+ end
110
+ end
111
+
112
+ def self.store_emotions(emotions, emotion_score, emotion, term, frequency)
113
+ if emotions[emotion].include?(term)
114
+ emotion_score[emotion] ||= 0
115
+ emotion_score[emotion] += frequency
116
+ end
117
+ end
118
+
119
+ def self.store_polarities(term, word, polarity_hash, polarity_scores)
120
+ if term == word
121
+ polarity_scores << (polarity_hash[word].to_f)
122
+ end
123
+ end
124
+
125
+ def self.check_emoticon_for_emotion(emotion_score, message)
126
+ if (happy_emoticon(message) && sad_emoticon(message))
127
+ "ambiguous"
128
+ elsif happy_emoticon(message)
129
+ "joy"
130
+ elsif sad_emoticon(message)
131
+ "sadness"
95
132
  else
96
- score = emotion_score.max_by{|k, v| v}[0]
133
+ return_emotion_score(emotion_score)
97
134
  end
98
- score
99
135
  end
100
- end
101
136
 
102
- # this method returns the best-fit emotion for the status message
103
- def self.emotion message
104
- # get the emotion for which the emotion score value is highest
105
- if @emotions
106
- SadPanda.get_emotion_score(@emotions, SadPanda.build_term_frequencies(message))
107
- else
108
- SadPanda.get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies(message))
137
+ def self.return_emotion_score(emotion_score)
138
+ ## 0 if unable to detect emotion
139
+ if emotion_score == {}
140
+ "ambiguous"
141
+ else
142
+ emotion_score.max_by{|k, v| v}[0]
143
+ end
109
144
  end
110
- end
111
-
112
- # this method gives the status method a normalized polarity
113
- # value based on the words it contains
114
- def self.get_polarity_score (polarity_hash, term_frequencies, verbose = false)
115
- polarity_scores = []
116
- term_frequencies.each do |key, value|
117
- polarity_hash.keys.each do |k|
118
- if key == k
119
- polarity_scores << (polarity_hash[k].to_f)
120
- end
121
- end
122
- end
123
-
124
- # return an polarity_score_hash to be processed by polarity method
125
- # return an emotion_score_hash to be processed by emotion
126
- # get clue from any emoticons present
127
- if (@happy_que && @sad_que)
145
+
146
+ def self.check_emoticon_for_polarity(polarity_scores, message)
147
+ if (happy_emoticon(message) && sad_emoticon(message))
128
148
  score = 5
129
- elsif @happy_que
149
+ elsif happy_emoticon(message)
130
150
  score = 8
131
- elsif @sad_que
151
+ elsif sad_emoticon(message)
132
152
  score = 2
133
- else
134
- if polarity_scores == []
135
- # polarity unreadable; return a neutral score of zero
136
- score = 5
137
- else
138
- score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
139
- polarity_scores = []
140
- end
141
- if @verbose
142
- puts "POLARITY: " + score.to_s
143
- end
144
- score
145
- end
146
- end
147
-
148
- # this method returns the polarity value for the status message
149
- # (normalized by the number of 'polar' words that the status
150
- # message contains)
151
- def self.polarity message
152
- # get the polarity for which the polarity score value is highest
153
- if @polarities
154
- SadPanda.get_polarity_score(@polarities, SadPanda.build_term_frequencies(message))
155
- else
156
- SadPanda.get_polarity_score(TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
157
- end
158
- end
153
+ else
154
+ return_polarity_scores(polarity_scores)
155
+ end
156
+ end
157
+
158
+ def self.return_polarity_scores(polarity_scores)
159
+ if polarity_scores == []
160
+ # polarity unreadable; return a neutral score of 5
161
+ 5
162
+ else
163
+ polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
164
+ end
165
+ end
166
+
167
+ def self.create_term_frequencies(word_stems, term_frequencies)
168
+ word_stems.each do |stem|
169
+ term_frequencies[stem] = word_stems.count(stem)
170
+ end
171
+ term_frequencies
172
+ end
173
+
159
174
 
160
175
  end
@@ -1,3 +1,3 @@
1
1
  module SadPanda
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
@@ -21,5 +21,6 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency "bundler", "~> 1.3"
22
22
  spec.add_development_dependency "rake"
23
23
  spec.add_runtime_dependency "ruby-stemmer"
24
+ spec.add_development_dependency "pry"
24
25
  spec.add_development_dependency "rspec"
25
26
  end
@@ -6,6 +6,249 @@ describe SadPanda do
6
6
  let(:emotions) {EmotionBank.get_term_emotions}
7
7
  let(:polarities) {TermPolarities.get_term_polarities}
8
8
  let(:term_frequencies) {SadPanda.build_term_frequencies("My cactus collection makes me happy.")}
9
+ let(:emotion_score) { {} }
10
+ let(:polarity_scores) { [] }
11
+ let(:polarity_hash) { TermPolarities.get_term_polarities }
12
+
13
+ context "methods" do
14
+ describe "#happy_emoticon" do
15
+
16
+ context "when true" do
17
+ it "returns true" do
18
+ message = ":)"
19
+ expect(SadPanda.happy_emoticon(message)).to be_true
20
+ end
21
+ end
22
+
23
+ context "when false" do
24
+ it "returns true" do
25
+ message = "stuff"
26
+ expect(SadPanda.happy_emoticon(message)).to be_false
27
+ end
28
+ end
29
+
30
+
31
+ end
32
+
33
+ describe "#sad_emoticon" do
34
+
35
+ context "when true" do
36
+ it "returns true" do
37
+ message = ":("
38
+ expect(SadPanda.sad_emoticon(message)).to be_true
39
+ end
40
+ end
41
+
42
+ context "when false" do
43
+ it "returns true" do
44
+ message = "stuff"
45
+ expect(SadPanda.sad_emoticon(message)).to be_false
46
+ end
47
+ end
48
+
49
+ end
50
+
51
+ describe "#words_from_message_text" do
52
+
53
+ it "removes urls and other gross stuff from tweet" do
54
+ message = "lobster hickory http://www.boston.com/business #Rails"
55
+
56
+ words = SadPanda.words_from_message_text(message)
57
+
58
+ expect(words).to eql(["lobster", "hickory", "rails"])
59
+ end
60
+
61
+ end
62
+
63
+ describe "#set_emotions" do
64
+
65
+ it "modifies the emotions_score array" do
66
+
67
+ term_frequencies.each do |key, value|
68
+ SadPanda.set_emotions(emotions, emotion_score, key, value)
69
+ end
70
+ expect((emotion_score["joy"])).to eql(1)
71
+ end
72
+
73
+ end
74
+
75
+ describe "#set_polarities" do
76
+
77
+ it "modifies the polarity_scores array" do
78
+ term_frequencies = {'sad' => 1}
79
+ term_frequencies.each do |key, value|
80
+ SadPanda.set_polarities(key, value, polarity_hash, polarity_scores)
81
+ end
82
+ expect(polarity_scores).to eql([0.0])
83
+ end
84
+
85
+ end
86
+
87
+ describe "#store_emotions" do
88
+ it "stores emotions in emotion_score hash" do
89
+
90
+ emotions = {"joy" => "zorg" }
91
+ key,value = "zorg", 1
92
+
93
+ emotions.keys.each do |k|
94
+ SadPanda.store_emotions(emotions, emotion_score, k, key, value)
95
+ end
96
+ expect(emotion_score["joy"]).to eql(1)
97
+ end
98
+
99
+ end
100
+
101
+ describe "#store_polarities" do
102
+
103
+ context "word in polarity_hash" do
104
+
105
+ it "adds a polarity to polarity_scores" do
106
+ term = "sad"
107
+ word = "sad"
108
+ SadPanda.store_polarities(term, word, polarity_hash, polarity_scores)
109
+ expect(polarity_scores).to eql([0.0])
110
+ end
111
+
112
+ end
113
+
114
+ context "word not in polarity_hash" do
115
+
116
+ it "does not add a polarity to polarity_scores" do
117
+ term = "sad"
118
+ word = "cactus"
119
+ SadPanda.store_polarities(term, word, polarity_hash, polarity_scores)
120
+ expect(polarity_scores).to eql([])
121
+ end
122
+
123
+ end
124
+
125
+ end
126
+
127
+ describe "#create_term_frequencies" do
128
+
129
+ it "populates a word-stem frequency hash" do
130
+ words = ["yo", "stuff"]
131
+ term_frequencies = {}
132
+ word_stems = SadPanda.get_word_stems(words)
133
+ term_frequencies = SadPanda.create_term_frequencies(word_stems, term_frequencies)
134
+
135
+ expect(term_frequencies).to eql({"yo"=>1, "stuff"=>1})
136
+ end
137
+
138
+ end
139
+
140
+ describe "#check_emoticon_for_emotion" do
141
+ context "contains happy emoticon" do
142
+
143
+ it "returns 'joy'" do
144
+ message = ":)"
145
+ output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
146
+ expect(output).to eql("joy")
147
+ end
148
+
149
+ end
150
+
151
+ context "contains sad emoticon" do
152
+
153
+ it "returns 'sadness'" do
154
+ message = ":("
155
+ output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
156
+ expect(output).to eql("sadness")
157
+ end
158
+
159
+ end
160
+
161
+ context "contains both a happy and a sad emoticon" do
162
+
163
+ it "returns 'ambiguous'" do
164
+ message = ":( :)"
165
+ output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
166
+ expect(output).to eql("ambiguous")
167
+ end
168
+
169
+ end
170
+
171
+ context "contains no emoticons and emotion_score is not empty" do
172
+
173
+ it "returns joy" do
174
+ message = "no emoticons in hur"
175
+ emotion_score = {"joy" => 1}
176
+ output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
177
+ expect(output).to eql("joy")
178
+ end
179
+
180
+ end
181
+
182
+ context "contains no emoticons and emotion_score is empty" do
183
+
184
+ it "returns joy" do
185
+ message = "no emoticons in hur"
186
+ output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
187
+ expect(output).to eql("ambiguous")
188
+ end
189
+
190
+ end
191
+ end
192
+
193
+ describe "#check_emoticon_for_polarity" do
194
+ context "contains happy emoticon" do
195
+
196
+ it "returns 8" do
197
+ message = ":)"
198
+ polarity_scores = [2.0,3.0]
199
+ output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
200
+ expect(output).to eql(8)
201
+ end
202
+
203
+ end
204
+
205
+ context "contains sad emoticon" do
206
+
207
+ it "returns 2" do
208
+ message = ":("
209
+ polarity_scores = [2.0,3.0]
210
+ output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
211
+ expect(output).to eql(2)
212
+ end
213
+
214
+ end
215
+
216
+ context "contains both a happy and a sad emoticon" do
217
+
218
+ it "returns 5" do
219
+ message = ":( :)"
220
+ polarity_scores = [2.0,3.0]
221
+ output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
222
+ expect(output).to eql(5)
223
+ end
224
+
225
+ end
226
+
227
+
228
+ context "contains no emoticons and polarity_scores is empty" do
229
+
230
+ it "returns joy" do
231
+ message = "no emoticons in hur"
232
+ polarity_scores = []
233
+ output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
234
+ expect(output).to eql(5)
235
+ end
236
+
237
+ end
238
+
239
+ context "contains no emoticons and emotion_score is not empty" do
240
+
241
+ it "returns joy" do
242
+ message = "no emoticons in hur"
243
+ polarity_scores = [8.0]
244
+ output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
245
+ expect(output).to eql(8.0)
246
+ end
247
+
248
+ end
249
+ end
250
+
251
+ end
9
252
 
10
253
  describe "when 'build_term_frequencies' method is called" do
11
254
 
@@ -34,14 +277,16 @@ describe SadPanda do
34
277
 
35
278
  describe "when 'get_emotion_score' method is called" do
36
279
  it 'returns a string' do
37
- output = SadPanda.get_emotion_score emotions,term_frequencies
280
+ message = "this is a message!"
281
+ output = SadPanda.get_emotion_score(message, emotions,term_frequencies)
38
282
  expect(output.class).to eql(String)
39
283
  end
40
284
  end
41
285
 
42
286
  describe "when 'get_polarity_score' method is called" do
43
287
  it 'returns a string' do
44
- output = SadPanda.get_polarity_score polarities,term_frequencies
288
+ message = "this is another message!"
289
+ output = SadPanda.get_polarity_score(message, polarities, term_frequencies)
45
290
  expect(output.class).to eql(Fixnum)
46
291
  end
47
292
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sad_panda
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-06-30 00:00:00.000000000 Z
12
+ date: 2013-07-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -59,6 +59,22 @@ dependencies:
59
59
  - - ! '>='
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: pry
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
62
78
  - !ruby/object:Gem::Dependency
63
79
  name: rspec
64
80
  requirement: !ruby/object:Gem::Requirement
@@ -116,7 +132,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
116
132
  version: '0'
117
133
  segments:
118
134
  - 0
119
- hash: -2958994952268376603
135
+ hash: 2873136892834471107
120
136
  required_rubygems_version: !ruby/object:Gem::Requirement
121
137
  none: false
122
138
  requirements:
@@ -125,7 +141,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
125
141
  version: '0'
126
142
  segments:
127
143
  - 0
128
- hash: -2958994952268376603
144
+ hash: 2873136892834471107
129
145
  requirements: []
130
146
  rubyforge_project:
131
147
  rubygems_version: 1.8.25