sad_panda 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,155 +6,170 @@ require 'lingua/stemmer'
6
6
 
7
7
  module SadPanda
8
8
 
9
- # this method reads the text of the status message
10
- # inputed by the user, removes common english words,
11
- # strips punctuation and capitalized letters, isolates
12
- # the stem of the word, and ultimately produces a hash
13
- # where the keys are the stems of the remaining words,
14
- # and the values are their respective frequencies within
15
- # the status message
16
- def self.build_term_frequencies message
17
-
18
- @message = message
19
-
20
- # create empty term_frequencies
21
- term_frequencies = {}
22
-
23
- # clean the text of the status message
24
- if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]"))
25
- @happy_que = true
26
- end
27
- if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-["))
28
- @sad_que = true
29
- end
30
-
31
- message_text = @message.gsub(/[^a-z ]/i, '').downcase
32
- message_text.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '')
33
- message_text.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '')
34
- message_text.gsub!(/\s\s+/,' ')
35
- words = message_text.split(" ")
36
-
37
- #filter for english stopwords
38
- stopwords = Stopwords.stopwords
39
- words = words - stopwords
40
-
41
- #get word stems
42
- word_stems = SadPanda.get_word_stems words
43
-
44
- #create term_frequencies
45
- word_stems.each do |stem|
46
- term_frequencies[stem] = word_stems.count(stem)
47
- end
48
-
49
- #return term frequency matrix
50
- term_frequencies
51
- end
52
-
53
- # this method takes an array of words an returns an array of word stems
54
- def self.get_word_stems words
55
- @stemmer = Lingua::Stemmer.new(:language => "en")
56
- output = []
57
- words.each do |word|
58
- output << @stemmer.stem(word)
59
- end
60
- output
61
- end
62
-
63
- # this method takes an emotion-words hash and a hash containing word
64
- # frequencies for the status message, calculates a numerical score
65
- # for each possble emotion, and returns the emotion with the highest
66
- # "score"
67
- def self.get_emotion_score(emotions, term_frequencies, verbose = false)
68
- emotion_score = {}
69
- term_frequencies.each do |key,value|
70
- emotions.keys.each do |k|
71
- if emotions[k].include?(key)
72
- emotion_score[k] ||= 0
73
- emotion_score[k] += value
74
- end
75
- end
76
- end
77
- if @verbose
78
- emotion_score.keys.each do |key|
79
- puts "EMOTION: "+key
80
- puts "SCORE: "+emotion_score[key].to_s
81
- end
82
- end
83
- # return an emotion_score_hash to be processed by emotion
84
- # get clue from any emoticons present
85
- if (@happy_que && @sad_que)
86
- return "ambiguous"
87
- elsif @happy_que
88
- return "joy"
89
- elsif @sad_que
90
- return "sadness"
91
- else
92
- ## 0 if unable to detect emotion
93
- if emotion_score == {}
94
- return "ambiguous"
9
+ # this method returns the best-fit emotion for the status message
10
+ def self.emotion(message)
11
+ # get the emotion for which the emotion score value is highest
12
+ SadPanda.get_emotion_score(message, EmotionBank.get_term_emotions, build_term_frequencies(message))
13
+ end
14
+
15
+ # this method returns the polarity value for the status message
16
+ # (normalized by the number of 'polar' words that the status
17
+ # message contains)
18
+ def self.polarity(message)
19
+ # get the polarity for which the polarity score value is highest
20
+ SadPanda.get_polarity_score(message, TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
21
+ end
22
+
23
+
24
+ private
25
+
26
+ # this method reads the text of the status message
27
+ # inputed by the user, removes common english words,
28
+ # strips punctuation and capitalized letters, isolates
29
+ # the stem of the word, and ultimately produces a hash
30
+ # where the keys are the stems of the remaining words,
31
+ # and the values are their respective frequencies within
32
+ # the status message
33
+ def self.build_term_frequencies(message, term_frequencies = {})
34
+ # clean the text of the status message
35
+ happy_emoticon = happy_emoticon(message)
36
+ sad_emoticon = sad_emoticon(message)
37
+ words = words_from_message_text(message)
38
+ #filter for english stopwords
39
+ stopwords = Stopwords.stopwords
40
+ words = words - stopwords
41
+ #get word stems
42
+ word_stems = SadPanda.get_word_stems words
43
+ #create term_frequencies
44
+ #return term frequency hash
45
+ create_term_frequencies(word_stems, term_frequencies)
46
+ end
47
+
48
+ # this method takes an array of words an returns an array of word stems
49
+ def self.get_word_stems(words, output=[])
50
+ stemmer = Lingua::Stemmer.new(:language => "en")
51
+ words.each do |word|
52
+ output << stemmer.stem(word)
53
+ end
54
+ output
55
+ end
56
+
57
+ # this method takes an emotion-words hash and a hash containing word
58
+ # frequencies for the status message, calculates a numerical score
59
+ # for each possble emotion, and returns the emotion with the highest
60
+ # "score"
61
+ def self.get_emotion_score(message, emotions, term_frequencies, emotion_score = {})
62
+ term_frequencies.each do |key,value|
63
+ set_emotions(emotions, emotion_score, key, value)
64
+ end
65
+ # return an emotion_score_hash to be processed by emotion
66
+ # get clue from any emoticons present
67
+ check_emoticon_for_emotion(emotion_score, message)
68
+ end
69
+
70
+ # this method gives the status method a normalized polarity
71
+ # value based on the words it contains
72
+ def self.get_polarity_score (message, polarity_hash, term_frequencies, polarity_scores = [])
73
+ term_frequencies.each do |key, value|
74
+ set_polarities(key, value, polarity_hash, polarity_scores)
75
+ end
76
+
77
+ # return an polarity_score_hash to be processed by polarity method
78
+ # return an emotion_score_hash to be processed by emotion
79
+ # get clue from any emoticons present
80
+ check_emoticon_for_polarity(polarity_scores, message)
81
+ end
82
+
83
+ def self.happy_emoticon(message)
84
+ (message.include?(":)") || message.include?(":-)") || message.include?(":]") || message.include?(":-]"))
85
+ end
86
+
87
+ def self.sad_emoticon(message)
88
+ (message.include?(":(") || message.include?(":-(") || message.include?(":[") || message.include?(":-["))
89
+ end
90
+
91
+ def self.words_from_message_text(message)
92
+ message.gsub!(/[^a-z ]/i, '')
93
+ message.downcase!
94
+ message.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '')
95
+ message.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '')
96
+ message.gsub!(/\s\s+/,' ')
97
+ message.split(" ")
98
+ end
99
+
100
+ def self.set_emotions(emotions, emotion_score, term, frequency)
101
+ emotions.keys.each do |k|
102
+ store_emotions(emotions, emotion_score, k, term, frequency)
103
+ end
104
+ end
105
+
106
+ def self.set_polarities(term, frequency, polarity_hash, polarity_scores)
107
+ polarity_hash.keys.each do |k|
108
+ store_polarities(term, k, polarity_hash, polarity_scores)
109
+ end
110
+ end
111
+
112
+ def self.store_emotions(emotions, emotion_score, emotion, term, frequency)
113
+ if emotions[emotion].include?(term)
114
+ emotion_score[emotion] ||= 0
115
+ emotion_score[emotion] += frequency
116
+ end
117
+ end
118
+
119
+ def self.store_polarities(term, word, polarity_hash, polarity_scores)
120
+ if term == word
121
+ polarity_scores << (polarity_hash[word].to_f)
122
+ end
123
+ end
124
+
125
+ def self.check_emoticon_for_emotion(emotion_score, message)
126
+ if (happy_emoticon(message) && sad_emoticon(message))
127
+ "ambiguous"
128
+ elsif happy_emoticon(message)
129
+ "joy"
130
+ elsif sad_emoticon(message)
131
+ "sadness"
95
132
  else
96
- score = emotion_score.max_by{|k, v| v}[0]
133
+ return_emotion_score(emotion_score)
97
134
  end
98
- score
99
135
  end
100
- end
101
136
 
102
- # this method returns the best-fit emotion for the status message
103
- def self.emotion message
104
- # get the emotion for which the emotion score value is highest
105
- if @emotions
106
- SadPanda.get_emotion_score(@emotions, SadPanda.build_term_frequencies(message))
107
- else
108
- SadPanda.get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies(message))
137
+ def self.return_emotion_score(emotion_score)
138
+ ## 0 if unable to detect emotion
139
+ if emotion_score == {}
140
+ "ambiguous"
141
+ else
142
+ emotion_score.max_by{|k, v| v}[0]
143
+ end
109
144
  end
110
- end
111
-
112
- # this method gives the status method a normalized polarity
113
- # value based on the words it contains
114
- def self.get_polarity_score (polarity_hash, term_frequencies, verbose = false)
115
- polarity_scores = []
116
- term_frequencies.each do |key, value|
117
- polarity_hash.keys.each do |k|
118
- if key == k
119
- polarity_scores << (polarity_hash[k].to_f)
120
- end
121
- end
122
- end
123
-
124
- # return an polarity_score_hash to be processed by polarity method
125
- # return an emotion_score_hash to be processed by emotion
126
- # get clue from any emoticons present
127
- if (@happy_que && @sad_que)
145
+
146
+ def self.check_emoticon_for_polarity(polarity_scores, message)
147
+ if (happy_emoticon(message) && sad_emoticon(message))
128
148
  score = 5
129
- elsif @happy_que
149
+ elsif happy_emoticon(message)
130
150
  score = 8
131
- elsif @sad_que
151
+ elsif sad_emoticon(message)
132
152
  score = 2
133
- else
134
- if polarity_scores == []
135
- # polarity unreadable; return a neutral score of zero
136
- score = 5
137
- else
138
- score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
139
- polarity_scores = []
140
- end
141
- if @verbose
142
- puts "POLARITY: " + score.to_s
143
- end
144
- score
145
- end
146
- end
147
-
148
- # this method returns the polarity value for the status message
149
- # (normalized by the number of 'polar' words that the status
150
- # message contains)
151
- def self.polarity message
152
- # get the polarity for which the polarity score value is highest
153
- if @polarities
154
- SadPanda.get_polarity_score(@polarities, SadPanda.build_term_frequencies(message))
155
- else
156
- SadPanda.get_polarity_score(TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
157
- end
158
- end
153
+ else
154
+ return_polarity_scores(polarity_scores)
155
+ end
156
+ end
157
+
158
+ def self.return_polarity_scores(polarity_scores)
159
+ if polarity_scores == []
160
+ # polarity unreadable; return a neutral score of 5
161
+ 5
162
+ else
163
+ polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
164
+ end
165
+ end
166
+
167
+ def self.create_term_frequencies(word_stems, term_frequencies)
168
+ word_stems.each do |stem|
169
+ term_frequencies[stem] = word_stems.count(stem)
170
+ end
171
+ term_frequencies
172
+ end
173
+
159
174
 
160
175
  end
@@ -1,3 +1,3 @@
1
1
  module SadPanda
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
@@ -21,5 +21,6 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency "bundler", "~> 1.3"
22
22
  spec.add_development_dependency "rake"
23
23
  spec.add_runtime_dependency "ruby-stemmer"
24
+ spec.add_development_dependency "pry"
24
25
  spec.add_development_dependency "rspec"
25
26
  end
@@ -6,6 +6,249 @@ describe SadPanda do
6
6
  let(:emotions) {EmotionBank.get_term_emotions}
7
7
  let(:polarities) {TermPolarities.get_term_polarities}
8
8
  let(:term_frequencies) {SadPanda.build_term_frequencies("My cactus collection makes me happy.")}
9
+ let(:emotion_score) { {} }
10
+ let(:polarity_scores) { [] }
11
+ let(:polarity_hash) { TermPolarities.get_term_polarities }
12
+
13
+ context "methods" do
14
+ describe "#happy_emoticon" do
15
+
16
+ context "when true" do
17
+ it "returns true" do
18
+ message = ":)"
19
+ expect(SadPanda.happy_emoticon(message)).to be_true
20
+ end
21
+ end
22
+
23
+ context "when false" do
24
+ it "returns true" do
25
+ message = "stuff"
26
+ expect(SadPanda.happy_emoticon(message)).to be_false
27
+ end
28
+ end
29
+
30
+
31
+ end
32
+
33
+ describe "#sad_emoticon" do
34
+
35
+ context "when true" do
36
+ it "returns true" do
37
+ message = ":("
38
+ expect(SadPanda.sad_emoticon(message)).to be_true
39
+ end
40
+ end
41
+
42
+ context "when false" do
43
+ it "returns true" do
44
+ message = "stuff"
45
+ expect(SadPanda.sad_emoticon(message)).to be_false
46
+ end
47
+ end
48
+
49
+ end
50
+
51
+ describe "#words_from_message_text" do
52
+
53
+ it "removes urls and other gross stuff from tweet" do
54
+ message = "lobster hickory http://www.boston.com/business #Rails"
55
+
56
+ words = SadPanda.words_from_message_text(message)
57
+
58
+ expect(words).to eql(["lobster", "hickory", "rails"])
59
+ end
60
+
61
+ end
62
+
63
+ describe "#set_emotions" do
64
+
65
+ it "modifies the emotions_score array" do
66
+
67
+ term_frequencies.each do |key, value|
68
+ SadPanda.set_emotions(emotions, emotion_score, key, value)
69
+ end
70
+ expect((emotion_score["joy"])).to eql(1)
71
+ end
72
+
73
+ end
74
+
75
+ describe "#set_polarities" do
76
+
77
+ it "modifies the polarity_scores array" do
78
+ term_frequencies = {'sad' => 1}
79
+ term_frequencies.each do |key, value|
80
+ SadPanda.set_polarities(key, value, polarity_hash, polarity_scores)
81
+ end
82
+ expect(polarity_scores).to eql([0.0])
83
+ end
84
+
85
+ end
86
+
87
+ describe "#store_emotions" do
88
+ it "stores emotions in emotion_score hash" do
89
+
90
+ emotions = {"joy" => "zorg" }
91
+ key,value = "zorg", 1
92
+
93
+ emotions.keys.each do |k|
94
+ SadPanda.store_emotions(emotions, emotion_score, k, key, value)
95
+ end
96
+ expect(emotion_score["joy"]).to eql(1)
97
+ end
98
+
99
+ end
100
+
101
+ describe "#store_polarities" do
102
+
103
+ context "word in polarity_hash" do
104
+
105
+ it "adds a polarity to polarity_scores" do
106
+ term = "sad"
107
+ word = "sad"
108
+ SadPanda.store_polarities(term, word, polarity_hash, polarity_scores)
109
+ expect(polarity_scores).to eql([0.0])
110
+ end
111
+
112
+ end
113
+
114
+ context "word not in polarity_hash" do
115
+
116
+ it "does not add a polarity to polarity_scores" do
117
+ term = "sad"
118
+ word = "cactus"
119
+ SadPanda.store_polarities(term, word, polarity_hash, polarity_scores)
120
+ expect(polarity_scores).to eql([])
121
+ end
122
+
123
+ end
124
+
125
+ end
126
+
127
+ describe "#create_term_frequencies" do
128
+
129
+ it "populates a word-stem frequency hash" do
130
+ words = ["yo", "stuff"]
131
+ term_frequencies = {}
132
+ word_stems = SadPanda.get_word_stems(words)
133
+ term_frequencies = SadPanda.create_term_frequencies(word_stems, term_frequencies)
134
+
135
+ expect(term_frequencies).to eql({"yo"=>1, "stuff"=>1})
136
+ end
137
+
138
+ end
139
+
140
+ describe "#check_emoticon_for_emotion" do
141
+ context "contains happy emoticon" do
142
+
143
+ it "returns 'joy'" do
144
+ message = ":)"
145
+ output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
146
+ expect(output).to eql("joy")
147
+ end
148
+
149
+ end
150
+
151
+ context "contains sad emoticon" do
152
+
153
+ it "returns 'sadness'" do
154
+ message = ":("
155
+ output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
156
+ expect(output).to eql("sadness")
157
+ end
158
+
159
+ end
160
+
161
+ context "contains both a happy and a sad emoticon" do
162
+
163
+ it "returns 'ambiguous'" do
164
+ message = ":( :)"
165
+ output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
166
+ expect(output).to eql("ambiguous")
167
+ end
168
+
169
+ end
170
+
171
+ context "contains no emoticons and emotion_score is not empty" do
172
+
173
+ it "returns joy" do
174
+ message = "no emoticons in hur"
175
+ emotion_score = {"joy" => 1}
176
+ output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
177
+ expect(output).to eql("joy")
178
+ end
179
+
180
+ end
181
+
182
+ context "contains no emoticons and emotion_score is empty" do
183
+
184
+ it "returns joy" do
185
+ message = "no emoticons in hur"
186
+ output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
187
+ expect(output).to eql("ambiguous")
188
+ end
189
+
190
+ end
191
+ end
192
+
193
+ describe "#check_emoticon_for_polarity" do
194
+ context "contains happy emoticon" do
195
+
196
+ it "returns 8" do
197
+ message = ":)"
198
+ polarity_scores = [2.0,3.0]
199
+ output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
200
+ expect(output).to eql(8)
201
+ end
202
+
203
+ end
204
+
205
+ context "contains sad emoticon" do
206
+
207
+ it "returns 2" do
208
+ message = ":("
209
+ polarity_scores = [2.0,3.0]
210
+ output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
211
+ expect(output).to eql(2)
212
+ end
213
+
214
+ end
215
+
216
+ context "contains both a happy and a sad emoticon" do
217
+
218
+ it "returns 5" do
219
+ message = ":( :)"
220
+ polarity_scores = [2.0,3.0]
221
+ output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
222
+ expect(output).to eql(5)
223
+ end
224
+
225
+ end
226
+
227
+
228
+ context "contains no emoticons and polarity_scores is empty" do
229
+
230
+ it "returns joy" do
231
+ message = "no emoticons in hur"
232
+ polarity_scores = []
233
+ output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
234
+ expect(output).to eql(5)
235
+ end
236
+
237
+ end
238
+
239
+ context "contains no emoticons and emotion_score is not empty" do
240
+
241
+ it "returns joy" do
242
+ message = "no emoticons in hur"
243
+ polarity_scores = [8.0]
244
+ output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
245
+ expect(output).to eql(8.0)
246
+ end
247
+
248
+ end
249
+ end
250
+
251
+ end
9
252
 
10
253
  describe "when 'build_term_frequencies' method is called" do
11
254
 
@@ -34,14 +277,16 @@ describe SadPanda do
34
277
 
35
278
  describe "when 'get_emotion_score' method is called" do
36
279
  it 'returns a string' do
37
- output = SadPanda.get_emotion_score emotions,term_frequencies
280
+ message = "this is a message!"
281
+ output = SadPanda.get_emotion_score(message, emotions,term_frequencies)
38
282
  expect(output.class).to eql(String)
39
283
  end
40
284
  end
41
285
 
42
286
  describe "when 'get_polarity_score' method is called" do
43
287
  it 'returns a string' do
44
- output = SadPanda.get_polarity_score polarities,term_frequencies
288
+ message = "this is another message!"
289
+ output = SadPanda.get_polarity_score(message, polarities, term_frequencies)
45
290
  expect(output.class).to eql(Fixnum)
46
291
  end
47
292
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sad_panda
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-06-30 00:00:00.000000000 Z
12
+ date: 2013-07-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -59,6 +59,22 @@ dependencies:
59
59
  - - ! '>='
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: pry
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
62
78
  - !ruby/object:Gem::Dependency
63
79
  name: rspec
64
80
  requirement: !ruby/object:Gem::Requirement
@@ -116,7 +132,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
116
132
  version: '0'
117
133
  segments:
118
134
  - 0
119
- hash: -2958994952268376603
135
+ hash: 2873136892834471107
120
136
  required_rubygems_version: !ruby/object:Gem::Requirement
121
137
  none: false
122
138
  requirements:
@@ -125,7 +141,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
125
141
  version: '0'
126
142
  segments:
127
143
  - 0
128
- hash: -2958994952268376603
144
+ hash: 2873136892834471107
129
145
  requirements: []
130
146
  rubyforge_project:
131
147
  rubygems_version: 1.8.25