sad_panda 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -24,13 +24,16 @@ Or install it yourself as:
24
24
 
25
25
  require 'sad_panda'
26
26
 
27
- my_message = SadPanda::StatusMessage.new "my lobster collection makes me happy!"
27
+ SadPanda.emotion("my lobster collection makes me happy!")
28
28
 
29
- my_message.emotion
30
29
  => "joy"
31
30
 
31
+
32
+
33
+ SadPanda.polarity("my lobster collection makes me happy!")
32
34
  my_message.polarity
33
- => 0.5
35
+
36
+ => 5
34
37
 
35
38
  ## Contributing
36
39
 
data/lib/sad_panda.rb CHANGED
@@ -3,7 +3,166 @@ require_relative "./sad_panda/status_message"
3
3
  require_relative './sad_panda/emotions/emotion_bank.rb'
4
4
  require_relative './sad_panda/emotions/term_polarities.rb'
5
5
  require_relative './sad_panda/emotions/stopwords.rb'
6
-
6
+ require 'lingua/stemmer'
7
7
 
8
8
  module SadPanda
9
+
10
+ attr_accessor :message, :verbose
11
+ attr_reader :stemmer
12
+
13
+ # this method reads the text of the status message
14
+ # inputed by the user, removes common english words,
15
+ # strips punctuation and capitalized letters, isolates
16
+ # the stem of the word, and ultimately produces a hash
17
+ # where the keys are the stems of the remaining words,
18
+ # and the values are their respective frequencies within
19
+ # the status message
20
+ def self.build_term_frequencies message
21
+
22
+ @message = message
23
+
24
+ # create empty term_frequencies
25
+ term_frequencies = {}
26
+
27
+ # clean the text of the status message
28
+ if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]"))
29
+ @happy_que = true
30
+ end
31
+ if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-["))
32
+ @sad_que = true
33
+ end
34
+ if (@message.include?(":/") || @message.include?(":-/") || @message.include?(":\\") || @message.include?(":-\\"))
35
+ @uneasy_que = true
36
+ end
37
+ message_text = @message.gsub(/[^a-z ]/i, '').downcase
38
+ message_text = message_text.gsub(/\s\s+/,' ')
39
+ words = message_text.split(" ")
40
+
41
+ #filter for english stopwords
42
+ stopwords = Stopwords.stopwords
43
+ words = words - stopwords
44
+
45
+ #get word stems
46
+ word_stems = SadPanda.get_word_stems words
47
+
48
+ #create term_frequencies
49
+ word_stems.each do |stem|
50
+ term_frequencies[stem] = word_stems.count(stem)
51
+ end
52
+
53
+ #return term frequency matrix
54
+ term_frequencies
55
+ end
56
+
57
+ # this method takes an array of words an returns an array of word stems
58
+ def self.get_word_stems words
59
+ @stemmer = Lingua::Stemmer.new(:language => "en")
60
+ output = []
61
+ words.each do |word|
62
+ output << @stemmer.stem(word)
63
+ end
64
+ output
65
+ end
66
+
67
+ # this method takes an emotion-words hash and a hash containing word
68
+ # frequencies for the status message, calculates a numerical score
69
+ # for each possble emotion, and returns the emotion with the highest
70
+ # "score"
71
+ def self.get_emotion_score(emotions, term_frequencies, verbose = false)
72
+ emotion_score = {}
73
+ term_frequencies.each do |key,value|
74
+ emotions.keys.each do |k|
75
+ if emotions[k].include?(key)
76
+ emotion_score[k] ||= 0
77
+ emotion_score[k] += value
78
+ end
79
+ end
80
+ end
81
+ if @verbose
82
+ emotion_score.keys.each do |key|
83
+ puts "EMOTION: "+key
84
+ puts "SCORE: "+emotion_score[key].to_s
85
+ end
86
+ end
87
+ # return an emotion_score_hash to be processed by emotion
88
+ # get clue from any emoticons present
89
+ if (@happy_que && @sad_que)
90
+ return "uncertain"
91
+ elsif @uneasy_que
92
+ return "uneasiness"
93
+ elsif @happy_que
94
+ return "joy"
95
+ elsif @sad_que
96
+ return "sadness"
97
+ else
98
+ ## 0 if unable to detect emotion
99
+ if emotion_score == {}
100
+ return "uncertain"
101
+ else
102
+ score = emotion_score.max_by{|k, v| v}[0]
103
+ end
104
+ score
105
+ end
106
+ end
107
+
108
+ # this method returns the best-fit emotion for the status message
109
+ def self.emotion message
110
+ # get the emotion for which the emotion score value is highest
111
+ if @emotions
112
+ SadPanda.get_emotion_score(@emotions, SadPanda.build_term_frequencies(message))
113
+ else
114
+ SadPanda.get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies(message))
115
+ end
116
+ end
117
+
118
+ # this method gives the status method a normalized polarity
119
+ # value based on the words it contains
120
+ def self.get_polarity_score (polarity_hash, term_frequencies, verbose = false)
121
+ polarity_scores = []
122
+ term_frequencies.each do |key, value|
123
+ polarity_hash.keys.each do |k|
124
+ if key == k
125
+ polarity_scores << (polarity_hash[k].to_f)
126
+ end
127
+ end
128
+ end
129
+
130
+ # return an polarity_score_hash to be processed by polarity method
131
+ # return an emotion_score_hash to be processed by emotion
132
+ # get clue from any emoticons present
133
+ if (@happy_que && @sad_que)
134
+ score = 5
135
+ elsif @uneasy_que
136
+ score = 3
137
+ elsif @happy_que
138
+ score = 8
139
+ elsif @sad_que
140
+ score = 2
141
+ else
142
+ if polarity_scores == []
143
+ # polarity unreadable; return a neutral score of zero
144
+ score = 5
145
+ else
146
+ score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
147
+ polarity_scores = []
148
+ end
149
+ if @verbose
150
+ puts "POLARITY: " + score.to_s
151
+ end
152
+ score
153
+ end
154
+ end
155
+
156
+ # this method returns the polarity value for the status message
157
+ # (normalized by the number of 'polar' words that the status
158
+ # message contains)
159
+ def self.polarity message
160
+ # get the polarity for which the polarity score value is highest
161
+ if @polarities
162
+ SadPanda.get_polarity_score(@polarities, SadPanda.build_term_frequencies(message))
163
+ else
164
+ SadPanda.get_polarity_score(TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
165
+ end
166
+ end
167
+
9
168
  end
@@ -1,3 +1,3 @@
1
1
  module SadPanda
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sad_panda
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -96,7 +96,6 @@ files:
96
96
  - lib/sad_panda/emotions/stopwords.rb
97
97
  - lib/sad_panda/emotions/subjectivity.csv
98
98
  - lib/sad_panda/emotions/term_polarities.rb
99
- - lib/sad_panda/status_message.rb
100
99
  - lib/sad_panda/version.rb
101
100
  - sad_panda.gemspec
102
101
  - spec/emotion_bank_spec.rb
@@ -1,169 +0,0 @@
1
- require 'lingua/stemmer'
2
-
3
- module SadPanda
4
-
5
- class StatusMessage
6
-
7
- attr_accessor :message, :verbose
8
- attr_reader :stemmer
9
-
10
- def initialize(message, verbose = false)
11
- @message = message
12
- @stemmer = Lingua::Stemmer.new(:language => "en")
13
- @verbose = verbose
14
- end
15
-
16
- # this method reads the text of the status message
17
- # inputed by the user, removes common english words,
18
- # strips punctuation and capitalized letters, isolates
19
- # the stem of the word, and ultimately produces a hash
20
- # where the keys are the stems of the remaining words,
21
- # and the values are their respective frequencies within
22
- # the status message
23
- def build_term_frequencies
24
-
25
- # create empty term_frequencies
26
- term_frequencies = {}
27
-
28
- # clean the text of the status message
29
- if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]"))
30
- @happy_que = true
31
- end
32
- if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-["))
33
- @sad_que = true
34
- end
35
- if (@message.include?(":/") || @message.include?(":-/") || @message.include?(":\\") || @message.include?(":-\\"))
36
- @uneasy_que = true
37
- end
38
- message_text = @message.gsub(/[^a-z ]/i, '').downcase
39
- message_text = message_text.gsub(/\s\s+/,' ')
40
- words = message_text.split(" ")
41
-
42
- #filter for english stopwords
43
- stopwords = Stopwords.stopwords
44
- words = words - stopwords
45
-
46
- #get word stems
47
- word_stems = get_word_stems words
48
-
49
- #create term_frequencies
50
- word_stems.each do |stem|
51
- term_frequencies[stem] = word_stems.count(stem)
52
- end
53
-
54
- #return term frequency matrix
55
- term_frequencies
56
- end
57
-
58
- # this method takes an array of words an returns an array of word stems
59
- def get_word_stems words
60
- output = []
61
- words.each do |word|
62
- output << @stemmer.stem(word)
63
- end
64
- output
65
- end
66
-
67
- # this method takes an emotion-words hash and a hash containing word
68
- # frequencies for the status message, calculates a numerical score
69
- # for each possble emotion, and returns the emotion with the highest
70
- # "score"
71
- def get_emotion_score(emotions, term_frequencies)
72
- emotion_score = {}
73
- term_frequencies.each do |key,value|
74
- emotions.keys.each do |k|
75
- if emotions[k].include?(key)
76
- emotion_score[k] ||= 0
77
- emotion_score[k] += value
78
- end
79
- end
80
- end
81
- if @verbose
82
- emotion_score.keys.each do |key|
83
- puts "EMOTION: "+key
84
- puts "SCORE: "+emotion_score[key].to_s
85
- end
86
- end
87
- # return an emotion_score_hash to be processed by emotion
88
- # get clue from any emoticons present
89
- if (@happy_que && @sad_que)
90
- return "uncertain"
91
- elsif @uneasy_que
92
- return "uneasiness"
93
- elsif @happy_que
94
- return "joy"
95
- elsif @sad_que
96
- return "sadness"
97
- else
98
- ## 0 if unable to detect emotion
99
- if emotion_score == {}
100
- return "uncertain"
101
- else
102
- score = emotion_score.max_by{|k, v| v}[0]
103
- end
104
- score
105
- end
106
- end
107
-
108
- # this method returns the best-fit emotion for the status message
109
- def emotion
110
- # get the emotion for which the emotion score value is highest
111
- if @emotions
112
- get_emotion_score(@emotions, build_term_frequencies)
113
- else
114
- get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies)
115
- end
116
- end
117
-
118
- # this method gives the status method a normalized polarity
119
- # value based on the words it contains
120
- def get_polarity_score (polarity_hash, term_frequencies)
121
- polarity_scores = []
122
- term_frequencies.each do |key, value|
123
- polarity_hash.keys.each do |k|
124
- if key == k
125
- polarity_scores << (polarity_hash[k].to_f)
126
- end
127
- end
128
- end
129
-
130
- # return an polarity_score_hash to be processed by polarity method
131
- # return an emotion_score_hash to be processed by emotion
132
- # get clue from any emoticons present
133
- if (@happy_que && @sad_que)
134
- score = 5
135
- elsif @uneasy_que
136
- score = 3
137
- elsif @happy_que
138
- score = 8
139
- elsif @sad_que
140
- score = 2
141
- else
142
- if polarity_scores == []
143
- # polarity unreadable; return a neutral score of zero
144
- score = 5
145
- else
146
- score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
147
- polarity_scores = []
148
- end
149
- if @verbose
150
- puts "POLARITY: " + score.to_s
151
- end
152
- score
153
- end
154
- end
155
-
156
- # this method returns the polarity value for the status message
157
- # (normalized by the number of 'polar' words that the status
158
- # message contains)
159
- def polarity
160
- # get the polarity for which the polarity score value is highest
161
- if @polarities
162
- get_polarity_score(@polarities, build_term_frequencies)
163
- else
164
- get_polarity_score(TermPolarities.get_term_polarities, build_term_frequencies)
165
- end
166
- end
167
-
168
- end
169
- end