sad_panda 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -24,13 +24,16 @@ Or install it yourself as:
24
24
 
25
25
  require 'sad_panda'
26
26
 
27
- my_message = SadPanda::StatusMessage.new "my lobster collection makes me happy!"
27
+ SadPanda.emotion("my lobster collection makes me happy!")
28
28
 
29
- my_message.emotion
30
29
  => "joy"
31
30
 
31
+
32
+
33
+ SadPanda.polarity("my lobster collection makes me happy!")
32
34
  my_message.polarity
33
- => 0.5
35
+
36
+ => 5
34
37
 
35
38
  ## Contributing
36
39
 
data/lib/sad_panda.rb CHANGED
@@ -3,7 +3,166 @@ require_relative "./sad_panda/status_message"
3
3
  require_relative './sad_panda/emotions/emotion_bank.rb'
4
4
  require_relative './sad_panda/emotions/term_polarities.rb'
5
5
  require_relative './sad_panda/emotions/stopwords.rb'
6
-
6
+ require 'lingua/stemmer'
7
7
 
8
8
  module SadPanda
9
+
10
+ attr_accessor :message, :verbose
11
+ attr_reader :stemmer
12
+
13
+ # this method reads the text of the status message
14
+ # inputed by the user, removes common english words,
15
+ # strips punctuation and capitalized letters, isolates
16
+ # the stem of the word, and ultimately produces a hash
17
+ # where the keys are the stems of the remaining words,
18
+ # and the values are their respective frequencies within
19
+ # the status message
20
+ def self.build_term_frequencies message
21
+
22
+ @message = message
23
+
24
+ # create empty term_frequencies
25
+ term_frequencies = {}
26
+
27
+ # clean the text of the status message
28
+ if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]"))
29
+ @happy_que = true
30
+ end
31
+ if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-["))
32
+ @sad_que = true
33
+ end
34
+ if (@message.include?(":/") || @message.include?(":-/") || @message.include?(":\\") || @message.include?(":-\\"))
35
+ @uneasy_que = true
36
+ end
37
+ message_text = @message.gsub(/[^a-z ]/i, '').downcase
38
+ message_text = message_text.gsub(/\s\s+/,' ')
39
+ words = message_text.split(" ")
40
+
41
+ #filter for english stopwords
42
+ stopwords = Stopwords.stopwords
43
+ words = words - stopwords
44
+
45
+ #get word stems
46
+ word_stems = SadPanda.get_word_stems words
47
+
48
+ #create term_frequencies
49
+ word_stems.each do |stem|
50
+ term_frequencies[stem] = word_stems.count(stem)
51
+ end
52
+
53
+ #return term frequency matrix
54
+ term_frequencies
55
+ end
56
+
57
+ # this method takes an array of words an returns an array of word stems
58
+ def self.get_word_stems words
59
+ @stemmer = Lingua::Stemmer.new(:language => "en")
60
+ output = []
61
+ words.each do |word|
62
+ output << @stemmer.stem(word)
63
+ end
64
+ output
65
+ end
66
+
67
+ # this method takes an emotion-words hash and a hash containing word
68
+ # frequencies for the status message, calculates a numerical score
69
+ # for each possble emotion, and returns the emotion with the highest
70
+ # "score"
71
+ def self.get_emotion_score(emotions, term_frequencies, verbose = false)
72
+ emotion_score = {}
73
+ term_frequencies.each do |key,value|
74
+ emotions.keys.each do |k|
75
+ if emotions[k].include?(key)
76
+ emotion_score[k] ||= 0
77
+ emotion_score[k] += value
78
+ end
79
+ end
80
+ end
81
+ if @verbose
82
+ emotion_score.keys.each do |key|
83
+ puts "EMOTION: "+key
84
+ puts "SCORE: "+emotion_score[key].to_s
85
+ end
86
+ end
87
+ # return an emotion_score_hash to be processed by emotion
88
+ # get clue from any emoticons present
89
+ if (@happy_que && @sad_que)
90
+ return "uncertain"
91
+ elsif @uneasy_que
92
+ return "uneasiness"
93
+ elsif @happy_que
94
+ return "joy"
95
+ elsif @sad_que
96
+ return "sadness"
97
+ else
98
+ ## 0 if unable to detect emotion
99
+ if emotion_score == {}
100
+ return "uncertain"
101
+ else
102
+ score = emotion_score.max_by{|k, v| v}[0]
103
+ end
104
+ score
105
+ end
106
+ end
107
+
108
+ # this method returns the best-fit emotion for the status message
109
+ def self.emotion message
110
+ # get the emotion for which the emotion score value is highest
111
+ if @emotions
112
+ SadPanda.get_emotion_score(@emotions, SadPanda.build_term_frequencies(message))
113
+ else
114
+ SadPanda.get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies(message))
115
+ end
116
+ end
117
+
118
+ # this method gives the status method a normalized polarity
119
+ # value based on the words it contains
120
+ def self.get_polarity_score (polarity_hash, term_frequencies, verbose = false)
121
+ polarity_scores = []
122
+ term_frequencies.each do |key, value|
123
+ polarity_hash.keys.each do |k|
124
+ if key == k
125
+ polarity_scores << (polarity_hash[k].to_f)
126
+ end
127
+ end
128
+ end
129
+
130
+ # return an polarity_score_hash to be processed by polarity method
131
+ # return an emotion_score_hash to be processed by emotion
132
+ # get clue from any emoticons present
133
+ if (@happy_que && @sad_que)
134
+ score = 5
135
+ elsif @uneasy_que
136
+ score = 3
137
+ elsif @happy_que
138
+ score = 8
139
+ elsif @sad_que
140
+ score = 2
141
+ else
142
+ if polarity_scores == []
143
+ # polarity unreadable; return a neutral score of zero
144
+ score = 5
145
+ else
146
+ score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
147
+ polarity_scores = []
148
+ end
149
+ if @verbose
150
+ puts "POLARITY: " + score.to_s
151
+ end
152
+ score
153
+ end
154
+ end
155
+
156
+ # this method returns the polarity value for the status message
157
+ # (normalized by the number of 'polar' words that the status
158
+ # message contains)
159
+ def self.polarity message
160
+ # get the polarity for which the polarity score value is highest
161
+ if @polarities
162
+ SadPanda.get_polarity_score(@polarities, SadPanda.build_term_frequencies(message))
163
+ else
164
+ SadPanda.get_polarity_score(TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
165
+ end
166
+ end
167
+
9
168
  end
@@ -1,3 +1,3 @@
1
1
  module SadPanda
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sad_panda
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -96,7 +96,6 @@ files:
96
96
  - lib/sad_panda/emotions/stopwords.rb
97
97
  - lib/sad_panda/emotions/subjectivity.csv
98
98
  - lib/sad_panda/emotions/term_polarities.rb
99
- - lib/sad_panda/status_message.rb
100
99
  - lib/sad_panda/version.rb
101
100
  - sad_panda.gemspec
102
101
  - spec/emotion_bank_spec.rb
@@ -1,169 +0,0 @@
1
- require 'lingua/stemmer'
2
-
3
- module SadPanda
4
-
5
- class StatusMessage
6
-
7
- attr_accessor :message, :verbose
8
- attr_reader :stemmer
9
-
10
- def initialize(message, verbose = false)
11
- @message = message
12
- @stemmer = Lingua::Stemmer.new(:language => "en")
13
- @verbose = verbose
14
- end
15
-
16
- # this method reads the text of the status message
17
- # inputed by the user, removes common english words,
18
- # strips punctuation and capitalized letters, isolates
19
- # the stem of the word, and ultimately produces a hash
20
- # where the keys are the stems of the remaining words,
21
- # and the values are their respective frequencies within
22
- # the status message
23
- def build_term_frequencies
24
-
25
- # create empty term_frequencies
26
- term_frequencies = {}
27
-
28
- # clean the text of the status message
29
- if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]"))
30
- @happy_que = true
31
- end
32
- if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-["))
33
- @sad_que = true
34
- end
35
- if (@message.include?(":/") || @message.include?(":-/") || @message.include?(":\\") || @message.include?(":-\\"))
36
- @uneasy_que = true
37
- end
38
- message_text = @message.gsub(/[^a-z ]/i, '').downcase
39
- message_text = message_text.gsub(/\s\s+/,' ')
40
- words = message_text.split(" ")
41
-
42
- #filter for english stopwords
43
- stopwords = Stopwords.stopwords
44
- words = words - stopwords
45
-
46
- #get word stems
47
- word_stems = get_word_stems words
48
-
49
- #create term_frequencies
50
- word_stems.each do |stem|
51
- term_frequencies[stem] = word_stems.count(stem)
52
- end
53
-
54
- #return term frequency matrix
55
- term_frequencies
56
- end
57
-
58
- # this method takes an array of words an returns an array of word stems
59
- def get_word_stems words
60
- output = []
61
- words.each do |word|
62
- output << @stemmer.stem(word)
63
- end
64
- output
65
- end
66
-
67
- # this method takes an emotion-words hash and a hash containing word
68
- # frequencies for the status message, calculates a numerical score
69
- # for each possble emotion, and returns the emotion with the highest
70
- # "score"
71
- def get_emotion_score(emotions, term_frequencies)
72
- emotion_score = {}
73
- term_frequencies.each do |key,value|
74
- emotions.keys.each do |k|
75
- if emotions[k].include?(key)
76
- emotion_score[k] ||= 0
77
- emotion_score[k] += value
78
- end
79
- end
80
- end
81
- if @verbose
82
- emotion_score.keys.each do |key|
83
- puts "EMOTION: "+key
84
- puts "SCORE: "+emotion_score[key].to_s
85
- end
86
- end
87
- # return an emotion_score_hash to be processed by emotion
88
- # get clue from any emoticons present
89
- if (@happy_que && @sad_que)
90
- return "uncertain"
91
- elsif @uneasy_que
92
- return "uneasiness"
93
- elsif @happy_que
94
- return "joy"
95
- elsif @sad_que
96
- return "sadness"
97
- else
98
- ## 0 if unable to detect emotion
99
- if emotion_score == {}
100
- return "uncertain"
101
- else
102
- score = emotion_score.max_by{|k, v| v}[0]
103
- end
104
- score
105
- end
106
- end
107
-
108
- # this method returns the best-fit emotion for the status message
109
- def emotion
110
- # get the emotion for which the emotion score value is highest
111
- if @emotions
112
- get_emotion_score(@emotions, build_term_frequencies)
113
- else
114
- get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies)
115
- end
116
- end
117
-
118
- # this method gives the status method a normalized polarity
119
- # value based on the words it contains
120
- def get_polarity_score (polarity_hash, term_frequencies)
121
- polarity_scores = []
122
- term_frequencies.each do |key, value|
123
- polarity_hash.keys.each do |k|
124
- if key == k
125
- polarity_scores << (polarity_hash[k].to_f)
126
- end
127
- end
128
- end
129
-
130
- # return an polarity_score_hash to be processed by polarity method
131
- # return an emotion_score_hash to be processed by emotion
132
- # get clue from any emoticons present
133
- if (@happy_que && @sad_que)
134
- score = 5
135
- elsif @uneasy_que
136
- score = 3
137
- elsif @happy_que
138
- score = 8
139
- elsif @sad_que
140
- score = 2
141
- else
142
- if polarity_scores == []
143
- # polarity unreadable; return a neutral score of zero
144
- score = 5
145
- else
146
- score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
147
- polarity_scores = []
148
- end
149
- if @verbose
150
- puts "POLARITY: " + score.to_s
151
- end
152
- score
153
- end
154
- end
155
-
156
- # this method returns the polarity value for the status message
157
- # (normalized by the number of 'polar' words that the status
158
- # message contains)
159
- def polarity
160
- # get the polarity for which the polarity score value is highest
161
- if @polarities
162
- get_polarity_score(@polarities, build_term_frequencies)
163
- else
164
- get_polarity_score(TermPolarities.get_term_polarities, build_term_frequencies)
165
- end
166
- end
167
-
168
- end
169
- end