sad_panda 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +6 -3
- data/lib/sad_panda.rb +160 -1
- data/lib/sad_panda/version.rb +1 -1
- metadata +1 -2
- data/lib/sad_panda/status_message.rb +0 -169
data/README.md
CHANGED
@@ -24,13 +24,16 @@ Or install it yourself as:
|
|
24
24
|
|
25
25
|
require 'sad_panda'
|
26
26
|
|
27
|
-
|
27
|
+
SadPanda.emotion("my lobster collection makes me happy!")
|
28
28
|
|
29
|
-
my_message.emotion
|
30
29
|
=> "joy"
|
31
30
|
|
31
|
+
|
32
|
+
|
33
|
+
SadPanda.polarity("my lobster collection makes me happy!")
|
32
34
|
my_message.polarity
|
33
|
-
|
35
|
+
|
36
|
+
=> 5
|
34
37
|
|
35
38
|
## Contributing
|
36
39
|
|
data/lib/sad_panda.rb
CHANGED
@@ -3,7 +3,166 @@ require_relative "./sad_panda/status_message"
|
|
3
3
|
require_relative './sad_panda/emotions/emotion_bank.rb'
|
4
4
|
require_relative './sad_panda/emotions/term_polarities.rb'
|
5
5
|
require_relative './sad_panda/emotions/stopwords.rb'
|
6
|
-
|
6
|
+
require 'lingua/stemmer'
|
7
7
|
|
8
8
|
module SadPanda
|
9
|
+
|
10
|
+
attr_accessor :message, :verbose
|
11
|
+
attr_reader :stemmer
|
12
|
+
|
13
|
+
# this method reads the text of the status message
|
14
|
+
# inputed by the user, removes common english words,
|
15
|
+
# strips punctuation and capitalized letters, isolates
|
16
|
+
# the stem of the word, and ultimately produces a hash
|
17
|
+
# where the keys are the stems of the remaining words,
|
18
|
+
# and the values are their respective frequencies within
|
19
|
+
# the status message
|
20
|
+
def self.build_term_frequencies message
|
21
|
+
|
22
|
+
@message = message
|
23
|
+
|
24
|
+
# create empty term_frequencies
|
25
|
+
term_frequencies = {}
|
26
|
+
|
27
|
+
# clean the text of the status message
|
28
|
+
if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]"))
|
29
|
+
@happy_que = true
|
30
|
+
end
|
31
|
+
if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-["))
|
32
|
+
@sad_que = true
|
33
|
+
end
|
34
|
+
if (@message.include?(":/") || @message.include?(":-/") || @message.include?(":\\") || @message.include?(":-\\"))
|
35
|
+
@uneasy_que = true
|
36
|
+
end
|
37
|
+
message_text = @message.gsub(/[^a-z ]/i, '').downcase
|
38
|
+
message_text = message_text.gsub(/\s\s+/,' ')
|
39
|
+
words = message_text.split(" ")
|
40
|
+
|
41
|
+
#filter for english stopwords
|
42
|
+
stopwords = Stopwords.stopwords
|
43
|
+
words = words - stopwords
|
44
|
+
|
45
|
+
#get word stems
|
46
|
+
word_stems = SadPanda.get_word_stems words
|
47
|
+
|
48
|
+
#create term_frequencies
|
49
|
+
word_stems.each do |stem|
|
50
|
+
term_frequencies[stem] = word_stems.count(stem)
|
51
|
+
end
|
52
|
+
|
53
|
+
#return term frequency matrix
|
54
|
+
term_frequencies
|
55
|
+
end
|
56
|
+
|
57
|
+
# this method takes an array of words an returns an array of word stems
|
58
|
+
def self.get_word_stems words
|
59
|
+
@stemmer = Lingua::Stemmer.new(:language => "en")
|
60
|
+
output = []
|
61
|
+
words.each do |word|
|
62
|
+
output << @stemmer.stem(word)
|
63
|
+
end
|
64
|
+
output
|
65
|
+
end
|
66
|
+
|
67
|
+
# this method takes an emotion-words hash and a hash containing word
|
68
|
+
# frequencies for the status message, calculates a numerical score
|
69
|
+
# for each possble emotion, and returns the emotion with the highest
|
70
|
+
# "score"
|
71
|
+
def self.get_emotion_score(emotions, term_frequencies, verbose = false)
|
72
|
+
emotion_score = {}
|
73
|
+
term_frequencies.each do |key,value|
|
74
|
+
emotions.keys.each do |k|
|
75
|
+
if emotions[k].include?(key)
|
76
|
+
emotion_score[k] ||= 0
|
77
|
+
emotion_score[k] += value
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
if @verbose
|
82
|
+
emotion_score.keys.each do |key|
|
83
|
+
puts "EMOTION: "+key
|
84
|
+
puts "SCORE: "+emotion_score[key].to_s
|
85
|
+
end
|
86
|
+
end
|
87
|
+
# return an emotion_score_hash to be processed by emotion
|
88
|
+
# get clue from any emoticons present
|
89
|
+
if (@happy_que && @sad_que)
|
90
|
+
return "uncertain"
|
91
|
+
elsif @uneasy_que
|
92
|
+
return "uneasiness"
|
93
|
+
elsif @happy_que
|
94
|
+
return "joy"
|
95
|
+
elsif @sad_que
|
96
|
+
return "sadness"
|
97
|
+
else
|
98
|
+
## 0 if unable to detect emotion
|
99
|
+
if emotion_score == {}
|
100
|
+
return "uncertain"
|
101
|
+
else
|
102
|
+
score = emotion_score.max_by{|k, v| v}[0]
|
103
|
+
end
|
104
|
+
score
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# this method returns the best-fit emotion for the status message
|
109
|
+
def self.emotion message
|
110
|
+
# get the emotion for which the emotion score value is highest
|
111
|
+
if @emotions
|
112
|
+
SadPanda.get_emotion_score(@emotions, SadPanda.build_term_frequencies(message))
|
113
|
+
else
|
114
|
+
SadPanda.get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies(message))
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# this method gives the status method a normalized polarity
|
119
|
+
# value based on the words it contains
|
120
|
+
def self.get_polarity_score (polarity_hash, term_frequencies, verbose = false)
|
121
|
+
polarity_scores = []
|
122
|
+
term_frequencies.each do |key, value|
|
123
|
+
polarity_hash.keys.each do |k|
|
124
|
+
if key == k
|
125
|
+
polarity_scores << (polarity_hash[k].to_f)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# return an polarity_score_hash to be processed by polarity method
|
131
|
+
# return an emotion_score_hash to be processed by emotion
|
132
|
+
# get clue from any emoticons present
|
133
|
+
if (@happy_que && @sad_que)
|
134
|
+
score = 5
|
135
|
+
elsif @uneasy_que
|
136
|
+
score = 3
|
137
|
+
elsif @happy_que
|
138
|
+
score = 8
|
139
|
+
elsif @sad_que
|
140
|
+
score = 2
|
141
|
+
else
|
142
|
+
if polarity_scores == []
|
143
|
+
# polarity unreadable; return a neutral score of zero
|
144
|
+
score = 5
|
145
|
+
else
|
146
|
+
score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
|
147
|
+
polarity_scores = []
|
148
|
+
end
|
149
|
+
if @verbose
|
150
|
+
puts "POLARITY: " + score.to_s
|
151
|
+
end
|
152
|
+
score
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# this method returns the polarity value for the status message
|
157
|
+
# (normalized by the number of 'polar' words that the status
|
158
|
+
# message contains)
|
159
|
+
def self.polarity message
|
160
|
+
# get the polarity for which the polarity score value is highest
|
161
|
+
if @polarities
|
162
|
+
SadPanda.get_polarity_score(@polarities, SadPanda.build_term_frequencies(message))
|
163
|
+
else
|
164
|
+
SadPanda.get_polarity_score(TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
9
168
|
end
|
data/lib/sad_panda/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sad_panda
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -96,7 +96,6 @@ files:
|
|
96
96
|
- lib/sad_panda/emotions/stopwords.rb
|
97
97
|
- lib/sad_panda/emotions/subjectivity.csv
|
98
98
|
- lib/sad_panda/emotions/term_polarities.rb
|
99
|
-
- lib/sad_panda/status_message.rb
|
100
99
|
- lib/sad_panda/version.rb
|
101
100
|
- sad_panda.gemspec
|
102
101
|
- spec/emotion_bank_spec.rb
|
@@ -1,169 +0,0 @@
|
|
1
|
-
require 'lingua/stemmer'
|
2
|
-
|
3
|
-
module SadPanda
|
4
|
-
|
5
|
-
class StatusMessage
|
6
|
-
|
7
|
-
attr_accessor :message, :verbose
|
8
|
-
attr_reader :stemmer
|
9
|
-
|
10
|
-
def initialize(message, verbose = false)
|
11
|
-
@message = message
|
12
|
-
@stemmer = Lingua::Stemmer.new(:language => "en")
|
13
|
-
@verbose = verbose
|
14
|
-
end
|
15
|
-
|
16
|
-
# this method reads the text of the status message
|
17
|
-
# inputed by the user, removes common english words,
|
18
|
-
# strips punctuation and capitalized letters, isolates
|
19
|
-
# the stem of the word, and ultimately produces a hash
|
20
|
-
# where the keys are the stems of the remaining words,
|
21
|
-
# and the values are their respective frequencies within
|
22
|
-
# the status message
|
23
|
-
def build_term_frequencies
|
24
|
-
|
25
|
-
# create empty term_frequencies
|
26
|
-
term_frequencies = {}
|
27
|
-
|
28
|
-
# clean the text of the status message
|
29
|
-
if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]"))
|
30
|
-
@happy_que = true
|
31
|
-
end
|
32
|
-
if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-["))
|
33
|
-
@sad_que = true
|
34
|
-
end
|
35
|
-
if (@message.include?(":/") || @message.include?(":-/") || @message.include?(":\\") || @message.include?(":-\\"))
|
36
|
-
@uneasy_que = true
|
37
|
-
end
|
38
|
-
message_text = @message.gsub(/[^a-z ]/i, '').downcase
|
39
|
-
message_text = message_text.gsub(/\s\s+/,' ')
|
40
|
-
words = message_text.split(" ")
|
41
|
-
|
42
|
-
#filter for english stopwords
|
43
|
-
stopwords = Stopwords.stopwords
|
44
|
-
words = words - stopwords
|
45
|
-
|
46
|
-
#get word stems
|
47
|
-
word_stems = get_word_stems words
|
48
|
-
|
49
|
-
#create term_frequencies
|
50
|
-
word_stems.each do |stem|
|
51
|
-
term_frequencies[stem] = word_stems.count(stem)
|
52
|
-
end
|
53
|
-
|
54
|
-
#return term frequency matrix
|
55
|
-
term_frequencies
|
56
|
-
end
|
57
|
-
|
58
|
-
# this method takes an array of words an returns an array of word stems
|
59
|
-
def get_word_stems words
|
60
|
-
output = []
|
61
|
-
words.each do |word|
|
62
|
-
output << @stemmer.stem(word)
|
63
|
-
end
|
64
|
-
output
|
65
|
-
end
|
66
|
-
|
67
|
-
# this method takes an emotion-words hash and a hash containing word
|
68
|
-
# frequencies for the status message, calculates a numerical score
|
69
|
-
# for each possble emotion, and returns the emotion with the highest
|
70
|
-
# "score"
|
71
|
-
def get_emotion_score(emotions, term_frequencies)
|
72
|
-
emotion_score = {}
|
73
|
-
term_frequencies.each do |key,value|
|
74
|
-
emotions.keys.each do |k|
|
75
|
-
if emotions[k].include?(key)
|
76
|
-
emotion_score[k] ||= 0
|
77
|
-
emotion_score[k] += value
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
if @verbose
|
82
|
-
emotion_score.keys.each do |key|
|
83
|
-
puts "EMOTION: "+key
|
84
|
-
puts "SCORE: "+emotion_score[key].to_s
|
85
|
-
end
|
86
|
-
end
|
87
|
-
# return an emotion_score_hash to be processed by emotion
|
88
|
-
# get clue from any emoticons present
|
89
|
-
if (@happy_que && @sad_que)
|
90
|
-
return "uncertain"
|
91
|
-
elsif @uneasy_que
|
92
|
-
return "uneasiness"
|
93
|
-
elsif @happy_que
|
94
|
-
return "joy"
|
95
|
-
elsif @sad_que
|
96
|
-
return "sadness"
|
97
|
-
else
|
98
|
-
## 0 if unable to detect emotion
|
99
|
-
if emotion_score == {}
|
100
|
-
return "uncertain"
|
101
|
-
else
|
102
|
-
score = emotion_score.max_by{|k, v| v}[0]
|
103
|
-
end
|
104
|
-
score
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
# this method returns the best-fit emotion for the status message
|
109
|
-
def emotion
|
110
|
-
# get the emotion for which the emotion score value is highest
|
111
|
-
if @emotions
|
112
|
-
get_emotion_score(@emotions, build_term_frequencies)
|
113
|
-
else
|
114
|
-
get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies)
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
# this method gives the status method a normalized polarity
|
119
|
-
# value based on the words it contains
|
120
|
-
def get_polarity_score (polarity_hash, term_frequencies)
|
121
|
-
polarity_scores = []
|
122
|
-
term_frequencies.each do |key, value|
|
123
|
-
polarity_hash.keys.each do |k|
|
124
|
-
if key == k
|
125
|
-
polarity_scores << (polarity_hash[k].to_f)
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
# return an polarity_score_hash to be processed by polarity method
|
131
|
-
# return an emotion_score_hash to be processed by emotion
|
132
|
-
# get clue from any emoticons present
|
133
|
-
if (@happy_que && @sad_que)
|
134
|
-
score = 5
|
135
|
-
elsif @uneasy_que
|
136
|
-
score = 3
|
137
|
-
elsif @happy_que
|
138
|
-
score = 8
|
139
|
-
elsif @sad_que
|
140
|
-
score = 2
|
141
|
-
else
|
142
|
-
if polarity_scores == []
|
143
|
-
# polarity unreadable; return a neutral score of zero
|
144
|
-
score = 5
|
145
|
-
else
|
146
|
-
score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
|
147
|
-
polarity_scores = []
|
148
|
-
end
|
149
|
-
if @verbose
|
150
|
-
puts "POLARITY: " + score.to_s
|
151
|
-
end
|
152
|
-
score
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
# this method returns the polarity value for the status message
|
157
|
-
# (normalized by the number of 'polar' words that the status
|
158
|
-
# message contains)
|
159
|
-
def polarity
|
160
|
-
# get the polarity for which the polarity score value is highest
|
161
|
-
if @polarities
|
162
|
-
get_polarity_score(@polarities, build_term_frequencies)
|
163
|
-
else
|
164
|
-
get_polarity_score(TermPolarities.get_term_polarities, build_term_frequencies)
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
168
|
-
end
|
169
|
-
end
|