chime 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +74 -0
- data/LICENSE.txt +20 -0
- data/README.md +4 -0
- data/README.rdoc +19 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/chime.gemspec +70 -0
- data/lib/chime/emotions/emotion_bank.rb +265 -0
- data/lib/chime/emotions/emotions.csv +1542 -0
- data/lib/chime/emotions/stopwords.rb +42 -0
- data/lib/chime/emotions/subjectivity.csv +1 -0
- data/lib/chime/emotions/term_polarities.rb +1467 -0
- data/lib/chime/version.rb +3 -0
- data/lib/chime.rb +175 -0
- data/test/helper.rb +18 -0
- data/test/test_chime.rb +7 -0
- metadata +134 -0
data/lib/chime.rb
ADDED
@@ -0,0 +1,175 @@
|
|
1
|
+
require_relative "./chime/version"
|
2
|
+
require_relative './chime/emotions/emotion_bank.rb'
|
3
|
+
require_relative './chime/emotions/term_polarities.rb'
|
4
|
+
require_relative './chime/emotions/stopwords.rb'
|
5
|
+
require 'lingua/stemmer'
|
6
|
+
|
7
|
+
module Chime
|
8
|
+
|
9
|
+
# this method returns the best-fit emotion for the status message
|
10
|
+
def self.emotion(message)
|
11
|
+
# get the emotion for which the emotion score value is highest
|
12
|
+
Chime.get_emotion_score(message, EmotionBank.get_term_emotions, build_term_frequencies(message))
|
13
|
+
end
|
14
|
+
|
15
|
+
# this method returns the polarity value for the status message
|
16
|
+
# (normalized by the number of 'polar' words that the status
|
17
|
+
# message contains)
|
18
|
+
def self.polarity(message)
|
19
|
+
# get the polarity for which the polarity score value is highest
|
20
|
+
Chime.get_polarity_score(message, TermPolarities.get_term_polarities, Chime.build_term_frequencies(message))
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
# this method reads the text of the status message
|
27
|
+
# inputed by the user, removes common english words,
|
28
|
+
# strips punctuation and capitalized letters, isolates
|
29
|
+
# the stem of the word, and ultimately produces a hash
|
30
|
+
# where the keys are the stems of the remaining words,
|
31
|
+
# and the values are their respective frequencies within
|
32
|
+
# the status message
|
33
|
+
def self.build_term_frequencies(message, term_frequencies = {})
|
34
|
+
# clean the text of the status message
|
35
|
+
happy_emoticon = happy_emoticon(message)
|
36
|
+
sad_emoticon = sad_emoticon(message)
|
37
|
+
words = words_from_message_text(message)
|
38
|
+
#filter for english stopwords
|
39
|
+
stopwords = Stopwords.stopwords
|
40
|
+
words = words - stopwords
|
41
|
+
#get word stems
|
42
|
+
word_stems = Chime.get_word_stems words
|
43
|
+
#create term_frequencies
|
44
|
+
#return term frequency hash
|
45
|
+
create_term_frequencies(word_stems, term_frequencies)
|
46
|
+
end
|
47
|
+
|
48
|
+
# this method takes an array of words an returns an array of word stems
|
49
|
+
def self.get_word_stems(words, output=[])
|
50
|
+
stemmer = Lingua::Stemmer.new(:language => "en")
|
51
|
+
words.each do |word|
|
52
|
+
output << stemmer.stem(word)
|
53
|
+
end
|
54
|
+
output
|
55
|
+
end
|
56
|
+
|
57
|
+
# this method takes an emotion-words hash and a hash containing word
|
58
|
+
# frequencies for the status message, calculates a numerical score
|
59
|
+
# for each possble emotion, and returns the emotion with the highest
|
60
|
+
# "score"
|
61
|
+
def self.get_emotion_score(message, emotions, term_frequencies, emotion_score = {})
|
62
|
+
term_frequencies.each do |key,value|
|
63
|
+
set_emotions(emotions, emotion_score, key, value)
|
64
|
+
end
|
65
|
+
# return an emotion_score_hash to be processed by emotion
|
66
|
+
# get clue from any emoticons present
|
67
|
+
check_emoticon_for_emotion(emotion_score, message)
|
68
|
+
end
|
69
|
+
|
70
|
+
# this method gives the status method a normalized polarity
|
71
|
+
# value based on the words it contains
|
72
|
+
def self.get_polarity_score (message, polarity_hash, term_frequencies, polarity_scores = [])
|
73
|
+
term_frequencies.each do |key, value|
|
74
|
+
set_polarities(key, value, polarity_hash, polarity_scores)
|
75
|
+
end
|
76
|
+
|
77
|
+
# return an polarity_score_hash to be processed by polarity method
|
78
|
+
# return an emotion_score_hash to be processed by emotion
|
79
|
+
# get clue from any emoticons present
|
80
|
+
check_emoticon_for_polarity(polarity_scores, message)
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.happy_emoticon(message)
|
84
|
+
(message.include?(":)") || message.include?(":-)") || message.include?(":]") || message.include?(":-]"))
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.sad_emoticon(message)
|
88
|
+
(message.include?(":(") || message.include?(":-(") || message.include?(":[") || message.include?(":-["))
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.words_from_message_text(message)
|
92
|
+
message.gsub!(/[^a-z ]/i, '')
|
93
|
+
message.downcase!
|
94
|
+
message.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '')
|
95
|
+
message.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '')
|
96
|
+
message.gsub!(/\s\s+/,' ')
|
97
|
+
message.split(" ")
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.set_emotions(emotions, emotion_score, term, frequency)
|
101
|
+
emotions.keys.each do |k|
|
102
|
+
store_emotions(emotions, emotion_score, k, term, frequency)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def self.set_polarities(term, frequency, polarity_hash, polarity_scores)
|
107
|
+
polarity_hash.keys.each do |k|
|
108
|
+
store_polarities(term, k, polarity_hash, polarity_scores)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.store_emotions(emotions, emotion_score, emotion, term, frequency)
|
113
|
+
if emotions[emotion].include?(term)
|
114
|
+
emotion_score[emotion] ||= 0
|
115
|
+
emotion_score[emotion] += frequency
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.store_polarities(term, word, polarity_hash, polarity_scores)
|
120
|
+
if term == word
|
121
|
+
polarity_scores << (polarity_hash[word].to_f)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def self.check_emoticon_for_emotion(emotion_score, message)
|
126
|
+
if (happy_emoticon(message) && sad_emoticon(message))
|
127
|
+
"ambiguous"
|
128
|
+
elsif happy_emoticon(message)
|
129
|
+
"joy"
|
130
|
+
elsif sad_emoticon(message)
|
131
|
+
"sadness"
|
132
|
+
else
|
133
|
+
return_emotion_score(emotion_score)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def self.return_emotion_score(emotion_score)
|
138
|
+
## 0 if unable to detect emotion
|
139
|
+
if emotion_score == {}
|
140
|
+
"ambiguous"
|
141
|
+
else
|
142
|
+
emotion_score.max_by{|k, v| v}[0]
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def self.check_emoticon_for_polarity(polarity_scores, message)
|
147
|
+
if (happy_emoticon(message) && sad_emoticon(message))
|
148
|
+
score = 5
|
149
|
+
elsif happy_emoticon(message)
|
150
|
+
score = 8
|
151
|
+
elsif sad_emoticon(message)
|
152
|
+
score = 2
|
153
|
+
else
|
154
|
+
return_polarity_scores(polarity_scores)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def self.return_polarity_scores(polarity_scores)
|
159
|
+
if polarity_scores == []
|
160
|
+
# polarity unreadable; return a neutral score of 5
|
161
|
+
5
|
162
|
+
else
|
163
|
+
polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def self.create_term_frequencies(word_stems, term_frequencies)
|
168
|
+
word_stems.each do |stem|
|
169
|
+
term_frequencies[stem] = word_stems.count(stem)
|
170
|
+
end
|
171
|
+
term_frequencies
|
172
|
+
end
|
173
|
+
|
174
|
+
|
175
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'chime'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
data/test/test_chime.rb
ADDED
metadata
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: chime
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- jonathanamccann
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-10-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: shoulda
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ! '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rdoc
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.12'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.12'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: jeweler
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.8.7
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 1.8.7
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rcov
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.9.11
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.9.11
|
83
|
+
description: Text Analysis for Member Based Organizations
|
84
|
+
email: jonathanamccann@gmail.com
|
85
|
+
executables: []
|
86
|
+
extensions: []
|
87
|
+
extra_rdoc_files:
|
88
|
+
- LICENSE.txt
|
89
|
+
- README.md
|
90
|
+
- README.rdoc
|
91
|
+
files:
|
92
|
+
- .document
|
93
|
+
- Gemfile
|
94
|
+
- Gemfile.lock
|
95
|
+
- LICENSE.txt
|
96
|
+
- README.md
|
97
|
+
- README.rdoc
|
98
|
+
- Rakefile
|
99
|
+
- VERSION
|
100
|
+
- chime.gemspec
|
101
|
+
- lib/chime.rb
|
102
|
+
- lib/chime/emotions/emotion_bank.rb
|
103
|
+
- lib/chime/emotions/emotions.csv
|
104
|
+
- lib/chime/emotions/stopwords.rb
|
105
|
+
- lib/chime/emotions/subjectivity.csv
|
106
|
+
- lib/chime/emotions/term_polarities.rb
|
107
|
+
- lib/chime/version.rb
|
108
|
+
- test/helper.rb
|
109
|
+
- test/test_chime.rb
|
110
|
+
homepage: http://github.com/jonathanamccann/chime
|
111
|
+
licenses:
|
112
|
+
- MIT
|
113
|
+
metadata: {}
|
114
|
+
post_install_message:
|
115
|
+
rdoc_options: []
|
116
|
+
require_paths:
|
117
|
+
- lib
|
118
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ! '>='
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
123
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
124
|
+
requirements:
|
125
|
+
- - ! '>='
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: '0'
|
128
|
+
requirements: []
|
129
|
+
rubyforge_project:
|
130
|
+
rubygems_version: 2.0.5
|
131
|
+
signing_key:
|
132
|
+
specification_version: 4
|
133
|
+
summary: Text Analysis for Member Based Organizations
|
134
|
+
test_files: []
|