RubyGems - sad_panda - Versions diffs - 1.0.0 → 1.0.1 - Mend

sad_panda 1.0.0 → 1.0.1

Files changed (5) hide show

@@ -6,155 +6,170 @@ require 'lingua/stemmer'
 module SadPanda
-	# this method reads the text of the status message
-	# inputed by the user, removes common english words,
-	# strips punctuation and capitalized letters, isolates
-	# the stem of the word, and ultimately produces a hash
-	# where the keys are the stems of the remaining words,
-	# and the values are their respective frequencies within
-	# the status message
-	def self.build_term_frequencies message
-		@message = message
-		# create empty term_frequencies
-		term_frequencies = {}
-		# clean the text of the status message
-		if (@message.include?(":)") || @message.include?(":-)") || @message.include?(":]") || @message.include?(":-]"))
-			@happy_que = true
-		end
-		if (@message.include?(":(") || @message.include?(":-(") || @message.include?(":[") || @message.include?(":-["))
-			@sad_que = true
-		end
-		message_text = @message.gsub(/[^a-z ]/i, '').downcase
-    message_text.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '')
-    message_text.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '')
-    message_text.gsub!(/\s\s+/,' ')
-		words = message_text.split(" ")
-		#filter for english stopwords
-		stopwords = Stopwords.stopwords
-		words = words - stopwords
-		#get word stems
-		word_stems = SadPanda.get_word_stems words
-		#create term_frequencies
-		word_stems.each do |stem|
-			term_frequencies[stem] = word_stems.count(stem)
-		end
-		#return term frequency matrix
-		term_frequencies
-	end
-	# this method takes an array of words an returns an array of word stems
-	def self.get_word_stems words
-		@stemmer = Lingua::Stemmer.new(:language => "en")
-		output = []
-		words.each do |word|
-			output << @stemmer.stem(word)
-		end
-		output
-	end
-	# this method takes an emotion-words hash and a hash containing word
-	# frequencies for the status message, calculates a numerical score
-	# for each possble emotion, and returns the emotion with the highest
-	# "score"
-	def self.get_emotion_score(emotions, term_frequencies, verbose = false)
-		emotion_score = {}
-		term_frequencies.each do |key,value|
-			emotions.keys.each do |k|
-				if emotions[k].include?(key)
-					emotion_score[k] ||= 0
-					emotion_score[k] += value
-				end
-			end
-		end
-		if @verbose
-			emotion_score.keys.each do |key|
-				puts "EMOTION: "+key
-				puts "SCORE: "+emotion_score[key].to_s
-			end
-		end
-		# return an emotion_score_hash to be processed by emotion
-    # get clue from any emoticons present
-    if (@happy_que && @sad_que)
-        return "ambiguous"
-    elsif @happy_que
-        return "joy"
-    elsif @sad_que
-        return "sadness"
-    else
-		## 0 if unable to detect emotion
-      if emotion_score == {}
-          return "ambiguous"
+  # this method returns the best-fit emotion for the status message
+  def self.emotion(message)
+    # get the emotion for which the emotion score value is highest
+    SadPanda.get_emotion_score(message, EmotionBank.get_term_emotions, build_term_frequencies(message))
+  end
+  # this method returns the polarity value for the status message
+  # (normalized by the number of 'polar' words that the status
+  # message contains)
+  def self.polarity(message)
+    # get the polarity for which the polarity score value is highest
+    SadPanda.get_polarity_score(message, TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
+  end
+  private
+  	# this method reads the text of the status message
+  	# inputed by the user, removes common english words,
+  	# strips punctuation and capitalized letters, isolates
+  	# the stem of the word, and ultimately produces a hash
+  	# where the keys are the stems of the remaining words,
+  	# and the values are their respective frequencies within
+  	# the status message
+  	def self.build_term_frequencies(message, term_frequencies = {})
+  		# clean the text of the status message
+      happy_emoticon = happy_emoticon(message)
+      sad_emoticon = sad_emoticon(message)
+  		words = words_from_message_text(message)
+  		#filter for english stopwords
+  		stopwords = Stopwords.stopwords
+  		words = words - stopwords
+  		#get word stems
+  		word_stems = SadPanda.get_word_stems words
+  		#create term_frequencies
+  		#return term frequency hash
+    	create_term_frequencies(word_stems, term_frequencies)
+    end
+  	# this method takes an array of words an returns an array of word stems
+  	def self.get_word_stems(words, output=[])
+  		stemmer = Lingua::Stemmer.new(:language => "en")
+  		words.each do |word|
+  			output << stemmer.stem(word)
+  		end
+  		output
+  	end
+  	# this method takes an emotion-words hash and a hash containing word
+  	# frequencies for the status message, calculates a numerical score
+  	# for each possble emotion, and returns the emotion with the highest
+  	# "score"
+  	def self.get_emotion_score(message, emotions, term_frequencies, emotion_score = {})
+  		term_frequencies.each do |key,value|
+  			set_emotions(emotions, emotion_score, key, value)
+  		end
+  		# return an emotion_score_hash to be processed by emotion
+      # get clue from any emoticons present
+      check_emoticon_for_emotion(emotion_score, message)
+  	end
+  	# this method gives the status method a normalized polarity
+  	# value based on the words it contains
+  	def self.get_polarity_score (message, polarity_hash, term_frequencies, polarity_scores = [])
+  		term_frequencies.each do |key, value|
+        set_polarities(key, value, polarity_hash, polarity_scores)
+  		end
+  		# return an polarity_score_hash to be processed by polarity method
+  		# return an emotion_score_hash to be processed by emotion
+      # get clue from any emoticons present
+      check_emoticon_for_polarity(polarity_scores, message)
+  	end
+    def self.happy_emoticon(message)
+      (message.include?(":)") || message.include?(":-)") || message.include?(":]") || message.include?(":-]"))
+    end
+    def self.sad_emoticon(message)
+      (message.include?(":(") || message.include?(":-(") || message.include?(":[") || message.include?(":-["))
+    end
+    def self.words_from_message_text(message)
+      message.gsub!(/[^a-z ]/i, '')
+      message.downcase!
+      message.gsub!(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)/, '')
+      message.gsub!(/(?=\w*h)(?=\w*t)(?=\w*t)(?=\w*p)\w*/, '')
+      message.gsub!(/\s\s+/,' ')
+      message.split(" ")
+    end
+    def self.set_emotions(emotions, emotion_score, term, frequency)
+      emotions.keys.each do |k|
+        store_emotions(emotions, emotion_score, k, term, frequency)
+      end
+    end
+    def self.set_polarities(term, frequency, polarity_hash, polarity_scores)
+      polarity_hash.keys.each do |k|
+        store_polarities(term, k, polarity_hash, polarity_scores)
+      end
+    end
+    def self.store_emotions(emotions, emotion_score, emotion, term, frequency)
+      if emotions[emotion].include?(term)
+        emotion_score[emotion] ||= 0
+        emotion_score[emotion] += frequency
+      end
+    end
+    def self.store_polarities(term, word, polarity_hash, polarity_scores)
+      if term == word
+        polarity_scores << (polarity_hash[word].to_f)
+      end
+    end
+    def self.check_emoticon_for_emotion(emotion_score, message)
+      if (happy_emoticon(message) && sad_emoticon(message))
+         "ambiguous"
+      elsif happy_emoticon(message)
+         "joy"
+      elsif sad_emoticon(message)
+         "sadness"
       else
-          score = emotion_score.max_by{|k, v| v}[0]
+        return_emotion_score(emotion_score)
       end
-      score
     end
-	end
-	# this method returns the best-fit emotion for the status message
-	def self.emotion message
-    # get the emotion for which the emotion score value is highest
-    if @emotions
-        SadPanda.get_emotion_score(@emotions, SadPanda.build_term_frequencies(message))
-    else
-        SadPanda.get_emotion_score(EmotionBank.get_term_emotions, build_term_frequencies(message))
+    def self.return_emotion_score(emotion_score)
+      ## 0 if unable to detect emotion
+      if emotion_score == {}
+        "ambiguous"
+      else
+        emotion_score.max_by{|k, v| v}[0]
+      end
     end
-	end
-	# this method gives the status method a normalized polarity
-	# value based on the words it contains
-	def self.get_polarity_score (polarity_hash, term_frequencies, verbose = false)
-		polarity_scores = []
-		term_frequencies.each do |key, value|
-			polarity_hash.keys.each do |k|
-				if key == k
-					polarity_scores << (polarity_hash[k].to_f)
-				end
-			end
-		end
-		# return an polarity_score_hash to be processed by polarity method
-		# return an emotion_score_hash to be processed by emotion
-    # get clue from any emoticons present
-    if (@happy_que && @sad_que)
+    def self.check_emoticon_for_polarity(polarity_scores, message)
+      if (happy_emoticon(message) && sad_emoticon(message))
         score = 5
-    elsif @happy_que
+      elsif happy_emoticon(message)
         score = 8
-    elsif @sad_que
+      elsif sad_emoticon(message)
         score = 2
-    else
-			if polarity_scores == []
-				# polarity unreadable; return a neutral score of zero
-				score = 5
-			else
-				score = polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
-				polarity_scores = []
-			end
-			if @verbose
-				puts "POLARITY: " + score.to_s
-			end
-			score
-		end
-	end
-	# this method returns the polarity value for the status message
-	# (normalized by the number of 'polar' words that the status
-	# message contains)
-	def self.polarity message
-		# get the polarity for which the polarity score value is highest
-		if @polarities
-			SadPanda.get_polarity_score(@polarities, SadPanda.build_term_frequencies(message))
-		else
-			SadPanda.get_polarity_score(TermPolarities.get_term_polarities, SadPanda.build_term_frequencies(message))
-		end
-	end
+      else
+        return_polarity_scores(polarity_scores)
+      end
+    end
+    def self.return_polarity_scores(polarity_scores)
+      if polarity_scores == []
+        # polarity unreadable; return a neutral score of 5
+        5
+      else
+        polarity_scores.inject(0.0){ |sum, el| sum + el}/polarity_scores.length
+      end
+    end
+    def self.create_term_frequencies(word_stems, term_frequencies)
+      word_stems.each do |stem|
+        term_frequencies[stem] = word_stems.count(stem)
+      end
+      term_frequencies
+    end
 end

data/lib/sad_panda/version.rb CHANGED

@@ -1,3 +1,3 @@
 module SadPanda
-  VERSION = "1.0.0"
+  VERSION = "1.0.1"
 end

data/sad_panda.gemspec CHANGED

@@ -21,5 +21,6 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "bundler", "~> 1.3"
   spec.add_development_dependency "rake"
   spec.add_runtime_dependency "ruby-stemmer"
+  spec.add_development_dependency "pry"
   spec.add_development_dependency "rspec"
 end

data/spec/sad_panda_spec.rb CHANGED

@@ -6,6 +6,249 @@ describe SadPanda  do
   let(:emotions) {EmotionBank.get_term_emotions}
   let(:polarities) {TermPolarities.get_term_polarities}
   let(:term_frequencies) {SadPanda.build_term_frequencies("My cactus collection makes me happy.")}
+  let(:emotion_score) { {} }
+  let(:polarity_scores) { [] }
+  let(:polarity_hash) { TermPolarities.get_term_polarities }
+  context "methods" do
+    describe "#happy_emoticon" do
+      context "when true" do
+        it "returns true" do
+          message = ":)"
+          expect(SadPanda.happy_emoticon(message)).to be_true
+        end
+      end
+      context "when false" do
+        it "returns true" do
+          message = "stuff"
+          expect(SadPanda.happy_emoticon(message)).to be_false
+        end
+      end
+    end
+    describe "#sad_emoticon" do
+      context "when true" do
+        it "returns true" do
+          message = ":("
+          expect(SadPanda.sad_emoticon(message)).to be_true
+        end
+      end
+      context "when false" do
+        it "returns true" do
+          message = "stuff"
+          expect(SadPanda.sad_emoticon(message)).to be_false
+        end
+      end
+    end
+    describe "#words_from_message_text" do
+      it "removes urls and other gross stuff from tweet" do
+        message = "lobster hickory http://www.boston.com/business #Rails"
+        words = SadPanda.words_from_message_text(message)
+        expect(words).to eql(["lobster", "hickory", "rails"])
+      end
+    end
+    describe "#set_emotions" do
+      it "modifies the emotions_score array" do
+        term_frequencies.each do |key, value|
+          SadPanda.set_emotions(emotions, emotion_score, key, value)
+        end
+        expect((emotion_score["joy"])).to eql(1)
+      end
+    end
+    describe "#set_polarities" do
+      it "modifies the polarity_scores array" do
+        term_frequencies = {'sad' => 1}
+        term_frequencies.each do |key, value|
+          SadPanda.set_polarities(key, value, polarity_hash, polarity_scores)
+        end
+        expect(polarity_scores).to eql([0.0])
+      end
+    end
+    describe "#store_emotions" do
+      it "stores emotions in emotion_score hash" do
+        emotions = {"joy" => "zorg" }
+        key,value = "zorg", 1
+        emotions.keys.each do |k|
+          SadPanda.store_emotions(emotions, emotion_score, k, key, value)
+        end
+        expect(emotion_score["joy"]).to eql(1)
+      end
+    end
+    describe "#store_polarities" do
+      context "word in polarity_hash" do
+        it "adds a polarity to polarity_scores" do
+          term = "sad"
+          word = "sad"
+          SadPanda.store_polarities(term, word, polarity_hash, polarity_scores)
+          expect(polarity_scores).to eql([0.0])
+        end
+      end
+      context "word not in polarity_hash" do
+        it "does not add a polarity to polarity_scores" do
+          term = "sad"
+          word = "cactus"
+          SadPanda.store_polarities(term, word, polarity_hash, polarity_scores)
+          expect(polarity_scores).to eql([])
+        end
+      end
+    end
+    describe "#create_term_frequencies" do
+      it "populates a word-stem frequency hash" do
+        words = ["yo", "stuff"]
+        term_frequencies = {}
+        word_stems = SadPanda.get_word_stems(words)
+        term_frequencies = SadPanda.create_term_frequencies(word_stems, term_frequencies)
+        expect(term_frequencies).to eql({"yo"=>1, "stuff"=>1})
+      end
+    end
+    describe "#check_emoticon_for_emotion" do
+      context "contains happy emoticon" do
+        it "returns 'joy'" do
+          message = ":)"
+          output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
+          expect(output).to eql("joy")
+        end
+      end
+      context "contains sad emoticon" do
+        it "returns 'sadness'" do
+          message = ":("
+          output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
+          expect(output).to eql("sadness")
+        end
+      end
+      context "contains both a happy and a sad emoticon" do
+        it "returns 'ambiguous'" do
+          message = ":( :)"
+          output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
+          expect(output).to eql("ambiguous")
+        end
+      end
+      context "contains no emoticons and emotion_score is not empty" do
+        it "returns joy" do
+          message = "no emoticons in hur"
+          emotion_score = {"joy" => 1}
+          output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
+          expect(output).to eql("joy")
+        end
+      end
+      context "contains no emoticons and emotion_score is  empty" do
+        it "returns joy" do
+          message = "no emoticons in hur"
+          output = SadPanda.check_emoticon_for_emotion(emotion_score, message)
+          expect(output).to eql("ambiguous")
+        end
+      end
+    end
+    describe "#check_emoticon_for_polarity" do
+      context "contains happy emoticon" do
+        it "returns 8" do
+          message = ":)"
+          polarity_scores = [2.0,3.0]
+          output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
+          expect(output).to eql(8)
+        end
+      end
+      context "contains sad emoticon" do
+        it "returns 2" do
+          message = ":("
+          polarity_scores = [2.0,3.0]
+          output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
+          expect(output).to eql(2)
+        end
+      end
+      context "contains both a happy and a sad emoticon" do
+        it "returns 5" do
+          message = ":( :)"
+          polarity_scores = [2.0,3.0]
+          output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
+          expect(output).to eql(5)
+        end
+      end
+      context "contains no emoticons and polarity_scores is empty" do
+        it "returns joy" do
+          message = "no emoticons in hur"
+          polarity_scores = []
+          output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
+          expect(output).to eql(5)
+        end
+      end
+      context "contains no emoticons and emotion_score is not empty" do
+        it "returns joy" do
+          message = "no emoticons in hur"
+          polarity_scores = [8.0]
+          output = SadPanda.check_emoticon_for_polarity(polarity_scores, message)
+          expect(output).to eql(8.0)
+        end
+      end
+    end
+  end
   describe "when 'build_term_frequencies' method is called" do
@@ -34,14 +277,16 @@ describe SadPanda  do
   describe "when 'get_emotion_score' method is called" do
     it 'returns a string' do
-      output = SadPanda.get_emotion_score emotions,term_frequencies
+      message = "this is a message!"
+      output = SadPanda.get_emotion_score(message, emotions,term_frequencies)
       expect(output.class).to eql(String)
     end
   end
   describe "when 'get_polarity_score' method is called" do
     it 'returns a string' do
-      output = SadPanda.get_polarity_score polarities,term_frequencies
+      message = "this is another message!"
+      output = SadPanda.get_polarity_score(message, polarities, term_frequencies)
       expect(output.class).to eql(Fixnum)
     end
   end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: sad_panda
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.0.1
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-06-30 00:00:00.000000000 Z
+date: 2013-07-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -59,6 +59,22 @@ dependencies:
     - - ! '>='
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: pry
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: rspec
   requirement: !ruby/object:Gem::Requirement
@@ -116,7 +132,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -2958994952268376603
+      hash: 2873136892834471107
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
@@ -125,7 +141,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -2958994952268376603
+      hash: 2873136892834471107
 requirements: []
 rubyforge_project:
 rubygems_version: 1.8.25