vader_sentiment_ruby 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (26) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +51 -0
  4. data/lib/vader_sentiment_ruby.rb +20 -0
  5. data/lib/vader_sentiment_ruby/checker.rb +13 -0
  6. data/lib/vader_sentiment_ruby/checker/but_word_negation_checker.rb +34 -0
  7. data/lib/vader_sentiment_ruby/checker/least_word_negation_checker.rb +38 -0
  8. data/lib/vader_sentiment_ruby/checker/negation_checker.rb +114 -0
  9. data/lib/vader_sentiment_ruby/checker/no_word_checker.rb +49 -0
  10. data/lib/vader_sentiment_ruby/checker/previous_words_influence_checker.rb +55 -0
  11. data/lib/vader_sentiment_ruby/checker/sentiment_laden_idioms_checker.rb +30 -0
  12. data/lib/vader_sentiment_ruby/checker/special_idioms_checker.rb +107 -0
  13. data/lib/vader_sentiment_ruby/constants.rb +135 -0
  14. data/lib/vader_sentiment_ruby/data/emoji_utf8_lexicon.txt +3570 -0
  15. data/lib/vader_sentiment_ruby/data/vader_lexicon.txt +7518 -0
  16. data/lib/vader_sentiment_ruby/emojis_describer.rb +39 -0
  17. data/lib/vader_sentiment_ruby/emojis_dictionary_creator.rb +21 -0
  18. data/lib/vader_sentiment_ruby/lexicon_dictionary_creator.rb +21 -0
  19. data/lib/vader_sentiment_ruby/punctuation_emphasis_amplifier.rb +36 -0
  20. data/lib/vader_sentiment_ruby/sentiment_intensity_analyzer.rb +105 -0
  21. data/lib/vader_sentiment_ruby/sentiment_properties_identifier.rb +48 -0
  22. data/lib/vader_sentiment_ruby/sentiment_scores_sifter.rb +27 -0
  23. data/lib/vader_sentiment_ruby/valence_score_calculator.rb +82 -0
  24. data/lib/vader_sentiment_ruby/version.rb +5 -0
  25. data/lib/vader_sentiment_ruby/word_helper.rb +93 -0
  26. metadata +156 -0
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Replaces emoji char with its description
5
+ class EmojiDescriber
6
+ def initialize(text, emojis)
7
+ @text = text
8
+ @emojis = emojis
9
+ @text_no_emoji = ''
10
+ @prev_space = true
11
+ end
12
+
13
+ def call
14
+ @text.split('').each do |chr|
15
+ if @emojis.keys.include?(chr)
16
+ handle_emoji_presence(chr)
17
+ else
18
+ handle_emoji_absence(chr)
19
+ end
20
+ end
21
+
22
+ @text_no_emoji
23
+ end
24
+
25
+ private
26
+
27
+ def handle_emoji_presence(emoji)
28
+ description = @emojis[emoji]
29
+ @text_no_emoji += ' ' unless @prev_space
30
+ @text_no_emoji += description
31
+ @prev_space = false
32
+ end
33
+
34
+ def handle_emoji_absence(character)
35
+ @text_no_emoji += character
36
+ @prev_space = character == ' '
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Converts emoji lexicon file to a dictionary
5
+ class EmojisDictionaryCreator
6
+ # @return [Hash]
7
+ def call
8
+ emoji_file = File.open("#{__dir__}/data/emoji_utf8_lexicon.txt").read
9
+ emoji_dict = {}
10
+ lines = emoji_file.strip.split("\n")
11
+ lines.each do |line|
12
+ next unless line
13
+
14
+ emoji, description = line.strip.split("\t")[0..1]
15
+ emoji_dict[emoji] = description
16
+ end
17
+
18
+ emoji_dict
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Converts lexicon file to a dictionary
5
+ class LexiconDictionaryCreator
6
+ # @return [Hash]
7
+ def call
8
+ lexicon_file = File.open("#{__dir__}/data/vader_lexicon.txt").read
9
+ lex_dict = {}
10
+ lines = lexicon_file.strip.split("\n")
11
+ lines.each do |line|
12
+ next unless line
13
+
14
+ word, measure = line.strip.split("\t")[0..1]
15
+ lex_dict[word] = measure.to_f
16
+ end
17
+
18
+ lex_dict
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Adds emphasis from exclamation points and question marks
5
+ class PunctuationEmphasisAmplifier
6
+ # @param [String] text
7
+ def initialize(text)
8
+ @text = text
9
+ end
10
+
11
+ # @return [Float]
12
+ def call
13
+ amplify_exclamation_points + amplify_question_marks
14
+ end
15
+
16
+ def amplify_exclamation_points
17
+ # check for added emphasis resulting from exclamation points (up to 4 of them)
18
+ ep_count = @text.split('').count('!')
19
+ ep_count = 4.0 if ep_count > 4
20
+
21
+ # empirically derived mean sentiment intensity rating increase for exclamation points
22
+ ep_count * 0.292
23
+ end
24
+
25
+ def amplify_question_marks
26
+ # check for added emphasis resulting from question marks (2 or 3+)
27
+ qm_count = @text.split('').count('?')
28
+
29
+ return 0.0 unless qm_count > 1
30
+ # empirically derived mean sentiment intensity rating increase for question marks
31
+ return qm_count * 0.18 if qm_count <= 3
32
+
33
+ 0.96
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Returns a sentiment intensity score for sentences.
5
+ class SentimentIntensityAnalyzer
6
+ def initialize
7
+ @lexicon = LexiconDictionaryCreator.new.call
8
+ @emojis = EmojisDictionaryCreator.new.call
9
+ end
10
+
11
+ # Returns a float for sentiment strength based on the input text.
12
+ # Positive values are positive valence, negative value are negative valence.
13
+ # @param [String] text Text to analyze
14
+ # @return [Hash] Hash of sentiments for analyzed text
15
+ def polarity_scores(text)
16
+ text = EmojiDescriber.new(text, @emojis).call
17
+ senti_text = SentimentPropertiesIdentifier.new(text)
18
+
19
+ sentiments = []
20
+ words_and_emoticons = senti_text.words_and_emoticons
21
+ words_and_emoticons.each_with_index do |item, index|
22
+ sentiments << prepare_valence(item, index, words_and_emoticons, senti_text)
23
+ end
24
+
25
+ sentiments = Checker::ButWordNegationChecker.new(words_and_emoticons, sentiments).call
26
+
27
+ ValenceScoreCalculator.new(sentiments, text).call
28
+ end
29
+
30
+ private
31
+
32
+ def prepare_valence(item, index, words_and_emoticons, senti_text)
33
+ valence = 0
34
+
35
+ # Check for vader_lexicon words that may be used as modifiers or negations
36
+ return valence if Constants::BOOSTER_DICT.keys.include?(item.downcase)
37
+
38
+ if index < words_and_emoticons.size - 1 &&
39
+ item.downcase == 'kind' &&
40
+ (words_and_emoticons[index + 1]).downcase == 'of'
41
+ return valence
42
+ end
43
+
44
+ sentiment_valence(valence, senti_text, item, index)
45
+ end
46
+
47
+ def sentiment_valence(valence, senti_text, item, index)
48
+ item_lowercase = item.downcase
49
+
50
+ if @lexicon.keys.include?(item_lowercase)
51
+ valence = calculate_valence_for_word_in_lexicon(item, item_lowercase, index, senti_text)
52
+ end
53
+
54
+ valence
55
+ end
56
+
57
+ def calculate_valence_for_word_in_lexicon(item, item_lowercase, index, senti_text)
58
+ is_cap_diff = senti_text.is_cap_diff
59
+ words_and_emoticons = senti_text.words_and_emoticons
60
+
61
+ valence = @lexicon[item_lowercase] # get the sentiment valence
62
+ valence = Checker::NoWordChecker.new(valence, item_lowercase, index, words_and_emoticons, @lexicon).call
63
+ # Check if sentiment laden word is in ALL CAPS (while others aren't)
64
+ valence = apply_intensity_rating(valence) if WordHelper.word_upcase?(item) && is_cap_diff
65
+ valence = modify_valence_by_scalar(valence, index, words_and_emoticons, is_cap_diff)
66
+ Checker::LeastWordNegationChecker.new(valence, words_and_emoticons, index, @lexicon).call
67
+ end
68
+
69
+ def apply_intensity_rating(valence)
70
+ return valence + Constants::C_INCR if valence.positive?
71
+
72
+ valence - Constants::C_INCR
73
+ end
74
+
75
+ # Dampen the scalar modifier of preceding words and emoticons
76
+ # (excluding the ones that immediately precede the item) based
77
+ # on their distance from the current item.
78
+ def modify_valence_by_scalar(valence, index, words_and_emoticons, is_cap_diff)
79
+ (0..2).each do |start_index|
80
+ next unless index > start_index
81
+ next if @lexicon.keys.include?((words_and_emoticons[index - (start_index + 1)]).downcase)
82
+
83
+ valence = apply_scalar(valence, words_and_emoticons, index, start_index, is_cap_diff)
84
+ valence = Checker::NegationChecker.new(valence, words_and_emoticons, start_index, index).call
85
+ valence = Checker::SpecialIdiomsChecker.new(valence, words_and_emoticons, index).call if start_index == 2
86
+ end
87
+
88
+ valence
89
+ end
90
+
91
+ def apply_scalar(valence, words_and_emoticons, index, start_index, is_cap_diff)
92
+ previous_word = words_and_emoticons[index - (start_index + 1)]
93
+ scalar = Checker::PreviousWordsInfluenceChecker.new(previous_word, valence, is_cap_diff).call
94
+ valence + adjust_scalar(scalar, start_index)
95
+ end
96
+
97
+ def adjust_scalar(scalar, start_index)
98
+ return scalar if scalar.zero?
99
+
100
+ scalar *= 0.95 if start_index == 1
101
+ scalar *= 0.9 if start_index == 2
102
+ scalar
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Identify sentiment-relevant string-level properties of input text.
5
+ class SentimentPropertiesIdentifier
6
+ attr_reader :is_cap_diff, :words_and_emoticons
7
+
8
+ # @param [String] text
9
+ def initialize(text)
10
+ text = text.to_s.encode('utf-8') unless text.is_a? String
11
+ @text = text
12
+ @words_and_emoticons = prepare_words_and_emoticons
13
+ # Doesn't separate words from adjacent punctuation (keeps emoticons & contractions)
14
+ @is_cap_diff = all_cap_differential?(@words_and_emoticons)
15
+ end
16
+
17
+ private
18
+
19
+ # Removes leading and trailing punctuation
20
+ # Leaves contractions and most emoticons
21
+ # Does not preserve punc-plus-letter emoticons (e.g. :D)
22
+ # @return [Array]
23
+ def prepare_words_and_emoticons
24
+ @text
25
+ .split
26
+ .map { |word| WordHelper.strip_punctuation(word) }
27
+ end
28
+
29
+ # Check whether just some words in the input are ALL CAPS.
30
+ # Returns `True` if some but not all items in `words` are ALL CAPS
31
+ # @param [Array] words
32
+ # @return [Boolean]
33
+ def all_cap_differential?(words)
34
+ all_cap_words = 0
35
+
36
+ words.each do |word|
37
+ all_cap_words += 1 if WordHelper.word_upcase?(word)
38
+ end
39
+
40
+ words_size = words.size
41
+ cap_differential = words_size - all_cap_words
42
+
43
+ return true if cap_differential.positive? && cap_differential < words_size
44
+
45
+ false
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Separates positive versus negative sentiment scores
5
+ class SentimentScoresSifter
6
+ def initialize(sentiments)
7
+ @sentiments = sentiments
8
+ @pos_sum = 0.0
9
+ @neg_sum = 0.0
10
+ @neu_count = 0
11
+ end
12
+
13
+ def call
14
+ @sentiments.each do |sentiment_score|
15
+ # compensates for neutral words that are counted as 1
16
+ @pos_sum += sentiment_score.to_f + 1 if sentiment_score.positive?
17
+
18
+ # when used with .abs, compensates for neutrals
19
+ @neg_sum += sentiment_score.to_f - 1 if sentiment_score.negative?
20
+
21
+ @neu_count += 1 if sentiment_score.zero?
22
+ end
23
+
24
+ [@pos_sum, @neg_sum, @neu_count]
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Prepares response with semantic score
5
+ class ValenceScoreCalculator
6
+ DEFAULT_RESPONSE = {
7
+ negative: 0.0,
8
+ neutral: 0.0,
9
+ positive: 0.0,
10
+ compound: 0.0
11
+ }.freeze
12
+
13
+ def initialize(sentiments, text)
14
+ @sentiments = sentiments
15
+ @text = text
16
+ end
17
+
18
+ def call
19
+ return DEFAULT_RESPONSE unless @sentiments
20
+
21
+ sum_s = @sentiments.map(&:to_f).sum
22
+ # compute and add emphasis from punctuation in text
23
+ punct_emph_amplifier = PunctuationEmphasisAmplifier.new(@text).call
24
+ compound = normalize(sum_s, punct_emph_amplifier)
25
+
26
+ prepare_response(compound, punct_emph_amplifier)
27
+ end
28
+
29
+ private
30
+
31
+ # Normalize the score to be between -1 and 1 using an alpha that
32
+ # approximates the max expected value
33
+ # Move to Sentiment analyzer
34
+ def normalize(score, punct_emph_amplifier, alpha = 15)
35
+ score = add_punctuation_emphasis(score, punct_emph_amplifier)
36
+ norm_score = score / Math.sqrt((score * score) + alpha).to_f
37
+
38
+ return -1.0 if norm_score < -1.0
39
+ return 1.0 if norm_score > 1.0
40
+
41
+ norm_score
42
+ end
43
+
44
+ def add_punctuation_emphasis(sum_s, punct_emph_amplifier)
45
+ if sum_s.positive?
46
+ sum_s += punct_emph_amplifier
47
+ elsif sum_s.negative?
48
+ sum_s -= punct_emph_amplifier
49
+ end
50
+
51
+ sum_s
52
+ end
53
+
54
+ # rubocop:disable Metrics/AbcSize
55
+ def prepare_response(compound, punct_emph_amplifier)
56
+ pos_sum, neg_sum, neu_count = scores(punct_emph_amplifier)
57
+ total = (pos_sum + neg_sum.to_f.abs) + neu_count
58
+
59
+ {
60
+ negative: (neg_sum / total.to_f).abs.round(3),
61
+ neutral: (neu_count / total.to_f).abs.round(3),
62
+ positive: (pos_sum / total.to_f).abs.round(3),
63
+ compound: compound.round(4)
64
+ }
65
+ end
66
+ # rubocop:enable Metrics/AbcSize
67
+
68
+ # Prepare scores sum for result calculation
69
+ def scores(punct_emph_amplifier)
70
+ # discriminate between positive, negative and neutral sentiment scores
71
+ pos_sum, neg_sum, neu_count = SentimentScoresSifter.new(@sentiments).call
72
+
73
+ if pos_sum > neg_sum.to_f.abs
74
+ pos_sum += punct_emph_amplifier
75
+ elsif pos_sum < neg_sum.to_f.abs
76
+ neg_sum -= punct_emph_amplifier
77
+ end
78
+
79
+ [pos_sum, neg_sum, neu_count]
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ VERSION = '0.1.0'
5
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Helper module for word manipulations to simulate pythons methods behavior
5
+ # word_upcase?(word) is similar to Python's word.isupper()
6
+ # strip_punctuation(word) is similar to Python's word.strip(string.punctuation)
7
+ module WordHelper
8
+ PUNCTUATIONS = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
9
+
10
+ class << self
11
+ # Checks that string contains at least one letter and all letters are in upcase
12
+ # @param [String] word
13
+ # @return [Boolean]
14
+ #
15
+ # Example
16
+ # word_upcase?(':D') # => true
17
+ # word_upcase?(':)') # => false
18
+ def word_upcase?(word)
19
+ word == word.upcase && word.count('A-Za-z').positive?
20
+ end
21
+
22
+ # Removes all trailing and leading punctuation
23
+ # If the resulting string has two or fewer characters,
24
+ # then it was likely an emoticon, so return original string
25
+ # (ie ':)' stripped would be '', so just return ':)'
26
+ # @param [String] token
27
+ # @return [String]
28
+ #
29
+ # Example
30
+ # strip_punctuation("'test'") # => "test"
31
+ # strip_punctuation("'don't'") # => "don't"
32
+ # strip_punctuation(":)") # => ":)"
33
+ def strip_punctuation(token)
34
+ token_without_punctuation = replace_punctuations(token)
35
+
36
+ original_set = token.split('')
37
+ updated_set = token_without_punctuation.split('')
38
+
39
+ pair_array = prepare_match_array(original_set, updated_set)
40
+ pair_array = clean_leading_punctuations(pair_array)
41
+ pair_array = clean_trailing_punctuations(pair_array)
42
+
43
+ stripped = pair_array.map { |item| item[:old_ch] }.join
44
+
45
+ return token if stripped.size <= 2
46
+
47
+ stripped
48
+ end
49
+
50
+ private
51
+
52
+ def replace_punctuations(token)
53
+ punctuation_array = PUNCTUATIONS.split('')
54
+
55
+ punctuation_array.each do |punctuation|
56
+ token = token.gsub(punctuation, ' ')
57
+ end
58
+
59
+ token
60
+ end
61
+
62
+ def prepare_match_array(original_set, updated_set)
63
+ pair_array = []
64
+ original_set.each_with_index do |item, index|
65
+ pair_array << { index: index, old_ch: item, new_ch: updated_set[index] }
66
+ end
67
+
68
+ pair_array
69
+ end
70
+
71
+ def clean_leading_punctuations(pair_array)
72
+ pair_array.map do |pair|
73
+ break if pair[:new_ch] != ' '
74
+
75
+ pair_array.delete_at(pair[:index])
76
+ end
77
+
78
+ pair_array
79
+ end
80
+
81
+ def clean_trailing_punctuations(pair_array)
82
+ reversed_array = pair_array.reverse
83
+ reversed_array.map do |pair|
84
+ break if pair[:new_ch] != ' '
85
+
86
+ pair_array.delete_at(pair[:index])
87
+ end
88
+
89
+ pair_array
90
+ end
91
+ end
92
+ end
93
+ end