vader_sentiment_ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +51 -0
  4. data/lib/vader_sentiment_ruby.rb +20 -0
  5. data/lib/vader_sentiment_ruby/checker.rb +13 -0
  6. data/lib/vader_sentiment_ruby/checker/but_word_negation_checker.rb +34 -0
  7. data/lib/vader_sentiment_ruby/checker/least_word_negation_checker.rb +38 -0
  8. data/lib/vader_sentiment_ruby/checker/negation_checker.rb +114 -0
  9. data/lib/vader_sentiment_ruby/checker/no_word_checker.rb +49 -0
  10. data/lib/vader_sentiment_ruby/checker/previous_words_influence_checker.rb +55 -0
  11. data/lib/vader_sentiment_ruby/checker/sentiment_laden_idioms_checker.rb +30 -0
  12. data/lib/vader_sentiment_ruby/checker/special_idioms_checker.rb +107 -0
  13. data/lib/vader_sentiment_ruby/constants.rb +135 -0
  14. data/lib/vader_sentiment_ruby/data/emoji_utf8_lexicon.txt +3570 -0
  15. data/lib/vader_sentiment_ruby/data/vader_lexicon.txt +7518 -0
  16. data/lib/vader_sentiment_ruby/emojis_describer.rb +39 -0
  17. data/lib/vader_sentiment_ruby/emojis_dictionary_creator.rb +21 -0
  18. data/lib/vader_sentiment_ruby/lexicon_dictionary_creator.rb +21 -0
  19. data/lib/vader_sentiment_ruby/punctuation_emphasis_amplifier.rb +36 -0
  20. data/lib/vader_sentiment_ruby/sentiment_intensity_analyzer.rb +105 -0
  21. data/lib/vader_sentiment_ruby/sentiment_properties_identifier.rb +48 -0
  22. data/lib/vader_sentiment_ruby/sentiment_scores_sifter.rb +27 -0
  23. data/lib/vader_sentiment_ruby/valence_score_calculator.rb +82 -0
  24. data/lib/vader_sentiment_ruby/version.rb +5 -0
  25. data/lib/vader_sentiment_ruby/word_helper.rb +93 -0
  26. metadata +156 -0
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Replaces emoji char with its description
5
+ class EmojiDescriber
6
+ def initialize(text, emojis)
7
+ @text = text
8
+ @emojis = emojis
9
+ @text_no_emoji = ''
10
+ @prev_space = true
11
+ end
12
+
13
+ def call
14
+ @text.split('').each do |chr|
15
+ if @emojis.keys.include?(chr)
16
+ handle_emoji_presence(chr)
17
+ else
18
+ handle_emoji_absence(chr)
19
+ end
20
+ end
21
+
22
+ @text_no_emoji
23
+ end
24
+
25
+ private
26
+
27
+ def handle_emoji_presence(emoji)
28
+ description = @emojis[emoji]
29
+ @text_no_emoji += ' ' unless @prev_space
30
+ @text_no_emoji += description
31
+ @prev_space = false
32
+ end
33
+
34
+ def handle_emoji_absence(character)
35
+ @text_no_emoji += character
36
+ @prev_space = character == ' '
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Converts emoji lexicon file to a dictionary
5
+ class EmojisDictionaryCreator
6
+ # @return [Hash]
7
+ def call
8
+ emoji_file = File.open("#{__dir__}/data/emoji_utf8_lexicon.txt").read
9
+ emoji_dict = {}
10
+ lines = emoji_file.strip.split("\n")
11
+ lines.each do |line|
12
+ next unless line
13
+
14
+ emoji, description = line.strip.split("\t")[0..1]
15
+ emoji_dict[emoji] = description
16
+ end
17
+
18
+ emoji_dict
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Converts lexicon file to a dictionary
5
+ class LexiconDictionaryCreator
6
+ # @return [Hash]
7
+ def call
8
+ lexicon_file = File.open("#{__dir__}/data/vader_lexicon.txt").read
9
+ lex_dict = {}
10
+ lines = lexicon_file.strip.split("\n")
11
+ lines.each do |line|
12
+ next unless line
13
+
14
+ word, measure = line.strip.split("\t")[0..1]
15
+ lex_dict[word] = measure.to_f
16
+ end
17
+
18
+ lex_dict
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Adds emphasis from exclamation points and question marks
5
+ class PunctuationEmphasisAmplifier
6
+ # @param [String] text
7
+ def initialize(text)
8
+ @text = text
9
+ end
10
+
11
+ # @return [Float]
12
+ def call
13
+ amplify_exclamation_points + amplify_question_marks
14
+ end
15
+
16
+ def amplify_exclamation_points
17
+ # check for added emphasis resulting from exclamation points (up to 4 of them)
18
+ ep_count = @text.split('').count('!')
19
+ ep_count = 4.0 if ep_count > 4
20
+
21
+ # empirically derived mean sentiment intensity rating increase for exclamation points
22
+ ep_count * 0.292
23
+ end
24
+
25
+ def amplify_question_marks
26
+ # check for added emphasis resulting from question marks (2 or 3+)
27
+ qm_count = @text.split('').count('?')
28
+
29
+ return 0.0 unless qm_count > 1
30
+ # empirically derived mean sentiment intensity rating increase for question marks
31
+ return qm_count * 0.18 if qm_count <= 3
32
+
33
+ 0.96
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Returns a sentiment intensity score for sentences.
5
+ class SentimentIntensityAnalyzer
6
+ def initialize
7
+ @lexicon = LexiconDictionaryCreator.new.call
8
+ @emojis = EmojisDictionaryCreator.new.call
9
+ end
10
+
11
+ # Returns a float for sentiment strength based on the input text.
12
+ # Positive values are positive valence, negative value are negative valence.
13
+ # @param [String] text Text to analyze
14
+ # @return [Hash] Hash of sentiments for analyzed text
15
+ def polarity_scores(text)
16
+ text = EmojiDescriber.new(text, @emojis).call
17
+ senti_text = SentimentPropertiesIdentifier.new(text)
18
+
19
+ sentiments = []
20
+ words_and_emoticons = senti_text.words_and_emoticons
21
+ words_and_emoticons.each_with_index do |item, index|
22
+ sentiments << prepare_valence(item, index, words_and_emoticons, senti_text)
23
+ end
24
+
25
+ sentiments = Checker::ButWordNegationChecker.new(words_and_emoticons, sentiments).call
26
+
27
+ ValenceScoreCalculator.new(sentiments, text).call
28
+ end
29
+
30
+ private
31
+
32
+ def prepare_valence(item, index, words_and_emoticons, senti_text)
33
+ valence = 0
34
+
35
+ # Check for vader_lexicon words that may be used as modifiers or negations
36
+ return valence if Constants::BOOSTER_DICT.keys.include?(item.downcase)
37
+
38
+ if index < words_and_emoticons.size - 1 &&
39
+ item.downcase == 'kind' &&
40
+ (words_and_emoticons[index + 1]).downcase == 'of'
41
+ return valence
42
+ end
43
+
44
+ sentiment_valence(valence, senti_text, item, index)
45
+ end
46
+
47
+ def sentiment_valence(valence, senti_text, item, index)
48
+ item_lowercase = item.downcase
49
+
50
+ if @lexicon.keys.include?(item_lowercase)
51
+ valence = calculate_valence_for_word_in_lexicon(item, item_lowercase, index, senti_text)
52
+ end
53
+
54
+ valence
55
+ end
56
+
57
+ def calculate_valence_for_word_in_lexicon(item, item_lowercase, index, senti_text)
58
+ is_cap_diff = senti_text.is_cap_diff
59
+ words_and_emoticons = senti_text.words_and_emoticons
60
+
61
+ valence = @lexicon[item_lowercase] # get the sentiment valence
62
+ valence = Checker::NoWordChecker.new(valence, item_lowercase, index, words_and_emoticons, @lexicon).call
63
+ # Check if sentiment laden word is in ALL CAPS (while others aren't)
64
+ valence = apply_intensity_rating(valence) if WordHelper.word_upcase?(item) && is_cap_diff
65
+ valence = modify_valence_by_scalar(valence, index, words_and_emoticons, is_cap_diff)
66
+ Checker::LeastWordNegationChecker.new(valence, words_and_emoticons, index, @lexicon).call
67
+ end
68
+
69
+ def apply_intensity_rating(valence)
70
+ return valence + Constants::C_INCR if valence.positive?
71
+
72
+ valence - Constants::C_INCR
73
+ end
74
+
75
+ # Dampen the scalar modifier of preceding words and emoticons
76
+ # (excluding the ones that immediately precede the item) based
77
+ # on their distance from the current item.
78
+ def modify_valence_by_scalar(valence, index, words_and_emoticons, is_cap_diff)
79
+ (0..2).each do |start_index|
80
+ next unless index > start_index
81
+ next if @lexicon.keys.include?((words_and_emoticons[index - (start_index + 1)]).downcase)
82
+
83
+ valence = apply_scalar(valence, words_and_emoticons, index, start_index, is_cap_diff)
84
+ valence = Checker::NegationChecker.new(valence, words_and_emoticons, start_index, index).call
85
+ valence = Checker::SpecialIdiomsChecker.new(valence, words_and_emoticons, index).call if start_index == 2
86
+ end
87
+
88
+ valence
89
+ end
90
+
91
+ def apply_scalar(valence, words_and_emoticons, index, start_index, is_cap_diff)
92
+ previous_word = words_and_emoticons[index - (start_index + 1)]
93
+ scalar = Checker::PreviousWordsInfluenceChecker.new(previous_word, valence, is_cap_diff).call
94
+ valence + adjust_scalar(scalar, start_index)
95
+ end
96
+
97
+ def adjust_scalar(scalar, start_index)
98
+ return scalar if scalar.zero?
99
+
100
+ scalar *= 0.95 if start_index == 1
101
+ scalar *= 0.9 if start_index == 2
102
+ scalar
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Identify sentiment-relevant string-level properties of input text.
5
+ class SentimentPropertiesIdentifier
6
+ attr_reader :is_cap_diff, :words_and_emoticons
7
+
8
+ # @param [String] text
9
+ def initialize(text)
10
+ text = text.to_s.encode('utf-8') unless text.is_a? String
11
+ @text = text
12
+ @words_and_emoticons = prepare_words_and_emoticons
13
+ # Doesn't separate words from adjacent punctuation (keeps emoticons & contractions)
14
+ @is_cap_diff = all_cap_differential?(@words_and_emoticons)
15
+ end
16
+
17
+ private
18
+
19
+ # Removes leading and trailing punctuation
20
+ # Leaves contractions and most emoticons
21
+ # Does not preserve punc-plus-letter emoticons (e.g. :D)
22
+ # @return [Array]
23
+ def prepare_words_and_emoticons
24
+ @text
25
+ .split
26
+ .map { |word| WordHelper.strip_punctuation(word) }
27
+ end
28
+
29
+ # Check whether just some words in the input are ALL CAPS.
30
+ # Returns `True` if some but not all items in `words` are ALL CAPS
31
+ # @param [Array] words
32
+ # @return [Boolean]
33
+ def all_cap_differential?(words)
34
+ all_cap_words = 0
35
+
36
+ words.each do |word|
37
+ all_cap_words += 1 if WordHelper.word_upcase?(word)
38
+ end
39
+
40
+ words_size = words.size
41
+ cap_differential = words_size - all_cap_words
42
+
43
+ return true if cap_differential.positive? && cap_differential < words_size
44
+
45
+ false
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Separates positive versus negative sentiment scores
5
+ class SentimentScoresSifter
6
+ def initialize(sentiments)
7
+ @sentiments = sentiments
8
+ @pos_sum = 0.0
9
+ @neg_sum = 0.0
10
+ @neu_count = 0
11
+ end
12
+
13
+ def call
14
+ @sentiments.each do |sentiment_score|
15
+ # compensates for neutral words that are counted as 1
16
+ @pos_sum += sentiment_score.to_f + 1 if sentiment_score.positive?
17
+
18
+ # when used with .abs, compensates for neutrals
19
+ @neg_sum += sentiment_score.to_f - 1 if sentiment_score.negative?
20
+
21
+ @neu_count += 1 if sentiment_score.zero?
22
+ end
23
+
24
+ [@pos_sum, @neg_sum, @neu_count]
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Prepares response with semantic score
5
+ class ValenceScoreCalculator
6
+ DEFAULT_RESPONSE = {
7
+ negative: 0.0,
8
+ neutral: 0.0,
9
+ positive: 0.0,
10
+ compound: 0.0
11
+ }.freeze
12
+
13
+ def initialize(sentiments, text)
14
+ @sentiments = sentiments
15
+ @text = text
16
+ end
17
+
18
+ def call
19
+ return DEFAULT_RESPONSE unless @sentiments
20
+
21
+ sum_s = @sentiments.map(&:to_f).sum
22
+ # compute and add emphasis from punctuation in text
23
+ punct_emph_amplifier = PunctuationEmphasisAmplifier.new(@text).call
24
+ compound = normalize(sum_s, punct_emph_amplifier)
25
+
26
+ prepare_response(compound, punct_emph_amplifier)
27
+ end
28
+
29
+ private
30
+
31
+ # Normalize the score to be between -1 and 1 using an alpha that
32
+ # approximates the max expected value
33
+ # Move to Sentiment analyzer
34
+ def normalize(score, punct_emph_amplifier, alpha = 15)
35
+ score = add_punctuation_emphasis(score, punct_emph_amplifier)
36
+ norm_score = score / Math.sqrt((score * score) + alpha).to_f
37
+
38
+ return -1.0 if norm_score < -1.0
39
+ return 1.0 if norm_score > 1.0
40
+
41
+ norm_score
42
+ end
43
+
44
+ def add_punctuation_emphasis(sum_s, punct_emph_amplifier)
45
+ if sum_s.positive?
46
+ sum_s += punct_emph_amplifier
47
+ elsif sum_s.negative?
48
+ sum_s -= punct_emph_amplifier
49
+ end
50
+
51
+ sum_s
52
+ end
53
+
54
+ # rubocop:disable Metrics/AbcSize
55
+ def prepare_response(compound, punct_emph_amplifier)
56
+ pos_sum, neg_sum, neu_count = scores(punct_emph_amplifier)
57
+ total = (pos_sum + neg_sum.to_f.abs) + neu_count
58
+
59
+ {
60
+ negative: (neg_sum / total.to_f).abs.round(3),
61
+ neutral: (neu_count / total.to_f).abs.round(3),
62
+ positive: (pos_sum / total.to_f).abs.round(3),
63
+ compound: compound.round(4)
64
+ }
65
+ end
66
+ # rubocop:enable Metrics/AbcSize
67
+
68
+ # Prepare scores sum for result calculation
69
+ def scores(punct_emph_amplifier)
70
+ # discriminate between positive, negative and neutral sentiment scores
71
+ pos_sum, neg_sum, neu_count = SentimentScoresSifter.new(@sentiments).call
72
+
73
+ if pos_sum > neg_sum.to_f.abs
74
+ pos_sum += punct_emph_amplifier
75
+ elsif pos_sum < neg_sum.to_f.abs
76
+ neg_sum -= punct_emph_amplifier
77
+ end
78
+
79
+ [pos_sum, neg_sum, neu_count]
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ VERSION = '0.1.0'
5
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ module VaderSentimentRuby
4
+ # Helper module for word manipulations to simulate pythons methods behavior
5
+ # word_upcase?(word) is similar to Python's word.isupper()
6
+ # strip_punctuation(word) is similar to Python's word.strip(string.punctuation)
7
+ module WordHelper
8
+ PUNCTUATIONS = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
9
+
10
+ class << self
11
+ # Checks that string contains at least one letter and all letters are in upcase
12
+ # @param [String] word
13
+ # @return [Boolean]
14
+ #
15
+ # Example
16
+ # word_upcase?(':D') # => true
17
+ # word_upcase?(':)') # => false
18
+ def word_upcase?(word)
19
+ word == word.upcase && word.count('A-Za-z').positive?
20
+ end
21
+
22
+ # Removes all trailing and leading punctuation
23
+ # If the resulting string has two or fewer characters,
24
+ # then it was likely an emoticon, so return original string
25
+ # (ie ':)' stripped would be '', so just return ':)'
26
+ # @param [String] token
27
+ # @return [String]
28
+ #
29
+ # Example
30
+ # strip_punctuation("'test'") # => "test"
31
+ # strip_punctuation("'don't'") # => "don't"
32
+ # strip_punctuation(":)") # => ":)"
33
+ def strip_punctuation(token)
34
+ token_without_punctuation = replace_punctuations(token)
35
+
36
+ original_set = token.split('')
37
+ updated_set = token_without_punctuation.split('')
38
+
39
+ pair_array = prepare_match_array(original_set, updated_set)
40
+ pair_array = clean_leading_punctuations(pair_array)
41
+ pair_array = clean_trailing_punctuations(pair_array)
42
+
43
+ stripped = pair_array.map { |item| item[:old_ch] }.join
44
+
45
+ return token if stripped.size <= 2
46
+
47
+ stripped
48
+ end
49
+
50
+ private
51
+
52
+ def replace_punctuations(token)
53
+ punctuation_array = PUNCTUATIONS.split('')
54
+
55
+ punctuation_array.each do |punctuation|
56
+ token = token.gsub(punctuation, ' ')
57
+ end
58
+
59
+ token
60
+ end
61
+
62
+ def prepare_match_array(original_set, updated_set)
63
+ pair_array = []
64
+ original_set.each_with_index do |item, index|
65
+ pair_array << { index: index, old_ch: item, new_ch: updated_set[index] }
66
+ end
67
+
68
+ pair_array
69
+ end
70
+
71
+ def clean_leading_punctuations(pair_array)
72
+ pair_array.map do |pair|
73
+ break if pair[:new_ch] != ' '
74
+
75
+ pair_array.delete_at(pair[:index])
76
+ end
77
+
78
+ pair_array
79
+ end
80
+
81
+ def clean_trailing_punctuations(pair_array)
82
+ reversed_array = pair_array.reverse
83
+ reversed_array.map do |pair|
84
+ break if pair[:new_ch] != ' '
85
+
86
+ pair_array.delete_at(pair[:index])
87
+ end
88
+
89
+ pair_array
90
+ end
91
+ end
92
+ end
93
+ end