vader_sentiment_ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +51 -0
- data/lib/vader_sentiment_ruby.rb +20 -0
- data/lib/vader_sentiment_ruby/checker.rb +13 -0
- data/lib/vader_sentiment_ruby/checker/but_word_negation_checker.rb +34 -0
- data/lib/vader_sentiment_ruby/checker/least_word_negation_checker.rb +38 -0
- data/lib/vader_sentiment_ruby/checker/negation_checker.rb +114 -0
- data/lib/vader_sentiment_ruby/checker/no_word_checker.rb +49 -0
- data/lib/vader_sentiment_ruby/checker/previous_words_influence_checker.rb +55 -0
- data/lib/vader_sentiment_ruby/checker/sentiment_laden_idioms_checker.rb +30 -0
- data/lib/vader_sentiment_ruby/checker/special_idioms_checker.rb +107 -0
- data/lib/vader_sentiment_ruby/constants.rb +135 -0
- data/lib/vader_sentiment_ruby/data/emoji_utf8_lexicon.txt +3570 -0
- data/lib/vader_sentiment_ruby/data/vader_lexicon.txt +7518 -0
- data/lib/vader_sentiment_ruby/emojis_describer.rb +39 -0
- data/lib/vader_sentiment_ruby/emojis_dictionary_creator.rb +21 -0
- data/lib/vader_sentiment_ruby/lexicon_dictionary_creator.rb +21 -0
- data/lib/vader_sentiment_ruby/punctuation_emphasis_amplifier.rb +36 -0
- data/lib/vader_sentiment_ruby/sentiment_intensity_analyzer.rb +105 -0
- data/lib/vader_sentiment_ruby/sentiment_properties_identifier.rb +48 -0
- data/lib/vader_sentiment_ruby/sentiment_scores_sifter.rb +27 -0
- data/lib/vader_sentiment_ruby/valence_score_calculator.rb +82 -0
- data/lib/vader_sentiment_ruby/version.rb +5 -0
- data/lib/vader_sentiment_ruby/word_helper.rb +93 -0
- metadata +156 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module VaderSentimentRuby
|
4
|
+
# Replaces emoji char with its description
|
5
|
+
class EmojiDescriber
|
6
|
+
def initialize(text, emojis)
|
7
|
+
@text = text
|
8
|
+
@emojis = emojis
|
9
|
+
@text_no_emoji = ''
|
10
|
+
@prev_space = true
|
11
|
+
end
|
12
|
+
|
13
|
+
def call
|
14
|
+
@text.split('').each do |chr|
|
15
|
+
if @emojis.keys.include?(chr)
|
16
|
+
handle_emoji_presence(chr)
|
17
|
+
else
|
18
|
+
handle_emoji_absence(chr)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
@text_no_emoji
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def handle_emoji_presence(emoji)
|
28
|
+
description = @emojis[emoji]
|
29
|
+
@text_no_emoji += ' ' unless @prev_space
|
30
|
+
@text_no_emoji += description
|
31
|
+
@prev_space = false
|
32
|
+
end
|
33
|
+
|
34
|
+
def handle_emoji_absence(character)
|
35
|
+
@text_no_emoji += character
|
36
|
+
@prev_space = character == ' '
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module VaderSentimentRuby
|
4
|
+
# Converts emoji lexicon file to a dictionary
|
5
|
+
class EmojisDictionaryCreator
|
6
|
+
# @return [Hash]
|
7
|
+
def call
|
8
|
+
emoji_file = File.open("#{__dir__}/data/emoji_utf8_lexicon.txt").read
|
9
|
+
emoji_dict = {}
|
10
|
+
lines = emoji_file.strip.split("\n")
|
11
|
+
lines.each do |line|
|
12
|
+
next unless line
|
13
|
+
|
14
|
+
emoji, description = line.strip.split("\t")[0..1]
|
15
|
+
emoji_dict[emoji] = description
|
16
|
+
end
|
17
|
+
|
18
|
+
emoji_dict
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module VaderSentimentRuby
|
4
|
+
# Converts lexicon file to a dictionary
|
5
|
+
class LexiconDictionaryCreator
|
6
|
+
# @return [Hash]
|
7
|
+
def call
|
8
|
+
lexicon_file = File.open("#{__dir__}/data/vader_lexicon.txt").read
|
9
|
+
lex_dict = {}
|
10
|
+
lines = lexicon_file.strip.split("\n")
|
11
|
+
lines.each do |line|
|
12
|
+
next unless line
|
13
|
+
|
14
|
+
word, measure = line.strip.split("\t")[0..1]
|
15
|
+
lex_dict[word] = measure.to_f
|
16
|
+
end
|
17
|
+
|
18
|
+
lex_dict
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module VaderSentimentRuby
|
4
|
+
# Adds emphasis from exclamation points and question marks
|
5
|
+
class PunctuationEmphasisAmplifier
|
6
|
+
# @param [String] text
|
7
|
+
def initialize(text)
|
8
|
+
@text = text
|
9
|
+
end
|
10
|
+
|
11
|
+
# @return [Float]
|
12
|
+
def call
|
13
|
+
amplify_exclamation_points + amplify_question_marks
|
14
|
+
end
|
15
|
+
|
16
|
+
def amplify_exclamation_points
|
17
|
+
# check for added emphasis resulting from exclamation points (up to 4 of them)
|
18
|
+
ep_count = @text.split('').count('!')
|
19
|
+
ep_count = 4.0 if ep_count > 4
|
20
|
+
|
21
|
+
# empirically derived mean sentiment intensity rating increase for exclamation points
|
22
|
+
ep_count * 0.292
|
23
|
+
end
|
24
|
+
|
25
|
+
def amplify_question_marks
|
26
|
+
# check for added emphasis resulting from question marks (2 or 3+)
|
27
|
+
qm_count = @text.split('').count('?')
|
28
|
+
|
29
|
+
return 0.0 unless qm_count > 1
|
30
|
+
# empirically derived mean sentiment intensity rating increase for question marks
|
31
|
+
return qm_count * 0.18 if qm_count <= 3
|
32
|
+
|
33
|
+
0.96
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module VaderSentimentRuby
|
4
|
+
# Returns a sentiment intensity score for sentences.
|
5
|
+
class SentimentIntensityAnalyzer
|
6
|
+
def initialize
|
7
|
+
@lexicon = LexiconDictionaryCreator.new.call
|
8
|
+
@emojis = EmojisDictionaryCreator.new.call
|
9
|
+
end
|
10
|
+
|
11
|
+
# Returns a float for sentiment strength based on the input text.
|
12
|
+
# Positive values are positive valence, negative value are negative valence.
|
13
|
+
# @param [String] text Text to analyze
|
14
|
+
# @return [Hash] Hash of sentiments for analyzed text
|
15
|
+
def polarity_scores(text)
|
16
|
+
text = EmojiDescriber.new(text, @emojis).call
|
17
|
+
senti_text = SentimentPropertiesIdentifier.new(text)
|
18
|
+
|
19
|
+
sentiments = []
|
20
|
+
words_and_emoticons = senti_text.words_and_emoticons
|
21
|
+
words_and_emoticons.each_with_index do |item, index|
|
22
|
+
sentiments << prepare_valence(item, index, words_and_emoticons, senti_text)
|
23
|
+
end
|
24
|
+
|
25
|
+
sentiments = Checker::ButWordNegationChecker.new(words_and_emoticons, sentiments).call
|
26
|
+
|
27
|
+
ValenceScoreCalculator.new(sentiments, text).call
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def prepare_valence(item, index, words_and_emoticons, senti_text)
|
33
|
+
valence = 0
|
34
|
+
|
35
|
+
# Check for vader_lexicon words that may be used as modifiers or negations
|
36
|
+
return valence if Constants::BOOSTER_DICT.keys.include?(item.downcase)
|
37
|
+
|
38
|
+
if index < words_and_emoticons.size - 1 &&
|
39
|
+
item.downcase == 'kind' &&
|
40
|
+
(words_and_emoticons[index + 1]).downcase == 'of'
|
41
|
+
return valence
|
42
|
+
end
|
43
|
+
|
44
|
+
sentiment_valence(valence, senti_text, item, index)
|
45
|
+
end
|
46
|
+
|
47
|
+
def sentiment_valence(valence, senti_text, item, index)
|
48
|
+
item_lowercase = item.downcase
|
49
|
+
|
50
|
+
if @lexicon.keys.include?(item_lowercase)
|
51
|
+
valence = calculate_valence_for_word_in_lexicon(item, item_lowercase, index, senti_text)
|
52
|
+
end
|
53
|
+
|
54
|
+
valence
|
55
|
+
end
|
56
|
+
|
57
|
+
def calculate_valence_for_word_in_lexicon(item, item_lowercase, index, senti_text)
|
58
|
+
is_cap_diff = senti_text.is_cap_diff
|
59
|
+
words_and_emoticons = senti_text.words_and_emoticons
|
60
|
+
|
61
|
+
valence = @lexicon[item_lowercase] # get the sentiment valence
|
62
|
+
valence = Checker::NoWordChecker.new(valence, item_lowercase, index, words_and_emoticons, @lexicon).call
|
63
|
+
# Check if sentiment laden word is in ALL CAPS (while others aren't)
|
64
|
+
valence = apply_intensity_rating(valence) if WordHelper.word_upcase?(item) && is_cap_diff
|
65
|
+
valence = modify_valence_by_scalar(valence, index, words_and_emoticons, is_cap_diff)
|
66
|
+
Checker::LeastWordNegationChecker.new(valence, words_and_emoticons, index, @lexicon).call
|
67
|
+
end
|
68
|
+
|
69
|
+
def apply_intensity_rating(valence)
|
70
|
+
return valence + Constants::C_INCR if valence.positive?
|
71
|
+
|
72
|
+
valence - Constants::C_INCR
|
73
|
+
end
|
74
|
+
|
75
|
+
# Dampen the scalar modifier of preceding words and emoticons
|
76
|
+
# (excluding the ones that immediately precede the item) based
|
77
|
+
# on their distance from the current item.
|
78
|
+
def modify_valence_by_scalar(valence, index, words_and_emoticons, is_cap_diff)
|
79
|
+
(0..2).each do |start_index|
|
80
|
+
next unless index > start_index
|
81
|
+
next if @lexicon.keys.include?((words_and_emoticons[index - (start_index + 1)]).downcase)
|
82
|
+
|
83
|
+
valence = apply_scalar(valence, words_and_emoticons, index, start_index, is_cap_diff)
|
84
|
+
valence = Checker::NegationChecker.new(valence, words_and_emoticons, start_index, index).call
|
85
|
+
valence = Checker::SpecialIdiomsChecker.new(valence, words_and_emoticons, index).call if start_index == 2
|
86
|
+
end
|
87
|
+
|
88
|
+
valence
|
89
|
+
end
|
90
|
+
|
91
|
+
def apply_scalar(valence, words_and_emoticons, index, start_index, is_cap_diff)
|
92
|
+
previous_word = words_and_emoticons[index - (start_index + 1)]
|
93
|
+
scalar = Checker::PreviousWordsInfluenceChecker.new(previous_word, valence, is_cap_diff).call
|
94
|
+
valence + adjust_scalar(scalar, start_index)
|
95
|
+
end
|
96
|
+
|
97
|
+
def adjust_scalar(scalar, start_index)
|
98
|
+
return scalar if scalar.zero?
|
99
|
+
|
100
|
+
scalar *= 0.95 if start_index == 1
|
101
|
+
scalar *= 0.9 if start_index == 2
|
102
|
+
scalar
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module VaderSentimentRuby
|
4
|
+
# Identify sentiment-relevant string-level properties of input text.
|
5
|
+
class SentimentPropertiesIdentifier
|
6
|
+
attr_reader :is_cap_diff, :words_and_emoticons
|
7
|
+
|
8
|
+
# @param [String] text
|
9
|
+
def initialize(text)
|
10
|
+
text = text.to_s.encode('utf-8') unless text.is_a? String
|
11
|
+
@text = text
|
12
|
+
@words_and_emoticons = prepare_words_and_emoticons
|
13
|
+
# Doesn't separate words from adjacent punctuation (keeps emoticons & contractions)
|
14
|
+
@is_cap_diff = all_cap_differential?(@words_and_emoticons)
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
# Removes leading and trailing punctuation
|
20
|
+
# Leaves contractions and most emoticons
|
21
|
+
# Does not preserve punc-plus-letter emoticons (e.g. :D)
|
22
|
+
# @return [Array]
|
23
|
+
def prepare_words_and_emoticons
|
24
|
+
@text
|
25
|
+
.split
|
26
|
+
.map { |word| WordHelper.strip_punctuation(word) }
|
27
|
+
end
|
28
|
+
|
29
|
+
# Check whether just some words in the input are ALL CAPS.
|
30
|
+
# Returns `True` if some but not all items in `words` are ALL CAPS
|
31
|
+
# @param [Array] words
|
32
|
+
# @return [Boolean]
|
33
|
+
def all_cap_differential?(words)
|
34
|
+
all_cap_words = 0
|
35
|
+
|
36
|
+
words.each do |word|
|
37
|
+
all_cap_words += 1 if WordHelper.word_upcase?(word)
|
38
|
+
end
|
39
|
+
|
40
|
+
words_size = words.size
|
41
|
+
cap_differential = words_size - all_cap_words
|
42
|
+
|
43
|
+
return true if cap_differential.positive? && cap_differential < words_size
|
44
|
+
|
45
|
+
false
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module VaderSentimentRuby
|
4
|
+
# Separates positive versus negative sentiment scores
|
5
|
+
class SentimentScoresSifter
|
6
|
+
def initialize(sentiments)
|
7
|
+
@sentiments = sentiments
|
8
|
+
@pos_sum = 0.0
|
9
|
+
@neg_sum = 0.0
|
10
|
+
@neu_count = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
def call
|
14
|
+
@sentiments.each do |sentiment_score|
|
15
|
+
# compensates for neutral words that are counted as 1
|
16
|
+
@pos_sum += sentiment_score.to_f + 1 if sentiment_score.positive?
|
17
|
+
|
18
|
+
# when used with .abs, compensates for neutrals
|
19
|
+
@neg_sum += sentiment_score.to_f - 1 if sentiment_score.negative?
|
20
|
+
|
21
|
+
@neu_count += 1 if sentiment_score.zero?
|
22
|
+
end
|
23
|
+
|
24
|
+
[@pos_sum, @neg_sum, @neu_count]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module VaderSentimentRuby
|
4
|
+
# Prepares response with semantic score
|
5
|
+
class ValenceScoreCalculator
|
6
|
+
DEFAULT_RESPONSE = {
|
7
|
+
negative: 0.0,
|
8
|
+
neutral: 0.0,
|
9
|
+
positive: 0.0,
|
10
|
+
compound: 0.0
|
11
|
+
}.freeze
|
12
|
+
|
13
|
+
def initialize(sentiments, text)
|
14
|
+
@sentiments = sentiments
|
15
|
+
@text = text
|
16
|
+
end
|
17
|
+
|
18
|
+
def call
|
19
|
+
return DEFAULT_RESPONSE unless @sentiments
|
20
|
+
|
21
|
+
sum_s = @sentiments.map(&:to_f).sum
|
22
|
+
# compute and add emphasis from punctuation in text
|
23
|
+
punct_emph_amplifier = PunctuationEmphasisAmplifier.new(@text).call
|
24
|
+
compound = normalize(sum_s, punct_emph_amplifier)
|
25
|
+
|
26
|
+
prepare_response(compound, punct_emph_amplifier)
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
# Normalize the score to be between -1 and 1 using an alpha that
|
32
|
+
# approximates the max expected value
|
33
|
+
# Move to Sentiment analyzer
|
34
|
+
def normalize(score, punct_emph_amplifier, alpha = 15)
|
35
|
+
score = add_punctuation_emphasis(score, punct_emph_amplifier)
|
36
|
+
norm_score = score / Math.sqrt((score * score) + alpha).to_f
|
37
|
+
|
38
|
+
return -1.0 if norm_score < -1.0
|
39
|
+
return 1.0 if norm_score > 1.0
|
40
|
+
|
41
|
+
norm_score
|
42
|
+
end
|
43
|
+
|
44
|
+
def add_punctuation_emphasis(sum_s, punct_emph_amplifier)
|
45
|
+
if sum_s.positive?
|
46
|
+
sum_s += punct_emph_amplifier
|
47
|
+
elsif sum_s.negative?
|
48
|
+
sum_s -= punct_emph_amplifier
|
49
|
+
end
|
50
|
+
|
51
|
+
sum_s
|
52
|
+
end
|
53
|
+
|
54
|
+
# rubocop:disable Metrics/AbcSize
|
55
|
+
def prepare_response(compound, punct_emph_amplifier)
|
56
|
+
pos_sum, neg_sum, neu_count = scores(punct_emph_amplifier)
|
57
|
+
total = (pos_sum + neg_sum.to_f.abs) + neu_count
|
58
|
+
|
59
|
+
{
|
60
|
+
negative: (neg_sum / total.to_f).abs.round(3),
|
61
|
+
neutral: (neu_count / total.to_f).abs.round(3),
|
62
|
+
positive: (pos_sum / total.to_f).abs.round(3),
|
63
|
+
compound: compound.round(4)
|
64
|
+
}
|
65
|
+
end
|
66
|
+
# rubocop:enable Metrics/AbcSize
|
67
|
+
|
68
|
+
# Prepare scores sum for result calculation
|
69
|
+
def scores(punct_emph_amplifier)
|
70
|
+
# discriminate between positive, negative and neutral sentiment scores
|
71
|
+
pos_sum, neg_sum, neu_count = SentimentScoresSifter.new(@sentiments).call
|
72
|
+
|
73
|
+
if pos_sum > neg_sum.to_f.abs
|
74
|
+
pos_sum += punct_emph_amplifier
|
75
|
+
elsif pos_sum < neg_sum.to_f.abs
|
76
|
+
neg_sum -= punct_emph_amplifier
|
77
|
+
end
|
78
|
+
|
79
|
+
[pos_sum, neg_sum, neu_count]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module VaderSentimentRuby
|
4
|
+
# Helper module for word manipulations to simulate pythons methods behavior
|
5
|
+
# word_upcase?(word) is similar to Python's word.isupper()
|
6
|
+
# strip_punctuation(word) is similar to Python's word.strip(string.punctuation)
|
7
|
+
module WordHelper
|
8
|
+
PUNCTUATIONS = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
|
9
|
+
|
10
|
+
class << self
|
11
|
+
# Checks that string contains at least one letter and all letters are in upcase
|
12
|
+
# @param [String] word
|
13
|
+
# @return [Boolean]
|
14
|
+
#
|
15
|
+
# Example
|
16
|
+
# word_upcase?(':D') # => true
|
17
|
+
# word_upcase?(':)') # => false
|
18
|
+
def word_upcase?(word)
|
19
|
+
word == word.upcase && word.count('A-Za-z').positive?
|
20
|
+
end
|
21
|
+
|
22
|
+
# Removes all trailing and leading punctuation
|
23
|
+
# If the resulting string has two or fewer characters,
|
24
|
+
# then it was likely an emoticon, so return original string
|
25
|
+
# (ie ':)' stripped would be '', so just return ':)'
|
26
|
+
# @param [String] token
|
27
|
+
# @return [String]
|
28
|
+
#
|
29
|
+
# Example
|
30
|
+
# strip_punctuation("'test'") # => "test"
|
31
|
+
# strip_punctuation("'don't'") # => "don't"
|
32
|
+
# strip_punctuation(":)") # => ":)"
|
33
|
+
def strip_punctuation(token)
|
34
|
+
token_without_punctuation = replace_punctuations(token)
|
35
|
+
|
36
|
+
original_set = token.split('')
|
37
|
+
updated_set = token_without_punctuation.split('')
|
38
|
+
|
39
|
+
pair_array = prepare_match_array(original_set, updated_set)
|
40
|
+
pair_array = clean_leading_punctuations(pair_array)
|
41
|
+
pair_array = clean_trailing_punctuations(pair_array)
|
42
|
+
|
43
|
+
stripped = pair_array.map { |item| item[:old_ch] }.join
|
44
|
+
|
45
|
+
return token if stripped.size <= 2
|
46
|
+
|
47
|
+
stripped
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def replace_punctuations(token)
|
53
|
+
punctuation_array = PUNCTUATIONS.split('')
|
54
|
+
|
55
|
+
punctuation_array.each do |punctuation|
|
56
|
+
token = token.gsub(punctuation, ' ')
|
57
|
+
end
|
58
|
+
|
59
|
+
token
|
60
|
+
end
|
61
|
+
|
62
|
+
def prepare_match_array(original_set, updated_set)
|
63
|
+
pair_array = []
|
64
|
+
original_set.each_with_index do |item, index|
|
65
|
+
pair_array << { index: index, old_ch: item, new_ch: updated_set[index] }
|
66
|
+
end
|
67
|
+
|
68
|
+
pair_array
|
69
|
+
end
|
70
|
+
|
71
|
+
def clean_leading_punctuations(pair_array)
|
72
|
+
pair_array.map do |pair|
|
73
|
+
break if pair[:new_ch] != ' '
|
74
|
+
|
75
|
+
pair_array.delete_at(pair[:index])
|
76
|
+
end
|
77
|
+
|
78
|
+
pair_array
|
79
|
+
end
|
80
|
+
|
81
|
+
def clean_trailing_punctuations(pair_array)
|
82
|
+
reversed_array = pair_array.reverse
|
83
|
+
reversed_array.map do |pair|
|
84
|
+
break if pair[:new_ch] != ' '
|
85
|
+
|
86
|
+
pair_array.delete_at(pair[:index])
|
87
|
+
end
|
88
|
+
|
89
|
+
pair_array
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|