vader_sentiment_ruby 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/vader_sentiment_ruby/emojis_describer.rb +14 -12
- data/lib/vader_sentiment_ruby/punctuation_emphasis_amplifier.rb +21 -15
- data/lib/vader_sentiment_ruby/sentiment_properties_identifier.rb +5 -16
- data/lib/vader_sentiment_ruby/sentiment_scores_sifter.rb +4 -0
- data/lib/vader_sentiment_ruby/valence_score_calculator.rb +10 -12
- data/lib/vader_sentiment_ruby/version.rb +1 -1
- data/lib/vader_sentiment_ruby/word_helper.rb +14 -47
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6b529805b4d4db7386700ff49cd122451590c19100e1c368925b3fa221a81c1a
|
4
|
+
data.tar.gz: 16dbb6928fa25b59b86acfc8a6272114bc751fff8ae2b02545d4af7539b6d0a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a6e0dd37f24f9c78ca95756c3a304b534e0c9a816c3a5b14b1f40142e59c868bc7965488904f8a041bfebbfab3f8451d32c59214c51abf3927e5df4676ddae9
|
7
|
+
data.tar.gz: 382dfe51190b6efbaa7b517ea454db85d41c0cbac784a887ce325815c8984d11604b13bb93e0b856ef41cbed865442257f4c85ee59ee2422e596c8167917e3ea
|
@@ -1,21 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module VaderSentimentRuby
|
4
|
-
# Replaces emoji
|
4
|
+
# Replaces emoji characters with their descriptions
|
5
5
|
class EmojiDescriber
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
# @param [String] text Original text
|
7
|
+
# @param [Hash] emoji_dictionary Emoji dictionary with emojis as keys
|
8
|
+
def initialize(text, emoji_dictionary)
|
9
|
+
@text_array = text.split('')
|
10
|
+
@emoji_dictionary = emoji_dictionary
|
9
11
|
@text_no_emoji = ''
|
10
12
|
@prev_space = true
|
11
13
|
end
|
12
14
|
|
15
|
+
# @return [String] Text with emojis replaced with descriptions
|
13
16
|
def call
|
14
|
-
@
|
15
|
-
if @
|
16
|
-
|
17
|
+
@text_array.each do |character|
|
18
|
+
if @emoji_dictionary.keys.include?(character)
|
19
|
+
replace_emoji_with_description(character)
|
17
20
|
else
|
18
|
-
|
21
|
+
handle_simple_character(character)
|
19
22
|
end
|
20
23
|
end
|
21
24
|
|
@@ -24,14 +27,13 @@ module VaderSentimentRuby
|
|
24
27
|
|
25
28
|
private
|
26
29
|
|
27
|
-
def
|
28
|
-
description = @emojis[emoji]
|
30
|
+
def replace_emoji_with_description(emoji)
|
29
31
|
@text_no_emoji += ' ' unless @prev_space
|
30
|
-
@text_no_emoji +=
|
32
|
+
@text_no_emoji += @emoji_dictionary[emoji]
|
31
33
|
@prev_space = false
|
32
34
|
end
|
33
35
|
|
34
|
-
def
|
36
|
+
def handle_simple_character(character)
|
35
37
|
@text_no_emoji += character
|
36
38
|
@prev_space = character == ' '
|
37
39
|
end
|
@@ -1,36 +1,42 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module VaderSentimentRuby
|
4
|
-
# Adds emphasis from exclamation points and question marks
|
4
|
+
# Adds emphasis factor from exclamation points and question marks
|
5
5
|
class PunctuationEmphasisAmplifier
|
6
|
+
EXCLAMATION_MARK = '!'
|
7
|
+
QUESTION_MARK = '?'
|
8
|
+
# Empirically derived mean sentiment intensity rating increases for exclamation points and question marks
|
9
|
+
EXCLAMATION_MARK_RATING_INCREASE = 0.292
|
10
|
+
QUESTION_MARK_RATING_INCREASE = 0.18
|
11
|
+
|
6
12
|
# @param [String] text
|
7
13
|
def initialize(text)
|
8
|
-
@
|
14
|
+
@text_array = text.split('')
|
9
15
|
end
|
10
16
|
|
11
|
-
# @return [Float]
|
17
|
+
# @return [Float, Integer] Emphasis factor
|
12
18
|
def call
|
13
|
-
amplify_exclamation_points + amplify_question_marks
|
19
|
+
(amplify_exclamation_points + amplify_question_marks).round(3)
|
14
20
|
end
|
15
21
|
|
22
|
+
private
|
23
|
+
|
16
24
|
def amplify_exclamation_points
|
17
|
-
#
|
18
|
-
ep_count = @
|
19
|
-
ep_count = 4
|
25
|
+
# Check for added emphasis resulting from exclamation points (up to 4 of them)
|
26
|
+
ep_count = @text_array.count(EXCLAMATION_MARK)
|
27
|
+
ep_count = 4 if ep_count > 4
|
20
28
|
|
21
|
-
|
22
|
-
ep_count * 0.292
|
29
|
+
ep_count * EXCLAMATION_MARK_RATING_INCREASE
|
23
30
|
end
|
24
31
|
|
25
32
|
def amplify_question_marks
|
26
|
-
#
|
27
|
-
qm_count = @
|
33
|
+
# Check for added emphasis resulting from question marks (2 or 3+)
|
34
|
+
qm_count = @text_array.count(QUESTION_MARK)
|
28
35
|
|
29
|
-
return 0
|
30
|
-
|
31
|
-
return qm_count * 0.18 if qm_count <= 3
|
36
|
+
return 0 unless qm_count > 1
|
37
|
+
return 0.96 if qm_count > 3
|
32
38
|
|
33
|
-
|
39
|
+
qm_count * QUESTION_MARK_RATING_INCREASE
|
34
40
|
end
|
35
41
|
end
|
36
42
|
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module VaderSentimentRuby
|
4
4
|
# Identify sentiment-relevant string-level properties of input text.
|
5
5
|
class SentimentPropertiesIdentifier
|
6
|
-
attr_reader :
|
6
|
+
attr_reader :words_and_emoticons, :is_cap_diff
|
7
7
|
|
8
8
|
# @param [String] text
|
9
9
|
def initialize(text)
|
@@ -11,14 +11,13 @@ module VaderSentimentRuby
|
|
11
11
|
@text = text
|
12
12
|
@words_and_emoticons = prepare_words_and_emoticons
|
13
13
|
# Doesn't separate words from adjacent punctuation (keeps emoticons & contractions)
|
14
|
-
@is_cap_diff =
|
14
|
+
@is_cap_diff = text_contains_mixed_cases?
|
15
15
|
end
|
16
16
|
|
17
17
|
private
|
18
18
|
|
19
19
|
# Removes leading and trailing punctuation
|
20
20
|
# Leaves contractions and most emoticons
|
21
|
-
# Does not preserve punc-plus-letter emoticons (e.g. :D)
|
22
21
|
# @return [Array]
|
23
22
|
def prepare_words_and_emoticons
|
24
23
|
@text
|
@@ -28,21 +27,11 @@ module VaderSentimentRuby
|
|
28
27
|
|
29
28
|
# Check whether just some words in the input are ALL CAPS.
|
30
29
|
# Returns `True` if some but not all items in `words` are ALL CAPS
|
31
|
-
# @param [Array] words
|
32
30
|
# @return [Boolean]
|
33
|
-
def
|
34
|
-
|
31
|
+
def text_contains_mixed_cases?
|
32
|
+
uppercase_words = @words_and_emoticons.count { |word| WordHelper.word_upcase?(word) }
|
35
33
|
|
36
|
-
|
37
|
-
all_cap_words += 1 if WordHelper.word_upcase?(word)
|
38
|
-
end
|
39
|
-
|
40
|
-
words_size = words.size
|
41
|
-
cap_differential = words_size - all_cap_words
|
42
|
-
|
43
|
-
return true if cap_differential.positive? && cap_differential < words_size
|
44
|
-
|
45
|
-
false
|
34
|
+
uppercase_words.positive? && uppercase_words < @words_and_emoticons.size
|
46
35
|
end
|
47
36
|
end
|
48
37
|
end
|
@@ -3,6 +3,7 @@
|
|
3
3
|
module VaderSentimentRuby
|
4
4
|
# Separates positive versus negative sentiment scores
|
5
5
|
class SentimentScoresSifter
|
6
|
+
# @param [Array<Float>] sentiments Array of sentiments generated from words
|
6
7
|
def initialize(sentiments)
|
7
8
|
@sentiments = sentiments
|
8
9
|
@pos_sum = 0.0
|
@@ -10,6 +11,9 @@ module VaderSentimentRuby
|
|
10
11
|
@neu_count = 0
|
11
12
|
end
|
12
13
|
|
14
|
+
# @return [Array<Float, Float, Integer>]
|
15
|
+
# @example
|
16
|
+
# [2.3, -3.2, 3]
|
13
17
|
def call
|
14
18
|
@sentiments.each do |sentiment_score|
|
15
19
|
# compensates for neutral words that are counted as 1
|
@@ -10,16 +10,19 @@ module VaderSentimentRuby
|
|
10
10
|
compound: 0.0
|
11
11
|
}.freeze
|
12
12
|
|
13
|
+
# @param [Array<Float, Integer>] sentiments Array of sentiments for text
|
14
|
+
# @param [String] text
|
13
15
|
def initialize(sentiments, text)
|
14
16
|
@sentiments = sentiments
|
15
17
|
@text = text
|
16
18
|
end
|
17
19
|
|
20
|
+
# @return [Hash<Float, Float, Float, Float>] Semantic score response hash
|
18
21
|
def call
|
19
|
-
return DEFAULT_RESPONSE
|
22
|
+
return DEFAULT_RESPONSE if @sentiments.empty?
|
20
23
|
|
21
24
|
sum_s = @sentiments.map(&:to_f).sum
|
22
|
-
#
|
25
|
+
# Compute and add emphasis from punctuation in text
|
23
26
|
punct_emph_amplifier = PunctuationEmphasisAmplifier.new(@text).call
|
24
27
|
compound = normalize(sum_s, punct_emph_amplifier)
|
25
28
|
|
@@ -28,9 +31,7 @@ module VaderSentimentRuby
|
|
28
31
|
|
29
32
|
private
|
30
33
|
|
31
|
-
#
|
32
|
-
# approximates the max expected value
|
33
|
-
# Move to Sentiment analyzer
|
34
|
+
# Normalizes the score to be between -1 and 1 using an alpha that approximates the max expected value
|
34
35
|
def normalize(score, punct_emph_amplifier, alpha = 15)
|
35
36
|
score = add_punctuation_emphasis(score, punct_emph_amplifier)
|
36
37
|
norm_score = score / Math.sqrt((score * score) + alpha).to_f
|
@@ -42,11 +43,8 @@ module VaderSentimentRuby
|
|
42
43
|
end
|
43
44
|
|
44
45
|
def add_punctuation_emphasis(sum_s, punct_emph_amplifier)
|
45
|
-
if sum_s.positive?
|
46
|
-
|
47
|
-
elsif sum_s.negative?
|
48
|
-
sum_s -= punct_emph_amplifier
|
49
|
-
end
|
46
|
+
return sum_s + punct_emph_amplifier if sum_s.positive?
|
47
|
+
return sum_s - punct_emph_amplifier if sum_s.negative?
|
50
48
|
|
51
49
|
sum_s
|
52
50
|
end
|
@@ -65,9 +63,9 @@ module VaderSentimentRuby
|
|
65
63
|
end
|
66
64
|
# rubocop:enable Metrics/AbcSize
|
67
65
|
|
68
|
-
#
|
66
|
+
# Prepares score sums for result calculation
|
69
67
|
def scores(punct_emph_amplifier)
|
70
|
-
#
|
68
|
+
# Discriminate between positive, negative and neutral sentiment scores
|
71
69
|
pos_sum, neg_sum, neu_count = SentimentScoresSifter.new(@sentiments).call
|
72
70
|
|
73
71
|
if pos_sum > neg_sum.to_f.abs
|
@@ -5,6 +5,7 @@ module VaderSentimentRuby
|
|
5
5
|
# word_upcase?(word) is similar to Python's word.isupper()
|
6
6
|
# strip_punctuation(word) is similar to Python's word.strip(string.punctuation)
|
7
7
|
module WordHelper
|
8
|
+
LETTERS_RANGE = 'A-Za-z'
|
8
9
|
PUNCTUATIONS = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
|
9
10
|
|
10
11
|
class << self
|
@@ -16,7 +17,7 @@ module VaderSentimentRuby
|
|
16
17
|
# word_upcase?(':D') # => true
|
17
18
|
# word_upcase?(':)') # => false
|
18
19
|
def word_upcase?(word)
|
19
|
-
word == word.upcase && word.count(
|
20
|
+
word == word.upcase && word.count(LETTERS_RANGE).positive?
|
20
21
|
end
|
21
22
|
|
22
23
|
# Removes all trailing and leading punctuation
|
@@ -31,62 +32,28 @@ module VaderSentimentRuby
|
|
31
32
|
# strip_punctuation("'don't'") # => "don't"
|
32
33
|
# strip_punctuation(":)") # => ":)"
|
33
34
|
def strip_punctuation(token)
|
34
|
-
token_without_punctuation = replace_punctuations(token)
|
35
|
-
|
36
35
|
original_set = token.split('')
|
37
|
-
updated_set = token_without_punctuation.split('')
|
38
|
-
|
39
|
-
pair_array = prepare_match_array(original_set, updated_set)
|
40
|
-
pair_array = clean_leading_punctuations(pair_array)
|
41
|
-
pair_array = clean_trailing_punctuations(pair_array)
|
42
36
|
|
43
|
-
|
37
|
+
array = clean_leading_punctuations(original_set)
|
38
|
+
array = clean_trailing_punctuations(array)
|
39
|
+
stripped_token = array.join
|
44
40
|
|
45
|
-
return token if
|
41
|
+
return token if stripped_token.size <= 2
|
46
42
|
|
47
|
-
|
43
|
+
stripped_token
|
48
44
|
end
|
49
45
|
|
50
46
|
private
|
51
47
|
|
52
|
-
def
|
53
|
-
|
54
|
-
|
55
|
-
punctuation_array.each do |punctuation|
|
56
|
-
token = token.gsub(punctuation, ' ')
|
57
|
-
end
|
58
|
-
|
59
|
-
token
|
60
|
-
end
|
61
|
-
|
62
|
-
def prepare_match_array(original_set, updated_set)
|
63
|
-
pair_array = []
|
64
|
-
original_set.each_with_index do |item, index|
|
65
|
-
pair_array << { index: index, old_ch: item, new_ch: updated_set[index] }
|
66
|
-
end
|
67
|
-
|
68
|
-
pair_array
|
48
|
+
def clean_leading_punctuations(token_array)
|
49
|
+
token_array.drop_while { |letter| PUNCTUATIONS.include? letter }
|
69
50
|
end
|
70
51
|
|
71
|
-
def
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
pair_array
|
79
|
-
end
|
80
|
-
|
81
|
-
def clean_trailing_punctuations(pair_array)
|
82
|
-
reversed_array = pair_array.reverse
|
83
|
-
reversed_array.map do |pair|
|
84
|
-
break if pair[:new_ch] != ' '
|
85
|
-
|
86
|
-
pair_array.delete_at(pair[:index])
|
87
|
-
end
|
88
|
-
|
89
|
-
pair_array
|
52
|
+
def clean_trailing_punctuations(token_array)
|
53
|
+
token_array
|
54
|
+
.reverse
|
55
|
+
.drop_while { |letter| PUNCTUATIONS.include? letter }
|
56
|
+
.reverse
|
90
57
|
end
|
91
58
|
end
|
92
59
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vader_sentiment_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nickolay Bulavin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-02-
|
11
|
+
date: 2021-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|