vader_sentiment_ruby 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/vader_sentiment_ruby/emojis_describer.rb +14 -12
- data/lib/vader_sentiment_ruby/punctuation_emphasis_amplifier.rb +21 -15
- data/lib/vader_sentiment_ruby/sentiment_properties_identifier.rb +5 -16
- data/lib/vader_sentiment_ruby/sentiment_scores_sifter.rb +4 -0
- data/lib/vader_sentiment_ruby/valence_score_calculator.rb +10 -12
- data/lib/vader_sentiment_ruby/version.rb +1 -1
- data/lib/vader_sentiment_ruby/word_helper.rb +14 -47
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6b529805b4d4db7386700ff49cd122451590c19100e1c368925b3fa221a81c1a
|
4
|
+
data.tar.gz: 16dbb6928fa25b59b86acfc8a6272114bc751fff8ae2b02545d4af7539b6d0a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a6e0dd37f24f9c78ca95756c3a304b534e0c9a816c3a5b14b1f40142e59c868bc7965488904f8a041bfebbfab3f8451d32c59214c51abf3927e5df4676ddae9
|
7
|
+
data.tar.gz: 382dfe51190b6efbaa7b517ea454db85d41c0cbac784a887ce325815c8984d11604b13bb93e0b856ef41cbed865442257f4c85ee59ee2422e596c8167917e3ea
|
@@ -1,21 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module VaderSentimentRuby
|
4
|
-
# Replaces emoji
|
4
|
+
# Replaces emoji characters with their descriptions
|
5
5
|
class EmojiDescriber
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
# @param [String] text Original text
|
7
|
+
# @param [Hash] emoji_dictionary Emoji dictionary with emojis as keys
|
8
|
+
def initialize(text, emoji_dictionary)
|
9
|
+
@text_array = text.split('')
|
10
|
+
@emoji_dictionary = emoji_dictionary
|
9
11
|
@text_no_emoji = ''
|
10
12
|
@prev_space = true
|
11
13
|
end
|
12
14
|
|
15
|
+
# @return [String] Text with emojis replaced with descriptions
|
13
16
|
def call
|
14
|
-
@
|
15
|
-
if @
|
16
|
-
|
17
|
+
@text_array.each do |character|
|
18
|
+
if @emoji_dictionary.keys.include?(character)
|
19
|
+
replace_emoji_with_description(character)
|
17
20
|
else
|
18
|
-
|
21
|
+
handle_simple_character(character)
|
19
22
|
end
|
20
23
|
end
|
21
24
|
|
@@ -24,14 +27,13 @@ module VaderSentimentRuby
|
|
24
27
|
|
25
28
|
private
|
26
29
|
|
27
|
-
def
|
28
|
-
description = @emojis[emoji]
|
30
|
+
def replace_emoji_with_description(emoji)
|
29
31
|
@text_no_emoji += ' ' unless @prev_space
|
30
|
-
@text_no_emoji +=
|
32
|
+
@text_no_emoji += @emoji_dictionary[emoji]
|
31
33
|
@prev_space = false
|
32
34
|
end
|
33
35
|
|
34
|
-
def
|
36
|
+
def handle_simple_character(character)
|
35
37
|
@text_no_emoji += character
|
36
38
|
@prev_space = character == ' '
|
37
39
|
end
|
@@ -1,36 +1,42 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module VaderSentimentRuby
|
4
|
-
# Adds emphasis from exclamation points and question marks
|
4
|
+
# Adds emphasis factor from exclamation points and question marks
|
5
5
|
class PunctuationEmphasisAmplifier
|
6
|
+
EXCLAMATION_MARK = '!'
|
7
|
+
QUESTION_MARK = '?'
|
8
|
+
# Empirically derived mean sentiment intensity rating increases for exclamation points and question marks
|
9
|
+
EXCLAMATION_MARK_RATING_INCREASE = 0.292
|
10
|
+
QUESTION_MARK_RATING_INCREASE = 0.18
|
11
|
+
|
6
12
|
# @param [String] text
|
7
13
|
def initialize(text)
|
8
|
-
@
|
14
|
+
@text_array = text.split('')
|
9
15
|
end
|
10
16
|
|
11
|
-
# @return [Float]
|
17
|
+
# @return [Float, Integer] Emphasis factor
|
12
18
|
def call
|
13
|
-
amplify_exclamation_points + amplify_question_marks
|
19
|
+
(amplify_exclamation_points + amplify_question_marks).round(3)
|
14
20
|
end
|
15
21
|
|
22
|
+
private
|
23
|
+
|
16
24
|
def amplify_exclamation_points
|
17
|
-
#
|
18
|
-
ep_count = @
|
19
|
-
ep_count = 4
|
25
|
+
# Check for added emphasis resulting from exclamation points (up to 4 of them)
|
26
|
+
ep_count = @text_array.count(EXCLAMATION_MARK)
|
27
|
+
ep_count = 4 if ep_count > 4
|
20
28
|
|
21
|
-
|
22
|
-
ep_count * 0.292
|
29
|
+
ep_count * EXCLAMATION_MARK_RATING_INCREASE
|
23
30
|
end
|
24
31
|
|
25
32
|
def amplify_question_marks
|
26
|
-
#
|
27
|
-
qm_count = @
|
33
|
+
# Check for added emphasis resulting from question marks (2 or 3+)
|
34
|
+
qm_count = @text_array.count(QUESTION_MARK)
|
28
35
|
|
29
|
-
return 0
|
30
|
-
|
31
|
-
return qm_count * 0.18 if qm_count <= 3
|
36
|
+
return 0 unless qm_count > 1
|
37
|
+
return 0.96 if qm_count > 3
|
32
38
|
|
33
|
-
|
39
|
+
qm_count * QUESTION_MARK_RATING_INCREASE
|
34
40
|
end
|
35
41
|
end
|
36
42
|
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module VaderSentimentRuby
|
4
4
|
# Identify sentiment-relevant string-level properties of input text.
|
5
5
|
class SentimentPropertiesIdentifier
|
6
|
-
attr_reader :
|
6
|
+
attr_reader :words_and_emoticons, :is_cap_diff
|
7
7
|
|
8
8
|
# @param [String] text
|
9
9
|
def initialize(text)
|
@@ -11,14 +11,13 @@ module VaderSentimentRuby
|
|
11
11
|
@text = text
|
12
12
|
@words_and_emoticons = prepare_words_and_emoticons
|
13
13
|
# Doesn't separate words from adjacent punctuation (keeps emoticons & contractions)
|
14
|
-
@is_cap_diff =
|
14
|
+
@is_cap_diff = text_contains_mixed_cases?
|
15
15
|
end
|
16
16
|
|
17
17
|
private
|
18
18
|
|
19
19
|
# Removes leading and trailing punctuation
|
20
20
|
# Leaves contractions and most emoticons
|
21
|
-
# Does not preserve punc-plus-letter emoticons (e.g. :D)
|
22
21
|
# @return [Array]
|
23
22
|
def prepare_words_and_emoticons
|
24
23
|
@text
|
@@ -28,21 +27,11 @@ module VaderSentimentRuby
|
|
28
27
|
|
29
28
|
# Check whether just some words in the input are ALL CAPS.
|
30
29
|
# Returns `True` if some but not all items in `words` are ALL CAPS
|
31
|
-
# @param [Array] words
|
32
30
|
# @return [Boolean]
|
33
|
-
def
|
34
|
-
|
31
|
+
def text_contains_mixed_cases?
|
32
|
+
uppercase_words = @words_and_emoticons.count { |word| WordHelper.word_upcase?(word) }
|
35
33
|
|
36
|
-
|
37
|
-
all_cap_words += 1 if WordHelper.word_upcase?(word)
|
38
|
-
end
|
39
|
-
|
40
|
-
words_size = words.size
|
41
|
-
cap_differential = words_size - all_cap_words
|
42
|
-
|
43
|
-
return true if cap_differential.positive? && cap_differential < words_size
|
44
|
-
|
45
|
-
false
|
34
|
+
uppercase_words.positive? && uppercase_words < @words_and_emoticons.size
|
46
35
|
end
|
47
36
|
end
|
48
37
|
end
|
@@ -3,6 +3,7 @@
|
|
3
3
|
module VaderSentimentRuby
|
4
4
|
# Separates positive versus negative sentiment scores
|
5
5
|
class SentimentScoresSifter
|
6
|
+
# @param [Array<Float>] sentiments Array of sentiments generated from words
|
6
7
|
def initialize(sentiments)
|
7
8
|
@sentiments = sentiments
|
8
9
|
@pos_sum = 0.0
|
@@ -10,6 +11,9 @@ module VaderSentimentRuby
|
|
10
11
|
@neu_count = 0
|
11
12
|
end
|
12
13
|
|
14
|
+
# @return [Array<Float, Float, Integer>]
|
15
|
+
# @example
|
16
|
+
# [2.3, -3.2, 3]
|
13
17
|
def call
|
14
18
|
@sentiments.each do |sentiment_score|
|
15
19
|
# compensates for neutral words that are counted as 1
|
@@ -10,16 +10,19 @@ module VaderSentimentRuby
|
|
10
10
|
compound: 0.0
|
11
11
|
}.freeze
|
12
12
|
|
13
|
+
# @param [Array<Float, Integer>] sentiments Array of sentiments for text
|
14
|
+
# @param [String] text
|
13
15
|
def initialize(sentiments, text)
|
14
16
|
@sentiments = sentiments
|
15
17
|
@text = text
|
16
18
|
end
|
17
19
|
|
20
|
+
# @return [Hash<Float, Float, Float, Float>] Semantic score response hash
|
18
21
|
def call
|
19
|
-
return DEFAULT_RESPONSE
|
22
|
+
return DEFAULT_RESPONSE if @sentiments.empty?
|
20
23
|
|
21
24
|
sum_s = @sentiments.map(&:to_f).sum
|
22
|
-
#
|
25
|
+
# Compute and add emphasis from punctuation in text
|
23
26
|
punct_emph_amplifier = PunctuationEmphasisAmplifier.new(@text).call
|
24
27
|
compound = normalize(sum_s, punct_emph_amplifier)
|
25
28
|
|
@@ -28,9 +31,7 @@ module VaderSentimentRuby
|
|
28
31
|
|
29
32
|
private
|
30
33
|
|
31
|
-
#
|
32
|
-
# approximates the max expected value
|
33
|
-
# Move to Sentiment analyzer
|
34
|
+
# Normalizes the score to be between -1 and 1 using an alpha that approximates the max expected value
|
34
35
|
def normalize(score, punct_emph_amplifier, alpha = 15)
|
35
36
|
score = add_punctuation_emphasis(score, punct_emph_amplifier)
|
36
37
|
norm_score = score / Math.sqrt((score * score) + alpha).to_f
|
@@ -42,11 +43,8 @@ module VaderSentimentRuby
|
|
42
43
|
end
|
43
44
|
|
44
45
|
def add_punctuation_emphasis(sum_s, punct_emph_amplifier)
|
45
|
-
if sum_s.positive?
|
46
|
-
|
47
|
-
elsif sum_s.negative?
|
48
|
-
sum_s -= punct_emph_amplifier
|
49
|
-
end
|
46
|
+
return sum_s + punct_emph_amplifier if sum_s.positive?
|
47
|
+
return sum_s - punct_emph_amplifier if sum_s.negative?
|
50
48
|
|
51
49
|
sum_s
|
52
50
|
end
|
@@ -65,9 +63,9 @@ module VaderSentimentRuby
|
|
65
63
|
end
|
66
64
|
# rubocop:enable Metrics/AbcSize
|
67
65
|
|
68
|
-
#
|
66
|
+
# Prepares score sums for result calculation
|
69
67
|
def scores(punct_emph_amplifier)
|
70
|
-
#
|
68
|
+
# Discriminate between positive, negative and neutral sentiment scores
|
71
69
|
pos_sum, neg_sum, neu_count = SentimentScoresSifter.new(@sentiments).call
|
72
70
|
|
73
71
|
if pos_sum > neg_sum.to_f.abs
|
@@ -5,6 +5,7 @@ module VaderSentimentRuby
|
|
5
5
|
# word_upcase?(word) is similar to Python's word.isupper()
|
6
6
|
# strip_punctuation(word) is similar to Python's word.strip(string.punctuation)
|
7
7
|
module WordHelper
|
8
|
+
LETTERS_RANGE = 'A-Za-z'
|
8
9
|
PUNCTUATIONS = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
|
9
10
|
|
10
11
|
class << self
|
@@ -16,7 +17,7 @@ module VaderSentimentRuby
|
|
16
17
|
# word_upcase?(':D') # => true
|
17
18
|
# word_upcase?(':)') # => false
|
18
19
|
def word_upcase?(word)
|
19
|
-
word == word.upcase && word.count(
|
20
|
+
word == word.upcase && word.count(LETTERS_RANGE).positive?
|
20
21
|
end
|
21
22
|
|
22
23
|
# Removes all trailing and leading punctuation
|
@@ -31,62 +32,28 @@ module VaderSentimentRuby
|
|
31
32
|
# strip_punctuation("'don't'") # => "don't"
|
32
33
|
# strip_punctuation(":)") # => ":)"
|
33
34
|
def strip_punctuation(token)
|
34
|
-
token_without_punctuation = replace_punctuations(token)
|
35
|
-
|
36
35
|
original_set = token.split('')
|
37
|
-
updated_set = token_without_punctuation.split('')
|
38
|
-
|
39
|
-
pair_array = prepare_match_array(original_set, updated_set)
|
40
|
-
pair_array = clean_leading_punctuations(pair_array)
|
41
|
-
pair_array = clean_trailing_punctuations(pair_array)
|
42
36
|
|
43
|
-
|
37
|
+
array = clean_leading_punctuations(original_set)
|
38
|
+
array = clean_trailing_punctuations(array)
|
39
|
+
stripped_token = array.join
|
44
40
|
|
45
|
-
return token if
|
41
|
+
return token if stripped_token.size <= 2
|
46
42
|
|
47
|
-
|
43
|
+
stripped_token
|
48
44
|
end
|
49
45
|
|
50
46
|
private
|
51
47
|
|
52
|
-
def
|
53
|
-
|
54
|
-
|
55
|
-
punctuation_array.each do |punctuation|
|
56
|
-
token = token.gsub(punctuation, ' ')
|
57
|
-
end
|
58
|
-
|
59
|
-
token
|
60
|
-
end
|
61
|
-
|
62
|
-
def prepare_match_array(original_set, updated_set)
|
63
|
-
pair_array = []
|
64
|
-
original_set.each_with_index do |item, index|
|
65
|
-
pair_array << { index: index, old_ch: item, new_ch: updated_set[index] }
|
66
|
-
end
|
67
|
-
|
68
|
-
pair_array
|
48
|
+
def clean_leading_punctuations(token_array)
|
49
|
+
token_array.drop_while { |letter| PUNCTUATIONS.include? letter }
|
69
50
|
end
|
70
51
|
|
71
|
-
def
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
pair_array
|
79
|
-
end
|
80
|
-
|
81
|
-
def clean_trailing_punctuations(pair_array)
|
82
|
-
reversed_array = pair_array.reverse
|
83
|
-
reversed_array.map do |pair|
|
84
|
-
break if pair[:new_ch] != ' '
|
85
|
-
|
86
|
-
pair_array.delete_at(pair[:index])
|
87
|
-
end
|
88
|
-
|
89
|
-
pair_array
|
52
|
+
def clean_trailing_punctuations(token_array)
|
53
|
+
token_array
|
54
|
+
.reverse
|
55
|
+
.drop_while { |letter| PUNCTUATIONS.include? letter }
|
56
|
+
.reverse
|
90
57
|
end
|
91
58
|
end
|
92
59
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vader_sentiment_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nickolay Bulavin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-02-
|
11
|
+
date: 2021-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|