vader_sentiment_ruby 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 57fb7693c238e33224556fb6a2d7d8c479ed88d76cd6576edef5c8befe4ad144
4
- data.tar.gz: 8fc4484045923da3ec6986b8dbbf39f41437c635a25dbfdd14c0973baebe271b
3
+ metadata.gz: 6b529805b4d4db7386700ff49cd122451590c19100e1c368925b3fa221a81c1a
4
+ data.tar.gz: 16dbb6928fa25b59b86acfc8a6272114bc751fff8ae2b02545d4af7539b6d0a6
5
5
  SHA512:
6
- metadata.gz: 127836bbe570da1cd60082da181d22e4a3619d32300d8e16dfc9aa9a3fcbbc0b3d51ad0e1b4c8e1143879f4b51f6e70a047590468af7c7de260468ddb7987ab9
7
- data.tar.gz: 94d770a25884484554c2dce05607c68bda7742482d883d18c2cf693c979a31085189fec042f78b7303e6b13e0b75f13c0c09ee7e39f5d756787cde27db0a6102
6
+ metadata.gz: 0a6e0dd37f24f9c78ca95756c3a304b534e0c9a816c3a5b14b1f40142e59c868bc7965488904f8a041bfebbfab3f8451d32c59214c51abf3927e5df4676ddae9
7
+ data.tar.gz: 382dfe51190b6efbaa7b517ea454db85d41c0cbac784a887ce325815c8984d11604b13bb93e0b856ef41cbed865442257f4c85ee59ee2422e596c8167917e3ea
@@ -1,21 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module VaderSentimentRuby
4
- # Replaces emoji char with its description
4
+ # Replaces emoji characters with their descriptions
5
5
  class EmojiDescriber
6
- def initialize(text, emojis)
7
- @text = text
8
- @emojis = emojis
6
+ # @param [String] text Original text
7
+ # @param [Hash] emoji_dictionary Emoji dictionary with emojis as keys
8
+ def initialize(text, emoji_dictionary)
9
+ @text_array = text.split('')
10
+ @emoji_dictionary = emoji_dictionary
9
11
  @text_no_emoji = ''
10
12
  @prev_space = true
11
13
  end
12
14
 
15
+ # @return [String] Text with emojis replaced with descriptions
13
16
  def call
14
- @text.split('').each do |chr|
15
- if @emojis.keys.include?(chr)
16
- handle_emoji_presence(chr)
17
+ @text_array.each do |character|
18
+ if @emoji_dictionary.keys.include?(character)
19
+ replace_emoji_with_description(character)
17
20
  else
18
- handle_emoji_absence(chr)
21
+ handle_simple_character(character)
19
22
  end
20
23
  end
21
24
 
@@ -24,14 +27,13 @@ module VaderSentimentRuby
24
27
 
25
28
  private
26
29
 
27
- def handle_emoji_presence(emoji)
28
- description = @emojis[emoji]
30
+ def replace_emoji_with_description(emoji)
29
31
  @text_no_emoji += ' ' unless @prev_space
30
- @text_no_emoji += description
32
+ @text_no_emoji += @emoji_dictionary[emoji]
31
33
  @prev_space = false
32
34
  end
33
35
 
34
- def handle_emoji_absence(character)
36
+ def handle_simple_character(character)
35
37
  @text_no_emoji += character
36
38
  @prev_space = character == ' '
37
39
  end
@@ -1,36 +1,42 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module VaderSentimentRuby
4
- # Adds emphasis from exclamation points and question marks
4
+ # Adds emphasis factor from exclamation points and question marks
5
5
  class PunctuationEmphasisAmplifier
6
+ EXCLAMATION_MARK = '!'
7
+ QUESTION_MARK = '?'
8
+ # Empirically derived mean sentiment intensity rating increases for exclamation points and question marks
9
+ EXCLAMATION_MARK_RATING_INCREASE = 0.292
10
+ QUESTION_MARK_RATING_INCREASE = 0.18
11
+
6
12
  # @param [String] text
7
13
  def initialize(text)
8
- @text = text
14
+ @text_array = text.split('')
9
15
  end
10
16
 
11
- # @return [Float]
17
+ # @return [Float, Integer] Emphasis factor
12
18
  def call
13
- amplify_exclamation_points + amplify_question_marks
19
+ (amplify_exclamation_points + amplify_question_marks).round(3)
14
20
  end
15
21
 
22
+ private
23
+
16
24
  def amplify_exclamation_points
17
- # check for added emphasis resulting from exclamation points (up to 4 of them)
18
- ep_count = @text.split('').count('!')
19
- ep_count = 4.0 if ep_count > 4
25
+ # Check for added emphasis resulting from exclamation points (up to 4 of them)
26
+ ep_count = @text_array.count(EXCLAMATION_MARK)
27
+ ep_count = 4 if ep_count > 4
20
28
 
21
- # empirically derived mean sentiment intensity rating increase for exclamation points
22
- ep_count * 0.292
29
+ ep_count * EXCLAMATION_MARK_RATING_INCREASE
23
30
  end
24
31
 
25
32
  def amplify_question_marks
26
- # check for added emphasis resulting from question marks (2 or 3+)
27
- qm_count = @text.split('').count('?')
33
+ # Check for added emphasis resulting from question marks (2 or 3+)
34
+ qm_count = @text_array.count(QUESTION_MARK)
28
35
 
29
- return 0.0 unless qm_count > 1
30
- # empirically derived mean sentiment intensity rating increase for question marks
31
- return qm_count * 0.18 if qm_count <= 3
36
+ return 0 unless qm_count > 1
37
+ return 0.96 if qm_count > 3
32
38
 
33
- 0.96
39
+ qm_count * QUESTION_MARK_RATING_INCREASE
34
40
  end
35
41
  end
36
42
  end
@@ -3,7 +3,7 @@
3
3
  module VaderSentimentRuby
4
4
  # Identify sentiment-relevant string-level properties of input text.
5
5
  class SentimentPropertiesIdentifier
6
- attr_reader :is_cap_diff, :words_and_emoticons
6
+ attr_reader :words_and_emoticons, :is_cap_diff
7
7
 
8
8
  # @param [String] text
9
9
  def initialize(text)
@@ -11,14 +11,13 @@ module VaderSentimentRuby
11
11
  @text = text
12
12
  @words_and_emoticons = prepare_words_and_emoticons
13
13
  # Doesn't separate words from adjacent punctuation (keeps emoticons & contractions)
14
- @is_cap_diff = all_cap_differential?(@words_and_emoticons)
14
+ @is_cap_diff = text_contains_mixed_cases?
15
15
  end
16
16
 
17
17
  private
18
18
 
19
19
  # Removes leading and trailing punctuation
20
20
  # Leaves contractions and most emoticons
21
- # Does not preserve punc-plus-letter emoticons (e.g. :D)
22
21
  # @return [Array]
23
22
  def prepare_words_and_emoticons
24
23
  @text
@@ -28,21 +27,11 @@ module VaderSentimentRuby
28
27
 
29
28
  # Check whether just some words in the input are ALL CAPS.
30
29
  # Returns `True` if some but not all items in `words` are ALL CAPS
31
- # @param [Array] words
32
30
  # @return [Boolean]
33
- def all_cap_differential?(words)
34
- all_cap_words = 0
31
+ def text_contains_mixed_cases?
32
+ uppercase_words = @words_and_emoticons.count { |word| WordHelper.word_upcase?(word) }
35
33
 
36
- words.each do |word|
37
- all_cap_words += 1 if WordHelper.word_upcase?(word)
38
- end
39
-
40
- words_size = words.size
41
- cap_differential = words_size - all_cap_words
42
-
43
- return true if cap_differential.positive? && cap_differential < words_size
44
-
45
- false
34
+ uppercase_words.positive? && uppercase_words < @words_and_emoticons.size
46
35
  end
47
36
  end
48
37
  end
@@ -3,6 +3,7 @@
3
3
  module VaderSentimentRuby
4
4
  # Separates positive versus negative sentiment scores
5
5
  class SentimentScoresSifter
6
+ # @param [Array<Float>] sentiments Array of sentiments generated from words
6
7
  def initialize(sentiments)
7
8
  @sentiments = sentiments
8
9
  @pos_sum = 0.0
@@ -10,6 +11,9 @@ module VaderSentimentRuby
10
11
  @neu_count = 0
11
12
  end
12
13
 
14
+ # @return [Array<Float, Float, Integer>]
15
+ # @example
16
+ # [2.3, -3.2, 3]
13
17
  def call
14
18
  @sentiments.each do |sentiment_score|
15
19
  # compensates for neutral words that are counted as 1
@@ -10,16 +10,19 @@ module VaderSentimentRuby
10
10
  compound: 0.0
11
11
  }.freeze
12
12
 
13
+ # @param [Array<Float, Integer>] sentiments Array of sentiments for text
14
+ # @param [String] text
13
15
  def initialize(sentiments, text)
14
16
  @sentiments = sentiments
15
17
  @text = text
16
18
  end
17
19
 
20
+ # @return [Hash<Float, Float, Float, Float>] Semantic score response hash
18
21
  def call
19
- return DEFAULT_RESPONSE unless @sentiments
22
+ return DEFAULT_RESPONSE if @sentiments.empty?
20
23
 
21
24
  sum_s = @sentiments.map(&:to_f).sum
22
- # compute and add emphasis from punctuation in text
25
+ # Compute and add emphasis from punctuation in text
23
26
  punct_emph_amplifier = PunctuationEmphasisAmplifier.new(@text).call
24
27
  compound = normalize(sum_s, punct_emph_amplifier)
25
28
 
@@ -28,9 +31,7 @@ module VaderSentimentRuby
28
31
 
29
32
  private
30
33
 
31
- # Normalize the score to be between -1 and 1 using an alpha that
32
- # approximates the max expected value
33
- # Move to Sentiment analyzer
34
+ # Normalizes the score to be between -1 and 1 using an alpha that approximates the max expected value
34
35
  def normalize(score, punct_emph_amplifier, alpha = 15)
35
36
  score = add_punctuation_emphasis(score, punct_emph_amplifier)
36
37
  norm_score = score / Math.sqrt((score * score) + alpha).to_f
@@ -42,11 +43,8 @@ module VaderSentimentRuby
42
43
  end
43
44
 
44
45
  def add_punctuation_emphasis(sum_s, punct_emph_amplifier)
45
- if sum_s.positive?
46
- sum_s += punct_emph_amplifier
47
- elsif sum_s.negative?
48
- sum_s -= punct_emph_amplifier
49
- end
46
+ return sum_s + punct_emph_amplifier if sum_s.positive?
47
+ return sum_s - punct_emph_amplifier if sum_s.negative?
50
48
 
51
49
  sum_s
52
50
  end
@@ -65,9 +63,9 @@ module VaderSentimentRuby
65
63
  end
66
64
  # rubocop:enable Metrics/AbcSize
67
65
 
68
- # Prepare scores sum for result calculation
66
+ # Prepares score sums for result calculation
69
67
  def scores(punct_emph_amplifier)
70
- # discriminate between positive, negative and neutral sentiment scores
68
+ # Discriminate between positive, negative and neutral sentiment scores
71
69
  pos_sum, neg_sum, neu_count = SentimentScoresSifter.new(@sentiments).call
72
70
 
73
71
  if pos_sum > neg_sum.to_f.abs
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module VaderSentimentRuby
4
- VERSION = '0.1.0'
4
+ VERSION = '0.1.1'
5
5
  end
@@ -5,6 +5,7 @@ module VaderSentimentRuby
5
5
  # word_upcase?(word) is similar to Python's word.isupper()
6
6
  # strip_punctuation(word) is similar to Python's word.strip(string.punctuation)
7
7
  module WordHelper
8
+ LETTERS_RANGE = 'A-Za-z'
8
9
  PUNCTUATIONS = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
9
10
 
10
11
  class << self
@@ -16,7 +17,7 @@ module VaderSentimentRuby
16
17
  # word_upcase?(':D') # => true
17
18
  # word_upcase?(':)') # => false
18
19
  def word_upcase?(word)
19
- word == word.upcase && word.count('A-Za-z').positive?
20
+ word == word.upcase && word.count(LETTERS_RANGE).positive?
20
21
  end
21
22
 
22
23
  # Removes all trailing and leading punctuation
@@ -31,62 +32,28 @@ module VaderSentimentRuby
31
32
  # strip_punctuation("'don't'") # => "don't"
32
33
  # strip_punctuation(":)") # => ":)"
33
34
  def strip_punctuation(token)
34
- token_without_punctuation = replace_punctuations(token)
35
-
36
35
  original_set = token.split('')
37
- updated_set = token_without_punctuation.split('')
38
-
39
- pair_array = prepare_match_array(original_set, updated_set)
40
- pair_array = clean_leading_punctuations(pair_array)
41
- pair_array = clean_trailing_punctuations(pair_array)
42
36
 
43
- stripped = pair_array.map { |item| item[:old_ch] }.join
37
+ array = clean_leading_punctuations(original_set)
38
+ array = clean_trailing_punctuations(array)
39
+ stripped_token = array.join
44
40
 
45
- return token if stripped.size <= 2
41
+ return token if stripped_token.size <= 2
46
42
 
47
- stripped
43
+ stripped_token
48
44
  end
49
45
 
50
46
  private
51
47
 
52
- def replace_punctuations(token)
53
- punctuation_array = PUNCTUATIONS.split('')
54
-
55
- punctuation_array.each do |punctuation|
56
- token = token.gsub(punctuation, ' ')
57
- end
58
-
59
- token
60
- end
61
-
62
- def prepare_match_array(original_set, updated_set)
63
- pair_array = []
64
- original_set.each_with_index do |item, index|
65
- pair_array << { index: index, old_ch: item, new_ch: updated_set[index] }
66
- end
67
-
68
- pair_array
48
+ def clean_leading_punctuations(token_array)
49
+ token_array.drop_while { |letter| PUNCTUATIONS.include? letter }
69
50
  end
70
51
 
71
- def clean_leading_punctuations(pair_array)
72
- pair_array.map do |pair|
73
- break if pair[:new_ch] != ' '
74
-
75
- pair_array.delete_at(pair[:index])
76
- end
77
-
78
- pair_array
79
- end
80
-
81
- def clean_trailing_punctuations(pair_array)
82
- reversed_array = pair_array.reverse
83
- reversed_array.map do |pair|
84
- break if pair[:new_ch] != ' '
85
-
86
- pair_array.delete_at(pair[:index])
87
- end
88
-
89
- pair_array
52
+ def clean_trailing_punctuations(token_array)
53
+ token_array
54
+ .reverse
55
+ .drop_while { |letter| PUNCTUATIONS.include? letter }
56
+ .reverse
90
57
  end
91
58
  end
92
59
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vader_sentiment_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nickolay Bulavin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-18 00:00:00.000000000 Z
11
+ date: 2021-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug