vader_sentiment_ruby 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 57fb7693c238e33224556fb6a2d7d8c479ed88d76cd6576edef5c8befe4ad144
4
- data.tar.gz: 8fc4484045923da3ec6986b8dbbf39f41437c635a25dbfdd14c0973baebe271b
3
+ metadata.gz: 6b529805b4d4db7386700ff49cd122451590c19100e1c368925b3fa221a81c1a
4
+ data.tar.gz: 16dbb6928fa25b59b86acfc8a6272114bc751fff8ae2b02545d4af7539b6d0a6
5
5
  SHA512:
6
- metadata.gz: 127836bbe570da1cd60082da181d22e4a3619d32300d8e16dfc9aa9a3fcbbc0b3d51ad0e1b4c8e1143879f4b51f6e70a047590468af7c7de260468ddb7987ab9
7
- data.tar.gz: 94d770a25884484554c2dce05607c68bda7742482d883d18c2cf693c979a31085189fec042f78b7303e6b13e0b75f13c0c09ee7e39f5d756787cde27db0a6102
6
+ metadata.gz: 0a6e0dd37f24f9c78ca95756c3a304b534e0c9a816c3a5b14b1f40142e59c868bc7965488904f8a041bfebbfab3f8451d32c59214c51abf3927e5df4676ddae9
7
+ data.tar.gz: 382dfe51190b6efbaa7b517ea454db85d41c0cbac784a887ce325815c8984d11604b13bb93e0b856ef41cbed865442257f4c85ee59ee2422e596c8167917e3ea
@@ -1,21 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module VaderSentimentRuby
4
- # Replaces emoji char with its description
4
+ # Replaces emoji characters with their descriptions
5
5
  class EmojiDescriber
6
- def initialize(text, emojis)
7
- @text = text
8
- @emojis = emojis
6
+ # @param [String] text Original text
7
+ # @param [Hash] emoji_dictionary Emoji dictionary with emojis as keys
8
+ def initialize(text, emoji_dictionary)
9
+ @text_array = text.split('')
10
+ @emoji_dictionary = emoji_dictionary
9
11
  @text_no_emoji = ''
10
12
  @prev_space = true
11
13
  end
12
14
 
15
+ # @return [String] Text with emojis replaced with descriptions
13
16
  def call
14
- @text.split('').each do |chr|
15
- if @emojis.keys.include?(chr)
16
- handle_emoji_presence(chr)
17
+ @text_array.each do |character|
18
+ if @emoji_dictionary.keys.include?(character)
19
+ replace_emoji_with_description(character)
17
20
  else
18
- handle_emoji_absence(chr)
21
+ handle_simple_character(character)
19
22
  end
20
23
  end
21
24
 
@@ -24,14 +27,13 @@ module VaderSentimentRuby
24
27
 
25
28
  private
26
29
 
27
- def handle_emoji_presence(emoji)
28
- description = @emojis[emoji]
30
+ def replace_emoji_with_description(emoji)
29
31
  @text_no_emoji += ' ' unless @prev_space
30
- @text_no_emoji += description
32
+ @text_no_emoji += @emoji_dictionary[emoji]
31
33
  @prev_space = false
32
34
  end
33
35
 
34
- def handle_emoji_absence(character)
36
+ def handle_simple_character(character)
35
37
  @text_no_emoji += character
36
38
  @prev_space = character == ' '
37
39
  end
@@ -1,36 +1,42 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module VaderSentimentRuby
4
- # Adds emphasis from exclamation points and question marks
4
+ # Adds emphasis factor from exclamation points and question marks
5
5
  class PunctuationEmphasisAmplifier
6
+ EXCLAMATION_MARK = '!'
7
+ QUESTION_MARK = '?'
8
+ # Empirically derived mean sentiment intensity rating increases for exclamation points and question marks
9
+ EXCLAMATION_MARK_RATING_INCREASE = 0.292
10
+ QUESTION_MARK_RATING_INCREASE = 0.18
11
+
6
12
  # @param [String] text
7
13
  def initialize(text)
8
- @text = text
14
+ @text_array = text.split('')
9
15
  end
10
16
 
11
- # @return [Float]
17
+ # @return [Float, Integer] Emphasis factor
12
18
  def call
13
- amplify_exclamation_points + amplify_question_marks
19
+ (amplify_exclamation_points + amplify_question_marks).round(3)
14
20
  end
15
21
 
22
+ private
23
+
16
24
  def amplify_exclamation_points
17
- # check for added emphasis resulting from exclamation points (up to 4 of them)
18
- ep_count = @text.split('').count('!')
19
- ep_count = 4.0 if ep_count > 4
25
+ # Check for added emphasis resulting from exclamation points (up to 4 of them)
26
+ ep_count = @text_array.count(EXCLAMATION_MARK)
27
+ ep_count = 4 if ep_count > 4
20
28
 
21
- # empirically derived mean sentiment intensity rating increase for exclamation points
22
- ep_count * 0.292
29
+ ep_count * EXCLAMATION_MARK_RATING_INCREASE
23
30
  end
24
31
 
25
32
  def amplify_question_marks
26
- # check for added emphasis resulting from question marks (2 or 3+)
27
- qm_count = @text.split('').count('?')
33
+ # Check for added emphasis resulting from question marks (2 or 3+)
34
+ qm_count = @text_array.count(QUESTION_MARK)
28
35
 
29
- return 0.0 unless qm_count > 1
30
- # empirically derived mean sentiment intensity rating increase for question marks
31
- return qm_count * 0.18 if qm_count <= 3
36
+ return 0 unless qm_count > 1
37
+ return 0.96 if qm_count > 3
32
38
 
33
- 0.96
39
+ qm_count * QUESTION_MARK_RATING_INCREASE
34
40
  end
35
41
  end
36
42
  end
@@ -3,7 +3,7 @@
3
3
  module VaderSentimentRuby
4
4
  # Identify sentiment-relevant string-level properties of input text.
5
5
  class SentimentPropertiesIdentifier
6
- attr_reader :is_cap_diff, :words_and_emoticons
6
+ attr_reader :words_and_emoticons, :is_cap_diff
7
7
 
8
8
  # @param [String] text
9
9
  def initialize(text)
@@ -11,14 +11,13 @@ module VaderSentimentRuby
11
11
  @text = text
12
12
  @words_and_emoticons = prepare_words_and_emoticons
13
13
  # Doesn't separate words from adjacent punctuation (keeps emoticons & contractions)
14
- @is_cap_diff = all_cap_differential?(@words_and_emoticons)
14
+ @is_cap_diff = text_contains_mixed_cases?
15
15
  end
16
16
 
17
17
  private
18
18
 
19
19
  # Removes leading and trailing punctuation
20
20
  # Leaves contractions and most emoticons
21
- # Does not preserve punc-plus-letter emoticons (e.g. :D)
22
21
  # @return [Array]
23
22
  def prepare_words_and_emoticons
24
23
  @text
@@ -28,21 +27,11 @@ module VaderSentimentRuby
28
27
 
29
28
  # Check whether just some words in the input are ALL CAPS.
30
29
  # Returns `True` if some but not all items in `words` are ALL CAPS
31
- # @param [Array] words
32
30
  # @return [Boolean]
33
- def all_cap_differential?(words)
34
- all_cap_words = 0
31
+ def text_contains_mixed_cases?
32
+ uppercase_words = @words_and_emoticons.count { |word| WordHelper.word_upcase?(word) }
35
33
 
36
- words.each do |word|
37
- all_cap_words += 1 if WordHelper.word_upcase?(word)
38
- end
39
-
40
- words_size = words.size
41
- cap_differential = words_size - all_cap_words
42
-
43
- return true if cap_differential.positive? && cap_differential < words_size
44
-
45
- false
34
+ uppercase_words.positive? && uppercase_words < @words_and_emoticons.size
46
35
  end
47
36
  end
48
37
  end
@@ -3,6 +3,7 @@
3
3
  module VaderSentimentRuby
4
4
  # Separates positive versus negative sentiment scores
5
5
  class SentimentScoresSifter
6
+ # @param [Array<Float>] sentiments Array of sentiments generated from words
6
7
  def initialize(sentiments)
7
8
  @sentiments = sentiments
8
9
  @pos_sum = 0.0
@@ -10,6 +11,9 @@ module VaderSentimentRuby
10
11
  @neu_count = 0
11
12
  end
12
13
 
14
+ # @return [Array<Float, Float, Integer>]
15
+ # @example
16
+ # [2.3, -3.2, 3]
13
17
  def call
14
18
  @sentiments.each do |sentiment_score|
15
19
  # compensates for neutral words that are counted as 1
@@ -10,16 +10,19 @@ module VaderSentimentRuby
10
10
  compound: 0.0
11
11
  }.freeze
12
12
 
13
+ # @param [Array<Float, Integer>] sentiments Array of sentiments for text
14
+ # @param [String] text
13
15
  def initialize(sentiments, text)
14
16
  @sentiments = sentiments
15
17
  @text = text
16
18
  end
17
19
 
20
+ # @return [Hash<Float, Float, Float, Float>] Semantic score response hash
18
21
  def call
19
- return DEFAULT_RESPONSE unless @sentiments
22
+ return DEFAULT_RESPONSE if @sentiments.empty?
20
23
 
21
24
  sum_s = @sentiments.map(&:to_f).sum
22
- # compute and add emphasis from punctuation in text
25
+ # Compute and add emphasis from punctuation in text
23
26
  punct_emph_amplifier = PunctuationEmphasisAmplifier.new(@text).call
24
27
  compound = normalize(sum_s, punct_emph_amplifier)
25
28
 
@@ -28,9 +31,7 @@ module VaderSentimentRuby
28
31
 
29
32
  private
30
33
 
31
- # Normalize the score to be between -1 and 1 using an alpha that
32
- # approximates the max expected value
33
- # Move to Sentiment analyzer
34
+ # Normalizes the score to be between -1 and 1 using an alpha that approximates the max expected value
34
35
  def normalize(score, punct_emph_amplifier, alpha = 15)
35
36
  score = add_punctuation_emphasis(score, punct_emph_amplifier)
36
37
  norm_score = score / Math.sqrt((score * score) + alpha).to_f
@@ -42,11 +43,8 @@ module VaderSentimentRuby
42
43
  end
43
44
 
44
45
  def add_punctuation_emphasis(sum_s, punct_emph_amplifier)
45
- if sum_s.positive?
46
- sum_s += punct_emph_amplifier
47
- elsif sum_s.negative?
48
- sum_s -= punct_emph_amplifier
49
- end
46
+ return sum_s + punct_emph_amplifier if sum_s.positive?
47
+ return sum_s - punct_emph_amplifier if sum_s.negative?
50
48
 
51
49
  sum_s
52
50
  end
@@ -65,9 +63,9 @@ module VaderSentimentRuby
65
63
  end
66
64
  # rubocop:enable Metrics/AbcSize
67
65
 
68
- # Prepare scores sum for result calculation
66
+ # Prepares score sums for result calculation
69
67
  def scores(punct_emph_amplifier)
70
- # discriminate between positive, negative and neutral sentiment scores
68
+ # Discriminate between positive, negative and neutral sentiment scores
71
69
  pos_sum, neg_sum, neu_count = SentimentScoresSifter.new(@sentiments).call
72
70
 
73
71
  if pos_sum > neg_sum.to_f.abs
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module VaderSentimentRuby
4
- VERSION = '0.1.0'
4
+ VERSION = '0.1.1'
5
5
  end
@@ -5,6 +5,7 @@ module VaderSentimentRuby
5
5
  # word_upcase?(word) is similar to Python's word.isupper()
6
6
  # strip_punctuation(word) is similar to Python's word.strip(string.punctuation)
7
7
  module WordHelper
8
+ LETTERS_RANGE = 'A-Za-z'
8
9
  PUNCTUATIONS = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
9
10
 
10
11
  class << self
@@ -16,7 +17,7 @@ module VaderSentimentRuby
16
17
  # word_upcase?(':D') # => true
17
18
  # word_upcase?(':)') # => false
18
19
  def word_upcase?(word)
19
- word == word.upcase && word.count('A-Za-z').positive?
20
+ word == word.upcase && word.count(LETTERS_RANGE).positive?
20
21
  end
21
22
 
22
23
  # Removes all trailing and leading punctuation
@@ -31,62 +32,28 @@ module VaderSentimentRuby
31
32
  # strip_punctuation("'don't'") # => "don't"
32
33
  # strip_punctuation(":)") # => ":)"
33
34
  def strip_punctuation(token)
34
- token_without_punctuation = replace_punctuations(token)
35
-
36
35
  original_set = token.split('')
37
- updated_set = token_without_punctuation.split('')
38
-
39
- pair_array = prepare_match_array(original_set, updated_set)
40
- pair_array = clean_leading_punctuations(pair_array)
41
- pair_array = clean_trailing_punctuations(pair_array)
42
36
 
43
- stripped = pair_array.map { |item| item[:old_ch] }.join
37
+ array = clean_leading_punctuations(original_set)
38
+ array = clean_trailing_punctuations(array)
39
+ stripped_token = array.join
44
40
 
45
- return token if stripped.size <= 2
41
+ return token if stripped_token.size <= 2
46
42
 
47
- stripped
43
+ stripped_token
48
44
  end
49
45
 
50
46
  private
51
47
 
52
- def replace_punctuations(token)
53
- punctuation_array = PUNCTUATIONS.split('')
54
-
55
- punctuation_array.each do |punctuation|
56
- token = token.gsub(punctuation, ' ')
57
- end
58
-
59
- token
60
- end
61
-
62
- def prepare_match_array(original_set, updated_set)
63
- pair_array = []
64
- original_set.each_with_index do |item, index|
65
- pair_array << { index: index, old_ch: item, new_ch: updated_set[index] }
66
- end
67
-
68
- pair_array
48
+ def clean_leading_punctuations(token_array)
49
+ token_array.drop_while { |letter| PUNCTUATIONS.include? letter }
69
50
  end
70
51
 
71
- def clean_leading_punctuations(pair_array)
72
- pair_array.map do |pair|
73
- break if pair[:new_ch] != ' '
74
-
75
- pair_array.delete_at(pair[:index])
76
- end
77
-
78
- pair_array
79
- end
80
-
81
- def clean_trailing_punctuations(pair_array)
82
- reversed_array = pair_array.reverse
83
- reversed_array.map do |pair|
84
- break if pair[:new_ch] != ' '
85
-
86
- pair_array.delete_at(pair[:index])
87
- end
88
-
89
- pair_array
52
+ def clean_trailing_punctuations(token_array)
53
+ token_array
54
+ .reverse
55
+ .drop_while { |letter| PUNCTUATIONS.include? letter }
56
+ .reverse
90
57
  end
91
58
  end
92
59
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vader_sentiment_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nickolay Bulavin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-18 00:00:00.000000000 Z
11
+ date: 2021-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug