sentimenticon 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Public:
4
+ #
5
+ # The sentiment module provides methods for returning word-level and
6
+ # average word sentiment scores, currently for English only.
7
+ #
8
+ # The sentiment data is from the article Temporal Patterns of Happiness and Information in a Global Social Network:
9
+ # Hedonometrics and Twitter
10
+ # Peter Sheridan Dodds, Kameron Decker Harris, Isabel M. Kloumann, Catherine A. Bliss, and Christopher M. Danforth
11
+ #
12
+ # Sentiments range from -1.0 to 1.0, where -1.0 is the most unfavorable, and 1.0 is the most favorable.
13
+ # Words must be downcased; sentences must be downcased and tokenized.
14
+ #
15
+ # In addition, a Sentiment object can be inspected for the original values from the Hedonometrics paper.
16
+ #
17
+ # Examples:
18
+ #
19
+ # >> require 'sentimenticon'
20
+ # >> analyzer = Sentimenticon::Analyzer.new; true
21
+ # => true
22
+ # >> analyzer.word_sentiment("love")
23
+ # => 0.855
24
+ # >> analyzer.word_sentiment("terrorist")
25
+ # => -0.925
26
+ # >> analyzer.average_word_sentiment("I love my happy friend.".downcase.scan(/\w+/))
27
+ # => 0.5730000000000001
28
+ # >> analyzer.average_word_sentiment("I hate my terrorist enemy.".downcase.scan(/\w+/))
29
+ # => -0.332
30
+ # >> analyzer.average_word_sentiment("I fear our terrorist enemies.".downcase.scan(/\w+/))
31
+ # => -0.34700000000000003
32
+ #
33
+ module Sentimenticon
34
+ end
35
+
36
+ require_relative "sentimenticon/version"
37
+ require_relative "sentimenticon/models/analyzer"
38
+ require_relative "sentimenticon/models/sentiment"
39
+
40
+
41
+
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sentimenticon
4
+
5
+ # Public: A sentiment analyzer.
6
+ class Analyzer
7
+
8
+ attr_accessor :language
9
+ attr_accessor :minimum_frequency
10
+ attr_reader :sentiments
11
+
12
+ def initialize(language: 'en', minimum_frequency: 1.0e-08)
13
+ @language = language
14
+ @minimum_frequency = minimum_frequency
15
+ @sentiments = Hash.new
16
+ load_data
17
+ end
18
+
19
+ # Private: safely parse a float
20
+ def self.safe_float(str)
21
+ begin
22
+ Float(str)
23
+ rescue ArgumentError
24
+ nil
25
+ end
26
+ end
27
+
28
+ # Private: safely parse an int
29
+ def self.safe_int(str)
30
+ begin
31
+ Integer(str)
32
+ rescue ArgumentError
33
+ nil
34
+ end
35
+ end
36
+
37
+ # Private: normalize a Likert-scale of 1-9 to -1.0 to 1.0
38
+ def self.normalize(ave)
39
+ """normalize a Likert-scale of 1-9 to -1.0 to 1.0"""
40
+ raise ArgumentError("#{ave} not in range (1.0, 9.0)") if (ave > 9.0) or (ave < 1.0)
41
+ return (((ave - 1) / 8) * 2) - 1.0
42
+ end
43
+
44
+ # Private: Load data (happens on init)
45
+ def load_data
46
+ raise ArgumentError("Don't know how to load #{language}") unless language == 'en'
47
+ File.open(File.join(Analyzer::data_directory, 'en', 'journal.pone.0026752.s001.txt')) do |io|
48
+ io.each_line do |line|
49
+ parts = line.strip.split("\t")
50
+ next unless parts.size == 8
51
+ word, rank, average, std, twitter, google, nyt, lyrics = parts
52
+ r = Analyzer::safe_int(rank)
53
+ ave = Analyzer::safe_float(average)
54
+ s = Analyzer::safe_float(std)
55
+ next unless r && ave && s
56
+ norm = Analyzer::normalize(ave)
57
+ twitter = Analyzer::safe_int(twitter)
58
+ google = Analyzer::safe_int(google)
59
+ nyt = Analyzer::safe_int(nyt)
60
+ lyrics = Analyzer::safe_int(lyrics)
61
+ s = Sentiment.new(word: word, rank: r, normed_average: norm, average: ave, std: s, twitter:twitter, google:google, nyt:nyt, lyrics:lyrics)
62
+ @sentiments[word] = s
63
+ end
64
+ end
65
+ self
66
+ end
67
+
68
+ # Public: Is the file loaded?
69
+ def loaded?
70
+ @sentiments.size > 0
71
+ end
72
+
73
+ # Private: Find the data directory
74
+ def self.data_directory
75
+ File.join(__dir__, "../../../data")
76
+ end
77
+
78
+ # Public:
79
+ # return the normed sentiment of a word, returning default value if not found. word must be lowercased.
80
+ def word_sentiment(word, default=0.0)
81
+ s = @sentiments.dig(word)
82
+ s ? s.normed_average : default
83
+ end
84
+
85
+ # Public:
86
+ # return the average word sentiment of an iterable of words, using 0.0 for unknown words.
87
+ # returns 0.0 for empty lists.
88
+ def average_word_sentiment(words, default_sentiment=0.0)
89
+ return default_sentiment unless words.size > 0
90
+ words.map{|word| word_sentiment(word)}.sum / words.size
91
+ end
92
+
93
+ def sentiment_object(word)
94
+ @sentiments.dig(word)
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,41 @@
1
+ module Sentimenticon
2
+
3
+ # Pubilc
4
+ # Sentiment object. See paper or data for details. The normed average in the normalized average.
5
+ class Sentiment
6
+
7
+ attr_accessor :word
8
+ attr_accessor :rank
9
+ attr_accessor :normed_average
10
+ attr_accessor :average
11
+ attr_accessor :std
12
+ attr_accessor :twitter
13
+ attr_accessor :google
14
+ attr_accessor :nyt
15
+ attr_accessor :lyrics
16
+
17
+
18
+ def initialize(
19
+ word:,
20
+ rank:,
21
+ normed_average:,
22
+ average:,
23
+ std:,
24
+ twitter:,
25
+ google:,
26
+ nyt:,
27
+ lyrics:
28
+ )
29
+ @word = word
30
+ @rank = rank
31
+ @normed_average = normed_average
32
+ @average = average
33
+ @std = std
34
+ @twitter = twitter
35
+ @google = google
36
+ @nyt = nyt
37
+ @lyrics = lyrics
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sentimenticon
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/sentimenticon"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "sentimenticon"
7
+ spec.version = Sentimenticon::VERSION
8
+ spec.authors = ["Will Fitzgerald"]
9
+ spec.email = ["will.fitzgerald@pobox.com"]
10
+
11
+ spec.summary = "Word level sentiment analyzer."
12
+ spec.description = "Word level sentiment analyzer, currently English only"
13
+ spec.homepage = "https://github.com/willf/sentimenticon_rb"
14
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
15
+
16
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
17
+
18
+ spec.metadata["homepage_uri"] = spec.homepage
19
+ spec.metadata["source_code_uri"] = "https://github.com/willf/sentimenticon_rb"
20
+ spec.metadata["changelog_uri"] = "https://github.com/willf/sentimenticon_rb/CHANGELOG.md"
21
+
22
+ # Specify which files should be added to the gem when it is released.
23
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
24
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
25
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
26
+ end
27
+ spec.bindir = "exe"
28
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
29
+ spec.require_paths = ["lib", "data"]
30
+
31
+ # Uncomment to register a new dependency of your gem
32
+ # spec.add_dependency "example-gem", "~> 1.0"
33
+
34
+ spec.add_development_dependency "rspec", "~> 3.2"
35
+
36
+ # For more information and examples about making a new gem, checkout our
37
+ # guide at: https://bundler.io/guides/creating_gem.html
38
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sentimenticon
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Will Fitzgerald
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-04-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.2'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.2'
27
+ description: Word level sentiment analyzer, currently English only
28
+ email:
29
+ - will.fitzgerald@pobox.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - ".gitignore"
35
+ - CHANGELOG.md
36
+ - Gemfile
37
+ - Gemfile.lock
38
+ - README.md
39
+ - Rakefile
40
+ - bin/console
41
+ - bin/setup
42
+ - data/en/journal.pone.0026752.s001.txt
43
+ - lib/sentimenticon.rb
44
+ - lib/sentimenticon/models/analyzer.rb
45
+ - lib/sentimenticon/models/sentiment.rb
46
+ - lib/sentimenticon/version.rb
47
+ - sentimenticon.gemspec
48
+ homepage: https://github.com/willf/sentimenticon_rb
49
+ licenses: []
50
+ metadata:
51
+ homepage_uri: https://github.com/willf/sentimenticon_rb
52
+ source_code_uri: https://github.com/willf/sentimenticon_rb
53
+ changelog_uri: https://github.com/willf/sentimenticon_rb/CHANGELOG.md
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ - data
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 2.4.0
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubygems_version: 3.0.3
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: Word level sentiment analyzer.
74
+ test_files: []