sentimenticon 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Public:
4
+ #
5
+ # The sentiment module provides methods for returning word-level and
6
+ # average word sentiment scores, currently for English only.
7
+ #
8
+ # The sentiment data is from the article Temporal Patterns of Happiness and Information in a Global Social Network:
9
+ # Hedonometrics and Twitter
10
+ # Peter Sheridan Dodds, Kameron Decker Harris, Isabel M. Kloumann, Catherine A. Bliss, and Christopher M. Danforth
11
+ #
12
+ # Sentiments range from -1.0 to 1.0, where -1.0 is the most unfavorable, and 1.0 is the most favorable.
13
+ # Words must be downcased; sentences must be downcased and tokenized.
14
+ #
15
+ # In addition, a Sentiment object can be inspected for the original values from the Hedonometrics paper.
16
+ #
17
+ # Examples:
18
+ #
19
+ # >> require 'sentimenticon'
20
+ # >> analyzer = Sentimenticon::Analyzer.new; true
21
+ # => true
22
+ # >> analyzer.word_sentiment("love")
23
+ # => 0.855
24
+ # >> analyzer.word_sentiment("terrorist")
25
+ # => -0.925
26
+ # >> analyzer.average_word_sentiment("I love my happy friend.".downcase.scan(/\w+/))
27
+ # => 0.5730000000000001
28
+ # >> analyzer.average_word_sentiment("I hate my terrorist enemy.".downcase.scan(/\w+/))
29
+ # => -0.332
30
+ # >> analyzer.average_word_sentiment("I fear our terrorist enemies.".downcase.scan(/\w+/))
31
+ # => -0.34700000000000003
32
+ #
33
+ module Sentimenticon
34
+ end
35
+
36
+ require_relative "sentimenticon/version"
37
+ require_relative "sentimenticon/models/analyzer"
38
+ require_relative "sentimenticon/models/sentiment"
39
+
40
+
41
+
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sentimenticon
4
+
5
+ # Public: A sentiment analyzer.
6
+ class Analyzer
7
+
8
+ attr_accessor :language
9
+ attr_accessor :minimum_frequency
10
+ attr_reader :sentiments
11
+
12
+ def initialize(language: 'en', minimum_frequency: 1.0e-08)
13
+ @language = language
14
+ @minimum_frequency = minimum_frequency
15
+ @sentiments = Hash.new
16
+ load_data
17
+ end
18
+
19
+ # Private: safely parse a float
20
+ def self.safe_float(str)
21
+ begin
22
+ Float(str)
23
+ rescue ArgumentError
24
+ nil
25
+ end
26
+ end
27
+
28
+ # Private: safely parse an int
29
+ def self.safe_int(str)
30
+ begin
31
+ Integer(str)
32
+ rescue ArgumentError
33
+ nil
34
+ end
35
+ end
36
+
37
+ # Private: normalize a Likert-scale of 1-9 to -1.0 to 1.0
38
+ def self.normalize(ave)
39
+ """normalize a Likert-scale of 1-9 to -1.0 to 1.0"""
40
+ raise ArgumentError("#{ave} not in range (1.0, 9.0)") if (ave > 9.0) or (ave < 1.0)
41
+ return (((ave - 1) / 8) * 2) - 1.0
42
+ end
43
+
44
+ # Private: Load data (happens on init)
45
+ def load_data
46
+ raise ArgumentError("Don't know how to load #{language}") unless language == 'en'
47
+ File.open(File.join(Analyzer::data_directory, 'en', 'journal.pone.0026752.s001.txt')) do |io|
48
+ io.each_line do |line|
49
+ parts = line.strip.split("\t")
50
+ next unless parts.size == 8
51
+ word, rank, average, std, twitter, google, nyt, lyrics = parts
52
+ r = Analyzer::safe_int(rank)
53
+ ave = Analyzer::safe_float(average)
54
+ s = Analyzer::safe_float(std)
55
+ next unless r && ave && s
56
+ norm = Analyzer::normalize(ave)
57
+ twitter = Analyzer::safe_int(twitter)
58
+ google = Analyzer::safe_int(google)
59
+ nyt = Analyzer::safe_int(nyt)
60
+ lyrics = Analyzer::safe_int(lyrics)
61
+ s = Sentiment.new(word: word, rank: r, normed_average: norm, average: ave, std: s, twitter:twitter, google:google, nyt:nyt, lyrics:lyrics)
62
+ @sentiments[word] = s
63
+ end
64
+ end
65
+ self
66
+ end
67
+
68
+ # Public: Is the file loaded?
69
+ def loaded?
70
+ @sentiments.size > 0
71
+ end
72
+
73
+ # Private: Find the data directory
74
+ def self.data_directory
75
+ File.join(__dir__, "../../../data")
76
+ end
77
+
78
+ # Public:
79
+ # return the normed sentiment of a word, returning default value if not found. word must be lowercased.
80
+ def word_sentiment(word, default=0.0)
81
+ s = @sentiments.dig(word)
82
+ s ? s.normed_average : default
83
+ end
84
+
85
+ # Public:
86
+ # return the average word sentiment of an iterable of words, using 0.0 for unknown words.
87
+ # returns 0.0 for empty lists.
88
+ def average_word_sentiment(words, default_sentiment=0.0)
89
+ return default_sentiment unless words.size > 0
90
+ words.map{|word| word_sentiment(word)}.sum / words.size
91
+ end
92
+
93
+ def sentiment_object(word)
94
+ @sentiments.dig(word)
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,41 @@
1
+ module Sentimenticon
2
+
3
+ # Pubilc
4
+ # Sentiment object. See paper or data for details. The normed average in the normalized average.
5
+ class Sentiment
6
+
7
+ attr_accessor :word
8
+ attr_accessor :rank
9
+ attr_accessor :normed_average
10
+ attr_accessor :average
11
+ attr_accessor :std
12
+ attr_accessor :twitter
13
+ attr_accessor :google
14
+ attr_accessor :nyt
15
+ attr_accessor :lyrics
16
+
17
+
18
+ def initialize(
19
+ word:,
20
+ rank:,
21
+ normed_average:,
22
+ average:,
23
+ std:,
24
+ twitter:,
25
+ google:,
26
+ nyt:,
27
+ lyrics:
28
+ )
29
+ @word = word
30
+ @rank = rank
31
+ @normed_average = normed_average
32
+ @average = average
33
+ @std = std
34
+ @twitter = twitter
35
+ @google = google
36
+ @nyt = nyt
37
+ @lyrics = lyrics
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sentimenticon
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/sentimenticon"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "sentimenticon"
7
+ spec.version = Sentimenticon::VERSION
8
+ spec.authors = ["Will Fitzgerald"]
9
+ spec.email = ["will.fitzgerald@pobox.com"]
10
+
11
+ spec.summary = "Word level sentiment analyzer."
12
+ spec.description = "Word level sentiment analyzer, currently English only"
13
+ spec.homepage = "https://github.com/willf/sentimenticon_rb"
14
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
15
+
16
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
17
+
18
+ spec.metadata["homepage_uri"] = spec.homepage
19
+ spec.metadata["source_code_uri"] = "https://github.com/willf/sentimenticon_rb"
20
+ spec.metadata["changelog_uri"] = "https://github.com/willf/sentimenticon_rb/CHANGELOG.md"
21
+
22
+ # Specify which files should be added to the gem when it is released.
23
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
24
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
25
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
26
+ end
27
+ spec.bindir = "exe"
28
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
29
+ spec.require_paths = ["lib", "data"]
30
+
31
+ # Uncomment to register a new dependency of your gem
32
+ # spec.add_dependency "example-gem", "~> 1.0"
33
+
34
+ spec.add_development_dependency "rspec", "~> 3.2"
35
+
36
+ # For more information and examples about making a new gem, checkout our
37
+ # guide at: https://bundler.io/guides/creating_gem.html
38
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sentimenticon
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Will Fitzgerald
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-04-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.2'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.2'
27
+ description: Word level sentiment analyzer, currently English only
28
+ email:
29
+ - will.fitzgerald@pobox.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - ".gitignore"
35
+ - CHANGELOG.md
36
+ - Gemfile
37
+ - Gemfile.lock
38
+ - README.md
39
+ - Rakefile
40
+ - bin/console
41
+ - bin/setup
42
+ - data/en/journal.pone.0026752.s001.txt
43
+ - lib/sentimenticon.rb
44
+ - lib/sentimenticon/models/analyzer.rb
45
+ - lib/sentimenticon/models/sentiment.rb
46
+ - lib/sentimenticon/version.rb
47
+ - sentimenticon.gemspec
48
+ homepage: https://github.com/willf/sentimenticon_rb
49
+ licenses: []
50
+ metadata:
51
+ homepage_uri: https://github.com/willf/sentimenticon_rb
52
+ source_code_uri: https://github.com/willf/sentimenticon_rb
53
+ changelog_uri: https://github.com/willf/sentimenticon_rb/CHANGELOG.md
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ - data
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 2.4.0
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubygems_version: 3.0.3
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: Word level sentiment analyzer.
74
+ test_files: []