sentimenticon 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +93 -0
- data/README.md +61 -0
- data/Rakefile +12 -0
- data/bin/console +13 -0
- data/bin/setup +8 -0
- data/data/en/journal.pone.0026752.s001.txt +10226 -0
- data/lib/sentimenticon.rb +41 -0
- data/lib/sentimenticon/models/analyzer.rb +97 -0
- data/lib/sentimenticon/models/sentiment.rb +41 -0
- data/lib/sentimenticon/version.rb +5 -0
- data/sentimenticon.gemspec +38 -0
- metadata +74 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Public:
|
4
|
+
#
|
5
|
+
# The sentiment module provides methods for returning word-level and
|
6
|
+
# average word sentiment scores, currently for English only.
|
7
|
+
#
|
8
|
+
# The sentiment data is from the article Temporal Patterns of Happiness and Information in a Global Social Network:
|
9
|
+
# Hedonometrics and Twitter
|
10
|
+
# Peter Sheridan Dodds, Kameron Decker Harris, Isabel M. Kloumann, Catherine A. Bliss, and Christopher M. Danforth
|
11
|
+
#
|
12
|
+
# Sentiments range from -1.0 to 1.0, where -1.0 is the most unfavorable, and 1.0 is the most favorable.
|
13
|
+
# Words must be downcased; sentences must be downcased and tokenized.
|
14
|
+
#
|
15
|
+
# In addition, a Sentiment object can be inspected for the original values from the Hedonometrics paper.
|
16
|
+
#
|
17
|
+
# Examples:
|
18
|
+
#
|
19
|
+
# >> require 'sentimenticon'
|
20
|
+
# >> analyzer = Sentimenticon::Analyzer.new; true
|
21
|
+
# => true
|
22
|
+
# >> analyzer.word_sentiment("love")
|
23
|
+
# => 0.855
|
24
|
+
# >> analyzer.word_sentiment("terrorist")
|
25
|
+
# => -0.925
|
26
|
+
# >> analyzer.average_word_sentiment("I love my happy friend.".downcase.scan(/\w+/))
|
27
|
+
# => 0.5730000000000001
|
28
|
+
# >> analyzer.average_word_sentiment("I hate my terrorist enemy.".downcase.scan(/\w+/))
|
29
|
+
# => -0.332
|
30
|
+
# >> analyzer.average_word_sentiment("I fear our terrorist enemies.".downcase.scan(/\w+/))
|
31
|
+
# => -0.34700000000000003
|
32
|
+
#
|
33
|
+
module Sentimenticon
|
34
|
+
end
|
35
|
+
|
36
|
+
require_relative "sentimenticon/version"
|
37
|
+
require_relative "sentimenticon/models/analyzer"
|
38
|
+
require_relative "sentimenticon/models/sentiment"
|
39
|
+
|
40
|
+
|
41
|
+
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sentimenticon
|
4
|
+
|
5
|
+
# Public: A sentiment analyzer.
|
6
|
+
class Analyzer
|
7
|
+
|
8
|
+
attr_accessor :language
|
9
|
+
attr_accessor :minimum_frequency
|
10
|
+
attr_reader :sentiments
|
11
|
+
|
12
|
+
def initialize(language: 'en', minimum_frequency: 1.0e-08)
|
13
|
+
@language = language
|
14
|
+
@minimum_frequency = minimum_frequency
|
15
|
+
@sentiments = Hash.new
|
16
|
+
load_data
|
17
|
+
end
|
18
|
+
|
19
|
+
# Private: safely parse a float
|
20
|
+
def self.safe_float(str)
|
21
|
+
begin
|
22
|
+
Float(str)
|
23
|
+
rescue ArgumentError
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Private: safely parse an int
|
29
|
+
def self.safe_int(str)
|
30
|
+
begin
|
31
|
+
Integer(str)
|
32
|
+
rescue ArgumentError
|
33
|
+
nil
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Private: normalize a Likert-scale of 1-9 to -1.0 to 1.0
|
38
|
+
def self.normalize(ave)
|
39
|
+
"""normalize a Likert-scale of 1-9 to -1.0 to 1.0"""
|
40
|
+
raise ArgumentError("#{ave} not in range (1.0, 9.0)") if (ave > 9.0) or (ave < 1.0)
|
41
|
+
return (((ave - 1) / 8) * 2) - 1.0
|
42
|
+
end
|
43
|
+
|
44
|
+
# Private: Load data (happens on init)
|
45
|
+
def load_data
|
46
|
+
raise ArgumentError("Don't know how to load #{language}") unless language == 'en'
|
47
|
+
File.open(File.join(Analyzer::data_directory, 'en', 'journal.pone.0026752.s001.txt')) do |io|
|
48
|
+
io.each_line do |line|
|
49
|
+
parts = line.strip.split("\t")
|
50
|
+
next unless parts.size == 8
|
51
|
+
word, rank, average, std, twitter, google, nyt, lyrics = parts
|
52
|
+
r = Analyzer::safe_int(rank)
|
53
|
+
ave = Analyzer::safe_float(average)
|
54
|
+
s = Analyzer::safe_float(std)
|
55
|
+
next unless r && ave && s
|
56
|
+
norm = Analyzer::normalize(ave)
|
57
|
+
twitter = Analyzer::safe_int(twitter)
|
58
|
+
google = Analyzer::safe_int(google)
|
59
|
+
nyt = Analyzer::safe_int(nyt)
|
60
|
+
lyrics = Analyzer::safe_int(lyrics)
|
61
|
+
s = Sentiment.new(word: word, rank: r, normed_average: norm, average: ave, std: s, twitter:twitter, google:google, nyt:nyt, lyrics:lyrics)
|
62
|
+
@sentiments[word] = s
|
63
|
+
end
|
64
|
+
end
|
65
|
+
self
|
66
|
+
end
|
67
|
+
|
68
|
+
# Public: Is the file loaded?
|
69
|
+
def loaded?
|
70
|
+
@sentiments.size > 0
|
71
|
+
end
|
72
|
+
|
73
|
+
# Private: Find the data directory
|
74
|
+
def self.data_directory
|
75
|
+
File.join(__dir__, "../../../data")
|
76
|
+
end
|
77
|
+
|
78
|
+
# Public:
|
79
|
+
# return the normed sentiment of a word, returning default value if not found. word must be lowercased.
|
80
|
+
def word_sentiment(word, default=0.0)
|
81
|
+
s = @sentiments.dig(word)
|
82
|
+
s ? s.normed_average : default
|
83
|
+
end
|
84
|
+
|
85
|
+
# Public:
|
86
|
+
# return the average word sentiment of an iterable of words, using 0.0 for unknown words.
|
87
|
+
# returns 0.0 for empty lists.
|
88
|
+
def average_word_sentiment(words, default_sentiment=0.0)
|
89
|
+
return default_sentiment unless words.size > 0
|
90
|
+
words.map{|word| word_sentiment(word)}.sum / words.size
|
91
|
+
end
|
92
|
+
|
93
|
+
def sentiment_object(word)
|
94
|
+
@sentiments.dig(word)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Sentimenticon
|
2
|
+
|
3
|
+
# Pubilc
|
4
|
+
# Sentiment object. See paper or data for details. The normed average in the normalized average.
|
5
|
+
class Sentiment
|
6
|
+
|
7
|
+
attr_accessor :word
|
8
|
+
attr_accessor :rank
|
9
|
+
attr_accessor :normed_average
|
10
|
+
attr_accessor :average
|
11
|
+
attr_accessor :std
|
12
|
+
attr_accessor :twitter
|
13
|
+
attr_accessor :google
|
14
|
+
attr_accessor :nyt
|
15
|
+
attr_accessor :lyrics
|
16
|
+
|
17
|
+
|
18
|
+
def initialize(
|
19
|
+
word:,
|
20
|
+
rank:,
|
21
|
+
normed_average:,
|
22
|
+
average:,
|
23
|
+
std:,
|
24
|
+
twitter:,
|
25
|
+
google:,
|
26
|
+
nyt:,
|
27
|
+
lyrics:
|
28
|
+
)
|
29
|
+
@word = word
|
30
|
+
@rank = rank
|
31
|
+
@normed_average = normed_average
|
32
|
+
@average = average
|
33
|
+
@std = std
|
34
|
+
@twitter = twitter
|
35
|
+
@google = google
|
36
|
+
@nyt = nyt
|
37
|
+
@lyrics = lyrics
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/sentimenticon"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "sentimenticon"
|
7
|
+
spec.version = Sentimenticon::VERSION
|
8
|
+
spec.authors = ["Will Fitzgerald"]
|
9
|
+
spec.email = ["will.fitzgerald@pobox.com"]
|
10
|
+
|
11
|
+
spec.summary = "Word level sentiment analyzer."
|
12
|
+
spec.description = "Word level sentiment analyzer, currently English only"
|
13
|
+
spec.homepage = "https://github.com/willf/sentimenticon_rb"
|
14
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
|
15
|
+
|
16
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
17
|
+
|
18
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
19
|
+
spec.metadata["source_code_uri"] = "https://github.com/willf/sentimenticon_rb"
|
20
|
+
spec.metadata["changelog_uri"] = "https://github.com/willf/sentimenticon_rb/CHANGELOG.md"
|
21
|
+
|
22
|
+
# Specify which files should be added to the gem when it is released.
|
23
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
24
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
25
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
26
|
+
end
|
27
|
+
spec.bindir = "exe"
|
28
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
29
|
+
spec.require_paths = ["lib", "data"]
|
30
|
+
|
31
|
+
# Uncomment to register a new dependency of your gem
|
32
|
+
# spec.add_dependency "example-gem", "~> 1.0"
|
33
|
+
|
34
|
+
spec.add_development_dependency "rspec", "~> 3.2"
|
35
|
+
|
36
|
+
# For more information and examples about making a new gem, checkout our
|
37
|
+
# guide at: https://bundler.io/guides/creating_gem.html
|
38
|
+
end
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sentimenticon
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Will Fitzgerald
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-04-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3.2'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.2'
|
27
|
+
description: Word level sentiment analyzer, currently English only
|
28
|
+
email:
|
29
|
+
- will.fitzgerald@pobox.com
|
30
|
+
executables: []
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- ".gitignore"
|
35
|
+
- CHANGELOG.md
|
36
|
+
- Gemfile
|
37
|
+
- Gemfile.lock
|
38
|
+
- README.md
|
39
|
+
- Rakefile
|
40
|
+
- bin/console
|
41
|
+
- bin/setup
|
42
|
+
- data/en/journal.pone.0026752.s001.txt
|
43
|
+
- lib/sentimenticon.rb
|
44
|
+
- lib/sentimenticon/models/analyzer.rb
|
45
|
+
- lib/sentimenticon/models/sentiment.rb
|
46
|
+
- lib/sentimenticon/version.rb
|
47
|
+
- sentimenticon.gemspec
|
48
|
+
homepage: https://github.com/willf/sentimenticon_rb
|
49
|
+
licenses: []
|
50
|
+
metadata:
|
51
|
+
homepage_uri: https://github.com/willf/sentimenticon_rb
|
52
|
+
source_code_uri: https://github.com/willf/sentimenticon_rb
|
53
|
+
changelog_uri: https://github.com/willf/sentimenticon_rb/CHANGELOG.md
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
- data
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 2.4.0
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
requirements: []
|
70
|
+
rubygems_version: 3.0.3
|
71
|
+
signing_key:
|
72
|
+
specification_version: 4
|
73
|
+
summary: Word level sentiment analyzer.
|
74
|
+
test_files: []
|