sentimetnal 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/Guardfile +11 -0
- data/LICENSE.txt +22 -0
- data/README.md +53 -0
- data/Rakefile +33 -0
- data/data/AFINN-111.txt +2477 -0
- data/data/AFINN-96.txt +1480 -0
- data/data/AFINN-README.txt +43 -0
- data/examples/rapgenius.rb +13 -0
- data/examples/tupalo.rb +15 -0
- data/lib/sentimetnal.rb +31 -0
- data/lib/sentimetnal/core_ext/numeric.rb +23 -0
- data/lib/sentimetnal/core_ext/string.rb +5 -0
- data/lib/sentimetnal/version.rb +3 -0
- data/sentimetnal.gemspec +26 -0
- data/spec/sentimetnal_spec.rb +58 -0
- metadata +134 -0
@@ -0,0 +1,43 @@
|
|
1
|
+
AFINN is a list of English words rated for valence with an integer
|
2
|
+
between minus five (negative) and plus five (positive). The words have
|
3
|
+
been manually labeled by Finn Årup Nielsen in 2009-2011. The file
|
4
|
+
is tab-separated. There are two versions:
|
5
|
+
|
6
|
+
AFINN-111: Newest version with 2477 words and phrases.
|
7
|
+
|
8
|
+
AFINN-96: 1468 unique words and phrases on 1480 lines. Note that there
|
9
|
+
are 1480 lines, as some words are listed twice. The word list in not
|
10
|
+
entirely in alphabetic ordering.
|
11
|
+
|
12
|
+
An evaluation of the word list is available in:
|
13
|
+
|
14
|
+
Finn Årup Nielsen, "A new ANEW: Evaluation of a word list for
|
15
|
+
sentiment analysis in microblogs", http://arxiv.org/abs/1103.2903
|
16
|
+
|
17
|
+
The list was used in:
|
18
|
+
|
19
|
+
Lars Kai Hansen, Adam Arvidsson, Finn Årup Nielsen, Elanor Colleoni,
|
20
|
+
Michael Etter, "Good Friends, Bad News - Affect and Virality in
|
21
|
+
Twitter", The 2011 International Workshop on Social Computing,
|
22
|
+
Network, and Services (SocialComNet 2011).
|
23
|
+
|
24
|
+
|
25
|
+
This database of words is copyright protected and distributed under
|
26
|
+
"Open Database License (ODbL) v1.0"
|
27
|
+
http://www.opendatacommons.org/licenses/odbl/1.0/ or a similar
|
28
|
+
copyleft license.
|
29
|
+
|
30
|
+
See comments on the word list here:
|
31
|
+
http://fnielsen.posterous.com/old-anew-a-sentiment-about-sentiment-analysis
|
32
|
+
|
33
|
+
|
34
|
+
In Python the file may be read into a dictionary with:
|
35
|
+
|
36
|
+
>>> afinn = dict(map(lambda (k,v): (k,int(v)),
|
37
|
+
[ line.split('\t') for line in open("AFINN-111.txt") ]))
|
38
|
+
>>> afinn["Good".lower()]
|
39
|
+
3
|
40
|
+
>>> sum(map(lambda word: afinn.get(word, 0), "Rainy day but still in a good mood".lower().split()))
|
41
|
+
2
|
42
|
+
|
43
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'sentimetnal'
|
2
|
+
require 'rapgenius'
|
3
|
+
|
4
|
+
happy_song = RapGenius::Song.find(139968) # Get Lucky
|
5
|
+
sad_song = RapGenius::Song.find(111264) # Run to the Hills
|
6
|
+
|
7
|
+
analyzer = Sentimetnal::Analyzer.new
|
8
|
+
|
9
|
+
happy_song_sentiment = analyzer.sentiment(happy_song.lines.map(&:lyric).join("").gsub(/\[.*\]/,""))
|
10
|
+
sad_song_sentiment = analyzer.sentiment(sad_song.lines.map(&:lyric).join("").gsub(/\[.*\]/,""))
|
11
|
+
|
12
|
+
puts "Get Lucky: #{happy_song_sentiment}"
|
13
|
+
puts "Run to the Hills: #{sad_song_sentiment}"
|
data/examples/tupalo.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'sentimetnal'
|
2
|
+
require 'json'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
url = "http://tupalo.com/en/api/v1/reviews.json?public_id=2n5s5m"
|
6
|
+
reviews = JSON.parse(open(url).read)
|
7
|
+
|
8
|
+
analyzer = Sentimetnal::Analyzer.new
|
9
|
+
|
10
|
+
reviews.map do |review|
|
11
|
+
sentiment = analyzer.sentiment(review["review"])
|
12
|
+
puts "#{review["review"][0..100]}..."
|
13
|
+
puts "sentiment: #{sentiment.round(2)} | tupalo_rating: #{review["rating"]} | sentiment_rating: #{sentiment.to_rating} | emoji: #{sentiment.to_emoji}"
|
14
|
+
puts ""
|
15
|
+
end
|
data/lib/sentimetnal.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require "sentimetnal/core_ext/string"
|
2
|
+
require "sentimetnal/core_ext/numeric"
|
3
|
+
require "sentimetnal/version"
|
4
|
+
|
5
|
+
module Sentimetnal
|
6
|
+
WORD_LIST = File.join(File.dirname(__dir__), "data", "AFINN-111.txt")
|
7
|
+
|
8
|
+
class Analyzer
|
9
|
+
attr_reader :word_list
|
10
|
+
|
11
|
+
def initialize(word_list_file=WORD_LIST)
|
12
|
+
@word_list = load_word_list(word_list_file)
|
13
|
+
end
|
14
|
+
|
15
|
+
def sentiment(string)
|
16
|
+
words = string.tokenize
|
17
|
+
sentiments = words.map { |w| word_list[w] || 0 }
|
18
|
+
|
19
|
+
sentiments.inject(:+)/Math.sqrt(sentiments.size)
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
def load_word_list(word_list_file)
|
24
|
+
word_list_data = IO.readlines(word_list_file, :encoding => "UTF-8")
|
25
|
+
word_list_data.map {|w|
|
26
|
+
t = w.split("\t")
|
27
|
+
[t[0].downcase, t[1].chomp.to_f]
|
28
|
+
}.to_h
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
class Numeric
|
2
|
+
def to_emoji
|
3
|
+
case self
|
4
|
+
when -5.0..-1.0 then ":scream:"
|
5
|
+
when -1.0..2.0 then ":expressionless:"
|
6
|
+
when 2.0..5.0 then ":smile:"
|
7
|
+
else
|
8
|
+
self
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_rating
|
13
|
+
case self
|
14
|
+
when -5.0..-3.0 then 1
|
15
|
+
when -3.0..-1.0 then 2
|
16
|
+
when -1.0..1.0 then 3
|
17
|
+
when 1.0..3.0 then 4
|
18
|
+
when 3.0..5.0 then 5
|
19
|
+
else
|
20
|
+
self
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/sentimetnal.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'sentimetnal/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = "sentimetnal"
|
9
|
+
spec.version = Sentimetnal::VERSION
|
10
|
+
spec.authors = ["johdax"]
|
11
|
+
spec.email = ["johdax@gmail.com"]
|
12
|
+
spec.summary = %q{Sentiment analyzer}
|
13
|
+
spec.description = %q{Simple Ruby sentiment analyzer based on the AFINN word list. Returns a float value for the sentiment strength of an input text}
|
14
|
+
spec.homepage = "http://github.com/johdax/sentimetnal"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0")
|
18
|
+
spec.test_files = spec.files.grep(%r{^(spec)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec", "~> 3"
|
24
|
+
spec.add_development_dependency "guard", "~> 2.6"
|
25
|
+
spec.add_development_dependency "guard-rspec", "~> 4.3"
|
26
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'sentimetnal'
|
2
|
+
|
3
|
+
describe "String tokenizer" do
|
4
|
+
let(:text) { "This is a sentence to tokenize. Another sentence; to split into words!" }
|
5
|
+
let(:tokenized_text) { text.tokenize }
|
6
|
+
|
7
|
+
it "returns an array of correct size" do
|
8
|
+
expect(tokenized_text).to be_a(Array)
|
9
|
+
expect(tokenized_text.count).to eq(12)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "the array contains only words" do
|
13
|
+
expect(tokenized_text).not_to include(".", ";", "!")
|
14
|
+
expect(tokenized_text).to include("this", "sentence", "tokenize", "words")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "Sentiment Analyzer" do
|
19
|
+
let(:analyzer) { Sentimetnal::Analyzer.new }
|
20
|
+
|
21
|
+
describe "Sentiment Analyzer" do
|
22
|
+
context "AFINN sentiment word list" do
|
23
|
+
let(:word_list) { analyzer.word_list }
|
24
|
+
|
25
|
+
it "loads the word list into a hash" do
|
26
|
+
expect(word_list).to be_a(Hash)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "returns a numeric value for a specific word" do
|
30
|
+
expect(word_list[word_list.keys.sample]).to be_a(Float)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context "sentiment analysis" do
|
36
|
+
it "returns a float for sentiment strength" do
|
37
|
+
expect(analyzer.sentiment("Not sure if this is good or bad?")).to be_a(Float)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "returns a positive value for positive valence" do
|
41
|
+
expect(analyzer.sentiment("The best & most delicious beef and friendly staff!")).to be > 0
|
42
|
+
end
|
43
|
+
|
44
|
+
it "returns a negative value for negative valence" do
|
45
|
+
expect(analyzer.sentiment("WTF? Those horrible dishes gave me sleepless nights and stomach ache!")).to be < 0
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
context "sentiment strength converter" do
|
50
|
+
it "convert into a 5 star rating" do
|
51
|
+
expect(analyzer.sentiment("The best & most delicious beef and friendly staff!").to_rating).to eq(4)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "convert into a emoji" do
|
55
|
+
expect(analyzer.sentiment("WTF? Those horrible dishes gave me sleepless nights and stomach ache!").to_emoji).to eq(":scream:")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
metadata
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sentimetnal
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- johdax
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-08-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: guard
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.6'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '2.6'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: guard-rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '4.3'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '4.3'
|
83
|
+
description: Simple Ruby sentiment analyzer based on the AFINN word list. Returns
|
84
|
+
a float value for the sentiment strength of an input text
|
85
|
+
email:
|
86
|
+
- johdax@gmail.com
|
87
|
+
executables: []
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- ".gitignore"
|
92
|
+
- ".travis.yml"
|
93
|
+
- Gemfile
|
94
|
+
- Guardfile
|
95
|
+
- LICENSE.txt
|
96
|
+
- README.md
|
97
|
+
- Rakefile
|
98
|
+
- data/AFINN-111.txt
|
99
|
+
- data/AFINN-96.txt
|
100
|
+
- data/AFINN-README.txt
|
101
|
+
- examples/rapgenius.rb
|
102
|
+
- examples/tupalo.rb
|
103
|
+
- lib/sentimetnal.rb
|
104
|
+
- lib/sentimetnal/core_ext/numeric.rb
|
105
|
+
- lib/sentimetnal/core_ext/string.rb
|
106
|
+
- lib/sentimetnal/version.rb
|
107
|
+
- sentimetnal.gemspec
|
108
|
+
- spec/sentimetnal_spec.rb
|
109
|
+
homepage: http://github.com/johdax/sentimetnal
|
110
|
+
licenses:
|
111
|
+
- MIT
|
112
|
+
metadata: {}
|
113
|
+
post_install_message:
|
114
|
+
rdoc_options: []
|
115
|
+
require_paths:
|
116
|
+
- lib
|
117
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
requirements: []
|
128
|
+
rubyforge_project:
|
129
|
+
rubygems_version: 2.4.1
|
130
|
+
signing_key:
|
131
|
+
specification_version: 4
|
132
|
+
summary: Sentiment analyzer
|
133
|
+
test_files:
|
134
|
+
- spec/sentimetnal_spec.rb
|