sentimetnal 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,43 @@
1
+ AFINN is a list of English words rated for valence with an integer
2
+ between minus five (negative) and plus five (positive). The words have
3
+ been manually labeled by Finn Årup Nielsen in 2009-2011. The file
4
+ is tab-separated. There are two versions:
5
+
6
+ AFINN-111: Newest version with 2477 words and phrases.
7
+
8
+ AFINN-96: 1468 unique words and phrases on 1480 lines. Note that there
9
+ are 1480 lines, as some words are listed twice. The word list in not
10
+ entirely in alphabetic ordering.
11
+
12
+ An evaluation of the word list is available in:
13
+
14
+ Finn Årup Nielsen, "A new ANEW: Evaluation of a word list for
15
+ sentiment analysis in microblogs", http://arxiv.org/abs/1103.2903
16
+
17
+ The list was used in:
18
+
19
+ Lars Kai Hansen, Adam Arvidsson, Finn Årup Nielsen, Elanor Colleoni,
20
+ Michael Etter, "Good Friends, Bad News - Affect and Virality in
21
+ Twitter", The 2011 International Workshop on Social Computing,
22
+ Network, and Services (SocialComNet 2011).
23
+
24
+
25
+ This database of words is copyright protected and distributed under
26
+ "Open Database License (ODbL) v1.0"
27
+ http://www.opendatacommons.org/licenses/odbl/1.0/ or a similar
28
+ copyleft license.
29
+
30
+ See comments on the word list here:
31
+ http://fnielsen.posterous.com/old-anew-a-sentiment-about-sentiment-analysis
32
+
33
+
34
+ In Python the file may be read into a dictionary with:
35
+
36
+ >>> afinn = dict(map(lambda (k,v): (k,int(v)),
37
+ [ line.split('\t') for line in open("AFINN-111.txt") ]))
38
+ >>> afinn["Good".lower()]
39
+ 3
40
+ >>> sum(map(lambda word: afinn.get(word, 0), "Rainy day but still in a good mood".lower().split()))
41
+ 2
42
+
43
+
@@ -0,0 +1,13 @@
1
+ require 'sentimetnal'
2
+ require 'rapgenius'
3
+
4
+ happy_song = RapGenius::Song.find(139968) # Get Lucky
5
+ sad_song = RapGenius::Song.find(111264) # Run to the Hills
6
+
7
+ analyzer = Sentimetnal::Analyzer.new
8
+
9
+ happy_song_sentiment = analyzer.sentiment(happy_song.lines.map(&:lyric).join("").gsub(/\[.*\]/,""))
10
+ sad_song_sentiment = analyzer.sentiment(sad_song.lines.map(&:lyric).join("").gsub(/\[.*\]/,""))
11
+
12
+ puts "Get Lucky: #{happy_song_sentiment}"
13
+ puts "Run to the Hills: #{sad_song_sentiment}"
@@ -0,0 +1,15 @@
1
+ require 'sentimetnal'
2
+ require 'json'
3
+ require 'open-uri'
4
+
5
+ url = "http://tupalo.com/en/api/v1/reviews.json?public_id=2n5s5m"
6
+ reviews = JSON.parse(open(url).read)
7
+
8
+ analyzer = Sentimetnal::Analyzer.new
9
+
10
+ reviews.map do |review|
11
+ sentiment = analyzer.sentiment(review["review"])
12
+ puts "#{review["review"][0..100]}..."
13
+ puts "sentiment: #{sentiment.round(2)} | tupalo_rating: #{review["rating"]} | sentiment_rating: #{sentiment.to_rating} | emoji: #{sentiment.to_emoji}"
14
+ puts ""
15
+ end
@@ -0,0 +1,31 @@
1
+ require "sentimetnal/core_ext/string"
2
+ require "sentimetnal/core_ext/numeric"
3
+ require "sentimetnal/version"
4
+
5
+ module Sentimetnal
6
+ WORD_LIST = File.join(File.dirname(__dir__), "data", "AFINN-111.txt")
7
+
8
+ class Analyzer
9
+ attr_reader :word_list
10
+
11
+ def initialize(word_list_file=WORD_LIST)
12
+ @word_list = load_word_list(word_list_file)
13
+ end
14
+
15
+ def sentiment(string)
16
+ words = string.tokenize
17
+ sentiments = words.map { |w| word_list[w] || 0 }
18
+
19
+ sentiments.inject(:+)/Math.sqrt(sentiments.size)
20
+ end
21
+
22
+ private
23
+ def load_word_list(word_list_file)
24
+ word_list_data = IO.readlines(word_list_file, :encoding => "UTF-8")
25
+ word_list_data.map {|w|
26
+ t = w.split("\t")
27
+ [t[0].downcase, t[1].chomp.to_f]
28
+ }.to_h
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,23 @@
1
+ class Numeric
2
+ def to_emoji
3
+ case self
4
+ when -5.0..-1.0 then ":scream:"
5
+ when -1.0..2.0 then ":expressionless:"
6
+ when 2.0..5.0 then ":smile:"
7
+ else
8
+ self
9
+ end
10
+ end
11
+
12
+ def to_rating
13
+ case self
14
+ when -5.0..-3.0 then 1
15
+ when -3.0..-1.0 then 2
16
+ when -1.0..1.0 then 3
17
+ when 1.0..3.0 then 4
18
+ when 3.0..5.0 then 5
19
+ else
20
+ self
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,5 @@
1
+ class String
2
+ def tokenize
3
+ self.downcase.split(/\W+/)
4
+ end
5
+ end
@@ -0,0 +1,3 @@
1
+ module Sentimetnal
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'sentimetnal/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "sentimetnal"
9
+ spec.version = Sentimetnal::VERSION
10
+ spec.authors = ["johdax"]
11
+ spec.email = ["johdax@gmail.com"]
12
+ spec.summary = %q{Sentiment analyzer}
13
+ spec.description = %q{Simple Ruby sentiment analyzer based on the AFINN word list. Returns a float value for the sentiment strength of an input text}
14
+ spec.homepage = "http://github.com/johdax/sentimetnal"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0")
18
+ spec.test_files = spec.files.grep(%r{^(spec)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3"
24
+ spec.add_development_dependency "guard", "~> 2.6"
25
+ spec.add_development_dependency "guard-rspec", "~> 4.3"
26
+ end
@@ -0,0 +1,58 @@
1
+ require 'sentimetnal'
2
+
3
+ describe "String tokenizer" do
4
+ let(:text) { "This is a sentence to tokenize. Another sentence; to split into words!" }
5
+ let(:tokenized_text) { text.tokenize }
6
+
7
+ it "returns an array of correct size" do
8
+ expect(tokenized_text).to be_a(Array)
9
+ expect(tokenized_text.count).to eq(12)
10
+ end
11
+
12
+ it "the array contains only words" do
13
+ expect(tokenized_text).not_to include(".", ";", "!")
14
+ expect(tokenized_text).to include("this", "sentence", "tokenize", "words")
15
+ end
16
+ end
17
+
18
+ describe "Sentiment Analyzer" do
19
+ let(:analyzer) { Sentimetnal::Analyzer.new }
20
+
21
+ describe "Sentiment Analyzer" do
22
+ context "AFINN sentiment word list" do
23
+ let(:word_list) { analyzer.word_list }
24
+
25
+ it "loads the word list into a hash" do
26
+ expect(word_list).to be_a(Hash)
27
+ end
28
+
29
+ it "returns a numeric value for a specific word" do
30
+ expect(word_list[word_list.keys.sample]).to be_a(Float)
31
+ end
32
+ end
33
+ end
34
+
35
+ context "sentiment analysis" do
36
+ it "returns a float for sentiment strength" do
37
+ expect(analyzer.sentiment("Not sure if this is good or bad?")).to be_a(Float)
38
+ end
39
+
40
+ it "returns a positive value for positive valence" do
41
+ expect(analyzer.sentiment("The best & most delicious beef and friendly staff!")).to be > 0
42
+ end
43
+
44
+ it "returns a negative value for negative valence" do
45
+ expect(analyzer.sentiment("WTF? Those horrible dishes gave me sleepless nights and stomach ache!")).to be < 0
46
+ end
47
+ end
48
+
49
+ context "sentiment strength converter" do
50
+ it "convert into a 5 star rating" do
51
+ expect(analyzer.sentiment("The best & most delicious beef and friendly staff!").to_rating).to eq(4)
52
+ end
53
+
54
+ it "convert into a emoji" do
55
+ expect(analyzer.sentiment("WTF? Those horrible dishes gave me sleepless nights and stomach ache!").to_emoji).to eq(":scream:")
56
+ end
57
+ end
58
+ end
metadata ADDED
@@ -0,0 +1,134 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sentimetnal
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - johdax
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '2.6'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.6'
69
+ - !ruby/object:Gem::Dependency
70
+ name: guard-rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '4.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '4.3'
83
+ description: Simple Ruby sentiment analyzer based on the AFINN word list. Returns
84
+ a float value for the sentiment strength of an input text
85
+ email:
86
+ - johdax@gmail.com
87
+ executables: []
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - ".travis.yml"
93
+ - Gemfile
94
+ - Guardfile
95
+ - LICENSE.txt
96
+ - README.md
97
+ - Rakefile
98
+ - data/AFINN-111.txt
99
+ - data/AFINN-96.txt
100
+ - data/AFINN-README.txt
101
+ - examples/rapgenius.rb
102
+ - examples/tupalo.rb
103
+ - lib/sentimetnal.rb
104
+ - lib/sentimetnal/core_ext/numeric.rb
105
+ - lib/sentimetnal/core_ext/string.rb
106
+ - lib/sentimetnal/version.rb
107
+ - sentimetnal.gemspec
108
+ - spec/sentimetnal_spec.rb
109
+ homepage: http://github.com/johdax/sentimetnal
110
+ licenses:
111
+ - MIT
112
+ metadata: {}
113
+ post_install_message:
114
+ rdoc_options: []
115
+ require_paths:
116
+ - lib
117
+ required_ruby_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ requirements: []
128
+ rubyforge_project:
129
+ rubygems_version: 2.4.1
130
+ signing_key:
131
+ specification_version: 4
132
+ summary: Sentiment analyzer
133
+ test_files:
134
+ - spec/sentimetnal_spec.rb