sentimetnal 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ AFINN is a list of English words rated for valence with an integer
2
+ between minus five (negative) and plus five (positive). The words have
3
+ been manually labeled by Finn Årup Nielsen in 2009-2011. The file
4
+ is tab-separated. There are two versions:
5
+
6
+ AFINN-111: Newest version with 2477 words and phrases.
7
+
8
+ AFINN-96: 1468 unique words and phrases on 1480 lines. Note that there
9
+ are 1480 lines, as some words are listed twice. The word list in not
10
+ entirely in alphabetic ordering.
11
+
12
+ An evaluation of the word list is available in:
13
+
14
+ Finn Årup Nielsen, "A new ANEW: Evaluation of a word list for
15
+ sentiment analysis in microblogs", http://arxiv.org/abs/1103.2903
16
+
17
+ The list was used in:
18
+
19
+ Lars Kai Hansen, Adam Arvidsson, Finn Årup Nielsen, Elanor Colleoni,
20
+ Michael Etter, "Good Friends, Bad News - Affect and Virality in
21
+ Twitter", The 2011 International Workshop on Social Computing,
22
+ Network, and Services (SocialComNet 2011).
23
+
24
+
25
+ This database of words is copyright protected and distributed under
26
+ "Open Database License (ODbL) v1.0"
27
+ http://www.opendatacommons.org/licenses/odbl/1.0/ or a similar
28
+ copyleft license.
29
+
30
+ See comments on the word list here:
31
+ http://fnielsen.posterous.com/old-anew-a-sentiment-about-sentiment-analysis
32
+
33
+
34
+ In Python the file may be read into a dictionary with:
35
+
36
+ >>> afinn = dict(map(lambda (k,v): (k,int(v)),
37
+ [ line.split('\t') for line in open("AFINN-111.txt") ]))
38
+ >>> afinn["Good".lower()]
39
+ 3
40
+ >>> sum(map(lambda word: afinn.get(word, 0), "Rainy day but still in a good mood".lower().split()))
41
+ 2
42
+
43
+
@@ -0,0 +1,13 @@
1
+ require 'sentimetnal'
2
+ require 'rapgenius'
3
+
4
+ happy_song = RapGenius::Song.find(139968) # Get Lucky
5
+ sad_song = RapGenius::Song.find(111264) # Run to the Hills
6
+
7
+ analyzer = Sentimetnal::Analyzer.new
8
+
9
+ happy_song_sentiment = analyzer.sentiment(happy_song.lines.map(&:lyric).join("").gsub(/\[.*\]/,""))
10
+ sad_song_sentiment = analyzer.sentiment(sad_song.lines.map(&:lyric).join("").gsub(/\[.*\]/,""))
11
+
12
+ puts "Get Lucky: #{happy_song_sentiment}"
13
+ puts "Run to the Hills: #{sad_song_sentiment}"
@@ -0,0 +1,15 @@
1
+ require 'sentimetnal'
2
+ require 'json'
3
+ require 'open-uri'
4
+
5
+ url = "http://tupalo.com/en/api/v1/reviews.json?public_id=2n5s5m"
6
+ reviews = JSON.parse(open(url).read)
7
+
8
+ analyzer = Sentimetnal::Analyzer.new
9
+
10
+ reviews.map do |review|
11
+ sentiment = analyzer.sentiment(review["review"])
12
+ puts "#{review["review"][0..100]}..."
13
+ puts "sentiment: #{sentiment.round(2)} | tupalo_rating: #{review["rating"]} | sentiment_rating: #{sentiment.to_rating} | emoji: #{sentiment.to_emoji}"
14
+ puts ""
15
+ end
@@ -0,0 +1,31 @@
1
+ require "sentimetnal/core_ext/string"
2
+ require "sentimetnal/core_ext/numeric"
3
+ require "sentimetnal/version"
4
+
5
+ module Sentimetnal
6
+ WORD_LIST = File.join(File.dirname(__dir__), "data", "AFINN-111.txt")
7
+
8
+ class Analyzer
9
+ attr_reader :word_list
10
+
11
+ def initialize(word_list_file=WORD_LIST)
12
+ @word_list = load_word_list(word_list_file)
13
+ end
14
+
15
+ def sentiment(string)
16
+ words = string.tokenize
17
+ sentiments = words.map { |w| word_list[w] || 0 }
18
+
19
+ sentiments.inject(:+)/Math.sqrt(sentiments.size)
20
+ end
21
+
22
+ private
23
+ def load_word_list(word_list_file)
24
+ word_list_data = IO.readlines(word_list_file, :encoding => "UTF-8")
25
+ word_list_data.map {|w|
26
+ t = w.split("\t")
27
+ [t[0].downcase, t[1].chomp.to_f]
28
+ }.to_h
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,23 @@
1
+ class Numeric
2
+ def to_emoji
3
+ case self
4
+ when -5.0..-1.0 then ":scream:"
5
+ when -1.0..2.0 then ":expressionless:"
6
+ when 2.0..5.0 then ":smile:"
7
+ else
8
+ self
9
+ end
10
+ end
11
+
12
+ def to_rating
13
+ case self
14
+ when -5.0..-3.0 then 1
15
+ when -3.0..-1.0 then 2
16
+ when -1.0..1.0 then 3
17
+ when 1.0..3.0 then 4
18
+ when 3.0..5.0 then 5
19
+ else
20
+ self
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,5 @@
1
+ class String
2
+ def tokenize
3
+ self.downcase.split(/\W+/)
4
+ end
5
+ end
@@ -0,0 +1,3 @@
1
+ module Sentimetnal
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'sentimetnal/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "sentimetnal"
9
+ spec.version = Sentimetnal::VERSION
10
+ spec.authors = ["johdax"]
11
+ spec.email = ["johdax@gmail.com"]
12
+ spec.summary = %q{Sentiment analyzer}
13
+ spec.description = %q{Simple Ruby sentiment analyzer based on the AFINN word list. Returns a float value for the sentiment strength of an input text}
14
+ spec.homepage = "http://github.com/johdax/sentimetnal"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0")
18
+ spec.test_files = spec.files.grep(%r{^(spec)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3"
24
+ spec.add_development_dependency "guard", "~> 2.6"
25
+ spec.add_development_dependency "guard-rspec", "~> 4.3"
26
+ end
@@ -0,0 +1,58 @@
1
+ require 'sentimetnal'
2
+
3
+ describe "String tokenizer" do
4
+ let(:text) { "This is a sentence to tokenize. Another sentence; to split into words!" }
5
+ let(:tokenized_text) { text.tokenize }
6
+
7
+ it "returns an array of correct size" do
8
+ expect(tokenized_text).to be_a(Array)
9
+ expect(tokenized_text.count).to eq(12)
10
+ end
11
+
12
+ it "the array contains only words" do
13
+ expect(tokenized_text).not_to include(".", ";", "!")
14
+ expect(tokenized_text).to include("this", "sentence", "tokenize", "words")
15
+ end
16
+ end
17
+
18
+ describe "Sentiment Analyzer" do
19
+ let(:analyzer) { Sentimetnal::Analyzer.new }
20
+
21
+ describe "Sentiment Analyzer" do
22
+ context "AFINN sentiment word list" do
23
+ let(:word_list) { analyzer.word_list }
24
+
25
+ it "loads the word list into a hash" do
26
+ expect(word_list).to be_a(Hash)
27
+ end
28
+
29
+ it "returns a numeric value for a specific word" do
30
+ expect(word_list[word_list.keys.sample]).to be_a(Float)
31
+ end
32
+ end
33
+ end
34
+
35
+ context "sentiment analysis" do
36
+ it "returns a float for sentiment strength" do
37
+ expect(analyzer.sentiment("Not sure if this is good or bad?")).to be_a(Float)
38
+ end
39
+
40
+ it "returns a positive value for positive valence" do
41
+ expect(analyzer.sentiment("The best & most delicious beef and friendly staff!")).to be > 0
42
+ end
43
+
44
+ it "returns a negative value for negative valence" do
45
+ expect(analyzer.sentiment("WTF? Those horrible dishes gave me sleepless nights and stomach ache!")).to be < 0
46
+ end
47
+ end
48
+
49
+ context "sentiment strength converter" do
50
+ it "convert into a 5 star rating" do
51
+ expect(analyzer.sentiment("The best & most delicious beef and friendly staff!").to_rating).to eq(4)
52
+ end
53
+
54
+ it "convert into a emoji" do
55
+ expect(analyzer.sentiment("WTF? Those horrible dishes gave me sleepless nights and stomach ache!").to_emoji).to eq(":scream:")
56
+ end
57
+ end
58
+ end
metadata ADDED
@@ -0,0 +1,134 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sentimetnal
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - johdax
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '2.6'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.6'
69
+ - !ruby/object:Gem::Dependency
70
+ name: guard-rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '4.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '4.3'
83
+ description: Simple Ruby sentiment analyzer based on the AFINN word list. Returns
84
+ a float value for the sentiment strength of an input text
85
+ email:
86
+ - johdax@gmail.com
87
+ executables: []
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - ".travis.yml"
93
+ - Gemfile
94
+ - Guardfile
95
+ - LICENSE.txt
96
+ - README.md
97
+ - Rakefile
98
+ - data/AFINN-111.txt
99
+ - data/AFINN-96.txt
100
+ - data/AFINN-README.txt
101
+ - examples/rapgenius.rb
102
+ - examples/tupalo.rb
103
+ - lib/sentimetnal.rb
104
+ - lib/sentimetnal/core_ext/numeric.rb
105
+ - lib/sentimetnal/core_ext/string.rb
106
+ - lib/sentimetnal/version.rb
107
+ - sentimetnal.gemspec
108
+ - spec/sentimetnal_spec.rb
109
+ homepage: http://github.com/johdax/sentimetnal
110
+ licenses:
111
+ - MIT
112
+ metadata: {}
113
+ post_install_message:
114
+ rdoc_options: []
115
+ require_paths:
116
+ - lib
117
+ required_ruby_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ requirements: []
128
+ rubyforge_project:
129
+ rubygems_version: 2.4.1
130
+ signing_key:
131
+ specification_version: 4
132
+ summary: Sentiment analyzer
133
+ test_files:
134
+ - spec/sentimetnal_spec.rb