sentiment_lib 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .DS_Store
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in sentiment_lib.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Nicholas Zaillian
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,90 @@
1
+ # SentimentLib
2
+
3
+ A simple, extensible sentiment analysis gem.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'sentiment_lib'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install sentiment_lib
18
+
19
+ ## Usage
20
+
21
+ SentimentLib exposes simple sentiment analysis via SentimentLib::Analyzer.
22
+
23
+ example usage --
24
+
25
+ analyzer = SentimentLib::Analyzer.new
26
+
27
+ analyzer.analyze("I'm feeling confident and excited this morning!")
28
+ => 2.0
29
+
30
+
31
+ The library ships with two analysis strategies (you will liklely want to add your own domain-specific strategies):
32
+
33
+ SentimentLib::Analysis::Strategies::BasicDictStrategy
34
+
35
+ and
36
+
37
+ SentimentLib::Analysis::Strategies::FinancialDictStrategy
38
+
39
+ The first (BasicDictStrategy) uses a dictionary file (at lib/sentiment_lib/data/analysis/basic_dict/words.txt) mapping words to decimal weights between -1.0 and 1.0. It's analysis is the average of word weights for the supplied string.
40
+
41
+ The second (FinancialDictStrategy) uses the Loughran and McDonald Financial Sentiment Dictionary with +1 and -1 weights mapped to the dictionary's positive and negative words, respectively. It's analysis is the sum of word weights for the supplied string.
42
+
43
+ The API for adding your own analysis strategies is straightforward:
44
+
45
+ create a subclass of SentimentLib::Analysis::Strategy
46
+
47
+ implement the following public instance methods (all but first optional):
48
+
49
+ required:
50
+
51
+ mappings
52
+ returns a hash mapping token strings to weights
53
+
54
+ ex:
55
+
56
+ {
57
+ happy => 1.0,
58
+ sad => -1.0
59
+ }
60
+
61
+ The supplied strategies both load text files with
62
+ word-weight mappings and parse them into a hash.
63
+
64
+ optional:
65
+
66
+ normalize(str)
67
+ Normalize the string to be analyzed (applied before tokenization - see below).
68
+ Default is to downcase and strip all alphanumeric characters.
69
+
70
+ tokenize(str)
71
+ Defines custom method for converting the string being analyzed (str) into
72
+ tokens (default splits on white space).
73
+
74
+ weigh(tokens)
75
+ Define custom method for weighing the collected tokens of the string
76
+ (default is to average token weights).
77
+
78
+
79
+ You can then pass your strategy to new instances of SentimentLib::Analyzer:
80
+
81
+ analyzer = SentimentLib::Analyzer.new(:strategy => MyCustomStrategy.new)
82
+
83
+
84
+ ## Contributing
85
+
86
+ 1. Fork it
87
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
88
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
89
+ 4. Push to the branch (`git push origin my-new-feature`)
90
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,11 @@
1
+ module SentimentLib
2
+ class Analyzer
3
+ def analyze(str, opts={})
4
+ @strategy = opts[:strategy] || ::SentimentLib::Analysis::Strategies::FinancialDictStrategy.new
5
+
6
+ tokens, sum = @strategy.tokenize(@strategy.normalize(str)), 0
7
+
8
+ @strategy.weigh(tokens)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,57 @@
1
+ require File.expand_path('../../strategy', __FILE__)
2
+
3
+ module SentimentLib
4
+ module Analysis
5
+ module Strategies
6
+ class BasicDictStrategy < ::SentimentLib::Analysis::Strategy
7
+ @@mappings = nil
8
+
9
+ def initialize
10
+ self.class._load_mappings if !@@mappings
11
+ end
12
+
13
+ def mappings
14
+ @@mappings
15
+ end
16
+
17
+ # weight is pure sum (not average)
18
+ def weigh(tokens, opts={})
19
+ valid_tokens = tokens.delete_if { |token|
20
+ mappings[token] == nil
21
+ }
22
+
23
+ return 0 if valid_tokens.length == 0
24
+
25
+ sum = 0
26
+
27
+ valid_tokens.each { |token|
28
+ sum += mappings[token]
29
+ }
30
+
31
+ if opts[:average] == true
32
+ sum / valid_tokens.length
33
+ else
34
+ sum
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def self._load_mappings
41
+ @@mappings = {}
42
+
43
+ data_root = File.expand_path('../../../../sentiment_lib/data', __FILE__)
44
+
45
+ word_file = File.new("#{data_root}/analysis/basic_dict/words.txt", 'r')
46
+
47
+ while (line = word_file.gets)
48
+ tokens = line.chomp.split("\t")
49
+ @@mappings[tokens[1]] = tokens[0].to_f
50
+ end
51
+
52
+ word_file.close
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,63 @@
1
+ require File.expand_path('../../strategy', __FILE__)
2
+
3
+ module SentimentLib
4
+ module Analysis
5
+ module Strategies
6
+ class FinancialDictStrategy < ::SentimentLib::Analysis::Strategy
7
+ @@mappings = nil
8
+
9
+ def initialize
10
+ self.class._load_mappings if !@@mappings
11
+ end
12
+
13
+ def mappings
14
+ @@mappings
15
+ end
16
+
17
+ # weight is pure sum (not average)
18
+ def weigh(tokens, opts={})
19
+ valid_tokens = tokens.delete_if { |token|
20
+ mappings[token] == nil
21
+ }
22
+
23
+ return 0 if valid_tokens.length == 0
24
+
25
+ sum = 0
26
+
27
+ valid_tokens.each { |token|
28
+ sum += mappings[token]
29
+ }
30
+
31
+ if opts[:average] == true
32
+ sum / valid_tokens.length
33
+ else
34
+ sum
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def self._load_mappings
41
+ @@mappings = {}
42
+
43
+ data_root = File.expand_path('../../../../sentiment_lib/data', __FILE__)
44
+
45
+ positive_word_file = File.new("#{data_root}/analysis/financial_dict/positive.csv", 'r')
46
+ negative_word_file = File.new("#{data_root}/analysis/financial_dict/negative.csv", 'r')
47
+
48
+ while (line = positive_word_file.gets)
49
+ tokens = line.chomp.split(",")
50
+ @@mappings[tokens[0].downcase] = 1.0
51
+ end
52
+
53
+ while (line = negative_word_file.gets)
54
+ tokens = line.chomp.split(",")
55
+ @@mappings[tokens[0].downcase] = -1.0
56
+ end
57
+
58
+ [positive_word_file, negative_word_file].map &:close
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,34 @@
1
+ module SentimentLib
2
+ module Analysis
3
+ class Strategy
4
+
5
+ def normalize(str)
6
+ str.gsub(/[^[:alnum:]]/, ' ').downcase
7
+ end
8
+
9
+ def tokenize(str)
10
+ str.split
11
+ end
12
+
13
+ # default weight is an average of per-word
14
+ # weight. Optionally override in derived strategies.
15
+ def weigh(tokens)
16
+ valid_tokens = tokens.delete_if { |token|
17
+ mappings[token] == nil
18
+ }
19
+
20
+ return 0 if valid_tokens.length == 0
21
+
22
+ sum = 0
23
+
24
+ valid_tokens.each { |token|
25
+ sum += mappings[token]
26
+ }
27
+
28
+ avg = sum / valid_tokens.length
29
+
30
+ avg
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,17 @@
1
+ Dir[File.expand_path("../analysis/strategies/**/*.rb", __FILE__)].each { |f| require f }
2
+
3
+ module SentimentLib
4
+ class Analyzer
5
+ attr_accessor :strategy
6
+
7
+ def initialize(opts={})
8
+ @strategy = opts[:strategy] || ::SentimentLib::Analysis::Strategies::FinancialDictStrategy.new
9
+ end
10
+
11
+ def analyze(str, opts={})
12
+ tokens, sum = @strategy.tokenize(@strategy.normalize(str)), 0
13
+
14
+ @strategy.weigh(tokens)
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,55 @@
1
+ -1.0 %-(
2
+ -1.0 )-:
3
+ -1.0 ):
4
+ -1.0 )o:
5
+ -1.0 8-0
6
+ -1.0 8/
7
+ -1.0 8\
8
+ -1.0 8c
9
+ -1.0 :'(
10
+ -1.0 :'-(
11
+ -1.0 :(
12
+ -1.0 :*(
13
+ -1.0 :,(
14
+ -1.0 :-(
15
+ -1.0 :-/
16
+ -1.0 :-S
17
+ -1.0 :-\
18
+ -0.50 :-|
19
+ -0.50 :/
20
+ -0.25 :O
21
+ -0.25 :S
22
+ -0.25 :\
23
+ -0.25 :|
24
+ -1.0 =(
25
+ -1.0 >:(
26
+ -1.0 D:
27
+ -1.0 sux
28
+ 1.0 (o;
29
+ 1.00 8-)
30
+ 1.0 ;)
31
+ 1.0 ;o)
32
+ 1.0 %-)
33
+ 1.0 (-:
34
+ 1.0 (:
35
+ 1.0 (o:
36
+ 1.0 8)
37
+ 1.0 :)
38
+ 1.0 :-D
39
+ 1.0 :-P
40
+ 1.0 :D
41
+ 1.0 :P
42
+ 1.0 :P
43
+ 1.0 :]
44
+ 1.0 :o)
45
+ 1.0 :p
46
+ 1.0 ;^)
47
+ 1.0 <3
48
+ 1.0 &lt;3
49
+ 1.0 =)
50
+ 1.0 =]
51
+ 1.0 >:)
52
+ 1.0 >:D
53
+ 1.0 >=D
54
+ 1.0 ^_^
55
+ 1.0 }:)