textmood 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lang/symbols.txt ADDED
@@ -0,0 +1,54 @@
1
+ -1.0: %-(
2
+ -1.0: )-:
3
+ -1.0: ):
4
+ -1.0: )o:
5
+ -1.0: 8-0
6
+ -1.0: 8/
7
+ -1.0: 8\
8
+ -1.0: 8c
9
+ -1.0: :'(
10
+ -1.0: :'-(
11
+ -1.0: :(
12
+ -1.0: :*(
13
+ -1.0: :,(
14
+ -1.0: :-(
15
+ -1.0: :-/
16
+ -1.0: :-S
17
+ -1.0: :-\
18
+ -0.50: :-|
19
+ -0.50: :/
20
+ -0.25: :O
21
+ -0.25: :S
22
+ -0.25: :\
23
+ -0.25: :|
24
+ -1.0: =(
25
+ -1.0: >:(
26
+ -1.0: D:
27
+ 1.0: (o;
28
+ 1.00: 8-)
29
+ 1.0: ;)
30
+ 1.0: ;o)
31
+ 1.0: %-)
32
+ 1.0: (-:
33
+ 1.0: (:
34
+ 1.0: (o:
35
+ 1.0: 8)
36
+ 1.0: :)
37
+ 1.0: :-)
38
+ 1.0: :-D
39
+ 1.0: :-P
40
+ 1.0: :D
41
+ 1.0: :P
42
+ 1.0: :P
43
+ 1.0: :]
44
+ 1.0: :o)
45
+ 1.0: :p
46
+ 1.0: ;^)
47
+ 1.0: <3
48
+ 1.0: =)
49
+ 1.0: =]
50
+ 1.0: >:)
51
+ 1.0: >:D
52
+ 1.0: >=D
53
+ 1.0: ^_^
54
+ 1.0: }:)
data/lib/textmood.rb ADDED
@@ -0,0 +1,107 @@
1
+ #encoding: utf-8
2
+
3
+ if RUBY_VERSION < '1.9'
4
+ $KCODE='u'
5
+ else
6
+ Encoding.default_external = Encoding::UTF_8
7
+ Encoding.default_internal = Encoding::UTF_8
8
+ end
9
+
10
+ class TextMood
11
+
12
+ def initialize(options = {})
13
+ options[:max_threshold] ||= 0.5
14
+ options[:min_threshold] ||= -0.5
15
+ options[:start_ngram] ||= 1
16
+ options[:end_ngram] ||= 1
17
+ @options = options
18
+ if options[:lang]
19
+ @sentiment_values = load_sentiment_file(File.dirname(__FILE__) + "/../lang/#{options[:lang]}.txt")
20
+ unless options[:include_symbols] == false
21
+ # load the symbols file (emoticons and other symbols)
22
+ @sentiment_values.merge!(load_sentiment_file(File.dirname(__FILE__) + "/../lang/symbols.txt"))
23
+ end
24
+ else
25
+ if options[:files].empty?
26
+ raise ArgumentError, "No language or files provided"
27
+ else
28
+ @sentiment_values = {}
29
+ options[:files].each do |file|
30
+ @sentiment_values.merge!(load_sentiment_file(file))
31
+ end
32
+ end
33
+ end
34
+
35
+ end
36
+
37
+ # analyzes the sentiment of the provided text.
38
+ def score_text(text)
39
+ sentiment_total = 0.0
40
+
41
+ (@options[:start_ngram]..@options[:end_ngram]).each do |i|
42
+ ngrams(i, text.to_s).each do |token|
43
+ sentiment_total += score_token(token)
44
+ end
45
+ end
46
+
47
+ if @options[:normalize]
48
+ if sentiment_total > @options[:max_threshold]
49
+ 1
50
+ elsif sentiment_total < @options[:min_threshold]
51
+ -1
52
+ else
53
+ 0
54
+ end
55
+ else
56
+ sentiment_total
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ def score_token(token)
63
+ # try the downcased token verbatim
64
+ used_token = token
65
+ sentiment_value = @sentiment_values[token.downcase]
66
+ unless sentiment_value
67
+ # try the token without symbols
68
+ token_without_symbols = token.gsub(/[^\w\s]+/, "")
69
+ sentiment_value = @sentiment_values[token_without_symbols.downcase]
70
+ if sentiment_value
71
+ used_token = token_without_symbols
72
+ end
73
+ end
74
+ if sentiment_value
75
+ puts "#{used_token}: #{sentiment_value}" if @options[:debug]
76
+ sentiment_value
77
+ else
78
+ puts "#{used_token}: nil" if @options[:debug]
79
+ 0.0
80
+ end
81
+ end
82
+
83
+ def ngrams(n, string)
84
+ string.split.each_cons(n).to_a.collect {|words| words.join(" ")}
85
+ end
86
+
87
+ # load the specified sentiment file into a hash
88
+ def load_sentiment_file(path)
89
+ sentiment_values = {}
90
+
91
+ sentiment_file = File.new(path, "r:UTF-8")
92
+ while (line = sentiment_file.gets)
93
+ parsed_line = line.chomp.split(/\s*([\d.-]+):\s*([^\s].*)/)
94
+ if parsed_line.size == 3
95
+ score = parsed_line[1]
96
+ text = parsed_line[2]
97
+ if score and text
98
+ sentiment_values[text.downcase] = score.to_f
99
+ end
100
+ end
101
+ end
102
+ sentiment_file.close
103
+
104
+ sentiment_values
105
+ end
106
+
107
+ end
data/test/test.rb ADDED
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+ #encoding: utf-8
3
+
4
+ if RUBY_VERSION < '1.9'
5
+ $KCODE='u'
6
+ else
7
+ Encoding.default_external = Encoding::UTF_8
8
+ Encoding.default_internal = Encoding::UTF_8
9
+ end
10
+
11
+ require "test/unit"
12
+ # require "./#{File.dirname(__FILE__)}/../lib/textmood"
13
+ require "textmood"
14
+
15
+ include Test::Unit::Assertions
16
+
17
+ class TestScorer < Test::Unit::TestCase
18
+
19
+ def setup
20
+ @scorer = TextMood.new({:lang => "en_US"})
21
+ end
22
+
23
+ def test_negative
24
+ max = -0.01
25
+ texts = ["This is just terrible"]
26
+ texts.each do |text|
27
+ actual_score = @scorer.score_text(text)
28
+ assert((actual_score < max), "actual: #{actual_score} >= max: #{max} for '#{text}'")
29
+ end
30
+ end
31
+
32
+ def test_neutral
33
+ min = -0.5
34
+ max = 0.5
35
+ texts = ["This is neutral"]
36
+ texts.each do |text, test_score|
37
+ actual_score = @scorer.score_text(text)
38
+ assert((actual_score < max and actual_score > min), "min: #{min} <= actual: #{actual_score} >= max: #{max} for '#{text}'")
39
+ end
40
+ end
41
+
42
+ def test_positive
43
+ min = 0.01
44
+ texts = ["This is amazing!"]
45
+ texts.each do |text, test_score|
46
+ actual_score = @scorer.score_text(text)
47
+ assert((actual_score >= min), "actual: #{actual_score} <= max: #{min} for '#{text}'")
48
+ end
49
+ end
50
+
51
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: textmood
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Stian Grytoyr
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-11-08 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Simple sentiment analyzer with CLI tool
15
+ email: stian@grytoyr.net
16
+ executables:
17
+ - textmood
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/textmood.rb
22
+ - lang/en_US.txt
23
+ - lang/no_NB.txt
24
+ - lang/symbols.txt
25
+ - bin/textmood
26
+ - test/test.rb
27
+ - README.md
28
+ - LICENSE
29
+ homepage: https://github.com/stiang/textmood
30
+ licenses:
31
+ - MIT
32
+ post_install_message:
33
+ rdoc_options: []
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ required_rubygems_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ requirements: []
49
+ rubyforge_project:
50
+ rubygems_version: 1.8.23
51
+ signing_key:
52
+ specification_version: 3
53
+ summary: TextMood
54
+ test_files:
55
+ - test/test.rb