textmood 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.md +162 -0
- data/bin/textmood +108 -0
- data/lang/en_US.txt +18539 -0
- data/lang/no_NB.txt +9274 -0
- data/lang/symbols.txt +54 -0
- data/lib/textmood.rb +107 -0
- data/test/test.rb +51 -0
- metadata +55 -0
data/lang/symbols.txt
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
-1.0: %-(
|
2
|
+
-1.0: )-:
|
3
|
+
-1.0: ):
|
4
|
+
-1.0: )o:
|
5
|
+
-1.0: 8-0
|
6
|
+
-1.0: 8/
|
7
|
+
-1.0: 8\
|
8
|
+
-1.0: 8c
|
9
|
+
-1.0: :'(
|
10
|
+
-1.0: :'-(
|
11
|
+
-1.0: :(
|
12
|
+
-1.0: :*(
|
13
|
+
-1.0: :,(
|
14
|
+
-1.0: :-(
|
15
|
+
-1.0: :-/
|
16
|
+
-1.0: :-S
|
17
|
+
-1.0: :-\
|
18
|
+
-0.50: :-|
|
19
|
+
-0.50: :/
|
20
|
+
-0.25: :O
|
21
|
+
-0.25: :S
|
22
|
+
-0.25: :\
|
23
|
+
-0.25: :|
|
24
|
+
-1.0: =(
|
25
|
+
-1.0: >:(
|
26
|
+
-1.0: D:
|
27
|
+
1.0: (o;
|
28
|
+
1.00: 8-)
|
29
|
+
1.0: ;)
|
30
|
+
1.0: ;o)
|
31
|
+
1.0: %-)
|
32
|
+
1.0: (-:
|
33
|
+
1.0: (:
|
34
|
+
1.0: (o:
|
35
|
+
1.0: 8)
|
36
|
+
1.0: :)
|
37
|
+
1.0: :-)
|
38
|
+
1.0: :-D
|
39
|
+
1.0: :-P
|
40
|
+
1.0: :D
|
41
|
+
1.0: :P
|
42
|
+
1.0: :P
|
43
|
+
1.0: :]
|
44
|
+
1.0: :o)
|
45
|
+
1.0: :p
|
46
|
+
1.0: ;^)
|
47
|
+
1.0: <3
|
48
|
+
1.0: =)
|
49
|
+
1.0: =]
|
50
|
+
1.0: >:)
|
51
|
+
1.0: >:D
|
52
|
+
1.0: >=D
|
53
|
+
1.0: ^_^
|
54
|
+
1.0: }:)
|
data/lib/textmood.rb
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
|
3
|
+
if RUBY_VERSION < '1.9'
|
4
|
+
$KCODE='u'
|
5
|
+
else
|
6
|
+
Encoding.default_external = Encoding::UTF_8
|
7
|
+
Encoding.default_internal = Encoding::UTF_8
|
8
|
+
end
|
9
|
+
|
10
|
+
class TextMood
|
11
|
+
|
12
|
+
def initialize(options = {})
|
13
|
+
options[:max_threshold] ||= 0.5
|
14
|
+
options[:min_threshold] ||= -0.5
|
15
|
+
options[:start_ngram] ||= 1
|
16
|
+
options[:end_ngram] ||= 1
|
17
|
+
@options = options
|
18
|
+
if options[:lang]
|
19
|
+
@sentiment_values = load_sentiment_file(File.dirname(__FILE__) + "/../lang/#{options[:lang]}.txt")
|
20
|
+
unless options[:include_symbols] == false
|
21
|
+
# load the symbols file (emoticons and other symbols)
|
22
|
+
@sentiment_values.merge!(load_sentiment_file(File.dirname(__FILE__) + "/../lang/symbols.txt"))
|
23
|
+
end
|
24
|
+
else
|
25
|
+
if options[:files].empty?
|
26
|
+
raise ArgumentError, "No language or files provided"
|
27
|
+
else
|
28
|
+
@sentiment_values = {}
|
29
|
+
options[:files].each do |file|
|
30
|
+
@sentiment_values.merge!(load_sentiment_file(file))
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
# analyzes the sentiment of the provided text.
|
38
|
+
def score_text(text)
|
39
|
+
sentiment_total = 0.0
|
40
|
+
|
41
|
+
(@options[:start_ngram]..@options[:end_ngram]).each do |i|
|
42
|
+
ngrams(i, text.to_s).each do |token|
|
43
|
+
sentiment_total += score_token(token)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if @options[:normalize]
|
48
|
+
if sentiment_total > @options[:max_threshold]
|
49
|
+
1
|
50
|
+
elsif sentiment_total < @options[:min_threshold]
|
51
|
+
-1
|
52
|
+
else
|
53
|
+
0
|
54
|
+
end
|
55
|
+
else
|
56
|
+
sentiment_total
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def score_token(token)
|
63
|
+
# try the downcased token verbatim
|
64
|
+
used_token = token
|
65
|
+
sentiment_value = @sentiment_values[token.downcase]
|
66
|
+
unless sentiment_value
|
67
|
+
# try the token without symbols
|
68
|
+
token_without_symbols = token.gsub(/[^\w\s]+/, "")
|
69
|
+
sentiment_value = @sentiment_values[token_without_symbols.downcase]
|
70
|
+
if sentiment_value
|
71
|
+
used_token = token_without_symbols
|
72
|
+
end
|
73
|
+
end
|
74
|
+
if sentiment_value
|
75
|
+
puts "#{used_token}: #{sentiment_value}" if @options[:debug]
|
76
|
+
sentiment_value
|
77
|
+
else
|
78
|
+
puts "#{used_token}: nil" if @options[:debug]
|
79
|
+
0.0
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def ngrams(n, string)
|
84
|
+
string.split.each_cons(n).to_a.collect {|words| words.join(" ")}
|
85
|
+
end
|
86
|
+
|
87
|
+
# load the specified sentiment file into a hash
|
88
|
+
def load_sentiment_file(path)
|
89
|
+
sentiment_values = {}
|
90
|
+
|
91
|
+
sentiment_file = File.new(path, "r:UTF-8")
|
92
|
+
while (line = sentiment_file.gets)
|
93
|
+
parsed_line = line.chomp.split(/\s*([\d.-]+):\s*([^\s].*)/)
|
94
|
+
if parsed_line.size == 3
|
95
|
+
score = parsed_line[1]
|
96
|
+
text = parsed_line[2]
|
97
|
+
if score and text
|
98
|
+
sentiment_values[text.downcase] = score.to_f
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
sentiment_file.close
|
103
|
+
|
104
|
+
sentiment_values
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
data/test/test.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#encoding: utf-8
|
3
|
+
|
4
|
+
if RUBY_VERSION < '1.9'
|
5
|
+
$KCODE='u'
|
6
|
+
else
|
7
|
+
Encoding.default_external = Encoding::UTF_8
|
8
|
+
Encoding.default_internal = Encoding::UTF_8
|
9
|
+
end
|
10
|
+
|
11
|
+
require "test/unit"
|
12
|
+
# require "./#{File.dirname(__FILE__)}/../lib/textmood"
|
13
|
+
require "textmood"
|
14
|
+
|
15
|
+
include Test::Unit::Assertions
|
16
|
+
|
17
|
+
class TestScorer < Test::Unit::TestCase
|
18
|
+
|
19
|
+
def setup
|
20
|
+
@scorer = TextMood.new({:lang => "en_US"})
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_negative
|
24
|
+
max = -0.01
|
25
|
+
texts = ["This is just terrible"]
|
26
|
+
texts.each do |text|
|
27
|
+
actual_score = @scorer.score_text(text)
|
28
|
+
assert((actual_score < max), "actual: #{actual_score} >= max: #{max} for '#{text}'")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_neutral
|
33
|
+
min = -0.5
|
34
|
+
max = 0.5
|
35
|
+
texts = ["This is neutral"]
|
36
|
+
texts.each do |text, test_score|
|
37
|
+
actual_score = @scorer.score_text(text)
|
38
|
+
assert((actual_score < max and actual_score > min), "min: #{min} <= actual: #{actual_score} >= max: #{max} for '#{text}'")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_positive
|
43
|
+
min = 0.01
|
44
|
+
texts = ["This is amazing!"]
|
45
|
+
texts.each do |text, test_score|
|
46
|
+
actual_score = @scorer.score_text(text)
|
47
|
+
assert((actual_score >= min), "actual: #{actual_score} <= max: #{min} for '#{text}'")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
metadata
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: textmood
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Stian Grytoyr
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-11-08 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Simple sentiment analyzer with CLI tool
|
15
|
+
email: stian@grytoyr.net
|
16
|
+
executables:
|
17
|
+
- textmood
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- lib/textmood.rb
|
22
|
+
- lang/en_US.txt
|
23
|
+
- lang/no_NB.txt
|
24
|
+
- lang/symbols.txt
|
25
|
+
- bin/textmood
|
26
|
+
- test/test.rb
|
27
|
+
- README.md
|
28
|
+
- LICENSE
|
29
|
+
homepage: https://github.com/stiang/textmood
|
30
|
+
licenses:
|
31
|
+
- MIT
|
32
|
+
post_install_message:
|
33
|
+
rdoc_options: []
|
34
|
+
require_paths:
|
35
|
+
- lib
|
36
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ! '>='
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
requirements: []
|
49
|
+
rubyforge_project:
|
50
|
+
rubygems_version: 1.8.23
|
51
|
+
signing_key:
|
52
|
+
specification_version: 3
|
53
|
+
summary: TextMood
|
54
|
+
test_files:
|
55
|
+
- test/test.rb
|