textmood 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.md +162 -0
- data/bin/textmood +108 -0
- data/lang/en_US.txt +18539 -0
- data/lang/no_NB.txt +9274 -0
- data/lang/symbols.txt +54 -0
- data/lib/textmood.rb +107 -0
- data/test/test.rb +51 -0
- metadata +55 -0
data/lang/symbols.txt
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
-1.0: %-(
|
2
|
+
-1.0: )-:
|
3
|
+
-1.0: ):
|
4
|
+
-1.0: )o:
|
5
|
+
-1.0: 8-0
|
6
|
+
-1.0: 8/
|
7
|
+
-1.0: 8\
|
8
|
+
-1.0: 8c
|
9
|
+
-1.0: :'(
|
10
|
+
-1.0: :'-(
|
11
|
+
-1.0: :(
|
12
|
+
-1.0: :*(
|
13
|
+
-1.0: :,(
|
14
|
+
-1.0: :-(
|
15
|
+
-1.0: :-/
|
16
|
+
-1.0: :-S
|
17
|
+
-1.0: :-\
|
18
|
+
-0.50: :-|
|
19
|
+
-0.50: :/
|
20
|
+
-0.25: :O
|
21
|
+
-0.25: :S
|
22
|
+
-0.25: :\
|
23
|
+
-0.25: :|
|
24
|
+
-1.0: =(
|
25
|
+
-1.0: >:(
|
26
|
+
-1.0: D:
|
27
|
+
1.0: (o;
|
28
|
+
1.00: 8-)
|
29
|
+
1.0: ;)
|
30
|
+
1.0: ;o)
|
31
|
+
1.0: %-)
|
32
|
+
1.0: (-:
|
33
|
+
1.0: (:
|
34
|
+
1.0: (o:
|
35
|
+
1.0: 8)
|
36
|
+
1.0: :)
|
37
|
+
1.0: :-)
|
38
|
+
1.0: :-D
|
39
|
+
1.0: :-P
|
40
|
+
1.0: :D
|
41
|
+
1.0: :P
|
42
|
+
1.0: :P
|
43
|
+
1.0: :]
|
44
|
+
1.0: :o)
|
45
|
+
1.0: :p
|
46
|
+
1.0: ;^)
|
47
|
+
1.0: <3
|
48
|
+
1.0: =)
|
49
|
+
1.0: =]
|
50
|
+
1.0: >:)
|
51
|
+
1.0: >:D
|
52
|
+
1.0: >=D
|
53
|
+
1.0: ^_^
|
54
|
+
1.0: }:)
|
data/lib/textmood.rb
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
|
3
|
+
if RUBY_VERSION < '1.9'
|
4
|
+
$KCODE='u'
|
5
|
+
else
|
6
|
+
Encoding.default_external = Encoding::UTF_8
|
7
|
+
Encoding.default_internal = Encoding::UTF_8
|
8
|
+
end
|
9
|
+
|
10
|
+
class TextMood
|
11
|
+
|
12
|
+
def initialize(options = {})
|
13
|
+
options[:max_threshold] ||= 0.5
|
14
|
+
options[:min_threshold] ||= -0.5
|
15
|
+
options[:start_ngram] ||= 1
|
16
|
+
options[:end_ngram] ||= 1
|
17
|
+
@options = options
|
18
|
+
if options[:lang]
|
19
|
+
@sentiment_values = load_sentiment_file(File.dirname(__FILE__) + "/../lang/#{options[:lang]}.txt")
|
20
|
+
unless options[:include_symbols] == false
|
21
|
+
# load the symbols file (emoticons and other symbols)
|
22
|
+
@sentiment_values.merge!(load_sentiment_file(File.dirname(__FILE__) + "/../lang/symbols.txt"))
|
23
|
+
end
|
24
|
+
else
|
25
|
+
if options[:files].empty?
|
26
|
+
raise ArgumentError, "No language or files provided"
|
27
|
+
else
|
28
|
+
@sentiment_values = {}
|
29
|
+
options[:files].each do |file|
|
30
|
+
@sentiment_values.merge!(load_sentiment_file(file))
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
# analyzes the sentiment of the provided text.
|
38
|
+
def score_text(text)
|
39
|
+
sentiment_total = 0.0
|
40
|
+
|
41
|
+
(@options[:start_ngram]..@options[:end_ngram]).each do |i|
|
42
|
+
ngrams(i, text.to_s).each do |token|
|
43
|
+
sentiment_total += score_token(token)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if @options[:normalize]
|
48
|
+
if sentiment_total > @options[:max_threshold]
|
49
|
+
1
|
50
|
+
elsif sentiment_total < @options[:min_threshold]
|
51
|
+
-1
|
52
|
+
else
|
53
|
+
0
|
54
|
+
end
|
55
|
+
else
|
56
|
+
sentiment_total
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def score_token(token)
|
63
|
+
# try the downcased token verbatim
|
64
|
+
used_token = token
|
65
|
+
sentiment_value = @sentiment_values[token.downcase]
|
66
|
+
unless sentiment_value
|
67
|
+
# try the token without symbols
|
68
|
+
token_without_symbols = token.gsub(/[^\w\s]+/, "")
|
69
|
+
sentiment_value = @sentiment_values[token_without_symbols.downcase]
|
70
|
+
if sentiment_value
|
71
|
+
used_token = token_without_symbols
|
72
|
+
end
|
73
|
+
end
|
74
|
+
if sentiment_value
|
75
|
+
puts "#{used_token}: #{sentiment_value}" if @options[:debug]
|
76
|
+
sentiment_value
|
77
|
+
else
|
78
|
+
puts "#{used_token}: nil" if @options[:debug]
|
79
|
+
0.0
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def ngrams(n, string)
|
84
|
+
string.split.each_cons(n).to_a.collect {|words| words.join(" ")}
|
85
|
+
end
|
86
|
+
|
87
|
+
# load the specified sentiment file into a hash
|
88
|
+
def load_sentiment_file(path)
|
89
|
+
sentiment_values = {}
|
90
|
+
|
91
|
+
sentiment_file = File.new(path, "r:UTF-8")
|
92
|
+
while (line = sentiment_file.gets)
|
93
|
+
parsed_line = line.chomp.split(/\s*([\d.-]+):\s*([^\s].*)/)
|
94
|
+
if parsed_line.size == 3
|
95
|
+
score = parsed_line[1]
|
96
|
+
text = parsed_line[2]
|
97
|
+
if score and text
|
98
|
+
sentiment_values[text.downcase] = score.to_f
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
sentiment_file.close
|
103
|
+
|
104
|
+
sentiment_values
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
data/test/test.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#encoding: utf-8
|
3
|
+
|
4
|
+
if RUBY_VERSION < '1.9'
|
5
|
+
$KCODE='u'
|
6
|
+
else
|
7
|
+
Encoding.default_external = Encoding::UTF_8
|
8
|
+
Encoding.default_internal = Encoding::UTF_8
|
9
|
+
end
|
10
|
+
|
11
|
+
require "test/unit"
|
12
|
+
# require "./#{File.dirname(__FILE__)}/../lib/textmood"
|
13
|
+
require "textmood"
|
14
|
+
|
15
|
+
include Test::Unit::Assertions
|
16
|
+
|
17
|
+
class TestScorer < Test::Unit::TestCase
|
18
|
+
|
19
|
+
def setup
|
20
|
+
@scorer = TextMood.new({:lang => "en_US"})
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_negative
|
24
|
+
max = -0.01
|
25
|
+
texts = ["This is just terrible"]
|
26
|
+
texts.each do |text|
|
27
|
+
actual_score = @scorer.score_text(text)
|
28
|
+
assert((actual_score < max), "actual: #{actual_score} >= max: #{max} for '#{text}'")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_neutral
|
33
|
+
min = -0.5
|
34
|
+
max = 0.5
|
35
|
+
texts = ["This is neutral"]
|
36
|
+
texts.each do |text, test_score|
|
37
|
+
actual_score = @scorer.score_text(text)
|
38
|
+
assert((actual_score < max and actual_score > min), "min: #{min} <= actual: #{actual_score} >= max: #{max} for '#{text}'")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_positive
|
43
|
+
min = 0.01
|
44
|
+
texts = ["This is amazing!"]
|
45
|
+
texts.each do |text, test_score|
|
46
|
+
actual_score = @scorer.score_text(text)
|
47
|
+
assert((actual_score >= min), "actual: #{actual_score} <= max: #{min} for '#{text}'")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
metadata
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: textmood
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Stian Grytoyr
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-11-08 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Simple sentiment analyzer with CLI tool
|
15
|
+
email: stian@grytoyr.net
|
16
|
+
executables:
|
17
|
+
- textmood
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- lib/textmood.rb
|
22
|
+
- lang/en_US.txt
|
23
|
+
- lang/no_NB.txt
|
24
|
+
- lang/symbols.txt
|
25
|
+
- bin/textmood
|
26
|
+
- test/test.rb
|
27
|
+
- README.md
|
28
|
+
- LICENSE
|
29
|
+
homepage: https://github.com/stiang/textmood
|
30
|
+
licenses:
|
31
|
+
- MIT
|
32
|
+
post_install_message:
|
33
|
+
rdoc_options: []
|
34
|
+
require_paths:
|
35
|
+
- lib
|
36
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ! '>='
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
requirements: []
|
49
|
+
rubyforge_project:
|
50
|
+
rubygems_version: 1.8.23
|
51
|
+
signing_key:
|
52
|
+
specification_version: 3
|
53
|
+
summary: TextMood
|
54
|
+
test_files:
|
55
|
+
- test/test.rb
|