textmood 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +21 -7
- data/bin/textmood +8 -3
- data/lang/no_NB.txt +5 -22
- data/lib/textmood.rb +56 -14
- metadata +1 -1
data/README.md
CHANGED
@@ -51,7 +51,7 @@ You can use it in a Ruby program like this:
|
|
51
51
|
```ruby
|
52
52
|
require "textmood"
|
53
53
|
|
54
|
-
# The :language parameter makes TextMood use one of the bundled language
|
54
|
+
# The :language parameter makes TextMood use one of the bundled language files
|
55
55
|
tm = TextMood.new(language: "en")
|
56
56
|
score = tm.analyze("some text")
|
57
57
|
#=> '1.121'
|
@@ -115,14 +115,25 @@ score = tm.analyze("some text")
|
|
115
115
|
#=> '0.1'
|
116
116
|
```
|
117
117
|
|
118
|
+
# :verbose prints out statistics about the analysis
|
119
|
+
tm = TextMood.new(language: "en", verbose: true)
|
120
|
+
score = tm.analyze("some slightly longer text that contains a few more tokens")
|
121
|
+
#(stdout): Combined score: 1.0 (5 tokens, 0.2 avg.)
|
122
|
+
#(stdout): Negative score: -0.5 (1 tokens, -0.5 avg.)
|
123
|
+
#(stdout): Positive score: 1.5 (4 tokens, 0.375 avg.)
|
124
|
+
#(stdout): Neutral score: 0.0 (0 tokens)
|
125
|
+
#(stdout): Not found: 5 tokens
|
126
|
+
#=> '1.0'
|
127
|
+
```
|
128
|
+
|
118
129
|
#### CLI tool
|
119
|
-
You can also pass some UTF-8-encoded text to the CLI tool and get a score back, like so
|
130
|
+
You can also pass some UTF-8-encoded text to the CLI tool and get a score back, like so:
|
120
131
|
```bash
|
121
132
|
textmood -l en "<some text>"
|
122
133
|
-0.4375
|
123
134
|
```
|
124
135
|
|
125
|
-
Alternatively, you can pipe
|
136
|
+
Alternatively, you can pipe text to textmood on stdin:
|
126
137
|
```bash
|
127
138
|
echo "<some text>" | textmood -l en
|
128
139
|
-0.4375
|
@@ -181,18 +192,21 @@ OPTIONAL options:
|
|
181
192
|
-k, --skip-symbols Do not include symbols file (emoticons etc.). Only applies
|
182
193
|
when using -l/--language.
|
183
194
|
|
184
|
-
-c, --config PATH TO FILE Use the specified config file. If not specified, textmood will
|
185
|
-
for /etc/textmood.cfg and ~/.textmood. Settings in the user
|
186
|
-
will override settings from the global file.
|
195
|
+
-c, --config PATH TO FILE Use the specified config file. If not specified, textmood will
|
196
|
+
look for /etc/textmood.cfg and ~/.textmood. Settings in the user
|
197
|
+
config will override settings from the global file.
|
187
198
|
|
188
199
|
-d, --debug Prints out the score for each token in the provided text
|
189
200
|
or 'nil' if the token was not found in the sentiment file
|
190
201
|
|
202
|
+
-v, --verbose Prints out some useful statistics about the analysis
|
203
|
+
(counts, averages etc).
|
204
|
+
|
191
205
|
-h, --help Show this message
|
192
206
|
```
|
193
207
|
|
194
208
|
### Configuration files for the CLI tool
|
195
|
-
The CLI tool will look for
|
209
|
+
The CLI tool will look for */etc/textmood* and *~/.textmood* unless the -c/--config option
|
196
210
|
is used, in which case only that file is used. The configuration files are basic, flat
|
197
211
|
YAML files that use the same keys as the library understands:
|
198
212
|
```yaml
|
data/bin/textmood
CHANGED
@@ -116,9 +116,9 @@ opts_parser = OptionParser.new do |opts|
|
|
116
116
|
options[:include_symbols] = false
|
117
117
|
end
|
118
118
|
opts.separator ""
|
119
|
-
opts.on("-c", "--config PATH TO FILE", "Use the specified config file. If not specified, textmood will
|
120
|
-
"for /etc/textmood.cfg and ~/.textmood. Settings in the user
|
121
|
-
"will override settings from the global file.") do |c|
|
119
|
+
opts.on("-c", "--config PATH TO FILE", "Use the specified config file. If not specified, textmood will",
|
120
|
+
"look for /etc/textmood.cfg and ~/.textmood. Settings in the user",
|
121
|
+
"config will override settings from the global file.") do |c|
|
122
122
|
options[:config] = c.to_s
|
123
123
|
end
|
124
124
|
opts.separator ""
|
@@ -127,6 +127,11 @@ opts_parser = OptionParser.new do |opts|
|
|
127
127
|
options[:debug] = true
|
128
128
|
end
|
129
129
|
opts.separator ""
|
130
|
+
opts.on("-v", "--verbose", "Prints out some useful statistics about the analysis",
|
131
|
+
"(counts, averages etc).") do |v|
|
132
|
+
options[:verbose] = true
|
133
|
+
end
|
134
|
+
opts.separator ""
|
130
135
|
opts.on_tail("-h", "--help", "Show this message") do
|
131
136
|
puts opts
|
132
137
|
puts ""
|
data/lang/no_NB.txt
CHANGED
@@ -1,53 +1,40 @@
|
|
1
1
|
1.0: episke
|
2
2
|
1.0: god
|
3
|
+
1.0: gode
|
4
|
+
1.0: beste
|
3
5
|
1.0: oppreist
|
4
6
|
0.9: heldig
|
5
7
|
0.8: fantastisk
|
6
8
|
0.8: storøyd
|
7
9
|
0.8: velstående
|
8
|
-
0.8: godt fast
|
9
10
|
0.8: upretensiøs
|
10
|
-
0.8: undervurdert
|
11
11
|
0.8: terapeutisk
|
12
12
|
0.8: selvrespekt
|
13
13
|
0.8: helsebringende
|
14
14
|
0.8: rosenrød i kinnene
|
15
|
-
0.8: utstråle
|
16
|
-
0.8: utstråling
|
17
15
|
0.8: velstand
|
18
16
|
0.8: prinsipiell
|
19
|
-
0.8: moraliserende
|
20
17
|
0.8: mesterstykke
|
21
18
|
0.8: mesterlig
|
22
19
|
0.8: mesterlig
|
23
20
|
0.8: heldig
|
24
21
|
0.8: inspirert
|
25
|
-
0.8: idealisert
|
26
|
-
0.8: idealisert
|
27
|
-
0.8: hylende
|
28
22
|
0.8: sendt fra himmelen
|
29
|
-
0.8: fungicid
|
30
23
|
0.8: eleganse
|
31
|
-
0.8:
|
32
|
-
0.8: viderekoble
|
24
|
+
0.8: elegant
|
33
25
|
0.8: avvæpning
|
34
26
|
0.8: herlig
|
35
27
|
0.8: anstendighet
|
28
|
+
0.8: anstendig
|
36
29
|
0.8: blende
|
37
30
|
0.8: høflige
|
38
|
-
0.8:
|
39
|
-
0.8: skryte
|
40
|
-
0.8: bukking
|
41
|
-
0.8: bonheur
|
31
|
+
0.8: høflig
|
42
32
|
0.8: gunstig
|
43
|
-
0.8: erkeengelen
|
44
33
|
0.8: underholde
|
45
34
|
0.8: minnelig
|
46
35
|
0.8: rosenrød
|
47
|
-
0.8: gjennomgripende
|
48
36
|
0.8: rettferdige
|
49
37
|
0.8: presis
|
50
|
-
0.8: pervertert
|
51
38
|
0.8: integritet
|
52
39
|
0.8: ideell
|
53
40
|
0.8: godhet
|
@@ -59,11 +46,7 @@
|
|
59
46
|
0.7: helhet
|
60
47
|
0.7: velproporsjonert
|
61
48
|
0.7: godt bevart
|
62
|
-
0.7: godt favoriserte
|
63
|
-
0.7: godt favorisert
|
64
49
|
0.7: velvillige
|
65
|
-
0.7: fordreining
|
66
|
-
0.7: bestikkelig
|
67
50
|
0.7: urbanitet
|
68
51
|
0.7: øverste nivå
|
69
52
|
0.7: uaffisert
|
data/lib/textmood.rb
CHANGED
@@ -54,31 +54,71 @@ class TextMood
|
|
54
54
|
# analyzes the sentiment of the provided text.
|
55
55
|
def analyze(text)
|
56
56
|
sentiment_total = 0.0
|
57
|
+
negative_total = 0.0
|
58
|
+
positive_total = 0.0
|
59
|
+
neutral_total = 0.0
|
60
|
+
|
61
|
+
scores_added = 0
|
62
|
+
negative_added = 0
|
63
|
+
positive_added = 0
|
64
|
+
neutral_added = 0
|
65
|
+
not_found = 0
|
57
66
|
|
58
|
-
scores_added = 0
|
59
67
|
(@options[:start_ngram]..@options[:end_ngram]).each do |i|
|
60
68
|
ngrams(i, text.to_s).each do |token|
|
61
69
|
score = score_token(token)
|
62
|
-
|
70
|
+
if score.nil?
|
71
|
+
not_found += 1
|
72
|
+
else
|
63
73
|
sentiment_total += score
|
74
|
+
if score > 0
|
75
|
+
positive_total += score
|
76
|
+
positive_added += 1
|
77
|
+
elsif score < 0
|
78
|
+
negative_total += score
|
79
|
+
negative_added += 1
|
80
|
+
else
|
81
|
+
neutral_total += score
|
82
|
+
neutral_added += 1
|
83
|
+
end
|
64
84
|
scores_added += 1
|
65
85
|
end
|
66
86
|
end
|
67
87
|
end
|
68
88
|
|
69
89
|
if @options[:normalize_score]
|
70
|
-
|
90
|
+
actual_score = normalize_score(sentiment_total, scores_added)
|
91
|
+
else
|
92
|
+
actual_score = sentiment_total
|
71
93
|
end
|
94
|
+
|
95
|
+
if @options[:verbose]
|
96
|
+
puts "" if @options[:debug]
|
97
|
+
combined_avg = (scores_added > 0) ? ", #{(sentiment_total.to_f / scores_added.to_f)} avg." : ""
|
98
|
+
combined_text = "Combined score: #{sentiment_total} (#{scores_added} tokens#{combined_avg})"
|
99
|
+
puts combined_text
|
100
|
+
negative_avg = (negative_added > 0) ? ", #{(negative_total.to_f / negative_added.to_f)} avg." : ""
|
101
|
+
negative_text = "Negative score: #{negative_total} (#{negative_added} tokens#{negative_avg})"
|
102
|
+
puts negative_text
|
103
|
+
positive_avg = (positive_added > 0) ? ", #{(positive_total.to_f / positive_added.to_f)} avg." : ""
|
104
|
+
positive_text = "Positive score: #{positive_total} (#{positive_added} tokens#{positive_avg})"
|
105
|
+
puts positive_text
|
106
|
+
neutral_avg = (neutral_added > 0) ? ", #{(neutral_total.to_f / neutral_added.to_f)} avg." : ""
|
107
|
+
neutral_text = "Neutral score: #{neutral_total} (#{neutral_added} tokens#{neutral_avg})"
|
108
|
+
puts neutral_text
|
109
|
+
puts "Not found: #{not_found} tokens"
|
110
|
+
end
|
111
|
+
|
72
112
|
if @options[:ternary_output]
|
73
|
-
if
|
113
|
+
if actual_score > @options[:max_threshold]
|
74
114
|
1
|
75
|
-
elsif
|
115
|
+
elsif actual_score < @options[:min_threshold]
|
76
116
|
-1
|
77
117
|
else
|
78
118
|
0
|
79
119
|
end
|
80
120
|
else
|
81
|
-
|
121
|
+
actual_score
|
82
122
|
end
|
83
123
|
end
|
84
124
|
|
@@ -99,10 +139,10 @@ class TextMood
|
|
99
139
|
end
|
100
140
|
end
|
101
141
|
if sentiment_value
|
102
|
-
puts "#{used_token}: #{sentiment_value}" if @options[:debug]
|
142
|
+
puts "#{used_token}: #{sentiment_value}" if @options[:debug] and not @options[:skip_found_debug]
|
103
143
|
sentiment_value
|
104
144
|
else
|
105
|
-
puts "#{used_token}: nil" if @options[:debug]
|
145
|
+
puts "#{used_token}: nil" if @options[:debug] and not @options[:skip_not_found_debug]
|
106
146
|
nil
|
107
147
|
end
|
108
148
|
end
|
@@ -117,12 +157,14 @@ class TextMood
|
|
117
157
|
|
118
158
|
sentiment_file = File.new(path, "r:UTF-8")
|
119
159
|
while (line = sentiment_file.gets)
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
160
|
+
unless (line.match(/\s*#/))
|
161
|
+
parsed_line = line.chomp.split(/\s*([\d.-]+):\s*([^\s].*)/)
|
162
|
+
if parsed_line.size == 3
|
163
|
+
score = parsed_line[1]
|
164
|
+
text = parsed_line[2]
|
165
|
+
if score and text
|
166
|
+
sentiment_values[text.downcase] = score.to_f
|
167
|
+
end
|
126
168
|
end
|
127
169
|
end
|
128
170
|
end
|