textmood 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +21 -7
- data/bin/textmood +8 -3
- data/lang/no_NB.txt +5 -22
- data/lib/textmood.rb +56 -14
- metadata +1 -1
data/README.md
CHANGED
@@ -51,7 +51,7 @@ You can use it in a Ruby program like this:
|
|
51
51
|
```ruby
|
52
52
|
require "textmood"
|
53
53
|
|
54
|
-
# The :language parameter makes TextMood use one of the bundled language
|
54
|
+
# The :language parameter makes TextMood use one of the bundled language files
|
55
55
|
tm = TextMood.new(language: "en")
|
56
56
|
score = tm.analyze("some text")
|
57
57
|
#=> '1.121'
|
@@ -115,14 +115,25 @@ score = tm.analyze("some text")
|
|
115
115
|
#=> '0.1'
|
116
116
|
```
|
117
117
|
|
118
|
+
# :verbose prints out statistics about the analysis
|
119
|
+
tm = TextMood.new(language: "en", verbose: true)
|
120
|
+
score = tm.analyze("some slightly longer text that contains a few more tokens")
|
121
|
+
#(stdout): Combined score: 1.0 (5 tokens, 0.2 avg.)
|
122
|
+
#(stdout): Negative score: -0.5 (1 tokens, -0.5 avg.)
|
123
|
+
#(stdout): Positive score: 1.5 (4 tokens, 0.375 avg.)
|
124
|
+
#(stdout): Neutral score: 0.0 (0 tokens)
|
125
|
+
#(stdout): Not found: 5 tokens
|
126
|
+
#=> '1.0'
|
127
|
+
```
|
128
|
+
|
118
129
|
#### CLI tool
|
119
|
-
You can also pass some UTF-8-encoded text to the CLI tool and get a score back, like so
|
130
|
+
You can also pass some UTF-8-encoded text to the CLI tool and get a score back, like so:
|
120
131
|
```bash
|
121
132
|
textmood -l en "<some text>"
|
122
133
|
-0.4375
|
123
134
|
```
|
124
135
|
|
125
|
-
Alternatively, you can pipe
|
136
|
+
Alternatively, you can pipe text to textmood on stdin:
|
126
137
|
```bash
|
127
138
|
echo "<some text>" | textmood -l en
|
128
139
|
-0.4375
|
@@ -181,18 +192,21 @@ OPTIONAL options:
|
|
181
192
|
-k, --skip-symbols Do not include symbols file (emoticons etc.). Only applies
|
182
193
|
when using -l/--language.
|
183
194
|
|
184
|
-
-c, --config PATH TO FILE Use the specified config file. If not specified, textmood will
|
185
|
-
for /etc/textmood.cfg and ~/.textmood. Settings in the user
|
186
|
-
will override settings from the global file.
|
195
|
+
-c, --config PATH TO FILE Use the specified config file. If not specified, textmood will
|
196
|
+
look for /etc/textmood.cfg and ~/.textmood. Settings in the user
|
197
|
+
config will override settings from the global file.
|
187
198
|
|
188
199
|
-d, --debug Prints out the score for each token in the provided text
|
189
200
|
or 'nil' if the token was not found in the sentiment file
|
190
201
|
|
202
|
+
-v, --verbose Prints out some useful statistics about the analysis
|
203
|
+
(counts, averages etc).
|
204
|
+
|
191
205
|
-h, --help Show this message
|
192
206
|
```
|
193
207
|
|
194
208
|
### Configuration files for the CLI tool
|
195
|
-
The CLI tool will look for
|
209
|
+
The CLI tool will look for */etc/textmood* and *~/.textmood* unless the -c/--config option
|
196
210
|
is used, in which case only that file is used. The configuration files are basic, flat
|
197
211
|
YAML files that use the same keys as the library understands:
|
198
212
|
```yaml
|
data/bin/textmood
CHANGED
@@ -116,9 +116,9 @@ opts_parser = OptionParser.new do |opts|
|
|
116
116
|
options[:include_symbols] = false
|
117
117
|
end
|
118
118
|
opts.separator ""
|
119
|
-
opts.on("-c", "--config PATH TO FILE", "Use the specified config file. If not specified, textmood will
|
120
|
-
"for /etc/textmood.cfg and ~/.textmood. Settings in the user
|
121
|
-
"will override settings from the global file.") do |c|
|
119
|
+
opts.on("-c", "--config PATH TO FILE", "Use the specified config file. If not specified, textmood will",
|
120
|
+
"look for /etc/textmood.cfg and ~/.textmood. Settings in the user",
|
121
|
+
"config will override settings from the global file.") do |c|
|
122
122
|
options[:config] = c.to_s
|
123
123
|
end
|
124
124
|
opts.separator ""
|
@@ -127,6 +127,11 @@ opts_parser = OptionParser.new do |opts|
|
|
127
127
|
options[:debug] = true
|
128
128
|
end
|
129
129
|
opts.separator ""
|
130
|
+
opts.on("-v", "--verbose", "Prints out some useful statistics about the analysis",
|
131
|
+
"(counts, averages etc).") do |v|
|
132
|
+
options[:verbose] = true
|
133
|
+
end
|
134
|
+
opts.separator ""
|
130
135
|
opts.on_tail("-h", "--help", "Show this message") do
|
131
136
|
puts opts
|
132
137
|
puts ""
|
data/lang/no_NB.txt
CHANGED
@@ -1,53 +1,40 @@
|
|
1
1
|
1.0: episke
|
2
2
|
1.0: god
|
3
|
+
1.0: gode
|
4
|
+
1.0: beste
|
3
5
|
1.0: oppreist
|
4
6
|
0.9: heldig
|
5
7
|
0.8: fantastisk
|
6
8
|
0.8: storøyd
|
7
9
|
0.8: velstående
|
8
|
-
0.8: godt fast
|
9
10
|
0.8: upretensiøs
|
10
|
-
0.8: undervurdert
|
11
11
|
0.8: terapeutisk
|
12
12
|
0.8: selvrespekt
|
13
13
|
0.8: helsebringende
|
14
14
|
0.8: rosenrød i kinnene
|
15
|
-
0.8: utstråle
|
16
|
-
0.8: utstråling
|
17
15
|
0.8: velstand
|
18
16
|
0.8: prinsipiell
|
19
|
-
0.8: moraliserende
|
20
17
|
0.8: mesterstykke
|
21
18
|
0.8: mesterlig
|
22
19
|
0.8: mesterlig
|
23
20
|
0.8: heldig
|
24
21
|
0.8: inspirert
|
25
|
-
0.8: idealisert
|
26
|
-
0.8: idealisert
|
27
|
-
0.8: hylende
|
28
22
|
0.8: sendt fra himmelen
|
29
|
-
0.8: fungicid
|
30
23
|
0.8: eleganse
|
31
|
-
0.8:
|
32
|
-
0.8: viderekoble
|
24
|
+
0.8: elegant
|
33
25
|
0.8: avvæpning
|
34
26
|
0.8: herlig
|
35
27
|
0.8: anstendighet
|
28
|
+
0.8: anstendig
|
36
29
|
0.8: blende
|
37
30
|
0.8: høflige
|
38
|
-
0.8:
|
39
|
-
0.8: skryte
|
40
|
-
0.8: bukking
|
41
|
-
0.8: bonheur
|
31
|
+
0.8: høflig
|
42
32
|
0.8: gunstig
|
43
|
-
0.8: erkeengelen
|
44
33
|
0.8: underholde
|
45
34
|
0.8: minnelig
|
46
35
|
0.8: rosenrød
|
47
|
-
0.8: gjennomgripende
|
48
36
|
0.8: rettferdige
|
49
37
|
0.8: presis
|
50
|
-
0.8: pervertert
|
51
38
|
0.8: integritet
|
52
39
|
0.8: ideell
|
53
40
|
0.8: godhet
|
@@ -59,11 +46,7 @@
|
|
59
46
|
0.7: helhet
|
60
47
|
0.7: velproporsjonert
|
61
48
|
0.7: godt bevart
|
62
|
-
0.7: godt favoriserte
|
63
|
-
0.7: godt favorisert
|
64
49
|
0.7: velvillige
|
65
|
-
0.7: fordreining
|
66
|
-
0.7: bestikkelig
|
67
50
|
0.7: urbanitet
|
68
51
|
0.7: øverste nivå
|
69
52
|
0.7: uaffisert
|
data/lib/textmood.rb
CHANGED
@@ -54,31 +54,71 @@ class TextMood
|
|
54
54
|
# analyzes the sentiment of the provided text.
|
55
55
|
def analyze(text)
|
56
56
|
sentiment_total = 0.0
|
57
|
+
negative_total = 0.0
|
58
|
+
positive_total = 0.0
|
59
|
+
neutral_total = 0.0
|
60
|
+
|
61
|
+
scores_added = 0
|
62
|
+
negative_added = 0
|
63
|
+
positive_added = 0
|
64
|
+
neutral_added = 0
|
65
|
+
not_found = 0
|
57
66
|
|
58
|
-
scores_added = 0
|
59
67
|
(@options[:start_ngram]..@options[:end_ngram]).each do |i|
|
60
68
|
ngrams(i, text.to_s).each do |token|
|
61
69
|
score = score_token(token)
|
62
|
-
|
70
|
+
if score.nil?
|
71
|
+
not_found += 1
|
72
|
+
else
|
63
73
|
sentiment_total += score
|
74
|
+
if score > 0
|
75
|
+
positive_total += score
|
76
|
+
positive_added += 1
|
77
|
+
elsif score < 0
|
78
|
+
negative_total += score
|
79
|
+
negative_added += 1
|
80
|
+
else
|
81
|
+
neutral_total += score
|
82
|
+
neutral_added += 1
|
83
|
+
end
|
64
84
|
scores_added += 1
|
65
85
|
end
|
66
86
|
end
|
67
87
|
end
|
68
88
|
|
69
89
|
if @options[:normalize_score]
|
70
|
-
|
90
|
+
actual_score = normalize_score(sentiment_total, scores_added)
|
91
|
+
else
|
92
|
+
actual_score = sentiment_total
|
71
93
|
end
|
94
|
+
|
95
|
+
if @options[:verbose]
|
96
|
+
puts "" if @options[:debug]
|
97
|
+
combined_avg = (scores_added > 0) ? ", #{(sentiment_total.to_f / scores_added.to_f)} avg." : ""
|
98
|
+
combined_text = "Combined score: #{sentiment_total} (#{scores_added} tokens#{combined_avg})"
|
99
|
+
puts combined_text
|
100
|
+
negative_avg = (negative_added > 0) ? ", #{(negative_total.to_f / negative_added.to_f)} avg." : ""
|
101
|
+
negative_text = "Negative score: #{negative_total} (#{negative_added} tokens#{negative_avg})"
|
102
|
+
puts negative_text
|
103
|
+
positive_avg = (positive_added > 0) ? ", #{(positive_total.to_f / positive_added.to_f)} avg." : ""
|
104
|
+
positive_text = "Positive score: #{positive_total} (#{positive_added} tokens#{positive_avg})"
|
105
|
+
puts positive_text
|
106
|
+
neutral_avg = (neutral_added > 0) ? ", #{(neutral_total.to_f / neutral_added.to_f)} avg." : ""
|
107
|
+
neutral_text = "Neutral score: #{neutral_total} (#{neutral_added} tokens#{neutral_avg})"
|
108
|
+
puts neutral_text
|
109
|
+
puts "Not found: #{not_found} tokens"
|
110
|
+
end
|
111
|
+
|
72
112
|
if @options[:ternary_output]
|
73
|
-
if
|
113
|
+
if actual_score > @options[:max_threshold]
|
74
114
|
1
|
75
|
-
elsif
|
115
|
+
elsif actual_score < @options[:min_threshold]
|
76
116
|
-1
|
77
117
|
else
|
78
118
|
0
|
79
119
|
end
|
80
120
|
else
|
81
|
-
|
121
|
+
actual_score
|
82
122
|
end
|
83
123
|
end
|
84
124
|
|
@@ -99,10 +139,10 @@ class TextMood
|
|
99
139
|
end
|
100
140
|
end
|
101
141
|
if sentiment_value
|
102
|
-
puts "#{used_token}: #{sentiment_value}" if @options[:debug]
|
142
|
+
puts "#{used_token}: #{sentiment_value}" if @options[:debug] and not @options[:skip_found_debug]
|
103
143
|
sentiment_value
|
104
144
|
else
|
105
|
-
puts "#{used_token}: nil" if @options[:debug]
|
145
|
+
puts "#{used_token}: nil" if @options[:debug] and not @options[:skip_not_found_debug]
|
106
146
|
nil
|
107
147
|
end
|
108
148
|
end
|
@@ -117,12 +157,14 @@ class TextMood
|
|
117
157
|
|
118
158
|
sentiment_file = File.new(path, "r:UTF-8")
|
119
159
|
while (line = sentiment_file.gets)
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
160
|
+
unless (line.match(/\s*#/))
|
161
|
+
parsed_line = line.chomp.split(/\s*([\d.-]+):\s*([^\s].*)/)
|
162
|
+
if parsed_line.size == 3
|
163
|
+
score = parsed_line[1]
|
164
|
+
text = parsed_line[2]
|
165
|
+
if score and text
|
166
|
+
sentiment_values[text.downcase] = score.to_f
|
167
|
+
end
|
126
168
|
end
|
127
169
|
end
|
128
170
|
end
|