textmood 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/README.md +21 -7
  2. data/bin/textmood +8 -3
  3. data/lang/no_NB.txt +5 -22
  4. data/lib/textmood.rb +56 -14
  5. metadata +1 -1
data/README.md CHANGED
@@ -51,7 +51,7 @@ You can use it in a Ruby program like this:
51
51
  ```ruby
52
52
  require "textmood"
53
53
 
54
- # The :language parameter makes TextMood use one of the bundled language sentiment files
54
+ # The :language parameter makes TextMood use one of the bundled language files
55
55
  tm = TextMood.new(language: "en")
56
56
  score = tm.analyze("some text")
57
57
  #=> '1.121'
@@ -115,14 +115,25 @@ score = tm.analyze("some text")
115
115
  #=> '0.1'
116
116
  ```
117
117
 
118
+ # :verbose prints out statistics about the analysis
119
+ tm = TextMood.new(language: "en", verbose: true)
120
+ score = tm.analyze("some slightly longer text that contains a few more tokens")
121
+ #(stdout): Combined score: 1.0 (5 tokens, 0.2 avg.)
122
+ #(stdout): Negative score: -0.5 (1 tokens, -0.5 avg.)
123
+ #(stdout): Positive score: 1.5 (4 tokens, 0.375 avg.)
124
+ #(stdout): Neutral score: 0.0 (0 tokens)
125
+ #(stdout): Not found: 5 tokens
126
+ #=> '1.0'
127
+ ```
128
+
118
129
  #### CLI tool
119
- You can also pass some UTF-8-encoded text to the CLI tool and get a score back, like so
130
+ You can also pass some UTF-8-encoded text to the CLI tool and get a score back, like so:
120
131
  ```bash
121
132
  textmood -l en "<some text>"
122
133
  -0.4375
123
134
  ```
124
135
 
125
- Alternatively, you can pipe some text to textmood on stdin:
136
+ Alternatively, you can pipe text to textmood on stdin:
126
137
  ```bash
127
138
  echo "<some text>" | textmood -l en
128
139
  -0.4375
@@ -181,18 +192,21 @@ OPTIONAL options:
181
192
  -k, --skip-symbols Do not include symbols file (emoticons etc.). Only applies
182
193
  when using -l/--language.
183
194
 
184
- -c, --config PATH TO FILE Use the specified config file. If not specified, textmood will look
185
- for /etc/textmood.cfg and ~/.textmood. Settings in the user config
186
- will override settings from the global file.
195
+ -c, --config PATH TO FILE Use the specified config file. If not specified, textmood will
196
+ look for /etc/textmood.cfg and ~/.textmood. Settings in the user
197
+ config will override settings from the global file.
187
198
 
188
199
  -d, --debug Prints out the score for each token in the provided text
189
200
  or 'nil' if the token was not found in the sentiment file
190
201
 
202
+ -v, --verbose Prints out some useful statistics about the analysis
203
+ (counts, averages etc).
204
+
191
205
  -h, --help Show this message
192
206
  ```
193
207
 
194
208
  ### Configuration files for the CLI tool
195
- The CLI tool will look for /etc/textmood and ~/.textmood unless the -c/--config option
209
+ The CLI tool will look for */etc/textmood* and *~/.textmood* unless the -c/--config option
196
210
  is used, in which case only that file is used. The configuration files are basic, flat
197
211
  YAML files that use the same keys as the library understands:
198
212
  ```yaml
data/bin/textmood CHANGED
@@ -116,9 +116,9 @@ opts_parser = OptionParser.new do |opts|
116
116
  options[:include_symbols] = false
117
117
  end
118
118
  opts.separator ""
119
- opts.on("-c", "--config PATH TO FILE", "Use the specified config file. If not specified, textmood will look",
120
- "for /etc/textmood.cfg and ~/.textmood. Settings in the user config",
121
- "will override settings from the global file.") do |c|
119
+ opts.on("-c", "--config PATH TO FILE", "Use the specified config file. If not specified, textmood will",
120
+ "look for /etc/textmood.cfg and ~/.textmood. Settings in the user",
121
+ "config will override settings from the global file.") do |c|
122
122
  options[:config] = c.to_s
123
123
  end
124
124
  opts.separator ""
@@ -127,6 +127,11 @@ opts_parser = OptionParser.new do |opts|
127
127
  options[:debug] = true
128
128
  end
129
129
  opts.separator ""
130
+ opts.on("-v", "--verbose", "Prints out some useful statistics about the analysis",
131
+ "(counts, averages etc).") do |v|
132
+ options[:verbose] = true
133
+ end
134
+ opts.separator ""
130
135
  opts.on_tail("-h", "--help", "Show this message") do
131
136
  puts opts
132
137
  puts ""
data/lang/no_NB.txt CHANGED
@@ -1,53 +1,40 @@
1
1
  1.0: episke
2
2
  1.0: god
3
+ 1.0: gode
4
+ 1.0: beste
3
5
  1.0: oppreist
4
6
  0.9: heldig
5
7
  0.8: fantastisk
6
8
  0.8: storøyd
7
9
  0.8: velstående
8
- 0.8: godt fast
9
10
  0.8: upretensiøs
10
- 0.8: undervurdert
11
11
  0.8: terapeutisk
12
12
  0.8: selvrespekt
13
13
  0.8: helsebringende
14
14
  0.8: rosenrød i kinnene
15
- 0.8: utstråle
16
- 0.8: utstråling
17
15
  0.8: velstand
18
16
  0.8: prinsipiell
19
- 0.8: moraliserende
20
17
  0.8: mesterstykke
21
18
  0.8: mesterlig
22
19
  0.8: mesterlig
23
20
  0.8: heldig
24
21
  0.8: inspirert
25
- 0.8: idealisert
26
- 0.8: idealisert
27
- 0.8: hylende
28
22
  0.8: sendt fra himmelen
29
- 0.8: fungicid
30
23
  0.8: eleganse
31
- 0.8: double-farget
32
- 0.8: viderekoble
24
+ 0.8: elegant
33
25
  0.8: avvæpning
34
26
  0.8: herlig
35
27
  0.8: anstendighet
28
+ 0.8: anstendig
36
29
  0.8: blende
37
30
  0.8: høflige
38
- 0.8: barnlig
39
- 0.8: skryte
40
- 0.8: bukking
41
- 0.8: bonheur
31
+ 0.8: høflig
42
32
  0.8: gunstig
43
- 0.8: erkeengelen
44
33
  0.8: underholde
45
34
  0.8: minnelig
46
35
  0.8: rosenrød
47
- 0.8: gjennomgripende
48
36
  0.8: rettferdige
49
37
  0.8: presis
50
- 0.8: pervertert
51
38
  0.8: integritet
52
39
  0.8: ideell
53
40
  0.8: godhet
@@ -59,11 +46,7 @@
59
46
  0.7: helhet
60
47
  0.7: velproporsjonert
61
48
  0.7: godt bevart
62
- 0.7: godt favoriserte
63
- 0.7: godt favorisert
64
49
  0.7: velvillige
65
- 0.7: fordreining
66
- 0.7: bestikkelig
67
50
  0.7: urbanitet
68
51
  0.7: øverste nivå
69
52
  0.7: uaffisert
data/lib/textmood.rb CHANGED
@@ -54,31 +54,71 @@ class TextMood
54
54
  # analyzes the sentiment of the provided text.
55
55
  def analyze(text)
56
56
  sentiment_total = 0.0
57
+ negative_total = 0.0
58
+ positive_total = 0.0
59
+ neutral_total = 0.0
60
+
61
+ scores_added = 0
62
+ negative_added = 0
63
+ positive_added = 0
64
+ neutral_added = 0
65
+ not_found = 0
57
66
 
58
- scores_added = 0
59
67
  (@options[:start_ngram]..@options[:end_ngram]).each do |i|
60
68
  ngrams(i, text.to_s).each do |token|
61
69
  score = score_token(token)
62
- unless score.nil?
70
+ if score.nil?
71
+ not_found += 1
72
+ else
63
73
  sentiment_total += score
74
+ if score > 0
75
+ positive_total += score
76
+ positive_added += 1
77
+ elsif score < 0
78
+ negative_total += score
79
+ negative_added += 1
80
+ else
81
+ neutral_total += score
82
+ neutral_added += 1
83
+ end
64
84
  scores_added += 1
65
85
  end
66
86
  end
67
87
  end
68
88
 
69
89
  if @options[:normalize_score]
70
- sentiment_total = normalize_score(sentiment_total, scores_added)
90
+ actual_score = normalize_score(sentiment_total, scores_added)
91
+ else
92
+ actual_score = sentiment_total
71
93
  end
94
+
95
+ if @options[:verbose]
96
+ puts "" if @options[:debug]
97
+ combined_avg = (scores_added > 0) ? ", #{(sentiment_total.to_f / scores_added.to_f)} avg." : ""
98
+ combined_text = "Combined score: #{sentiment_total} (#{scores_added} tokens#{combined_avg})"
99
+ puts combined_text
100
+ negative_avg = (negative_added > 0) ? ", #{(negative_total.to_f / negative_added.to_f)} avg." : ""
101
+ negative_text = "Negative score: #{negative_total} (#{negative_added} tokens#{negative_avg})"
102
+ puts negative_text
103
+ positive_avg = (positive_added > 0) ? ", #{(positive_total.to_f / positive_added.to_f)} avg." : ""
104
+ positive_text = "Positive score: #{positive_total} (#{positive_added} tokens#{positive_avg})"
105
+ puts positive_text
106
+ neutral_avg = (neutral_added > 0) ? ", #{(neutral_total.to_f / neutral_added.to_f)} avg." : ""
107
+ neutral_text = "Neutral score: #{neutral_total} (#{neutral_added} tokens#{neutral_avg})"
108
+ puts neutral_text
109
+ puts "Not found: #{not_found} tokens"
110
+ end
111
+
72
112
  if @options[:ternary_output]
73
- if sentiment_total > @options[:max_threshold]
113
+ if actual_score > @options[:max_threshold]
74
114
  1
75
- elsif sentiment_total < @options[:min_threshold]
115
+ elsif actual_score < @options[:min_threshold]
76
116
  -1
77
117
  else
78
118
  0
79
119
  end
80
120
  else
81
- sentiment_total
121
+ actual_score
82
122
  end
83
123
  end
84
124
 
@@ -99,10 +139,10 @@ class TextMood
99
139
  end
100
140
  end
101
141
  if sentiment_value
102
- puts "#{used_token}: #{sentiment_value}" if @options[:debug]
142
+ puts "#{used_token}: #{sentiment_value}" if @options[:debug] and not @options[:skip_found_debug]
103
143
  sentiment_value
104
144
  else
105
- puts "#{used_token}: nil" if @options[:debug]
145
+ puts "#{used_token}: nil" if @options[:debug] and not @options[:skip_not_found_debug]
106
146
  nil
107
147
  end
108
148
  end
@@ -117,12 +157,14 @@ class TextMood
117
157
 
118
158
  sentiment_file = File.new(path, "r:UTF-8")
119
159
  while (line = sentiment_file.gets)
120
- parsed_line = line.chomp.split(/\s*([\d.-]+):\s*([^\s].*)/)
121
- if parsed_line.size == 3
122
- score = parsed_line[1]
123
- text = parsed_line[2]
124
- if score and text
125
- sentiment_values[text.downcase] = score.to_f
160
+ unless (line.match(/\s*#/))
161
+ parsed_line = line.chomp.split(/\s*([\d.-]+):\s*([^\s].*)/)
162
+ if parsed_line.size == 3
163
+ score = parsed_line[1]
164
+ text = parsed_line[2]
165
+ if score and text
166
+ sentiment_values[text.downcase] = score.to_f
167
+ end
126
168
  end
127
169
  end
128
170
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textmood
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: