RubyGems - textmood - Versions diffs - 0.1.2 → 0.1.3 - Mend

textmood 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

data/README.md CHANGED Viewed

@@ -51,7 +51,7 @@ You can use it in a Ruby program like this:
 ```ruby
 require "textmood"
-# The :language parameter makes TextMood use one of the bundled language sentiment files
+# The :language parameter makes TextMood use one of the bundled language files
 tm = TextMood.new(language: "en")
 score = tm.analyze("some text")
 #=> '1.121'
@@ -115,14 +115,25 @@ score = tm.analyze("some text")
 #=> '0.1'
 ```
+# :verbose prints out statistics about the analysis
+tm = TextMood.new(language: "en", verbose: true)
+score = tm.analyze("some slightly longer text that contains a few more tokens")
+#(stdout): Combined score: 1.0 (5 tokens, 0.2 avg.)
+#(stdout): Negative score: -0.5 (1 tokens, -0.5 avg.)
+#(stdout): Positive score: 1.5 (4 tokens, 0.375 avg.)
+#(stdout): Neutral score: 0.0 (0 tokens)
+#(stdout): Not found: 5 tokens
+#=> '1.0'
+```
 #### CLI tool
-You can also pass some UTF-8-encoded text to the CLI tool and get a score back, like so
+You can also pass some UTF-8-encoded text to the CLI tool and get a score back, like so:
 ```bash
 textmood -l en "<some text>"
 -0.4375
 ```
-Alternatively, you can pipe some text to textmood on stdin:
+Alternatively, you can pipe text to textmood on stdin:
 ```bash
 echo "<some text>" | textmood -l en
 -0.4375
@@ -181,18 +192,21 @@ OPTIONAL options:
     -k, --skip-symbols               Do not include symbols file (emoticons etc.). Only applies
                                      when using -l/--language.
-    -c, --config PATH TO FILE        Use the specified config file. If not specified, textmood will look
-                                     for /etc/textmood.cfg and ~/.textmood. Settings in the user config
-                                     will override settings from the global file.
+    -c, --config PATH TO FILE        Use the specified config file. If not specified, textmood will
+                                     look for /etc/textmood.cfg and ~/.textmood. Settings in the user
+                                     config will override settings from the global file.
     -d, --debug                      Prints out the score for each token in the provided text
                                      or 'nil' if the token was not found in the sentiment file
+    -v, --verbose                    Prints out some useful statistics about the analysis
+                                     (counts, averages etc).
     -h, --help                       Show this message
 ```
 ### Configuration files for the CLI tool
-The CLI tool will look for /etc/textmood and ~/.textmood unless the -c/--config option
+The CLI tool will look for */etc/textmood* and *~/.textmood* unless the -c/--config option
 is used, in which case only that file is used. The configuration files are basic, flat
 YAML files that use the same keys as the library understands:
 ```yaml

data/bin/textmood CHANGED Viewed

@@ -116,9 +116,9 @@ opts_parser = OptionParser.new do |opts|
     options[:include_symbols] = false
   end
   opts.separator ""
-  opts.on("-c", "--config PATH TO FILE", "Use the specified config file. If not specified, textmood will look",
-                                         "for /etc/textmood.cfg and ~/.textmood. Settings in the user config",
-                                         "will override settings from the global file.") do |c|
+  opts.on("-c", "--config PATH TO FILE", "Use the specified config file. If not specified, textmood will",
+                                         "look for /etc/textmood.cfg and ~/.textmood. Settings in the user",
+                                         "config will override settings from the global file.") do |c|
     options[:config] = c.to_s
   end
   opts.separator ""
@@ -127,6 +127,11 @@ opts_parser = OptionParser.new do |opts|
     options[:debug] = true
   end
   opts.separator ""
+  opts.on("-v", "--verbose", "Prints out some useful statistics about the analysis",
+                             "(counts, averages etc).") do |v|
+    options[:verbose] = true
+  end
+  opts.separator ""
   opts.on_tail("-h", "--help", "Show this message") do
     puts opts
     puts ""

data/lang/no_NB.txt CHANGED Viewed

@@ -1,53 +1,40 @@
 1.0: episke
 1.0: god
+1.0: gode
+1.0: beste
 1.0: oppreist
 0.9: heldig
 0.8: fantastisk
 0.8: storøyd
 0.8: velstående
-0.8: godt fast
 0.8: upretensiøs
-0.8: undervurdert
 0.8: terapeutisk
 0.8: selvrespekt
 0.8: helsebringende
 0.8: rosenrød i kinnene
-0.8: utstråle
-0.8: utstråling
 0.8: velstand
 0.8: prinsipiell
-0.8: moraliserende
 0.8: mesterstykke
 0.8: mesterlig
 0.8: mesterlig
 0.8: heldig
 0.8: inspirert
-0.8: idealisert
-0.8: idealisert
-0.8: hylende
 0.8: sendt fra himmelen
-0.8: fungicid
 0.8: eleganse
-0.8: double-farget
-0.8: viderekoble
+0.8: elegant
 0.8: avvæpning
 0.8: herlig
 0.8: anstendighet
+0.8: anstendig
 0.8: blende
 0.8: høflige
-0.8: barnlig
-0.8: skryte
-0.8: bukking
-0.8: bonheur
+0.8: høflig
 0.8: gunstig
-0.8: erkeengelen
 0.8: underholde
 0.8: minnelig
 0.8: rosenrød
-0.8: gjennomgripende
 0.8: rettferdige
 0.8: presis
-0.8: pervertert
 0.8: integritet
 0.8: ideell
 0.8: godhet
@@ -59,11 +46,7 @@
 0.7: helhet
 0.7: velproporsjonert
 0.7: godt bevart
-0.7: godt favoriserte
-0.7: godt favorisert
 0.7: velvillige
-0.7: fordreining
-0.7: bestikkelig
 0.7: urbanitet
 0.7: øverste nivå
 0.7: uaffisert

data/lib/textmood.rb CHANGED Viewed

@@ -54,31 +54,71 @@ class TextMood
   # analyzes the sentiment of the provided text.
   def analyze(text)
     sentiment_total = 0.0
+    negative_total  = 0.0
+    positive_total  = 0.0
+    neutral_total   = 0.0
+    scores_added   = 0
+    negative_added = 0
+    positive_added = 0
+    neutral_added  = 0
+    not_found      = 0
-    scores_added = 0
     (@options[:start_ngram]..@options[:end_ngram]).each do |i|
       ngrams(i, text.to_s).each do |token|
         score = score_token(token)
-        unless score.nil?
+        if score.nil?
+          not_found += 1
+        else
           sentiment_total += score
+          if score > 0
+            positive_total += score
+            positive_added += 1
+          elsif score < 0
+            negative_total += score
+            negative_added += 1
+          else
+            neutral_total += score
+            neutral_added += 1
+          end
           scores_added += 1
         end
       end
     end
     if @options[:normalize_score]
-      sentiment_total = normalize_score(sentiment_total, scores_added)
+      actual_score = normalize_score(sentiment_total, scores_added)
+    else
+      actual_score = sentiment_total
     end
+    if @options[:verbose]
+      puts "" if @options[:debug]
+      combined_avg  = (scores_added > 0) ? ", #{(sentiment_total.to_f / scores_added.to_f)} avg." : ""
+      combined_text = "Combined score: #{sentiment_total} (#{scores_added} tokens#{combined_avg})"
+      puts combined_text
+      negative_avg  = (negative_added > 0) ? ", #{(negative_total.to_f / negative_added.to_f)} avg." : ""
+      negative_text = "Negative score: #{negative_total} (#{negative_added} tokens#{negative_avg})"
+      puts negative_text
+      positive_avg  = (positive_added > 0) ? ", #{(positive_total.to_f / positive_added.to_f)} avg." : ""
+      positive_text = "Positive score: #{positive_total} (#{positive_added} tokens#{positive_avg})"
+      puts positive_text
+      neutral_avg  = (neutral_added > 0) ? ", #{(neutral_total.to_f / neutral_added.to_f)} avg." : ""
+      neutral_text = "Neutral score: #{neutral_total} (#{neutral_added} tokens#{neutral_avg})"
+      puts neutral_text
+      puts "Not found: #{not_found} tokens"
+    end
     if @options[:ternary_output]
-      if sentiment_total > @options[:max_threshold]
+      if actual_score > @options[:max_threshold]
         1
-      elsif sentiment_total < @options[:min_threshold]
+      elsif actual_score < @options[:min_threshold]
         -1
       else
         0
       end
     else
-      sentiment_total
+      actual_score
     end
   end
@@ -99,10 +139,10 @@ class TextMood
       end
     end
     if sentiment_value
-      puts "#{used_token}: #{sentiment_value}" if @options[:debug]
+      puts "#{used_token}: #{sentiment_value}" if @options[:debug] and not @options[:skip_found_debug]
       sentiment_value
     else
-      puts "#{used_token}: nil" if @options[:debug]
+      puts "#{used_token}: nil" if @options[:debug] and not @options[:skip_not_found_debug]
       nil
     end
   end
@@ -117,12 +157,14 @@ class TextMood
     sentiment_file = File.new(path, "r:UTF-8")
     while (line = sentiment_file.gets)
-      parsed_line = line.chomp.split(/\s*([\d.-]+):\s*([^\s].*)/)
-      if parsed_line.size == 3
-        score = parsed_line[1]
-        text = parsed_line[2]
-        if score and text
-          sentiment_values[text.downcase] = score.to_f
+      unless (line.match(/\s*#/))
+        parsed_line = line.chomp.split(/\s*([\d.-]+):\s*([^\s].*)/)
+        if parsed_line.size == 3
+          score = parsed_line[1]
+          text = parsed_line[2]
+          if score and text
+            sentiment_values[text.downcase] = score.to_f
+          end
         end
       end
     end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: textmood
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.3
   prerelease:
 platform: ruby
 authors: