RubyGems - textmood - Versions diffs - 0.1.0 → 0.1.1 - Mend

textmood 0.1.0 → 0.1.1

Files changed (5) hide show

data/README.md CHANGED Viewed

@@ -52,7 +52,7 @@ You can use it in a Ruby program like this:
 require "textmood"
 # The :lang parameter makes TextMood use one of the bundled language sentiment files
-tm = TextMood.new(lang: "en")
+tm = TextMood.new(language: "en")
 score = tm.analyze("some text")
 #=> '1.121'
@@ -62,18 +62,18 @@ tm = TextMood.new(files: ["en_US-mod1.txt", "emoticons.txt"])
 # Use :alias_file to make TextMood look up the file to use for the given language tag
 # in a JSON file containing a hash with {"language_tag": "path_to_file"} mappings
-tm = TextMood.new(lang: "zw", alias_file: "my-custom-languages.json")
+tm = TextMood.new(language: "zw", alias_file: "my-custom-languages.json")
 # :normalize_score will try to normalize the score to an integer between +/- 100,
 # based on how many tokens were scored, which can be useful when trying to compare
 # scores for texts of different length
-tm = TextMood.new(lang: "en", normalize_score: true)
+tm = TextMood.new(language: "en", normalize_score: true)
 score = tm.analyze("some text")
 #=> '14'
 # :ternary_output will make TextMood return one of three fixed values:
 # 1 for positive, 0 for neutral and -1 for negative
-tm = TextMood.new(lang: "en", ternary_output: true)
+tm = TextMood.new(language: "en", ternary_output: true)
 score = tm.analyze("some text")
 #=> '1'
@@ -81,7 +81,7 @@ score = tm.analyze("some text")
 # treats different values. The options below will make all scores below 10 negative,
 # 10-20 will be neutral, and above 20 will be positive. Note that these thresholds
 # are compared to the normalized score, if applicable.
-tm = TextMood.new(lang: "en",
+tm = TextMood.new(language: "en",
                   ternary_output: true,
                   normalize_score: true,
                   min_threshold: 10,
@@ -92,7 +92,7 @@ score = tm.analyze("some text")
 # TextMood will by default make one pass over the text, checking every word, but it
 # supports doing several passes for any range of word N-grams. Both the start and end
 # N-gram can be specified using the :start_ngram and :end_ngram options
-tm = TextMood.new(lang: "en", debug: true, start_ngram: 2, end_ngram: 3)
+tm = TextMood.new(language: "en", debug: true, start_ngram: 2, end_ngram: 3)
 score = tm.analyze("some long text with many words")
 #(stdout): some long: 0.1
 #(stdout): long text: 0.1
@@ -107,7 +107,7 @@ score = tm.analyze("some long text with many words")
 # :debug prints out all tokens to stdout, alongs with their values (or 'nil' when the
 # token was not found)
-tm = TextMood.new(lang: "en", debug: true)
+tm = TextMood.new(language: "en", debug: true)
 score = tm.analyze("some text")
 #(stdout): some: 0.1
 #(stdout): text: 0.1
@@ -140,7 +140,7 @@ Above 0 is considered positive, below is considered negative.
 MANDATORY options:
     -l, --language LANGUAGE          The IETF language tag for the provided text.
-                                     Examples: en, fr, no_NB, sv,
+                                     Examples: en_US, no_NB
               OR
@@ -149,6 +149,10 @@ MANDATORY options:
                                      files will be loaded if this option is used.
 OPTIONAL options:
+    -a, --alias-file PATH TO FILE    JSON file containing a hash that maps language codes to
+                                     sentiment score files. This lets you use the convenience of
+                                     language codes with custom sentiment score files.
     -n, --normalize-score            Tries to normalize the score to an integer between +/- 100
                                      according to the number of tokens that were scored, making
                                      it more feasible to compare scores for texts of different
@@ -159,7 +163,7 @@ OPTIONAL options:
                                      and --max-threshold.
     -i, --min-threshold FLOAT        Scores lower than this are considered negative when
-                                     using --ternary-output (default -0.5). Note that the
+                                     using --ternary-output (default 0.5). Note that the
                                      threshold is compared to the normalized score, if applicable
     -x, --max-threshold FLOAT        Scores higher than this are considered positive when
@@ -183,6 +187,43 @@ OPTIONAL options:
     -h, --help                       Show this message
 ```
+### Configuration files for the CLI tool
+The CLI tool will look for /etc/textmood and ~/.textmood unless the -c/--config option
+is used, in which case only that file is used. The configuration files are basic, flat
+YAML files that use the same keys as the library understands:
+```yaml
+# Assume that text is in this language, unless overridden on the command line.
+# Do not use this in conjunction with the files setting.
+language: en
+# Load these sentiment score files instead of using any of the bundled ones
+# Do not use this in conjunction with the language setting
+files: [/path/to/file1, /path/to/file2]
+# Use a global alias file to resolve language codes
+alias_file: /home/john/textmood/aliases.json
+# Always normalize the score
+normalize_score: true
+# Use ternary output
+ternary_output: true
+# Use these thresholds when using ternary output
+max_threshold: 10
+min_threshold: 5
+# Do three passes, scoring unigrams, bigrams and trigrams
+start_ngram: 1
+end_ngram: 3
+# Do not load the symbols file when using a bundled language
+skip_symbols: true
+# Always print debug info
+debug: true
+```
 ## Sentiment files
 The included sentiment files reside in the *lang* directory. I hope to add many
 more baseline sentiment files in the future.

data/bin/textmood CHANGED Viewed

@@ -12,8 +12,13 @@ $:.unshift File.join(File.dirname(__FILE__), *%w{ .. lib })
 require "optparse"
 require "textmood"
+require "yaml"
-usage = "Usage: #{File.basename($0)} [options] \"<text>\"\n            OR\n       echo \"<text>\" | #{File.basename($0)} [options]"
+usage = <<-eos
+Usage: #{File.basename($0)} [options] "<text>"
+            OR
+       echo "<text>" | #{File.basename($0)} [options]"
+eos
 def mini_usage(usage, notext = false)
   puts usage
@@ -22,7 +27,8 @@ def mini_usage(usage, notext = false)
     puts "ERROR: Quoted text must be provided after the last option."
   else
     puts "ERROR: An IETF language tag must be provided using the -l/--language option,"
-    puts "       or sentiment files must be provided with the -f/--file option."
+    puts "       or sentiment files must be provided with the -f/--file option. These"
+    puts "       values can also be set in /etc/textmood.cfg or ~/.textmood."
   end
   puts ""
   puts "Use \"#{File.basename($0)} -h\" for full usage info."
@@ -30,21 +36,27 @@ def mini_usage(usage, notext = false)
   exit 20
 end
-if ARGV[0] != "-h" and ARGV[0] != "--help" and not (ARGV[0] and ARGV[1])
-  mini_usage(usage)
+def parse_config_file(file, debug = false)
+  if File.file?(file)
+    puts "Using config: #{file}" if debug
+    YAML.load(File.read(file))
+  else
+    {}
+  end
 end
 options = {:files => []}
 opts_parser = OptionParser.new do |opts|
   opts.banner = usage
   opts.separator ""
-  opts.separator "Returns a floating-point sentiment score of the provided text."
-  opts.separator "Above 0 is considered positive, below is considered negative."
+  opts.separator "Returns a sentiment score of the provided text. Above 0 is usually"
+  opts.separator "considered positive, below is considered negative."
   opts.separator ""
   opts.separator "MANDATORY options:"
   opts.on("-l", "--language LANGUAGE", "The IETF language tag for the provided text.",
                                        "Examples: en_US, no_NB") do |l|
-    options[:lang] = l
+    options[:language] = l
   end
   opts.separator ""
   opts.separator "              OR "
@@ -104,6 +116,12 @@ opts_parser = OptionParser.new do |opts|
     options[:include_symbols] = false
   end
   opts.separator ""
+  opts.on("-c", "--config PATH TO FILE", "Use the specified config file. If not specified, textmood will look",
+                                         "for /etc/textmood.cfg and ~/.textmood. Settings in the user config",
+                                         "will override settings from the global file.") do |c|
+    options[:config] = c.to_s
+  end
+  opts.separator ""
   opts.on("-d", "--debug", "Prints out the score for each token in the provided text",
                            "or 'nil' if the token was not found in the sentiment file") do |d|
     options[:debug] = true
@@ -117,6 +135,21 @@ opts_parser = OptionParser.new do |opts|
 end
 opts_parser.parse!
+if options[:config]
+  options.merge!(parse_config_file(options[:config], options[:debug]))
+else
+  ["/etc/textmood.cfg", File.expand_path("~/.textmood")].each do |file|
+    options.merge!(parse_config_file(file, options[:debug]))
+  end
+end
+options = Hash[options.map{ |k, v| [k.to_sym, v] }]
+unless ((options[:language] or not options[:files].empty?) or (ARGV[0] and ARGV[1]))
+  mini_usage(usage)
+  exit 2
+end
 def do_main(text, options)
   tm = TextMood.new(options)
   puts tm.analyze(text)

data/lib/textmood.rb CHANGED Viewed

@@ -19,11 +19,11 @@ class TextMood
     options[:start_ngram]   ||=  1
     options[:end_ngram]     ||=  1
     @options = options
-    if options[:lang]
+    if options[:language]
       if options[:alias_file]
         aliases = load_alias_file(options[:alias_file])
         if aliases
-          file = aliases[options[:lang]]
+          file = aliases[options[:language]]
           unless file
             raise ArgumentError, "Language tag not found in alias file"
           end
@@ -31,7 +31,7 @@ class TextMood
           raise ArgumentError, "Alias file not found"
         end
       else
-        file = File.dirname(__FILE__) + "/../lang/#{options[:lang]}.txt"
+        file = File.dirname(__FILE__) + "/../lang/#{options[:language]}.txt"
       end
       @sentiment_values = load_sentiment_file(file)
       unless options[:include_symbols] == false

data/test/test.rb CHANGED Viewed

@@ -17,7 +17,7 @@ include Test::Unit::Assertions
 class TestScorer < Test::Unit::TestCase
   def setup
-    @tm = TextMood.new({:lang => "en_US"})
+    @tm = TextMood.new({:language => "en"})
   end
   def test_negative

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: textmood
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.1
   prerelease:
 platform: ruby
 authors: