textmood 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/README.md +50 -9
  2. data/bin/textmood +40 -7
  3. data/lib/textmood.rb +3 -3
  4. data/test/test.rb +1 -1
  5. metadata +1 -1
data/README.md CHANGED
@@ -52,7 +52,7 @@ You can use it in a Ruby program like this:
52
52
  require "textmood"
53
53
 
54
54
  # The :lang parameter makes TextMood use one of the bundled language sentiment files
55
- tm = TextMood.new(lang: "en")
55
+ tm = TextMood.new(language: "en")
56
56
  score = tm.analyze("some text")
57
57
  #=> '1.121'
58
58
 
@@ -62,18 +62,18 @@ tm = TextMood.new(files: ["en_US-mod1.txt", "emoticons.txt"])
62
62
 
63
63
  # Use :alias_file to make TextMood look up the file to use for the given language tag
64
64
  # in a JSON file containing a hash with {"language_tag": "path_to_file"} mappings
65
- tm = TextMood.new(lang: "zw", alias_file: "my-custom-languages.json")
65
+ tm = TextMood.new(language: "zw", alias_file: "my-custom-languages.json")
66
66
 
67
67
  # :normalize_score will try to normalize the score to an integer between +/- 100,
68
68
  # based on how many tokens were scored, which can be useful when trying to compare
69
69
  # scores for texts of different length
70
- tm = TextMood.new(lang: "en", normalize_score: true)
70
+ tm = TextMood.new(language: "en", normalize_score: true)
71
71
  score = tm.analyze("some text")
72
72
  #=> '14'
73
73
 
74
74
  # :ternary_output will make TextMood return one of three fixed values:
75
75
  # 1 for positive, 0 for neutral and -1 for negative
76
- tm = TextMood.new(lang: "en", ternary_output: true)
76
+ tm = TextMood.new(language: "en", ternary_output: true)
77
77
  score = tm.analyze("some text")
78
78
  #=> '1'
79
79
 
@@ -81,7 +81,7 @@ score = tm.analyze("some text")
81
81
  # treats different values. The options below will make all scores below 10 negative,
82
82
  # 10-20 will be neutral, and above 20 will be positive. Note that these thresholds
83
83
  # are compared to the normalized score, if applicable.
84
- tm = TextMood.new(lang: "en",
84
+ tm = TextMood.new(language: "en",
85
85
  ternary_output: true,
86
86
  normalize_score: true,
87
87
  min_threshold: 10,
@@ -92,7 +92,7 @@ score = tm.analyze("some text")
92
92
  # TextMood will by default make one pass over the text, checking every word, but it
93
93
  # supports doing several passes for any range of word N-grams. Both the start and end
94
94
  # N-gram can be specified using the :start_ngram and :end_ngram options
95
- tm = TextMood.new(lang: "en", debug: true, start_ngram: 2, end_ngram: 3)
95
+ tm = TextMood.new(language: "en", debug: true, start_ngram: 2, end_ngram: 3)
96
96
  score = tm.analyze("some long text with many words")
97
97
  #(stdout): some long: 0.1
98
98
  #(stdout): long text: 0.1
@@ -107,7 +107,7 @@ score = tm.analyze("some long text with many words")
107
107
 
108
108
  # :debug prints out all tokens to stdout, alongs with their values (or 'nil' when the
109
109
  # token was not found)
110
- tm = TextMood.new(lang: "en", debug: true)
110
+ tm = TextMood.new(language: "en", debug: true)
111
111
  score = tm.analyze("some text")
112
112
  #(stdout): some: 0.1
113
113
  #(stdout): text: 0.1
@@ -140,7 +140,7 @@ Above 0 is considered positive, below is considered negative.
140
140
 
141
141
  MANDATORY options:
142
142
  -l, --language LANGUAGE The IETF language tag for the provided text.
143
- Examples: en, fr, no_NB, sv,
143
+ Examples: en_US, no_NB
144
144
 
145
145
  OR
146
146
 
@@ -149,6 +149,10 @@ MANDATORY options:
149
149
  files will be loaded if this option is used.
150
150
 
151
151
  OPTIONAL options:
152
+ -a, --alias-file PATH TO FILE JSON file containing a hash that maps language codes to
153
+ sentiment score files. This lets you use the convenience of
154
+ language codes with custom sentiment score files.
155
+
152
156
  -n, --normalize-score Tries to normalize the score to an integer between +/- 100
153
157
  according to the number of tokens that were scored, making
154
158
  it more feasible to compare scores for texts of different
@@ -159,7 +163,7 @@ OPTIONAL options:
159
163
  and --max-threshold.
160
164
 
161
165
  -i, --min-threshold FLOAT Scores lower than this are considered negative when
162
- using --ternary-output (default -0.5). Note that the
166
+ using --ternary-output (default 0.5). Note that the
163
167
  threshold is compared to the normalized score, if applicable
164
168
 
165
169
  -x, --max-threshold FLOAT Scores higher than this are considered positive when
@@ -183,6 +187,43 @@ OPTIONAL options:
183
187
  -h, --help Show this message
184
188
  ```
185
189
 
190
+ ### Configuration files for the CLI tool
191
+ The CLI tool will look for /etc/textmood and ~/.textmood unless the -c/--config option
192
+ is used, in which case only that file is used. The configuration files are basic, flat
193
+ YAML files that use the same keys as the library understands:
194
+ ```yaml
195
+ # Assume that text is in this language, unless overridden on the command line.
196
+ # Do not use this in conjunction with the files setting.
197
+ language: en
198
+
199
+ # Load these sentiment score files instead of using any of the bundled ones
200
+ # Do not use this in conjunction with the language setting
201
+ files: [/path/to/file1, /path/to/file2]
202
+
203
+ # Use a global alias file to resolve language codes
204
+ alias_file: /home/john/textmood/aliases.json
205
+
206
+ # Always normalize the score
207
+ normalize_score: true
208
+
209
+ # Use ternary output
210
+ ternary_output: true
211
+
212
+ # Use these thresholds when using ternary output
213
+ max_threshold: 10
214
+ min_threshold: 5
215
+
216
+ # Do three passes, scoring unigrams, bigrams and trigrams
217
+ start_ngram: 1
218
+ end_ngram: 3
219
+
220
+ # Do not load the symbols file when using a bundled language
221
+ skip_symbols: true
222
+
223
+ # Always print debug info
224
+ debug: true
225
+ ```
226
+
186
227
  ## Sentiment files
187
228
  The included sentiment files reside in the *lang* directory. I hope to add many
188
229
  more baseline sentiment files in the future.
data/bin/textmood CHANGED
@@ -12,8 +12,13 @@ $:.unshift File.join(File.dirname(__FILE__), *%w{ .. lib })
12
12
 
13
13
  require "optparse"
14
14
  require "textmood"
15
+ require "yaml"
15
16
 
16
- usage = "Usage: #{File.basename($0)} [options] \"<text>\"\n OR\n echo \"<text>\" | #{File.basename($0)} [options]"
17
+ usage = <<-eos
18
+ Usage: #{File.basename($0)} [options] "<text>"
19
+ OR
20
+ echo "<text>" | #{File.basename($0)} [options]"
21
+ eos
17
22
 
18
23
  def mini_usage(usage, notext = false)
19
24
  puts usage
@@ -22,7 +27,8 @@ def mini_usage(usage, notext = false)
22
27
  puts "ERROR: Quoted text must be provided after the last option."
23
28
  else
24
29
  puts "ERROR: An IETF language tag must be provided using the -l/--language option,"
25
- puts " or sentiment files must be provided with the -f/--file option."
30
+ puts " or sentiment files must be provided with the -f/--file option. These"
31
+ puts " values can also be set in /etc/textmood.cfg or ~/.textmood."
26
32
  end
27
33
  puts ""
28
34
  puts "Use \"#{File.basename($0)} -h\" for full usage info."
@@ -30,21 +36,27 @@ def mini_usage(usage, notext = false)
30
36
  exit 20
31
37
  end
32
38
 
33
- if ARGV[0] != "-h" and ARGV[0] != "--help" and not (ARGV[0] and ARGV[1])
34
- mini_usage(usage)
39
+ def parse_config_file(file, debug = false)
40
+ if File.file?(file)
41
+ puts "Using config: #{file}" if debug
42
+ YAML.load(File.read(file))
43
+ else
44
+ {}
45
+ end
35
46
  end
36
47
 
37
48
  options = {:files => []}
49
+
38
50
  opts_parser = OptionParser.new do |opts|
39
51
  opts.banner = usage
40
52
  opts.separator ""
41
- opts.separator "Returns a floating-point sentiment score of the provided text."
42
- opts.separator "Above 0 is considered positive, below is considered negative."
53
+ opts.separator "Returns a sentiment score of the provided text. Above 0 is usually"
54
+ opts.separator "considered positive, below is considered negative."
43
55
  opts.separator ""
44
56
  opts.separator "MANDATORY options:"
45
57
  opts.on("-l", "--language LANGUAGE", "The IETF language tag for the provided text.",
46
58
  "Examples: en_US, no_NB") do |l|
47
- options[:lang] = l
59
+ options[:language] = l
48
60
  end
49
61
  opts.separator ""
50
62
  opts.separator " OR "
@@ -104,6 +116,12 @@ opts_parser = OptionParser.new do |opts|
104
116
  options[:include_symbols] = false
105
117
  end
106
118
  opts.separator ""
119
+ opts.on("-c", "--config PATH TO FILE", "Use the specified config file. If not specified, textmood will look",
120
+ "for /etc/textmood.cfg and ~/.textmood. Settings in the user config",
121
+ "will override settings from the global file.") do |c|
122
+ options[:config] = c.to_s
123
+ end
124
+ opts.separator ""
107
125
  opts.on("-d", "--debug", "Prints out the score for each token in the provided text",
108
126
  "or 'nil' if the token was not found in the sentiment file") do |d|
109
127
  options[:debug] = true
@@ -117,6 +135,21 @@ opts_parser = OptionParser.new do |opts|
117
135
  end
118
136
  opts_parser.parse!
119
137
 
138
+ if options[:config]
139
+ options.merge!(parse_config_file(options[:config], options[:debug]))
140
+ else
141
+ ["/etc/textmood.cfg", File.expand_path("~/.textmood")].each do |file|
142
+ options.merge!(parse_config_file(file, options[:debug]))
143
+ end
144
+ end
145
+
146
+ options = Hash[options.map{ |k, v| [k.to_sym, v] }]
147
+
148
+ unless ((options[:language] or not options[:files].empty?) or (ARGV[0] and ARGV[1]))
149
+ mini_usage(usage)
150
+ exit 2
151
+ end
152
+
120
153
  def do_main(text, options)
121
154
  tm = TextMood.new(options)
122
155
  puts tm.analyze(text)
data/lib/textmood.rb CHANGED
@@ -19,11 +19,11 @@ class TextMood
19
19
  options[:start_ngram] ||= 1
20
20
  options[:end_ngram] ||= 1
21
21
  @options = options
22
- if options[:lang]
22
+ if options[:language]
23
23
  if options[:alias_file]
24
24
  aliases = load_alias_file(options[:alias_file])
25
25
  if aliases
26
- file = aliases[options[:lang]]
26
+ file = aliases[options[:language]]
27
27
  unless file
28
28
  raise ArgumentError, "Language tag not found in alias file"
29
29
  end
@@ -31,7 +31,7 @@ class TextMood
31
31
  raise ArgumentError, "Alias file not found"
32
32
  end
33
33
  else
34
- file = File.dirname(__FILE__) + "/../lang/#{options[:lang]}.txt"
34
+ file = File.dirname(__FILE__) + "/../lang/#{options[:language]}.txt"
35
35
  end
36
36
  @sentiment_values = load_sentiment_file(file)
37
37
  unless options[:include_symbols] == false
data/test/test.rb CHANGED
@@ -17,7 +17,7 @@ include Test::Unit::Assertions
17
17
  class TestScorer < Test::Unit::TestCase
18
18
 
19
19
  def setup
20
- @tm = TextMood.new({:lang => "en_US"})
20
+ @tm = TextMood.new({:language => "en"})
21
21
  end
22
22
 
23
23
  def test_negative
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textmood
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: