textmood 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/README.md +50 -9
  2. data/bin/textmood +40 -7
  3. data/lib/textmood.rb +3 -3
  4. data/test/test.rb +1 -1
  5. metadata +1 -1
data/README.md CHANGED
@@ -52,7 +52,7 @@ You can use it in a Ruby program like this:
52
52
  require "textmood"
53
53
 
54
54
  # The :lang parameter makes TextMood use one of the bundled language sentiment files
55
- tm = TextMood.new(lang: "en")
55
+ tm = TextMood.new(language: "en")
56
56
  score = tm.analyze("some text")
57
57
  #=> '1.121'
58
58
 
@@ -62,18 +62,18 @@ tm = TextMood.new(files: ["en_US-mod1.txt", "emoticons.txt"])
62
62
 
63
63
  # Use :alias_file to make TextMood look up the file to use for the given language tag
64
64
  # in a JSON file containing a hash with {"language_tag": "path_to_file"} mappings
65
- tm = TextMood.new(lang: "zw", alias_file: "my-custom-languages.json")
65
+ tm = TextMood.new(language: "zw", alias_file: "my-custom-languages.json")
66
66
 
67
67
  # :normalize_score will try to normalize the score to an integer between +/- 100,
68
68
  # based on how many tokens were scored, which can be useful when trying to compare
69
69
  # scores for texts of different length
70
- tm = TextMood.new(lang: "en", normalize_score: true)
70
+ tm = TextMood.new(language: "en", normalize_score: true)
71
71
  score = tm.analyze("some text")
72
72
  #=> '14'
73
73
 
74
74
  # :ternary_output will make TextMood return one of three fixed values:
75
75
  # 1 for positive, 0 for neutral and -1 for negative
76
- tm = TextMood.new(lang: "en", ternary_output: true)
76
+ tm = TextMood.new(language: "en", ternary_output: true)
77
77
  score = tm.analyze("some text")
78
78
  #=> '1'
79
79
 
@@ -81,7 +81,7 @@ score = tm.analyze("some text")
81
81
  # treats different values. The options below will make all scores below 10 negative,
82
82
  # 10-20 will be neutral, and above 20 will be positive. Note that these thresholds
83
83
  # are compared to the normalized score, if applicable.
84
- tm = TextMood.new(lang: "en",
84
+ tm = TextMood.new(language: "en",
85
85
  ternary_output: true,
86
86
  normalize_score: true,
87
87
  min_threshold: 10,
@@ -92,7 +92,7 @@ score = tm.analyze("some text")
92
92
  # TextMood will by default make one pass over the text, checking every word, but it
93
93
  # supports doing several passes for any range of word N-grams. Both the start and end
94
94
  # N-gram can be specified using the :start_ngram and :end_ngram options
95
- tm = TextMood.new(lang: "en", debug: true, start_ngram: 2, end_ngram: 3)
95
+ tm = TextMood.new(language: "en", debug: true, start_ngram: 2, end_ngram: 3)
96
96
  score = tm.analyze("some long text with many words")
97
97
  #(stdout): some long: 0.1
98
98
  #(stdout): long text: 0.1
@@ -107,7 +107,7 @@ score = tm.analyze("some long text with many words")
107
107
 
108
108
  # :debug prints out all tokens to stdout, alongs with their values (or 'nil' when the
109
109
  # token was not found)
110
- tm = TextMood.new(lang: "en", debug: true)
110
+ tm = TextMood.new(language: "en", debug: true)
111
111
  score = tm.analyze("some text")
112
112
  #(stdout): some: 0.1
113
113
  #(stdout): text: 0.1
@@ -140,7 +140,7 @@ Above 0 is considered positive, below is considered negative.
140
140
 
141
141
  MANDATORY options:
142
142
  -l, --language LANGUAGE The IETF language tag for the provided text.
143
- Examples: en, fr, no_NB, sv,
143
+ Examples: en_US, no_NB
144
144
 
145
145
  OR
146
146
 
@@ -149,6 +149,10 @@ MANDATORY options:
149
149
  files will be loaded if this option is used.
150
150
 
151
151
  OPTIONAL options:
152
+ -a, --alias-file PATH TO FILE JSON file containing a hash that maps language codes to
153
+ sentiment score files. This lets you use the convenience of
154
+ language codes with custom sentiment score files.
155
+
152
156
  -n, --normalize-score Tries to normalize the score to an integer between +/- 100
153
157
  according to the number of tokens that were scored, making
154
158
  it more feasible to compare scores for texts of different
@@ -159,7 +163,7 @@ OPTIONAL options:
159
163
  and --max-threshold.
160
164
 
161
165
  -i, --min-threshold FLOAT Scores lower than this are considered negative when
162
- using --ternary-output (default -0.5). Note that the
166
+ using --ternary-output (default 0.5). Note that the
163
167
  threshold is compared to the normalized score, if applicable
164
168
 
165
169
  -x, --max-threshold FLOAT Scores higher than this are considered positive when
@@ -183,6 +187,43 @@ OPTIONAL options:
183
187
  -h, --help Show this message
184
188
  ```
185
189
 
190
+ ### Configuration files for the CLI tool
191
+ The CLI tool will look for /etc/textmood and ~/.textmood unless the -c/--config option
192
+ is used, in which case only that file is used. The configuration files are basic, flat
193
+ YAML files that use the same keys as the library understands:
194
+ ```yaml
195
+ # Assume that text is in this language, unless overridden on the command line.
196
+ # Do not use this in conjunction with the files setting.
197
+ language: en
198
+
199
+ # Load these sentiment score files instead of using any of the bundled ones
200
+ # Do not use this in conjunction with the language setting
201
+ files: [/path/to/file1, /path/to/file2]
202
+
203
+ # Use a global alias file to resolve language codes
204
+ alias_file: /home/john/textmood/aliases.json
205
+
206
+ # Always normalize the score
207
+ normalize_score: true
208
+
209
+ # Use ternary output
210
+ ternary_output: true
211
+
212
+ # Use these thresholds when using ternary output
213
+ max_threshold: 10
214
+ min_threshold: 5
215
+
216
+ # Do three passes, scoring unigrams, bigrams and trigrams
217
+ start_ngram: 1
218
+ end_ngram: 3
219
+
220
+ # Do not load the symbols file when using a bundled language
221
+ skip_symbols: true
222
+
223
+ # Always print debug info
224
+ debug: true
225
+ ```
226
+
186
227
  ## Sentiment files
187
228
  The included sentiment files reside in the *lang* directory. I hope to add many
188
229
  more baseline sentiment files in the future.
data/bin/textmood CHANGED
@@ -12,8 +12,13 @@ $:.unshift File.join(File.dirname(__FILE__), *%w{ .. lib })
12
12
 
13
13
  require "optparse"
14
14
  require "textmood"
15
+ require "yaml"
15
16
 
16
- usage = "Usage: #{File.basename($0)} [options] \"<text>\"\n OR\n echo \"<text>\" | #{File.basename($0)} [options]"
17
+ usage = <<-eos
18
+ Usage: #{File.basename($0)} [options] "<text>"
19
+ OR
20
+ echo "<text>" | #{File.basename($0)} [options]"
21
+ eos
17
22
 
18
23
  def mini_usage(usage, notext = false)
19
24
  puts usage
@@ -22,7 +27,8 @@ def mini_usage(usage, notext = false)
22
27
  puts "ERROR: Quoted text must be provided after the last option."
23
28
  else
24
29
  puts "ERROR: An IETF language tag must be provided using the -l/--language option,"
25
- puts " or sentiment files must be provided with the -f/--file option."
30
+ puts " or sentiment files must be provided with the -f/--file option. These"
31
+ puts " values can also be set in /etc/textmood.cfg or ~/.textmood."
26
32
  end
27
33
  puts ""
28
34
  puts "Use \"#{File.basename($0)} -h\" for full usage info."
@@ -30,21 +36,27 @@ def mini_usage(usage, notext = false)
30
36
  exit 20
31
37
  end
32
38
 
33
- if ARGV[0] != "-h" and ARGV[0] != "--help" and not (ARGV[0] and ARGV[1])
34
- mini_usage(usage)
39
+ def parse_config_file(file, debug = false)
40
+ if File.file?(file)
41
+ puts "Using config: #{file}" if debug
42
+ YAML.load(File.read(file))
43
+ else
44
+ {}
45
+ end
35
46
  end
36
47
 
37
48
  options = {:files => []}
49
+
38
50
  opts_parser = OptionParser.new do |opts|
39
51
  opts.banner = usage
40
52
  opts.separator ""
41
- opts.separator "Returns a floating-point sentiment score of the provided text."
42
- opts.separator "Above 0 is considered positive, below is considered negative."
53
+ opts.separator "Returns a sentiment score of the provided text. Above 0 is usually"
54
+ opts.separator "considered positive, below is considered negative."
43
55
  opts.separator ""
44
56
  opts.separator "MANDATORY options:"
45
57
  opts.on("-l", "--language LANGUAGE", "The IETF language tag for the provided text.",
46
58
  "Examples: en_US, no_NB") do |l|
47
- options[:lang] = l
59
+ options[:language] = l
48
60
  end
49
61
  opts.separator ""
50
62
  opts.separator " OR "
@@ -104,6 +116,12 @@ opts_parser = OptionParser.new do |opts|
104
116
  options[:include_symbols] = false
105
117
  end
106
118
  opts.separator ""
119
+ opts.on("-c", "--config PATH TO FILE", "Use the specified config file. If not specified, textmood will look",
120
+ "for /etc/textmood.cfg and ~/.textmood. Settings in the user config",
121
+ "will override settings from the global file.") do |c|
122
+ options[:config] = c.to_s
123
+ end
124
+ opts.separator ""
107
125
  opts.on("-d", "--debug", "Prints out the score for each token in the provided text",
108
126
  "or 'nil' if the token was not found in the sentiment file") do |d|
109
127
  options[:debug] = true
@@ -117,6 +135,21 @@ opts_parser = OptionParser.new do |opts|
117
135
  end
118
136
  opts_parser.parse!
119
137
 
138
+ if options[:config]
139
+ options.merge!(parse_config_file(options[:config], options[:debug]))
140
+ else
141
+ ["/etc/textmood.cfg", File.expand_path("~/.textmood")].each do |file|
142
+ options.merge!(parse_config_file(file, options[:debug]))
143
+ end
144
+ end
145
+
146
+ options = Hash[options.map{ |k, v| [k.to_sym, v] }]
147
+
148
+ unless ((options[:language] or not options[:files].empty?) or (ARGV[0] and ARGV[1]))
149
+ mini_usage(usage)
150
+ exit 2
151
+ end
152
+
120
153
  def do_main(text, options)
121
154
  tm = TextMood.new(options)
122
155
  puts tm.analyze(text)
data/lib/textmood.rb CHANGED
@@ -19,11 +19,11 @@ class TextMood
19
19
  options[:start_ngram] ||= 1
20
20
  options[:end_ngram] ||= 1
21
21
  @options = options
22
- if options[:lang]
22
+ if options[:language]
23
23
  if options[:alias_file]
24
24
  aliases = load_alias_file(options[:alias_file])
25
25
  if aliases
26
- file = aliases[options[:lang]]
26
+ file = aliases[options[:language]]
27
27
  unless file
28
28
  raise ArgumentError, "Language tag not found in alias file"
29
29
  end
@@ -31,7 +31,7 @@ class TextMood
31
31
  raise ArgumentError, "Alias file not found"
32
32
  end
33
33
  else
34
- file = File.dirname(__FILE__) + "/../lang/#{options[:lang]}.txt"
34
+ file = File.dirname(__FILE__) + "/../lang/#{options[:language]}.txt"
35
35
  end
36
36
  @sentiment_values = load_sentiment_file(file)
37
37
  unless options[:include_symbols] == false
data/test/test.rb CHANGED
@@ -17,7 +17,7 @@ include Test::Unit::Assertions
17
17
  class TestScorer < Test::Unit::TestCase
18
18
 
19
19
  def setup
20
- @tm = TextMood.new({:lang => "en_US"})
20
+ @tm = TextMood.new({:language => "en"})
21
21
  end
22
22
 
23
23
  def test_negative
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textmood
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: