textmood 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +22 -19
- data/bin/textmood +17 -16
- data/lib/textmood.rb +1 -1
- metadata +1 -1
data/README.md
CHANGED
@@ -38,12 +38,6 @@ score = tm.analyze("some text")
|
|
38
38
|
# specified files instead. You can specify as many files as you want.
|
39
39
|
tm = TextMood.new(files: ["en_US-mod1.txt", "emoticons.txt"])
|
40
40
|
|
41
|
-
# Using :normalize_output, you can make TextMood return a normalized value:
|
42
|
-
# 1 for positive, 0 for neutral and -1 for negative
|
43
|
-
tm = TextMood.new(lang: "en_US", normalize_output: true)
|
44
|
-
score = tm.analyze("some text")
|
45
|
-
#=> '1'
|
46
|
-
|
47
41
|
# :normalize_score will try to normalize the score to an integer between +/- 100,
|
48
42
|
# based on how many tokens were scored, which can be useful when trying to compare
|
49
43
|
# scores for texts of different length
|
@@ -51,13 +45,21 @@ tm = TextMood.new(lang: "en_US", normalize_score: true)
|
|
51
45
|
score = tm.analyze("some text")
|
52
46
|
#=> '14'
|
53
47
|
|
54
|
-
# :
|
55
|
-
#
|
56
|
-
|
48
|
+
# :ternary_output will make TextMood return one of three fixed values:
|
49
|
+
# 1 for positive, 0 for neutral and -1 for negative
|
50
|
+
tm = TextMood.new(lang: "en_US", ternary_output: true)
|
51
|
+
score = tm.analyze("some text")
|
52
|
+
#=> '1'
|
53
|
+
|
54
|
+
# :min_threshold and :max_threshold lets you customize the way :ternary_output
|
55
|
+
# treats different values. The options below will make all scores below 10 negative,
|
56
|
+
# 10-20 will be neutral, and above 20 will be positive. Note that these thresholds
|
57
|
+
# are compared to the normalized score, if applicable.
|
57
58
|
tm = TextMood.new(lang: "en_US",
|
58
|
-
|
59
|
-
|
60
|
-
|
59
|
+
ternary_output: true,
|
60
|
+
normalize_score: true,
|
61
|
+
min_threshold: 10,
|
62
|
+
max_threshold: 20)
|
61
63
|
score = tm.analyze("some text")
|
62
64
|
#=> '0'
|
63
65
|
|
@@ -121,23 +123,24 @@ MANDATORY options:
|
|
121
123
|
files will be loaded if this option is used.
|
122
124
|
|
123
125
|
OPTIONAL options:
|
124
|
-
-
|
125
|
-
instead of the actual score. See also --min and --max.
|
126
|
-
|
127
|
-
-s, --normalize-score Tries to normalize the score to an integer between +/- 100
|
126
|
+
-n, --normalize-score Tries to normalize the score to an integer between +/- 100
|
128
127
|
according to the number of tokens that were scored, making
|
129
128
|
it more feasible to compare scores for texts of different
|
130
129
|
length
|
131
130
|
|
131
|
+
-t, --ternary-output Return 1 (positive), -1 (negative) or 0 (neutral)
|
132
|
+
instead of the actual score. See also --min-threshold
|
133
|
+
and --max-threshold.
|
134
|
+
|
132
135
|
-i, --min-threshold FLOAT Scores lower than this are considered negative when
|
133
|
-
using --
|
136
|
+
using --ternary-output (default 0.5). Note that the
|
134
137
|
threshold is compared to the normalized score, if applicable
|
135
138
|
|
136
139
|
-x, --max-threshold FLOAT Scores higher than this are considered positive when
|
137
|
-
using --
|
140
|
+
using --ternary-output (default 0.5). Note that the
|
138
141
|
threshold is compared to the normalized score, if applicable
|
139
142
|
|
140
|
-
-
|
143
|
+
-s, --start-ngram INTEGER The lowest word N-gram number to split the text into
|
141
144
|
(default 1). Note that this only makes sense if the
|
142
145
|
sentiment file has tokens of similar N-gram length
|
143
146
|
|
data/bin/textmood
CHANGED
@@ -56,44 +56,45 @@ opts_parser = OptionParser.new do |opts|
|
|
56
56
|
end
|
57
57
|
opts.separator ""
|
58
58
|
opts.separator "OPTIONAL options:"
|
59
|
-
opts.on("-
|
60
|
-
"instead of the actual score. See also --min and --max.") do |n|
|
61
|
-
options[:normalize_output] = true
|
62
|
-
end
|
63
|
-
opts.separator ""
|
64
|
-
opts.on("-s", "--normalize-score", "Tries to normalize the score to an integer between +/- 100",
|
59
|
+
opts.on("-n", "--normalize-score", "Tries to normalize the score to an integer between +/- 100",
|
65
60
|
"according to the number of tokens that were scored, making",
|
66
61
|
"it more feasible to compare scores for texts of different",
|
67
62
|
"length") do |ns|
|
68
63
|
options[:normalize_score] = true
|
69
64
|
end
|
70
65
|
opts.separator ""
|
66
|
+
opts.on("-t", "--ternary-output", "Return 1 (positive), -1 (negative) or 0 (neutral)",
|
67
|
+
"instead of the actual score. See also --min-threshold",
|
68
|
+
"and --max-threshold.") do |n|
|
69
|
+
options[:ternary_output] = true
|
70
|
+
end
|
71
|
+
opts.separator ""
|
71
72
|
opts.on("-i", "--min-threshold FLOAT", "Scores lower than this are considered negative when",
|
72
|
-
|
73
|
-
|
73
|
+
"using --ternary-output (default 0.5). Note that the",
|
74
|
+
"threshold is compared to the normalized score, if applicable") do |min|
|
74
75
|
options[:min_threshold] = min.to_f
|
75
76
|
end
|
76
77
|
opts.separator ""
|
77
78
|
opts.on("-x", "--max-threshold FLOAT", "Scores higher than this are considered positive when",
|
78
|
-
|
79
|
-
|
79
|
+
"using --ternary-output (default 0.5). Note that the",
|
80
|
+
"threshold is compared to the normalized score, if applicable") do |max|
|
80
81
|
options[:max_threshold] = max.to_f
|
81
82
|
end
|
82
83
|
opts.separator ""
|
83
|
-
opts.on("-
|
84
|
-
|
85
|
-
|
84
|
+
opts.on("-s", "--start-ngram INTEGER", "The lowest word N-gram number to split the text into",
|
85
|
+
"(default 1). Note that this only makes sense if the",
|
86
|
+
"sentiment file has tokens of similar N-gram length") do |start_ngram|
|
86
87
|
options[:start_ngram] = start_ngram.to_i
|
87
88
|
end
|
88
89
|
opts.separator ""
|
89
90
|
opts.on("-e", "--end-ngram INTEGER", "The highest word N-gram number to to split the text into",
|
90
|
-
|
91
|
-
|
91
|
+
"(default 1). Note that this only makes sense if the",
|
92
|
+
"sentiment file has tokens of similar N-gram length") do |end_ngram|
|
92
93
|
options[:end_ngram] = end_ngram.to_i
|
93
94
|
end
|
94
95
|
opts.separator ""
|
95
96
|
opts.on("-k", "--skip-symbols", "Do not include symbols file (emoticons etc.). Only applies",
|
96
|
-
|
97
|
+
"when using -l/--language.") do |s|
|
97
98
|
options[:include_symbols] = false
|
98
99
|
end
|
99
100
|
opts.separator ""
|
data/lib/textmood.rb
CHANGED
@@ -54,7 +54,7 @@ class TextMood
|
|
54
54
|
if @options[:normalize_score]
|
55
55
|
sentiment_total = normalize_score(sentiment_total, scores_added)
|
56
56
|
end
|
57
|
-
if @options[:
|
57
|
+
if @options[:ternary_output]
|
58
58
|
if sentiment_total > @options[:max_threshold]
|
59
59
|
1
|
60
60
|
elsif sentiment_total < @options[:min_threshold]
|