textmood 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +22 -19
- data/bin/textmood +17 -16
- data/lib/textmood.rb +1 -1
- metadata +1 -1
data/README.md
CHANGED
@@ -38,12 +38,6 @@ score = tm.analyze("some text")
|
|
38
38
|
# specified files instead. You can specify as many files as you want.
|
39
39
|
tm = TextMood.new(files: ["en_US-mod1.txt", "emoticons.txt"])
|
40
40
|
|
41
|
-
# Using :normalize_output, you can make TextMood return a normalized value:
|
42
|
-
# 1 for positive, 0 for neutral and -1 for negative
|
43
|
-
tm = TextMood.new(lang: "en_US", normalize_output: true)
|
44
|
-
score = tm.analyze("some text")
|
45
|
-
#=> '1'
|
46
|
-
|
47
41
|
# :normalize_score will try to normalize the score to an integer between +/- 100,
|
48
42
|
# based on how many tokens were scored, which can be useful when trying to compare
|
49
43
|
# scores for texts of different length
|
@@ -51,13 +45,21 @@ tm = TextMood.new(lang: "en_US", normalize_score: true)
|
|
51
45
|
score = tm.analyze("some text")
|
52
46
|
#=> '14'
|
53
47
|
|
54
|
-
# :
|
55
|
-
#
|
56
|
-
|
48
|
+
# :ternary_output will make TextMood return one of three fixed values:
|
49
|
+
# 1 for positive, 0 for neutral and -1 for negative
|
50
|
+
tm = TextMood.new(lang: "en_US", ternary_output: true)
|
51
|
+
score = tm.analyze("some text")
|
52
|
+
#=> '1'
|
53
|
+
|
54
|
+
# :min_threshold and :max_threshold lets you customize the way :ternary_output
|
55
|
+
# treats different values. The options below will make all scores below 10 negative,
|
56
|
+
# 10-20 will be neutral, and above 20 will be positive. Note that these thresholds
|
57
|
+
# are compared to the normalized score, if applicable.
|
57
58
|
tm = TextMood.new(lang: "en_US",
|
58
|
-
|
59
|
-
|
60
|
-
|
59
|
+
ternary_output: true,
|
60
|
+
normalize_score: true,
|
61
|
+
min_threshold: 10,
|
62
|
+
max_threshold: 20)
|
61
63
|
score = tm.analyze("some text")
|
62
64
|
#=> '0'
|
63
65
|
|
@@ -121,23 +123,24 @@ MANDATORY options:
|
|
121
123
|
files will be loaded if this option is used.
|
122
124
|
|
123
125
|
OPTIONAL options:
|
124
|
-
-
|
125
|
-
instead of the actual score. See also --min and --max.
|
126
|
-
|
127
|
-
-s, --normalize-score Tries to normalize the score to an integer between +/- 100
|
126
|
+
-n, --normalize-score Tries to normalize the score to an integer between +/- 100
|
128
127
|
according to the number of tokens that were scored, making
|
129
128
|
it more feasible to compare scores for texts of different
|
130
129
|
length
|
131
130
|
|
131
|
+
-t, --ternary-output Return 1 (positive), -1 (negative) or 0 (neutral)
|
132
|
+
instead of the actual score. See also --min-threshold
|
133
|
+
and --max-threshold.
|
134
|
+
|
132
135
|
-i, --min-threshold FLOAT Scores lower than this are considered negative when
|
133
|
-
using --
|
136
|
+
using --ternary-output (default 0.5). Note that the
|
134
137
|
threshold is compared to the normalized score, if applicable
|
135
138
|
|
136
139
|
-x, --max-threshold FLOAT Scores higher than this are considered positive when
|
137
|
-
using --
|
140
|
+
using --ternary-output (default 0.5). Note that the
|
138
141
|
threshold is compared to the normalized score, if applicable
|
139
142
|
|
140
|
-
-
|
143
|
+
-s, --start-ngram INTEGER The lowest word N-gram number to split the text into
|
141
144
|
(default 1). Note that this only makes sense if the
|
142
145
|
sentiment file has tokens of similar N-gram length
|
143
146
|
|
data/bin/textmood
CHANGED
@@ -56,44 +56,45 @@ opts_parser = OptionParser.new do |opts|
|
|
56
56
|
end
|
57
57
|
opts.separator ""
|
58
58
|
opts.separator "OPTIONAL options:"
|
59
|
-
opts.on("-
|
60
|
-
"instead of the actual score. See also --min and --max.") do |n|
|
61
|
-
options[:normalize_output] = true
|
62
|
-
end
|
63
|
-
opts.separator ""
|
64
|
-
opts.on("-s", "--normalize-score", "Tries to normalize the score to an integer between +/- 100",
|
59
|
+
opts.on("-n", "--normalize-score", "Tries to normalize the score to an integer between +/- 100",
|
65
60
|
"according to the number of tokens that were scored, making",
|
66
61
|
"it more feasible to compare scores for texts of different",
|
67
62
|
"length") do |ns|
|
68
63
|
options[:normalize_score] = true
|
69
64
|
end
|
70
65
|
opts.separator ""
|
66
|
+
opts.on("-t", "--ternary-output", "Return 1 (positive), -1 (negative) or 0 (neutral)",
|
67
|
+
"instead of the actual score. See also --min-threshold",
|
68
|
+
"and --max-threshold.") do |n|
|
69
|
+
options[:ternary_output] = true
|
70
|
+
end
|
71
|
+
opts.separator ""
|
71
72
|
opts.on("-i", "--min-threshold FLOAT", "Scores lower than this are considered negative when",
|
72
|
-
|
73
|
-
|
73
|
+
"using --ternary-output (default 0.5). Note that the",
|
74
|
+
"threshold is compared to the normalized score, if applicable") do |min|
|
74
75
|
options[:min_threshold] = min.to_f
|
75
76
|
end
|
76
77
|
opts.separator ""
|
77
78
|
opts.on("-x", "--max-threshold FLOAT", "Scores higher than this are considered positive when",
|
78
|
-
|
79
|
-
|
79
|
+
"using --ternary-output (default 0.5). Note that the",
|
80
|
+
"threshold is compared to the normalized score, if applicable") do |max|
|
80
81
|
options[:max_threshold] = max.to_f
|
81
82
|
end
|
82
83
|
opts.separator ""
|
83
|
-
opts.on("-
|
84
|
-
|
85
|
-
|
84
|
+
opts.on("-s", "--start-ngram INTEGER", "The lowest word N-gram number to split the text into",
|
85
|
+
"(default 1). Note that this only makes sense if the",
|
86
|
+
"sentiment file has tokens of similar N-gram length") do |start_ngram|
|
86
87
|
options[:start_ngram] = start_ngram.to_i
|
87
88
|
end
|
88
89
|
opts.separator ""
|
89
90
|
opts.on("-e", "--end-ngram INTEGER", "The highest word N-gram number to to split the text into",
|
90
|
-
|
91
|
-
|
91
|
+
"(default 1). Note that this only makes sense if the",
|
92
|
+
"sentiment file has tokens of similar N-gram length") do |end_ngram|
|
92
93
|
options[:end_ngram] = end_ngram.to_i
|
93
94
|
end
|
94
95
|
opts.separator ""
|
95
96
|
opts.on("-k", "--skip-symbols", "Do not include symbols file (emoticons etc.). Only applies",
|
96
|
-
|
97
|
+
"when using -l/--language.") do |s|
|
97
98
|
options[:include_symbols] = false
|
98
99
|
end
|
99
100
|
opts.separator ""
|
data/lib/textmood.rb
CHANGED
@@ -54,7 +54,7 @@ class TextMood
|
|
54
54
|
if @options[:normalize_score]
|
55
55
|
sentiment_total = normalize_score(sentiment_total, scores_added)
|
56
56
|
end
|
57
|
-
if @options[:
|
57
|
+
if @options[:ternary_output]
|
58
58
|
if sentiment_total > @options[:max_threshold]
|
59
59
|
1
|
60
60
|
elsif sentiment_total < @options[:min_threshold]
|