sentimental 1.1.1 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +13 -0
- data/lib/sentimental.rb +37 -10
- data/sentimental.gemspec +1 -1
- data/spec/sentimental_spec.rb +87 -14
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3589f3fdd19386fdd901d673bd905f2703102aa7
|
4
|
+
data.tar.gz: 355ec5e97a133254c82839b70db0e2a12fe0462a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec5d616c70d55299d58a520c5482e1dc0ff451b6405337d02e6ba0a9c5b77ca7553839a12dbb40c23f65e071e1022fc3b8dfeed0d022e96abde51d7532345935
|
7
|
+
data.tar.gz: d935a09a091dce7796717563bd9251cf4575ae50eae1bbfe7e0ed4a2b09398f4976a09d076bb4a3d445a1aec2e2f7aa01b4316b1079438862a74ec76c206df25
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -71,6 +71,19 @@ scores and tokens, e.g.:
|
|
71
71
|
0.0 Meh
|
72
72
|
-1.0 Horrible
|
73
73
|
|
74
|
+
## N-grams
|
75
|
+
|
76
|
+
You can parse n-grams of words by specifying their max size in the initializer:
|
77
|
+
```
|
78
|
+
Sentimental.new(ngrams: 4)
|
79
|
+
```
|
80
|
+
|
81
|
+
The dictionary must have this format:
|
82
|
+
|
83
|
+
1.0 very happy
|
84
|
+
-2.0 no
|
85
|
+
0.0 meh
|
86
|
+
|
74
87
|
## Installation
|
75
88
|
|
76
89
|
gem install sentimental
|
data/lib/sentimental.rb
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
class Sentimental
|
2
|
-
attr_accessor :threshold, :word_scores, :neutral_regexps
|
2
|
+
attr_accessor :threshold, :word_scores, :neutral_regexps, :ngrams
|
3
3
|
|
4
|
-
def initialize(threshold: 0, word_scores: nil, neutral_regexps: [])
|
5
|
-
|
4
|
+
def initialize(threshold: 0, word_scores: nil, neutral_regexps: [], ngrams: 1)
|
5
|
+
if ngrams >= 1
|
6
|
+
@ngrams = ngrams.to_i
|
7
|
+
else
|
8
|
+
@ngrams = 1
|
9
|
+
end
|
10
|
+
@word_scores = word_scores || {}
|
11
|
+
@word_scores.default = 0.0
|
6
12
|
@threshold = threshold
|
7
13
|
@neutral_regexps = neutral_regexps
|
8
14
|
end
|
@@ -10,7 +16,7 @@ class Sentimental
|
|
10
16
|
def score(string)
|
11
17
|
return 0 if neutral_regexps.any? {|regexp| string =~ regexp}
|
12
18
|
|
13
|
-
|
19
|
+
extract_words_with_n_grams(string).inject(0) do |score, token|
|
14
20
|
score += word_scores[token]
|
15
21
|
end
|
16
22
|
end
|
@@ -27,26 +33,47 @@ class Sentimental
|
|
27
33
|
end
|
28
34
|
end
|
29
35
|
|
36
|
+
def classify(string)
|
37
|
+
sentiment(string) == :positive
|
38
|
+
end
|
39
|
+
|
30
40
|
def load_defaults
|
31
41
|
['sentiwords', 'sentislang'].each do |filename|
|
32
|
-
|
42
|
+
load_from(File.dirname(__FILE__) + "/../data/#{filename}.txt")
|
33
43
|
end
|
34
44
|
end
|
35
45
|
|
36
|
-
def
|
46
|
+
def load_from(filename)
|
37
47
|
File.open(filename) do |file|
|
38
48
|
file.each_line do |line|
|
39
|
-
parsed_line = line.chomp.
|
40
|
-
|
41
|
-
|
42
|
-
|
49
|
+
if parsed_line = (line.chomp.scan(/^([^\s]+)\s+(.+)/).first)
|
50
|
+
sentiscore = parsed_line[0]
|
51
|
+
text = parsed_line[1]
|
52
|
+
word_scores[text] = sentiscore.to_f
|
53
|
+
end
|
43
54
|
end
|
44
55
|
end
|
45
56
|
end
|
57
|
+
|
58
|
+
alias_method :load_senti_file, :load_from
|
46
59
|
|
47
60
|
private
|
48
61
|
|
49
62
|
def extract_words(string)
|
50
63
|
string.to_s.downcase.scan(/([\w']+|\S{2,})/).flatten
|
51
64
|
end
|
65
|
+
|
66
|
+
def extract_words_with_n_grams(string)
|
67
|
+
words = extract_words(string)
|
68
|
+
(1..ngrams).to_a.map do |ngram_size|
|
69
|
+
ngramify(words, ngram_size)
|
70
|
+
end.flatten
|
71
|
+
end
|
72
|
+
|
73
|
+
def ngramify(words, max_size)
|
74
|
+
return [words.join(" ")] if words.size <= max_size
|
75
|
+
tail = words.last(words.size - 1)
|
76
|
+
|
77
|
+
[words.first(max_size).join(" ")] + ngramify(tail, max_size)
|
78
|
+
end
|
52
79
|
end
|
data/sentimental.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = 'sentimental'
|
3
|
-
spec.version = '1.
|
3
|
+
spec.version = '1.2.1'
|
4
4
|
spec.summary = 'Simple sentiment analysis'
|
5
5
|
spec.description = 'A simple sentiment analysis gem'
|
6
6
|
spec.authors = ['Jeff Emminger', 'Christopher MacLellan', 'Denis Pasin']
|
data/spec/sentimental_spec.rb
CHANGED
@@ -8,7 +8,93 @@ describe Sentimental do
|
|
8
8
|
|
9
9
|
let(:analyzer) { Sentimental.new(threshold: 0.1) }
|
10
10
|
|
11
|
-
describe "#
|
11
|
+
describe "#sentiment" do
|
12
|
+
it "returns :positive when the score > threshold" do
|
13
|
+
expect(analyzer.sentiment("I love ruby <3")).to be :positive
|
14
|
+
end
|
15
|
+
|
16
|
+
it "returns :negative when the score < -threshold" do
|
17
|
+
expect(analyzer.sentiment("I hate javascript")).to be :negative
|
18
|
+
end
|
19
|
+
|
20
|
+
it "returns :positive when -threshold < score < threshold" do
|
21
|
+
expect(analyzer.sentiment("je en sais pas")).to be :neutral
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe "#classify" do
|
26
|
+
it "is true when in the class" do
|
27
|
+
expect(analyzer.classify("I love ruby")).to be_truthy
|
28
|
+
end
|
29
|
+
|
30
|
+
it "is false otherwise" do
|
31
|
+
expect(analyzer.classify("je ne sais pas")).to be_falsy
|
32
|
+
expect(analyzer.classify("i hate java")).to be_falsy
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe "initialization" do
|
37
|
+
subject do
|
38
|
+
Sentimental.new(
|
39
|
+
threshold: 0.2,
|
40
|
+
word_scores: {"non" => -1.0},
|
41
|
+
neutral_regexps: [/.*/],
|
42
|
+
)
|
43
|
+
end
|
44
|
+
|
45
|
+
it "takes multiple init params" do
|
46
|
+
expect(subject.threshold).to eq 0.2
|
47
|
+
expect(subject.word_scores["non"]).to eq -1.0
|
48
|
+
expect(subject.neutral_regexps).to include /.*/
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "neutral regexp" do
|
53
|
+
context "when there is some neutral regexp" do
|
54
|
+
let(:text_neutral) {"Do you love ruby?"}
|
55
|
+
let(:text) {"I love ruby"}
|
56
|
+
|
57
|
+
before do
|
58
|
+
analyzer.neutral_regexps << /\?\s*$/
|
59
|
+
end
|
60
|
+
|
61
|
+
it "scores it to 0" do
|
62
|
+
expect(analyzer.score(text_neutral)).to eq 0
|
63
|
+
expect(analyzer.score(text)).not_to eq 0
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
describe "n-grams" do
|
69
|
+
let(:word_scores) { nil }
|
70
|
+
subject do
|
71
|
+
Sentimental.new(word_scores: word_scores, ngrams: 3)
|
72
|
+
end
|
73
|
+
|
74
|
+
it "is initialized by ngrams param" do
|
75
|
+
expect(subject.ngrams).to eq 3
|
76
|
+
end
|
77
|
+
|
78
|
+
context "there is n-grams in the dictionary" do
|
79
|
+
let(:word_scores) {{"happy hour" => 1.0, "not happy hour" => -5.0}}
|
80
|
+
let(:text) { "why not happy hour, but happy so hour?" }
|
81
|
+
|
82
|
+
it "update scores regarding to n-grams" do
|
83
|
+
expect(subject.score(text)).to eq -4
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
context "there's n-grams longer than specified in dictionary" do
|
88
|
+
let(:word_scores) {{"happy hour" => 1.0, "not so happy hour" => -5.0}}
|
89
|
+
let(:text) { "why not so happy hour ?" }
|
90
|
+
|
91
|
+
it "ignores the lines" do
|
92
|
+
expect(subject.score(text)).to eq 1
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe "scoring in a normal context" do
|
12
98
|
subject do
|
13
99
|
analyzer.score(text)
|
14
100
|
end
|
@@ -55,18 +141,5 @@ describe Sentimental do
|
|
55
141
|
end
|
56
142
|
end
|
57
143
|
|
58
|
-
context "when there is some neutral regexp" do
|
59
|
-
let(:text_neutral) {"Do you love ruby?"}
|
60
|
-
let(:text) {"I love ruby"}
|
61
|
-
|
62
|
-
before do
|
63
|
-
analyzer.neutral_regexps << /\?\s*$/
|
64
|
-
end
|
65
|
-
|
66
|
-
it "scores it to 0" do
|
67
|
-
expect(analyzer.score(text_neutral)).to eq 0
|
68
|
-
expect(analyzer.score(text)).not_to eq 0
|
69
|
-
end
|
70
|
-
end
|
71
144
|
end
|
72
145
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sentimental
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Emminger
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2016-
|
13
|
+
date: 2016-04-22 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
93
93
|
version: '0'
|
94
94
|
requirements: []
|
95
95
|
rubyforge_project:
|
96
|
-
rubygems_version: 2.4.
|
96
|
+
rubygems_version: 2.4.8
|
97
97
|
signing_key:
|
98
98
|
specification_version: 4
|
99
99
|
summary: Simple sentiment analysis
|