sentimental 1.1.1 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +13 -0
- data/lib/sentimental.rb +37 -10
- data/sentimental.gemspec +1 -1
- data/spec/sentimental_spec.rb +87 -14
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3589f3fdd19386fdd901d673bd905f2703102aa7
|
4
|
+
data.tar.gz: 355ec5e97a133254c82839b70db0e2a12fe0462a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec5d616c70d55299d58a520c5482e1dc0ff451b6405337d02e6ba0a9c5b77ca7553839a12dbb40c23f65e071e1022fc3b8dfeed0d022e96abde51d7532345935
|
7
|
+
data.tar.gz: d935a09a091dce7796717563bd9251cf4575ae50eae1bbfe7e0ed4a2b09398f4976a09d076bb4a3d445a1aec2e2f7aa01b4316b1079438862a74ec76c206df25
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -71,6 +71,19 @@ scores and tokens, e.g.:
|
|
71
71
|
0.0 Meh
|
72
72
|
-1.0 Horrible
|
73
73
|
|
74
|
+
## N-grams
|
75
|
+
|
76
|
+
You can parse n-grams of words by specifying their max size in the initializer:
|
77
|
+
```
|
78
|
+
Sentimental.new(ngrams: 4)
|
79
|
+
```
|
80
|
+
|
81
|
+
The dictionary must have this format:
|
82
|
+
|
83
|
+
1.0 very happy
|
84
|
+
-2.0 no
|
85
|
+
0.0 meh
|
86
|
+
|
74
87
|
## Installation
|
75
88
|
|
76
89
|
gem install sentimental
|
data/lib/sentimental.rb
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
class Sentimental
|
2
|
-
attr_accessor :threshold, :word_scores, :neutral_regexps
|
2
|
+
attr_accessor :threshold, :word_scores, :neutral_regexps, :ngrams
|
3
3
|
|
4
|
-
def initialize(threshold: 0, word_scores: nil, neutral_regexps: [])
|
5
|
-
|
4
|
+
def initialize(threshold: 0, word_scores: nil, neutral_regexps: [], ngrams: 1)
|
5
|
+
if ngrams >= 1
|
6
|
+
@ngrams = ngrams.to_i
|
7
|
+
else
|
8
|
+
@ngrams = 1
|
9
|
+
end
|
10
|
+
@word_scores = word_scores || {}
|
11
|
+
@word_scores.default = 0.0
|
6
12
|
@threshold = threshold
|
7
13
|
@neutral_regexps = neutral_regexps
|
8
14
|
end
|
@@ -10,7 +16,7 @@ class Sentimental
|
|
10
16
|
def score(string)
|
11
17
|
return 0 if neutral_regexps.any? {|regexp| string =~ regexp}
|
12
18
|
|
13
|
-
|
19
|
+
extract_words_with_n_grams(string).inject(0) do |score, token|
|
14
20
|
score += word_scores[token]
|
15
21
|
end
|
16
22
|
end
|
@@ -27,26 +33,47 @@ class Sentimental
|
|
27
33
|
end
|
28
34
|
end
|
29
35
|
|
36
|
+
def classify(string)
|
37
|
+
sentiment(string) == :positive
|
38
|
+
end
|
39
|
+
|
30
40
|
def load_defaults
|
31
41
|
['sentiwords', 'sentislang'].each do |filename|
|
32
|
-
|
42
|
+
load_from(File.dirname(__FILE__) + "/../data/#{filename}.txt")
|
33
43
|
end
|
34
44
|
end
|
35
45
|
|
36
|
-
def
|
46
|
+
def load_from(filename)
|
37
47
|
File.open(filename) do |file|
|
38
48
|
file.each_line do |line|
|
39
|
-
parsed_line = line.chomp.
|
40
|
-
|
41
|
-
|
42
|
-
|
49
|
+
if parsed_line = (line.chomp.scan(/^([^\s]+)\s+(.+)/).first)
|
50
|
+
sentiscore = parsed_line[0]
|
51
|
+
text = parsed_line[1]
|
52
|
+
word_scores[text] = sentiscore.to_f
|
53
|
+
end
|
43
54
|
end
|
44
55
|
end
|
45
56
|
end
|
57
|
+
|
58
|
+
alias_method :load_senti_file, :load_from
|
46
59
|
|
47
60
|
private
|
48
61
|
|
49
62
|
def extract_words(string)
|
50
63
|
string.to_s.downcase.scan(/([\w']+|\S{2,})/).flatten
|
51
64
|
end
|
65
|
+
|
66
|
+
def extract_words_with_n_grams(string)
|
67
|
+
words = extract_words(string)
|
68
|
+
(1..ngrams).to_a.map do |ngram_size|
|
69
|
+
ngramify(words, ngram_size)
|
70
|
+
end.flatten
|
71
|
+
end
|
72
|
+
|
73
|
+
def ngramify(words, max_size)
|
74
|
+
return [words.join(" ")] if words.size <= max_size
|
75
|
+
tail = words.last(words.size - 1)
|
76
|
+
|
77
|
+
[words.first(max_size).join(" ")] + ngramify(tail, max_size)
|
78
|
+
end
|
52
79
|
end
|
data/sentimental.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = 'sentimental'
|
3
|
-
spec.version = '1.
|
3
|
+
spec.version = '1.2.1'
|
4
4
|
spec.summary = 'Simple sentiment analysis'
|
5
5
|
spec.description = 'A simple sentiment analysis gem'
|
6
6
|
spec.authors = ['Jeff Emminger', 'Christopher MacLellan', 'Denis Pasin']
|
data/spec/sentimental_spec.rb
CHANGED
@@ -8,7 +8,93 @@ describe Sentimental do
|
|
8
8
|
|
9
9
|
let(:analyzer) { Sentimental.new(threshold: 0.1) }
|
10
10
|
|
11
|
-
describe "#
|
11
|
+
describe "#sentiment" do
|
12
|
+
it "returns :positive when the score > threshold" do
|
13
|
+
expect(analyzer.sentiment("I love ruby <3")).to be :positive
|
14
|
+
end
|
15
|
+
|
16
|
+
it "returns :negative when the score < -threshold" do
|
17
|
+
expect(analyzer.sentiment("I hate javascript")).to be :negative
|
18
|
+
end
|
19
|
+
|
20
|
+
it "returns :positive when -threshold < score < threshold" do
|
21
|
+
expect(analyzer.sentiment("je en sais pas")).to be :neutral
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe "#classify" do
|
26
|
+
it "is true when in the class" do
|
27
|
+
expect(analyzer.classify("I love ruby")).to be_truthy
|
28
|
+
end
|
29
|
+
|
30
|
+
it "is false otherwise" do
|
31
|
+
expect(analyzer.classify("je ne sais pas")).to be_falsy
|
32
|
+
expect(analyzer.classify("i hate java")).to be_falsy
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe "initialization" do
|
37
|
+
subject do
|
38
|
+
Sentimental.new(
|
39
|
+
threshold: 0.2,
|
40
|
+
word_scores: {"non" => -1.0},
|
41
|
+
neutral_regexps: [/.*/],
|
42
|
+
)
|
43
|
+
end
|
44
|
+
|
45
|
+
it "takes multiple init params" do
|
46
|
+
expect(subject.threshold).to eq 0.2
|
47
|
+
expect(subject.word_scores["non"]).to eq -1.0
|
48
|
+
expect(subject.neutral_regexps).to include /.*/
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "neutral regexp" do
|
53
|
+
context "when there is some neutral regexp" do
|
54
|
+
let(:text_neutral) {"Do you love ruby?"}
|
55
|
+
let(:text) {"I love ruby"}
|
56
|
+
|
57
|
+
before do
|
58
|
+
analyzer.neutral_regexps << /\?\s*$/
|
59
|
+
end
|
60
|
+
|
61
|
+
it "scores it to 0" do
|
62
|
+
expect(analyzer.score(text_neutral)).to eq 0
|
63
|
+
expect(analyzer.score(text)).not_to eq 0
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
describe "n-grams" do
|
69
|
+
let(:word_scores) { nil }
|
70
|
+
subject do
|
71
|
+
Sentimental.new(word_scores: word_scores, ngrams: 3)
|
72
|
+
end
|
73
|
+
|
74
|
+
it "is initialized by ngrams param" do
|
75
|
+
expect(subject.ngrams).to eq 3
|
76
|
+
end
|
77
|
+
|
78
|
+
context "there is n-grams in the dictionary" do
|
79
|
+
let(:word_scores) {{"happy hour" => 1.0, "not happy hour" => -5.0}}
|
80
|
+
let(:text) { "why not happy hour, but happy so hour?" }
|
81
|
+
|
82
|
+
it "update scores regarding to n-grams" do
|
83
|
+
expect(subject.score(text)).to eq -4
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
context "there's n-grams longer than specified in dictionary" do
|
88
|
+
let(:word_scores) {{"happy hour" => 1.0, "not so happy hour" => -5.0}}
|
89
|
+
let(:text) { "why not so happy hour ?" }
|
90
|
+
|
91
|
+
it "ignores the lines" do
|
92
|
+
expect(subject.score(text)).to eq 1
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe "scoring in a normal context" do
|
12
98
|
subject do
|
13
99
|
analyzer.score(text)
|
14
100
|
end
|
@@ -55,18 +141,5 @@ describe Sentimental do
|
|
55
141
|
end
|
56
142
|
end
|
57
143
|
|
58
|
-
context "when there is some neutral regexp" do
|
59
|
-
let(:text_neutral) {"Do you love ruby?"}
|
60
|
-
let(:text) {"I love ruby"}
|
61
|
-
|
62
|
-
before do
|
63
|
-
analyzer.neutral_regexps << /\?\s*$/
|
64
|
-
end
|
65
|
-
|
66
|
-
it "scores it to 0" do
|
67
|
-
expect(analyzer.score(text_neutral)).to eq 0
|
68
|
-
expect(analyzer.score(text)).not_to eq 0
|
69
|
-
end
|
70
|
-
end
|
71
144
|
end
|
72
145
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sentimental
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Emminger
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2016-
|
13
|
+
date: 2016-04-22 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
93
93
|
version: '0'
|
94
94
|
requirements: []
|
95
95
|
rubyforge_project:
|
96
|
-
rubygems_version: 2.4.
|
96
|
+
rubygems_version: 2.4.8
|
97
97
|
signing_key:
|
98
98
|
specification_version: 4
|
99
99
|
summary: Simple sentiment analysis
|