sentimental 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rspec +1 -0
- data/.rubocop.yml +12 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +20 -4
- data/Rakefile +2 -2
- data/data/JSON_builder.rb +21 -0
- data/data/en_words.json +18539 -0
- data/data/{sentiwords.txt → sentiwords_fr.txt} +2048 -2048
- data/data/slang.json +57 -0
- data/lib/file_reader.rb +19 -0
- data/lib/sentimental.rb +21 -24
- data/sentimental.gemspec +6 -3
- data/spec/file_reader_spec.rb +31 -0
- data/spec/sentimental_spec.rb +49 -51
- data/spec/spec_helper.rb +12 -0
- metadata +53 -4
- data/data/sentislang.txt +0 -56
data/data/slang.json
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
{
|
2
|
+
"%-(": -1.0,
|
3
|
+
")-:": -1.0,
|
4
|
+
"):": -1.0,
|
5
|
+
")o:": -1.0,
|
6
|
+
"8-0": -1.0,
|
7
|
+
"8/": -1.0,
|
8
|
+
"8\\": -1.0,
|
9
|
+
"8c": -1.0,
|
10
|
+
":'(": -1.0,
|
11
|
+
":'-(": -1.0,
|
12
|
+
":(": -1.0,
|
13
|
+
":*(": -1.0,
|
14
|
+
":,(": -1.0,
|
15
|
+
":-(": -1.0,
|
16
|
+
":-/": -1.0,
|
17
|
+
":-S": -1.0,
|
18
|
+
":-\\": -1.0,
|
19
|
+
":-|": -0.5,
|
20
|
+
":/": -0.5,
|
21
|
+
":O": -0.25,
|
22
|
+
":S": -0.25,
|
23
|
+
":\\": -0.25,
|
24
|
+
":|": -0.25,
|
25
|
+
"=(": -1.0,
|
26
|
+
">:(": -1.0,
|
27
|
+
"D:": -1.0,
|
28
|
+
"sux": -1.0,
|
29
|
+
"(o;": 1.0,
|
30
|
+
"8-)": 1.0,
|
31
|
+
";)": 1.0,
|
32
|
+
";o)": 1.0,
|
33
|
+
"%-)": 1.0,
|
34
|
+
"(-:": 1.0,
|
35
|
+
":-)": 1.0,
|
36
|
+
"(:": 1.0,
|
37
|
+
"(o:": 1.0,
|
38
|
+
"8)": 1.0,
|
39
|
+
":)": 1.0,
|
40
|
+
":-D": 1.0,
|
41
|
+
":-P": 1.0,
|
42
|
+
":D": 1.0,
|
43
|
+
":P": 1.0,
|
44
|
+
":]": 1.0,
|
45
|
+
":o)": 1.0,
|
46
|
+
":p": 1.0,
|
47
|
+
";^)": 1.0,
|
48
|
+
"<3": 1.0,
|
49
|
+
"<3": 1.0,
|
50
|
+
"=)": 1.0,
|
51
|
+
"=]": 1.0,
|
52
|
+
">:)": 1.0,
|
53
|
+
">:D": 1.0,
|
54
|
+
">=D": 1.0,
|
55
|
+
"^_^": 1.0,
|
56
|
+
"}:)": 1.0
|
57
|
+
}
|
data/lib/file_reader.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module FileReader
|
4
|
+
def hash_from_txt(filename)
|
5
|
+
new_words = {}
|
6
|
+
File.open(filename) do |file|
|
7
|
+
file.each_line do |line|
|
8
|
+
parsed_line = line.chomp.scan(/^([^\s]+)\s+(.+)/).first
|
9
|
+
next unless parsed_line
|
10
|
+
new_words[parsed_line[1]] = parsed_line[0].to_f
|
11
|
+
end
|
12
|
+
end
|
13
|
+
new_words
|
14
|
+
end
|
15
|
+
|
16
|
+
def hash_from_json(filename)
|
17
|
+
JSON.parse(File.read(filename))
|
18
|
+
end
|
19
|
+
end
|
data/lib/sentimental.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
+
require_relative 'file_reader'
|
2
|
+
|
1
3
|
class Sentimental
|
4
|
+
include FileReader
|
5
|
+
|
2
6
|
attr_accessor :threshold, :word_scores, :neutral_regexps, :ngrams
|
3
7
|
|
4
8
|
def initialize(threshold: 0, word_scores: nil, neutral_regexps: [], ngrams: 1)
|
5
|
-
if ngrams >= 1
|
6
|
-
@ngrams = ngrams.to_i
|
7
|
-
else
|
8
|
-
@ngrams = 1
|
9
|
-
end
|
9
|
+
@ngrams = ngrams.to_i.abs if ngrams.to_i >= 1
|
10
10
|
@word_scores = word_scores || {}
|
11
11
|
@word_scores.default = 0.0
|
12
12
|
@threshold = threshold
|
@@ -14,10 +14,10 @@ class Sentimental
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def score(string)
|
17
|
-
return 0 if neutral_regexps.any? {|regexp| string =~ regexp}
|
17
|
+
return 0 if neutral_regexps.any? { |regexp| string =~ regexp }
|
18
18
|
|
19
|
-
extract_words_with_n_grams(string).inject(0) do |score, token|
|
20
|
-
score
|
19
|
+
extract_words_with_n_grams(string).inject(0) do |score, token|
|
20
|
+
score + word_scores[token]
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
@@ -38,24 +38,21 @@ class Sentimental
|
|
38
38
|
end
|
39
39
|
|
40
40
|
def load_defaults
|
41
|
-
|
42
|
-
|
41
|
+
%w(slang en_words).each do |filename|
|
42
|
+
load_from_json(File.dirname(__FILE__) + "/../data/#{filename}.json")
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
46
|
def load_from(filename)
|
47
|
-
|
48
|
-
file.each_line do |line|
|
49
|
-
if parsed_line = (line.chomp.scan(/^([^\s]+)\s+(.+)/).first)
|
50
|
-
sentiscore = parsed_line[0]
|
51
|
-
text = parsed_line[1]
|
52
|
-
word_scores[text] = sentiscore.to_f
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
47
|
+
word_scores.merge!(hash_from_txt(filename))
|
56
48
|
end
|
57
|
-
|
58
|
-
|
49
|
+
|
50
|
+
def load_from_json(filename)
|
51
|
+
word_scores.merge!(hash_from_json(filename))
|
52
|
+
end
|
53
|
+
|
54
|
+
alias load_senti_file load_from
|
55
|
+
alias load_senti_json load_from_json
|
59
56
|
|
60
57
|
private
|
61
58
|
|
@@ -71,9 +68,9 @@ class Sentimental
|
|
71
68
|
end
|
72
69
|
|
73
70
|
def ngramify(words, max_size)
|
74
|
-
return [words.join(
|
71
|
+
return [words.join(' ')] if words.size <= max_size
|
75
72
|
tail = words.last(words.size - 1)
|
76
|
-
|
77
|
-
[words.first(max_size).join(
|
73
|
+
|
74
|
+
[words.first(max_size).join(' ')] + ngramify(tail, max_size)
|
78
75
|
end
|
79
76
|
end
|
data/sentimental.gemspec
CHANGED
@@ -1,19 +1,22 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = 'sentimental'
|
3
|
-
spec.version = '1.
|
3
|
+
spec.version = '1.3.0'
|
4
4
|
spec.summary = 'Simple sentiment analysis'
|
5
5
|
spec.description = 'A simple sentiment analysis gem'
|
6
6
|
spec.authors = ['Jeff Emminger', 'Christopher MacLellan', 'Denis Pasin']
|
7
7
|
spec.email = ['jeff@7compass.com', 'denis@hellojam.fr']
|
8
8
|
spec.homepage = 'https://github.com/7compass/sentimental'
|
9
9
|
spec.license = 'MIT'
|
10
|
-
|
10
|
+
|
11
11
|
spec.files = `git ls-files`.split($/)
|
12
12
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
13
13
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
14
14
|
spec.require_paths = ['lib']
|
15
|
-
|
15
|
+
|
16
16
|
spec.add_development_dependency "bundler", "~> 1.3"
|
17
17
|
spec.add_development_dependency "rake"
|
18
18
|
spec.add_development_dependency "rspec", ">= 3.0.0"
|
19
|
+
spec.add_development_dependency "rubocop", "~> 0.40", ">= 0.40.0"
|
20
|
+
|
21
|
+
spec.add_runtime_dependency "json", "~> 1.8", ">= 1.8.3"
|
19
22
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require_relative 'spec_helper'
|
2
|
+
require_relative '../lib/file_reader'
|
3
|
+
include FileReader
|
4
|
+
|
5
|
+
describe FileReader do
|
6
|
+
describe '#hash_from_txt' do
|
7
|
+
subject(:answer) { FileReader.hash_from_txt('spec/test_data/test.txt') }
|
8
|
+
|
9
|
+
it 'returns a hash from a txt file' do
|
10
|
+
expect(answer.empty?).to be_falsey
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'contains the data' do
|
14
|
+
expect(answer['TEST']).to eq 1.0
|
15
|
+
expect(answer[':(']).to eq(-1.0)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe '#hash_from_json' do
|
20
|
+
subject(:answer) { FileReader.hash_from_json('spec/test_data/test.json') }
|
21
|
+
|
22
|
+
it 'returns a hash from a json file' do
|
23
|
+
expect(answer.empty?).to be_falsey
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'contains the data' do
|
27
|
+
expect(answer['TEST']).to eq 1.0
|
28
|
+
expect(answer[':(']).to eq(-1.0)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/spec/sentimental_spec.rb
CHANGED
@@ -1,145 +1,143 @@
|
|
1
|
-
require_relative
|
1
|
+
require_relative '../lib/sentimental'
|
2
2
|
|
3
3
|
describe Sentimental do
|
4
|
-
|
5
4
|
before :each do
|
6
5
|
analyzer.load_defaults
|
7
6
|
end
|
8
7
|
|
9
8
|
let(:analyzer) { Sentimental.new(threshold: 0.1) }
|
10
9
|
|
11
|
-
describe
|
12
|
-
it
|
13
|
-
expect(analyzer.sentiment(
|
10
|
+
describe '#sentiment' do
|
11
|
+
it 'returns :positive when the score > threshold' do
|
12
|
+
expect(analyzer.sentiment('I love ruby <3')).to be :positive
|
14
13
|
end
|
15
14
|
|
16
|
-
it
|
17
|
-
expect(analyzer.sentiment(
|
15
|
+
it 'returns :negative when the score < -threshold' do
|
16
|
+
expect(analyzer.sentiment('I hate javascript')).to be :negative
|
18
17
|
end
|
19
18
|
|
20
|
-
it
|
21
|
-
expect(analyzer.sentiment(
|
19
|
+
it 'returns :positive when -threshold < score < threshold' do
|
20
|
+
expect(analyzer.sentiment('je en sais pas')).to be :neutral
|
22
21
|
end
|
23
22
|
end
|
24
23
|
|
25
|
-
describe
|
26
|
-
it
|
27
|
-
expect(analyzer.classify(
|
24
|
+
describe '#classify' do
|
25
|
+
it 'is true when in the class' do
|
26
|
+
expect(analyzer.classify('I love ruby')).to be_truthy
|
28
27
|
end
|
29
28
|
|
30
|
-
it
|
31
|
-
expect(analyzer.classify(
|
32
|
-
expect(analyzer.classify(
|
29
|
+
it 'is false otherwise' do
|
30
|
+
expect(analyzer.classify('je ne sais pas')).to be_falsy
|
31
|
+
expect(analyzer.classify('i hate java')).to be_falsy
|
33
32
|
end
|
34
33
|
end
|
35
34
|
|
36
|
-
describe
|
35
|
+
describe 'initialization' do
|
37
36
|
subject do
|
38
37
|
Sentimental.new(
|
39
38
|
threshold: 0.2,
|
40
|
-
word_scores: {
|
41
|
-
neutral_regexps: [/.*/]
|
39
|
+
word_scores: { 'non' => -1.0 },
|
40
|
+
neutral_regexps: [/.*/]
|
42
41
|
)
|
43
42
|
end
|
44
43
|
|
45
|
-
it
|
44
|
+
it 'takes multiple init params' do
|
46
45
|
expect(subject.threshold).to eq 0.2
|
47
|
-
expect(subject.word_scores[
|
48
|
-
expect(subject.neutral_regexps).to include
|
46
|
+
expect(subject.word_scores['non']).to eq(-1.0)
|
47
|
+
expect(subject.neutral_regexps).to include(/.*/)
|
49
48
|
end
|
50
49
|
end
|
51
50
|
|
52
|
-
describe
|
53
|
-
context
|
54
|
-
let(:text_neutral) {
|
55
|
-
let(:text) {
|
51
|
+
describe 'neutral regexp' do
|
52
|
+
context 'when there is some neutral regexp' do
|
53
|
+
let(:text_neutral) { 'Do you love ruby?' }
|
54
|
+
let(:text) { 'I love ruby' }
|
56
55
|
|
57
56
|
before do
|
58
57
|
analyzer.neutral_regexps << /\?\s*$/
|
59
58
|
end
|
60
59
|
|
61
|
-
it
|
60
|
+
it 'scores it to 0' do
|
62
61
|
expect(analyzer.score(text_neutral)).to eq 0
|
63
62
|
expect(analyzer.score(text)).not_to eq 0
|
64
63
|
end
|
65
64
|
end
|
66
65
|
end
|
67
66
|
|
68
|
-
describe
|
67
|
+
describe 'n-grams' do
|
69
68
|
let(:word_scores) { nil }
|
70
69
|
subject do
|
71
70
|
Sentimental.new(word_scores: word_scores, ngrams: 3)
|
72
71
|
end
|
73
72
|
|
74
|
-
it
|
73
|
+
it 'is initialized by ngrams param' do
|
75
74
|
expect(subject.ngrams).to eq 3
|
76
75
|
end
|
77
|
-
|
78
|
-
context "there is n-grams in the dictionary" do
|
79
|
-
let(:word_scores) {{"happy hour" => 1.0, "not happy hour" => -5.0}}
|
80
|
-
let(:text) { "why not happy hour, but happy so hour?" }
|
81
76
|
|
82
|
-
|
83
|
-
|
77
|
+
context 'there is n-grams in the dictionary' do
|
78
|
+
let(:word_scores) { { 'happy hour' => 1.0, 'not happy hour' => -5.0 } }
|
79
|
+
let(:text) { 'why not happy hour, but happy so hour?' }
|
80
|
+
|
81
|
+
it 'update scores regarding to n-grams' do
|
82
|
+
expect(subject.score(text)).to eq(-4)
|
84
83
|
end
|
85
84
|
end
|
86
85
|
|
87
86
|
context "there's n-grams longer than specified in dictionary" do
|
88
|
-
let(:word_scores) {{
|
89
|
-
let(:text) {
|
87
|
+
let(:word_scores) { { 'happy hour' => 1.0, 'not so happy hour' => -5.0 } }
|
88
|
+
let(:text) { 'why not so happy hour ?' }
|
90
89
|
|
91
|
-
it
|
90
|
+
it 'ignores the lines' do
|
92
91
|
expect(subject.score(text)).to eq 1
|
93
92
|
end
|
94
93
|
end
|
95
94
|
end
|
96
95
|
|
97
|
-
describe
|
96
|
+
describe 'scoring in a normal context' do
|
98
97
|
subject do
|
99
98
|
analyzer.score(text)
|
100
99
|
end
|
101
100
|
|
102
|
-
context
|
103
|
-
let(:text) {'I love ruby'}
|
101
|
+
context 'when the text is postive' do
|
102
|
+
let(:text) { 'I love ruby' }
|
104
103
|
|
105
104
|
it 'returns a positive score' do
|
106
105
|
expect(subject).to be > 0
|
107
106
|
end
|
108
107
|
end
|
109
108
|
|
110
|
-
context
|
111
|
-
let(:text) {'I like ruby'}
|
109
|
+
context 'when the text is neutral' do
|
110
|
+
let(:text) { 'I like ruby' }
|
112
111
|
|
113
112
|
it 'returns a neutral score' do
|
114
113
|
expect(subject).to eq 0
|
115
114
|
end
|
116
115
|
end
|
117
116
|
|
118
|
-
context
|
119
|
-
let(:text) {'I hate ruby'}
|
117
|
+
context 'when the text is negative' do
|
118
|
+
let(:text) { 'I hate ruby' }
|
120
119
|
|
121
120
|
it 'returns a negative score' do
|
122
121
|
expect(subject).to be < 0
|
123
122
|
end
|
124
123
|
end
|
125
124
|
|
126
|
-
context
|
127
|
-
let(:text) {'I love ruby'}
|
128
|
-
let(:text_with_smiley) {'I love ruby :-)'}
|
125
|
+
context 'when the text has smiley' do
|
126
|
+
let(:text) { 'I love ruby' }
|
127
|
+
let(:text_with_smiley) { 'I love ruby :-)' }
|
129
128
|
|
130
129
|
it 'scores it' do
|
131
130
|
expect(analyzer.score(text_with_smiley)).to be > analyzer.score(text)
|
132
131
|
end
|
133
132
|
end
|
134
133
|
|
135
|
-
context
|
136
|
-
let(:text) {'I love ruby'}
|
137
|
-
let(:text_with_punctuation) {'I love, ruby'}
|
134
|
+
context 'when the text has punctuation' do
|
135
|
+
let(:text) { 'I love ruby' }
|
136
|
+
let(:text_with_punctuation) { 'I love, ruby' }
|
138
137
|
|
139
138
|
it 'removes it' do
|
140
139
|
expect(analyzer.score(text_with_punctuation)).to eq analyzer.score(text)
|
141
140
|
end
|
142
141
|
end
|
143
|
-
|
144
142
|
end
|
145
143
|
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
def create_test_files
|
2
|
+
Dir.mkdir('spec/test_data') unless Dir.exist?('spec/test_data')
|
3
|
+
File.open('spec/test_data/test.txt', 'w+') do |file|
|
4
|
+
file.puts('1.0 TEST')
|
5
|
+
file.puts('-1.0 :(')
|
6
|
+
end
|
7
|
+
File.open('spec/test_data/test.json', 'w+') do |file|
|
8
|
+
file.puts('{"TEST":1.0,":(":-1.0}')
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
create_test_files
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sentimental
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Emminger
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2016-
|
13
|
+
date: 2016-05-11 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|
@@ -54,6 +54,46 @@ dependencies:
|
|
54
54
|
- - ">="
|
55
55
|
- !ruby/object:Gem::Version
|
56
56
|
version: 3.0.0
|
57
|
+
- !ruby/object:Gem::Dependency
|
58
|
+
name: rubocop
|
59
|
+
requirement: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - "~>"
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0.40'
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 0.40.0
|
67
|
+
type: :development
|
68
|
+
prerelease: false
|
69
|
+
version_requirements: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - "~>"
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0.40'
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 0.40.0
|
77
|
+
- !ruby/object:Gem::Dependency
|
78
|
+
name: json
|
79
|
+
requirement: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '1.8'
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: 1.8.3
|
87
|
+
type: :runtime
|
88
|
+
prerelease: false
|
89
|
+
version_requirements: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - "~>"
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '1.8'
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 1.8.3
|
57
97
|
description: A simple sentiment analysis gem
|
58
98
|
email:
|
59
99
|
- jeff@7compass.com
|
@@ -63,16 +103,23 @@ extensions: []
|
|
63
103
|
extra_rdoc_files: []
|
64
104
|
files:
|
65
105
|
- ".gitignore"
|
106
|
+
- ".rspec"
|
107
|
+
- ".rubocop.yml"
|
66
108
|
- Gemfile
|
67
109
|
- Gemfile.lock
|
68
110
|
- LICENSE.txt
|
69
111
|
- README.md
|
70
112
|
- Rakefile
|
71
|
-
- data/
|
72
|
-
- data/
|
113
|
+
- data/JSON_builder.rb
|
114
|
+
- data/en_words.json
|
115
|
+
- data/sentiwords_fr.txt
|
116
|
+
- data/slang.json
|
117
|
+
- lib/file_reader.rb
|
73
118
|
- lib/sentimental.rb
|
74
119
|
- sentimental.gemspec
|
120
|
+
- spec/file_reader_spec.rb
|
75
121
|
- spec/sentimental_spec.rb
|
122
|
+
- spec/spec_helper.rb
|
76
123
|
homepage: https://github.com/7compass/sentimental
|
77
124
|
licenses:
|
78
125
|
- MIT
|
@@ -98,4 +145,6 @@ signing_key:
|
|
98
145
|
specification_version: 4
|
99
146
|
summary: Simple sentiment analysis
|
100
147
|
test_files:
|
148
|
+
- spec/file_reader_spec.rb
|
101
149
|
- spec/sentimental_spec.rb
|
150
|
+
- spec/spec_helper.rb
|