sentimental 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rspec +1 -0
- data/.rubocop.yml +12 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +20 -4
- data/Rakefile +2 -2
- data/data/JSON_builder.rb +21 -0
- data/data/en_words.json +18539 -0
- data/data/{sentiwords.txt → sentiwords_fr.txt} +2048 -2048
- data/data/slang.json +57 -0
- data/lib/file_reader.rb +19 -0
- data/lib/sentimental.rb +21 -24
- data/sentimental.gemspec +6 -3
- data/spec/file_reader_spec.rb +31 -0
- data/spec/sentimental_spec.rb +49 -51
- data/spec/spec_helper.rb +12 -0
- metadata +53 -4
- data/data/sentislang.txt +0 -56
data/data/slang.json
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
{
|
2
|
+
"%-(": -1.0,
|
3
|
+
")-:": -1.0,
|
4
|
+
"):": -1.0,
|
5
|
+
")o:": -1.0,
|
6
|
+
"8-0": -1.0,
|
7
|
+
"8/": -1.0,
|
8
|
+
"8\\": -1.0,
|
9
|
+
"8c": -1.0,
|
10
|
+
":'(": -1.0,
|
11
|
+
":'-(": -1.0,
|
12
|
+
":(": -1.0,
|
13
|
+
":*(": -1.0,
|
14
|
+
":,(": -1.0,
|
15
|
+
":-(": -1.0,
|
16
|
+
":-/": -1.0,
|
17
|
+
":-S": -1.0,
|
18
|
+
":-\\": -1.0,
|
19
|
+
":-|": -0.5,
|
20
|
+
":/": -0.5,
|
21
|
+
":O": -0.25,
|
22
|
+
":S": -0.25,
|
23
|
+
":\\": -0.25,
|
24
|
+
":|": -0.25,
|
25
|
+
"=(": -1.0,
|
26
|
+
">:(": -1.0,
|
27
|
+
"D:": -1.0,
|
28
|
+
"sux": -1.0,
|
29
|
+
"(o;": 1.0,
|
30
|
+
"8-)": 1.0,
|
31
|
+
";)": 1.0,
|
32
|
+
";o)": 1.0,
|
33
|
+
"%-)": 1.0,
|
34
|
+
"(-:": 1.0,
|
35
|
+
":-)": 1.0,
|
36
|
+
"(:": 1.0,
|
37
|
+
"(o:": 1.0,
|
38
|
+
"8)": 1.0,
|
39
|
+
":)": 1.0,
|
40
|
+
":-D": 1.0,
|
41
|
+
":-P": 1.0,
|
42
|
+
":D": 1.0,
|
43
|
+
":P": 1.0,
|
44
|
+
":]": 1.0,
|
45
|
+
":o)": 1.0,
|
46
|
+
":p": 1.0,
|
47
|
+
";^)": 1.0,
|
48
|
+
"<3": 1.0,
|
49
|
+
"<3": 1.0,
|
50
|
+
"=)": 1.0,
|
51
|
+
"=]": 1.0,
|
52
|
+
">:)": 1.0,
|
53
|
+
">:D": 1.0,
|
54
|
+
">=D": 1.0,
|
55
|
+
"^_^": 1.0,
|
56
|
+
"}:)": 1.0
|
57
|
+
}
|
data/lib/file_reader.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module FileReader
|
4
|
+
def hash_from_txt(filename)
|
5
|
+
new_words = {}
|
6
|
+
File.open(filename) do |file|
|
7
|
+
file.each_line do |line|
|
8
|
+
parsed_line = line.chomp.scan(/^([^\s]+)\s+(.+)/).first
|
9
|
+
next unless parsed_line
|
10
|
+
new_words[parsed_line[1]] = parsed_line[0].to_f
|
11
|
+
end
|
12
|
+
end
|
13
|
+
new_words
|
14
|
+
end
|
15
|
+
|
16
|
+
def hash_from_json(filename)
|
17
|
+
JSON.parse(File.read(filename))
|
18
|
+
end
|
19
|
+
end
|
data/lib/sentimental.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
+
require_relative 'file_reader'
|
2
|
+
|
1
3
|
class Sentimental
|
4
|
+
include FileReader
|
5
|
+
|
2
6
|
attr_accessor :threshold, :word_scores, :neutral_regexps, :ngrams
|
3
7
|
|
4
8
|
def initialize(threshold: 0, word_scores: nil, neutral_regexps: [], ngrams: 1)
|
5
|
-
if ngrams >= 1
|
6
|
-
@ngrams = ngrams.to_i
|
7
|
-
else
|
8
|
-
@ngrams = 1
|
9
|
-
end
|
9
|
+
@ngrams = ngrams.to_i.abs if ngrams.to_i >= 1
|
10
10
|
@word_scores = word_scores || {}
|
11
11
|
@word_scores.default = 0.0
|
12
12
|
@threshold = threshold
|
@@ -14,10 +14,10 @@ class Sentimental
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def score(string)
|
17
|
-
return 0 if neutral_regexps.any? {|regexp| string =~ regexp}
|
17
|
+
return 0 if neutral_regexps.any? { |regexp| string =~ regexp }
|
18
18
|
|
19
|
-
extract_words_with_n_grams(string).inject(0) do |score, token|
|
20
|
-
score
|
19
|
+
extract_words_with_n_grams(string).inject(0) do |score, token|
|
20
|
+
score + word_scores[token]
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
@@ -38,24 +38,21 @@ class Sentimental
|
|
38
38
|
end
|
39
39
|
|
40
40
|
def load_defaults
|
41
|
-
|
42
|
-
|
41
|
+
%w(slang en_words).each do |filename|
|
42
|
+
load_from_json(File.dirname(__FILE__) + "/../data/#{filename}.json")
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
46
|
def load_from(filename)
|
47
|
-
|
48
|
-
file.each_line do |line|
|
49
|
-
if parsed_line = (line.chomp.scan(/^([^\s]+)\s+(.+)/).first)
|
50
|
-
sentiscore = parsed_line[0]
|
51
|
-
text = parsed_line[1]
|
52
|
-
word_scores[text] = sentiscore.to_f
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
47
|
+
word_scores.merge!(hash_from_txt(filename))
|
56
48
|
end
|
57
|
-
|
58
|
-
|
49
|
+
|
50
|
+
def load_from_json(filename)
|
51
|
+
word_scores.merge!(hash_from_json(filename))
|
52
|
+
end
|
53
|
+
|
54
|
+
alias load_senti_file load_from
|
55
|
+
alias load_senti_json load_from_json
|
59
56
|
|
60
57
|
private
|
61
58
|
|
@@ -71,9 +68,9 @@ class Sentimental
|
|
71
68
|
end
|
72
69
|
|
73
70
|
def ngramify(words, max_size)
|
74
|
-
return [words.join(
|
71
|
+
return [words.join(' ')] if words.size <= max_size
|
75
72
|
tail = words.last(words.size - 1)
|
76
|
-
|
77
|
-
[words.first(max_size).join(
|
73
|
+
|
74
|
+
[words.first(max_size).join(' ')] + ngramify(tail, max_size)
|
78
75
|
end
|
79
76
|
end
|
data/sentimental.gemspec
CHANGED
@@ -1,19 +1,22 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = 'sentimental'
|
3
|
-
spec.version = '1.
|
3
|
+
spec.version = '1.3.0'
|
4
4
|
spec.summary = 'Simple sentiment analysis'
|
5
5
|
spec.description = 'A simple sentiment analysis gem'
|
6
6
|
spec.authors = ['Jeff Emminger', 'Christopher MacLellan', 'Denis Pasin']
|
7
7
|
spec.email = ['jeff@7compass.com', 'denis@hellojam.fr']
|
8
8
|
spec.homepage = 'https://github.com/7compass/sentimental'
|
9
9
|
spec.license = 'MIT'
|
10
|
-
|
10
|
+
|
11
11
|
spec.files = `git ls-files`.split($/)
|
12
12
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
13
13
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
14
14
|
spec.require_paths = ['lib']
|
15
|
-
|
15
|
+
|
16
16
|
spec.add_development_dependency "bundler", "~> 1.3"
|
17
17
|
spec.add_development_dependency "rake"
|
18
18
|
spec.add_development_dependency "rspec", ">= 3.0.0"
|
19
|
+
spec.add_development_dependency "rubocop", "~> 0.40", ">= 0.40.0"
|
20
|
+
|
21
|
+
spec.add_runtime_dependency "json", "~> 1.8", ">= 1.8.3"
|
19
22
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require_relative 'spec_helper'
|
2
|
+
require_relative '../lib/file_reader'
|
3
|
+
include FileReader
|
4
|
+
|
5
|
+
describe FileReader do
|
6
|
+
describe '#hash_from_txt' do
|
7
|
+
subject(:answer) { FileReader.hash_from_txt('spec/test_data/test.txt') }
|
8
|
+
|
9
|
+
it 'returns a hash from a txt file' do
|
10
|
+
expect(answer.empty?).to be_falsey
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'contains the data' do
|
14
|
+
expect(answer['TEST']).to eq 1.0
|
15
|
+
expect(answer[':(']).to eq(-1.0)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe '#hash_from_json' do
|
20
|
+
subject(:answer) { FileReader.hash_from_json('spec/test_data/test.json') }
|
21
|
+
|
22
|
+
it 'returns a hash from a json file' do
|
23
|
+
expect(answer.empty?).to be_falsey
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'contains the data' do
|
27
|
+
expect(answer['TEST']).to eq 1.0
|
28
|
+
expect(answer[':(']).to eq(-1.0)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/spec/sentimental_spec.rb
CHANGED
@@ -1,145 +1,143 @@
|
|
1
|
-
require_relative
|
1
|
+
require_relative '../lib/sentimental'
|
2
2
|
|
3
3
|
describe Sentimental do
|
4
|
-
|
5
4
|
before :each do
|
6
5
|
analyzer.load_defaults
|
7
6
|
end
|
8
7
|
|
9
8
|
let(:analyzer) { Sentimental.new(threshold: 0.1) }
|
10
9
|
|
11
|
-
describe
|
12
|
-
it
|
13
|
-
expect(analyzer.sentiment(
|
10
|
+
describe '#sentiment' do
|
11
|
+
it 'returns :positive when the score > threshold' do
|
12
|
+
expect(analyzer.sentiment('I love ruby <3')).to be :positive
|
14
13
|
end
|
15
14
|
|
16
|
-
it
|
17
|
-
expect(analyzer.sentiment(
|
15
|
+
it 'returns :negative when the score < -threshold' do
|
16
|
+
expect(analyzer.sentiment('I hate javascript')).to be :negative
|
18
17
|
end
|
19
18
|
|
20
|
-
it
|
21
|
-
expect(analyzer.sentiment(
|
19
|
+
it 'returns :positive when -threshold < score < threshold' do
|
20
|
+
expect(analyzer.sentiment('je en sais pas')).to be :neutral
|
22
21
|
end
|
23
22
|
end
|
24
23
|
|
25
|
-
describe
|
26
|
-
it
|
27
|
-
expect(analyzer.classify(
|
24
|
+
describe '#classify' do
|
25
|
+
it 'is true when in the class' do
|
26
|
+
expect(analyzer.classify('I love ruby')).to be_truthy
|
28
27
|
end
|
29
28
|
|
30
|
-
it
|
31
|
-
expect(analyzer.classify(
|
32
|
-
expect(analyzer.classify(
|
29
|
+
it 'is false otherwise' do
|
30
|
+
expect(analyzer.classify('je ne sais pas')).to be_falsy
|
31
|
+
expect(analyzer.classify('i hate java')).to be_falsy
|
33
32
|
end
|
34
33
|
end
|
35
34
|
|
36
|
-
describe
|
35
|
+
describe 'initialization' do
|
37
36
|
subject do
|
38
37
|
Sentimental.new(
|
39
38
|
threshold: 0.2,
|
40
|
-
word_scores: {
|
41
|
-
neutral_regexps: [/.*/]
|
39
|
+
word_scores: { 'non' => -1.0 },
|
40
|
+
neutral_regexps: [/.*/]
|
42
41
|
)
|
43
42
|
end
|
44
43
|
|
45
|
-
it
|
44
|
+
it 'takes multiple init params' do
|
46
45
|
expect(subject.threshold).to eq 0.2
|
47
|
-
expect(subject.word_scores[
|
48
|
-
expect(subject.neutral_regexps).to include
|
46
|
+
expect(subject.word_scores['non']).to eq(-1.0)
|
47
|
+
expect(subject.neutral_regexps).to include(/.*/)
|
49
48
|
end
|
50
49
|
end
|
51
50
|
|
52
|
-
describe
|
53
|
-
context
|
54
|
-
let(:text_neutral) {
|
55
|
-
let(:text) {
|
51
|
+
describe 'neutral regexp' do
|
52
|
+
context 'when there is some neutral regexp' do
|
53
|
+
let(:text_neutral) { 'Do you love ruby?' }
|
54
|
+
let(:text) { 'I love ruby' }
|
56
55
|
|
57
56
|
before do
|
58
57
|
analyzer.neutral_regexps << /\?\s*$/
|
59
58
|
end
|
60
59
|
|
61
|
-
it
|
60
|
+
it 'scores it to 0' do
|
62
61
|
expect(analyzer.score(text_neutral)).to eq 0
|
63
62
|
expect(analyzer.score(text)).not_to eq 0
|
64
63
|
end
|
65
64
|
end
|
66
65
|
end
|
67
66
|
|
68
|
-
describe
|
67
|
+
describe 'n-grams' do
|
69
68
|
let(:word_scores) { nil }
|
70
69
|
subject do
|
71
70
|
Sentimental.new(word_scores: word_scores, ngrams: 3)
|
72
71
|
end
|
73
72
|
|
74
|
-
it
|
73
|
+
it 'is initialized by ngrams param' do
|
75
74
|
expect(subject.ngrams).to eq 3
|
76
75
|
end
|
77
|
-
|
78
|
-
context "there is n-grams in the dictionary" do
|
79
|
-
let(:word_scores) {{"happy hour" => 1.0, "not happy hour" => -5.0}}
|
80
|
-
let(:text) { "why not happy hour, but happy so hour?" }
|
81
76
|
|
82
|
-
|
83
|
-
|
77
|
+
context 'there is n-grams in the dictionary' do
|
78
|
+
let(:word_scores) { { 'happy hour' => 1.0, 'not happy hour' => -5.0 } }
|
79
|
+
let(:text) { 'why not happy hour, but happy so hour?' }
|
80
|
+
|
81
|
+
it 'update scores regarding to n-grams' do
|
82
|
+
expect(subject.score(text)).to eq(-4)
|
84
83
|
end
|
85
84
|
end
|
86
85
|
|
87
86
|
context "there's n-grams longer than specified in dictionary" do
|
88
|
-
let(:word_scores) {{
|
89
|
-
let(:text) {
|
87
|
+
let(:word_scores) { { 'happy hour' => 1.0, 'not so happy hour' => -5.0 } }
|
88
|
+
let(:text) { 'why not so happy hour ?' }
|
90
89
|
|
91
|
-
it
|
90
|
+
it 'ignores the lines' do
|
92
91
|
expect(subject.score(text)).to eq 1
|
93
92
|
end
|
94
93
|
end
|
95
94
|
end
|
96
95
|
|
97
|
-
describe
|
96
|
+
describe 'scoring in a normal context' do
|
98
97
|
subject do
|
99
98
|
analyzer.score(text)
|
100
99
|
end
|
101
100
|
|
102
|
-
context
|
103
|
-
let(:text) {'I love ruby'}
|
101
|
+
context 'when the text is postive' do
|
102
|
+
let(:text) { 'I love ruby' }
|
104
103
|
|
105
104
|
it 'returns a positive score' do
|
106
105
|
expect(subject).to be > 0
|
107
106
|
end
|
108
107
|
end
|
109
108
|
|
110
|
-
context
|
111
|
-
let(:text) {'I like ruby'}
|
109
|
+
context 'when the text is neutral' do
|
110
|
+
let(:text) { 'I like ruby' }
|
112
111
|
|
113
112
|
it 'returns a neutral score' do
|
114
113
|
expect(subject).to eq 0
|
115
114
|
end
|
116
115
|
end
|
117
116
|
|
118
|
-
context
|
119
|
-
let(:text) {'I hate ruby'}
|
117
|
+
context 'when the text is negative' do
|
118
|
+
let(:text) { 'I hate ruby' }
|
120
119
|
|
121
120
|
it 'returns a negative score' do
|
122
121
|
expect(subject).to be < 0
|
123
122
|
end
|
124
123
|
end
|
125
124
|
|
126
|
-
context
|
127
|
-
let(:text) {'I love ruby'}
|
128
|
-
let(:text_with_smiley) {'I love ruby :-)'}
|
125
|
+
context 'when the text has smiley' do
|
126
|
+
let(:text) { 'I love ruby' }
|
127
|
+
let(:text_with_smiley) { 'I love ruby :-)' }
|
129
128
|
|
130
129
|
it 'scores it' do
|
131
130
|
expect(analyzer.score(text_with_smiley)).to be > analyzer.score(text)
|
132
131
|
end
|
133
132
|
end
|
134
133
|
|
135
|
-
context
|
136
|
-
let(:text) {'I love ruby'}
|
137
|
-
let(:text_with_punctuation) {'I love, ruby'}
|
134
|
+
context 'when the text has punctuation' do
|
135
|
+
let(:text) { 'I love ruby' }
|
136
|
+
let(:text_with_punctuation) { 'I love, ruby' }
|
138
137
|
|
139
138
|
it 'removes it' do
|
140
139
|
expect(analyzer.score(text_with_punctuation)).to eq analyzer.score(text)
|
141
140
|
end
|
142
141
|
end
|
143
|
-
|
144
142
|
end
|
145
143
|
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
def create_test_files
|
2
|
+
Dir.mkdir('spec/test_data') unless Dir.exist?('spec/test_data')
|
3
|
+
File.open('spec/test_data/test.txt', 'w+') do |file|
|
4
|
+
file.puts('1.0 TEST')
|
5
|
+
file.puts('-1.0 :(')
|
6
|
+
end
|
7
|
+
File.open('spec/test_data/test.json', 'w+') do |file|
|
8
|
+
file.puts('{"TEST":1.0,":(":-1.0}')
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
create_test_files
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sentimental
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Emminger
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2016-
|
13
|
+
date: 2016-05-11 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|
@@ -54,6 +54,46 @@ dependencies:
|
|
54
54
|
- - ">="
|
55
55
|
- !ruby/object:Gem::Version
|
56
56
|
version: 3.0.0
|
57
|
+
- !ruby/object:Gem::Dependency
|
58
|
+
name: rubocop
|
59
|
+
requirement: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - "~>"
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0.40'
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 0.40.0
|
67
|
+
type: :development
|
68
|
+
prerelease: false
|
69
|
+
version_requirements: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - "~>"
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0.40'
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 0.40.0
|
77
|
+
- !ruby/object:Gem::Dependency
|
78
|
+
name: json
|
79
|
+
requirement: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '1.8'
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: 1.8.3
|
87
|
+
type: :runtime
|
88
|
+
prerelease: false
|
89
|
+
version_requirements: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - "~>"
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '1.8'
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 1.8.3
|
57
97
|
description: A simple sentiment analysis gem
|
58
98
|
email:
|
59
99
|
- jeff@7compass.com
|
@@ -63,16 +103,23 @@ extensions: []
|
|
63
103
|
extra_rdoc_files: []
|
64
104
|
files:
|
65
105
|
- ".gitignore"
|
106
|
+
- ".rspec"
|
107
|
+
- ".rubocop.yml"
|
66
108
|
- Gemfile
|
67
109
|
- Gemfile.lock
|
68
110
|
- LICENSE.txt
|
69
111
|
- README.md
|
70
112
|
- Rakefile
|
71
|
-
- data/
|
72
|
-
- data/
|
113
|
+
- data/JSON_builder.rb
|
114
|
+
- data/en_words.json
|
115
|
+
- data/sentiwords_fr.txt
|
116
|
+
- data/slang.json
|
117
|
+
- lib/file_reader.rb
|
73
118
|
- lib/sentimental.rb
|
74
119
|
- sentimental.gemspec
|
120
|
+
- spec/file_reader_spec.rb
|
75
121
|
- spec/sentimental_spec.rb
|
122
|
+
- spec/spec_helper.rb
|
76
123
|
homepage: https://github.com/7compass/sentimental
|
77
124
|
licenses:
|
78
125
|
- MIT
|
@@ -98,4 +145,6 @@ signing_key:
|
|
98
145
|
specification_version: 4
|
99
146
|
summary: Simple sentiment analysis
|
100
147
|
test_files:
|
148
|
+
- spec/file_reader_spec.rb
|
101
149
|
- spec/sentimental_spec.rb
|
150
|
+
- spec/spec_helper.rb
|