sentimental 1.1.1 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 63ca1cc3a2e131c766fda7b6ffef711958b77af4
4
- data.tar.gz: da25b0c71603372b554e3624e4113d667155d6dc
3
+ metadata.gz: 3589f3fdd19386fdd901d673bd905f2703102aa7
4
+ data.tar.gz: 355ec5e97a133254c82839b70db0e2a12fe0462a
5
5
  SHA512:
6
- metadata.gz: 33f62d43418c9d02206ec0897572a9efb47ae5dac4189c8fa6c07e889608f23c92fa3478990611d2b697d630fba4652dba5d501483f547a6d278ebfde5930ee6
7
- data.tar.gz: 3c45a95fafdcf12949dd67ea1c0b075d0628951f07168de5c803935db456d07ba4cbc478acf5715fcd830b67808c04dcceaf97f130f7cc99277afd2b83251aaf
6
+ metadata.gz: ec5d616c70d55299d58a520c5482e1dc0ff451b6405337d02e6ba0a9c5b77ca7553839a12dbb40c23f65e071e1022fc3b8dfeed0d022e96abde51d7532345935
7
+ data.tar.gz: d935a09a091dce7796717563bd9251cf4575ae50eae1bbfe7e0ed4a2b09398f4976a09d076bb4a3d445a1aec2e2f7aa01b4316b1079438862a74ec76c206df25
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sentimental (1.0.4)
4
+ sentimental (1.2.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -71,6 +71,19 @@ scores and tokens, e.g.:
71
71
  0.0 Meh
72
72
  -1.0 Horrible
73
73
 
74
+ ## N-grams
75
+
76
+ You can parse n-grams of words by specifying their max size in the initializer:
77
+ ```
78
+ Sentimental.new(ngrams: 4)
79
+ ```
80
+
81
+ The dictionary must have this format:
82
+
83
+ 1.0 very happy
84
+ -2.0 no
85
+ 0.0 meh
86
+
74
87
  ## Installation
75
88
 
76
89
  gem install sentimental
data/lib/sentimental.rb CHANGED
@@ -1,8 +1,14 @@
1
1
  class Sentimental
2
- attr_accessor :threshold, :word_scores, :neutral_regexps
2
+ attr_accessor :threshold, :word_scores, :neutral_regexps, :ngrams
3
3
 
4
- def initialize(threshold: 0, word_scores: nil, neutral_regexps: [])
5
- @word_scores = Hash.new(0.0) || word_scores
4
+ def initialize(threshold: 0, word_scores: nil, neutral_regexps: [], ngrams: 1)
5
+ if ngrams >= 1
6
+ @ngrams = ngrams.to_i
7
+ else
8
+ @ngrams = 1
9
+ end
10
+ @word_scores = word_scores || {}
11
+ @word_scores.default = 0.0
6
12
  @threshold = threshold
7
13
  @neutral_regexps = neutral_regexps
8
14
  end
@@ -10,7 +16,7 @@ class Sentimental
10
16
  def score(string)
11
17
  return 0 if neutral_regexps.any? {|regexp| string =~ regexp}
12
18
 
13
- extract_words(string).inject(0) do |score, token|
19
+ extract_words_with_n_grams(string).inject(0) do |score, token|
14
20
  score += word_scores[token]
15
21
  end
16
22
  end
@@ -27,26 +33,47 @@ class Sentimental
27
33
  end
28
34
  end
29
35
 
36
+ def classify(string)
37
+ sentiment(string) == :positive
38
+ end
39
+
30
40
  def load_defaults
31
41
  ['sentiwords', 'sentislang'].each do |filename|
32
- load_senti_file(File.dirname(__FILE__) + "/../data/#{filename}.txt")
42
+ load_from(File.dirname(__FILE__) + "/../data/#{filename}.txt")
33
43
  end
34
44
  end
35
45
 
36
- def load_senti_file(filename)
46
+ def load_from(filename)
37
47
  File.open(filename) do |file|
38
48
  file.each_line do |line|
39
- parsed_line = line.chomp.split(/\s+/)
40
- sentiscore = parsed_line[0]
41
- text = parsed_line[1]
42
- word_scores[text] = sentiscore.to_f
49
+ if parsed_line = (line.chomp.scan(/^([^\s]+)\s+(.+)/).first)
50
+ sentiscore = parsed_line[0]
51
+ text = parsed_line[1]
52
+ word_scores[text] = sentiscore.to_f
53
+ end
43
54
  end
44
55
  end
45
56
  end
57
+
58
+ alias_method :load_senti_file, :load_from
46
59
 
47
60
  private
48
61
 
49
62
  def extract_words(string)
50
63
  string.to_s.downcase.scan(/([\w']+|\S{2,})/).flatten
51
64
  end
65
+
66
+ def extract_words_with_n_grams(string)
67
+ words = extract_words(string)
68
+ (1..ngrams).to_a.map do |ngram_size|
69
+ ngramify(words, ngram_size)
70
+ end.flatten
71
+ end
72
+
73
+ def ngramify(words, max_size)
74
+ return [words.join(" ")] if words.size <= max_size
75
+ tail = words.last(words.size - 1)
76
+
77
+ [words.first(max_size).join(" ")] + ngramify(tail, max_size)
78
+ end
52
79
  end
data/sentimental.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = 'sentimental'
3
- spec.version = '1.1.1'
3
+ spec.version = '1.2.1'
4
4
  spec.summary = 'Simple sentiment analysis'
5
5
  spec.description = 'A simple sentiment analysis gem'
6
6
  spec.authors = ['Jeff Emminger', 'Christopher MacLellan', 'Denis Pasin']
@@ -8,7 +8,93 @@ describe Sentimental do
8
8
 
9
9
  let(:analyzer) { Sentimental.new(threshold: 0.1) }
10
10
 
11
- describe "#score" do
11
+ describe "#sentiment" do
12
+ it "returns :positive when the score > threshold" do
13
+ expect(analyzer.sentiment("I love ruby <3")).to be :positive
14
+ end
15
+
16
+ it "returns :negative when the score < -threshold" do
17
+ expect(analyzer.sentiment("I hate javascript")).to be :negative
18
+ end
19
+
20
+ it "returns :positive when -threshold < score < threshold" do
21
+ expect(analyzer.sentiment("je en sais pas")).to be :neutral
22
+ end
23
+ end
24
+
25
+ describe "#classify" do
26
+ it "is true when in the class" do
27
+ expect(analyzer.classify("I love ruby")).to be_truthy
28
+ end
29
+
30
+ it "is false otherwise" do
31
+ expect(analyzer.classify("je ne sais pas")).to be_falsy
32
+ expect(analyzer.classify("i hate java")).to be_falsy
33
+ end
34
+ end
35
+
36
+ describe "initialization" do
37
+ subject do
38
+ Sentimental.new(
39
+ threshold: 0.2,
40
+ word_scores: {"non" => -1.0},
41
+ neutral_regexps: [/.*/],
42
+ )
43
+ end
44
+
45
+ it "takes multiple init params" do
46
+ expect(subject.threshold).to eq 0.2
47
+ expect(subject.word_scores["non"]).to eq -1.0
48
+ expect(subject.neutral_regexps).to include /.*/
49
+ end
50
+ end
51
+
52
+ describe "neutral regexp" do
53
+ context "when there is some neutral regexp" do
54
+ let(:text_neutral) {"Do you love ruby?"}
55
+ let(:text) {"I love ruby"}
56
+
57
+ before do
58
+ analyzer.neutral_regexps << /\?\s*$/
59
+ end
60
+
61
+ it "scores it to 0" do
62
+ expect(analyzer.score(text_neutral)).to eq 0
63
+ expect(analyzer.score(text)).not_to eq 0
64
+ end
65
+ end
66
+ end
67
+
68
+ describe "n-grams" do
69
+ let(:word_scores) { nil }
70
+ subject do
71
+ Sentimental.new(word_scores: word_scores, ngrams: 3)
72
+ end
73
+
74
+ it "is initialized by ngrams param" do
75
+ expect(subject.ngrams).to eq 3
76
+ end
77
+
78
+ context "there is n-grams in the dictionary" do
79
+ let(:word_scores) {{"happy hour" => 1.0, "not happy hour" => -5.0}}
80
+ let(:text) { "why not happy hour, but happy so hour?" }
81
+
82
+ it "update scores regarding to n-grams" do
83
+ expect(subject.score(text)).to eq -4
84
+ end
85
+ end
86
+
87
+ context "there's n-grams longer than specified in dictionary" do
88
+ let(:word_scores) {{"happy hour" => 1.0, "not so happy hour" => -5.0}}
89
+ let(:text) { "why not so happy hour ?" }
90
+
91
+ it "ignores the lines" do
92
+ expect(subject.score(text)).to eq 1
93
+ end
94
+ end
95
+ end
96
+
97
+ describe "scoring in a normal context" do
12
98
  subject do
13
99
  analyzer.score(text)
14
100
  end
@@ -55,18 +141,5 @@ describe Sentimental do
55
141
  end
56
142
  end
57
143
 
58
- context "when there is some neutral regexp" do
59
- let(:text_neutral) {"Do you love ruby?"}
60
- let(:text) {"I love ruby"}
61
-
62
- before do
63
- analyzer.neutral_regexps << /\?\s*$/
64
- end
65
-
66
- it "scores it to 0" do
67
- expect(analyzer.score(text_neutral)).to eq 0
68
- expect(analyzer.score(text)).not_to eq 0
69
- end
70
- end
71
144
  end
72
145
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sentimental
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Emminger
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-03-17 00:00:00.000000000 Z
13
+ date: 2016-04-22 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
93
93
  version: '0'
94
94
  requirements: []
95
95
  rubyforge_project:
96
- rubygems_version: 2.4.6
96
+ rubygems_version: 2.4.8
97
97
  signing_key:
98
98
  specification_version: 4
99
99
  summary: Simple sentiment analysis