sentimental 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 63ca1cc3a2e131c766fda7b6ffef711958b77af4
4
- data.tar.gz: da25b0c71603372b554e3624e4113d667155d6dc
3
+ metadata.gz: 3589f3fdd19386fdd901d673bd905f2703102aa7
4
+ data.tar.gz: 355ec5e97a133254c82839b70db0e2a12fe0462a
5
5
  SHA512:
6
- metadata.gz: 33f62d43418c9d02206ec0897572a9efb47ae5dac4189c8fa6c07e889608f23c92fa3478990611d2b697d630fba4652dba5d501483f547a6d278ebfde5930ee6
7
- data.tar.gz: 3c45a95fafdcf12949dd67ea1c0b075d0628951f07168de5c803935db456d07ba4cbc478acf5715fcd830b67808c04dcceaf97f130f7cc99277afd2b83251aaf
6
+ metadata.gz: ec5d616c70d55299d58a520c5482e1dc0ff451b6405337d02e6ba0a9c5b77ca7553839a12dbb40c23f65e071e1022fc3b8dfeed0d022e96abde51d7532345935
7
+ data.tar.gz: d935a09a091dce7796717563bd9251cf4575ae50eae1bbfe7e0ed4a2b09398f4976a09d076bb4a3d445a1aec2e2f7aa01b4316b1079438862a74ec76c206df25
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sentimental (1.0.4)
4
+ sentimental (1.2.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -71,6 +71,19 @@ scores and tokens, e.g.:
71
71
  0.0 Meh
72
72
  -1.0 Horrible
73
73
 
74
+ ## N-grams
75
+
76
+ You can parse n-grams of words by specifying their max size in the initializer:
77
+ ```
78
+ Sentimental.new(ngrams: 4)
79
+ ```
80
+
81
+ The dictionary must have this format:
82
+
83
+ 1.0 very happy
84
+ -2.0 no
85
+ 0.0 meh
86
+
74
87
  ## Installation
75
88
 
76
89
  gem install sentimental
data/lib/sentimental.rb CHANGED
@@ -1,8 +1,14 @@
1
1
  class Sentimental
2
- attr_accessor :threshold, :word_scores, :neutral_regexps
2
+ attr_accessor :threshold, :word_scores, :neutral_regexps, :ngrams
3
3
 
4
- def initialize(threshold: 0, word_scores: nil, neutral_regexps: [])
5
- @word_scores = Hash.new(0.0) || word_scores
4
+ def initialize(threshold: 0, word_scores: nil, neutral_regexps: [], ngrams: 1)
5
+ if ngrams >= 1
6
+ @ngrams = ngrams.to_i
7
+ else
8
+ @ngrams = 1
9
+ end
10
+ @word_scores = word_scores || {}
11
+ @word_scores.default = 0.0
6
12
  @threshold = threshold
7
13
  @neutral_regexps = neutral_regexps
8
14
  end
@@ -10,7 +16,7 @@ class Sentimental
10
16
  def score(string)
11
17
  return 0 if neutral_regexps.any? {|regexp| string =~ regexp}
12
18
 
13
- extract_words(string).inject(0) do |score, token|
19
+ extract_words_with_n_grams(string).inject(0) do |score, token|
14
20
  score += word_scores[token]
15
21
  end
16
22
  end
@@ -27,26 +33,47 @@ class Sentimental
27
33
  end
28
34
  end
29
35
 
36
+ def classify(string)
37
+ sentiment(string) == :positive
38
+ end
39
+
30
40
  def load_defaults
31
41
  ['sentiwords', 'sentislang'].each do |filename|
32
- load_senti_file(File.dirname(__FILE__) + "/../data/#{filename}.txt")
42
+ load_from(File.dirname(__FILE__) + "/../data/#{filename}.txt")
33
43
  end
34
44
  end
35
45
 
36
- def load_senti_file(filename)
46
+ def load_from(filename)
37
47
  File.open(filename) do |file|
38
48
  file.each_line do |line|
39
- parsed_line = line.chomp.split(/\s+/)
40
- sentiscore = parsed_line[0]
41
- text = parsed_line[1]
42
- word_scores[text] = sentiscore.to_f
49
+ if parsed_line = (line.chomp.scan(/^([^\s]+)\s+(.+)/).first)
50
+ sentiscore = parsed_line[0]
51
+ text = parsed_line[1]
52
+ word_scores[text] = sentiscore.to_f
53
+ end
43
54
  end
44
55
  end
45
56
  end
57
+
58
+ alias_method :load_senti_file, :load_from
46
59
 
47
60
  private
48
61
 
49
62
  def extract_words(string)
50
63
  string.to_s.downcase.scan(/([\w']+|\S{2,})/).flatten
51
64
  end
65
+
66
+ def extract_words_with_n_grams(string)
67
+ words = extract_words(string)
68
+ (1..ngrams).to_a.map do |ngram_size|
69
+ ngramify(words, ngram_size)
70
+ end.flatten
71
+ end
72
+
73
+ def ngramify(words, max_size)
74
+ return [words.join(" ")] if words.size <= max_size
75
+ tail = words.last(words.size - 1)
76
+
77
+ [words.first(max_size).join(" ")] + ngramify(tail, max_size)
78
+ end
52
79
  end
data/sentimental.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = 'sentimental'
3
- spec.version = '1.1.1'
3
+ spec.version = '1.2.1'
4
4
  spec.summary = 'Simple sentiment analysis'
5
5
  spec.description = 'A simple sentiment analysis gem'
6
6
  spec.authors = ['Jeff Emminger', 'Christopher MacLellan', 'Denis Pasin']
@@ -8,7 +8,93 @@ describe Sentimental do
8
8
 
9
9
  let(:analyzer) { Sentimental.new(threshold: 0.1) }
10
10
 
11
- describe "#score" do
11
+ describe "#sentiment" do
12
+ it "returns :positive when the score > threshold" do
13
+ expect(analyzer.sentiment("I love ruby <3")).to be :positive
14
+ end
15
+
16
+ it "returns :negative when the score < -threshold" do
17
+ expect(analyzer.sentiment("I hate javascript")).to be :negative
18
+ end
19
+
20
+ it "returns :positive when -threshold < score < threshold" do
21
+ expect(analyzer.sentiment("je en sais pas")).to be :neutral
22
+ end
23
+ end
24
+
25
+ describe "#classify" do
26
+ it "is true when in the class" do
27
+ expect(analyzer.classify("I love ruby")).to be_truthy
28
+ end
29
+
30
+ it "is false otherwise" do
31
+ expect(analyzer.classify("je ne sais pas")).to be_falsy
32
+ expect(analyzer.classify("i hate java")).to be_falsy
33
+ end
34
+ end
35
+
36
+ describe "initialization" do
37
+ subject do
38
+ Sentimental.new(
39
+ threshold: 0.2,
40
+ word_scores: {"non" => -1.0},
41
+ neutral_regexps: [/.*/],
42
+ )
43
+ end
44
+
45
+ it "takes multiple init params" do
46
+ expect(subject.threshold).to eq 0.2
47
+ expect(subject.word_scores["non"]).to eq -1.0
48
+ expect(subject.neutral_regexps).to include /.*/
49
+ end
50
+ end
51
+
52
+ describe "neutral regexp" do
53
+ context "when there is some neutral regexp" do
54
+ let(:text_neutral) {"Do you love ruby?"}
55
+ let(:text) {"I love ruby"}
56
+
57
+ before do
58
+ analyzer.neutral_regexps << /\?\s*$/
59
+ end
60
+
61
+ it "scores it to 0" do
62
+ expect(analyzer.score(text_neutral)).to eq 0
63
+ expect(analyzer.score(text)).not_to eq 0
64
+ end
65
+ end
66
+ end
67
+
68
+ describe "n-grams" do
69
+ let(:word_scores) { nil }
70
+ subject do
71
+ Sentimental.new(word_scores: word_scores, ngrams: 3)
72
+ end
73
+
74
+ it "is initialized by ngrams param" do
75
+ expect(subject.ngrams).to eq 3
76
+ end
77
+
78
+ context "there is n-grams in the dictionary" do
79
+ let(:word_scores) {{"happy hour" => 1.0, "not happy hour" => -5.0}}
80
+ let(:text) { "why not happy hour, but happy so hour?" }
81
+
82
+ it "update scores regarding to n-grams" do
83
+ expect(subject.score(text)).to eq -4
84
+ end
85
+ end
86
+
87
+ context "there's n-grams longer than specified in dictionary" do
88
+ let(:word_scores) {{"happy hour" => 1.0, "not so happy hour" => -5.0}}
89
+ let(:text) { "why not so happy hour ?" }
90
+
91
+ it "ignores the lines" do
92
+ expect(subject.score(text)).to eq 1
93
+ end
94
+ end
95
+ end
96
+
97
+ describe "scoring in a normal context" do
12
98
  subject do
13
99
  analyzer.score(text)
14
100
  end
@@ -55,18 +141,5 @@ describe Sentimental do
55
141
  end
56
142
  end
57
143
 
58
- context "when there is some neutral regexp" do
59
- let(:text_neutral) {"Do you love ruby?"}
60
- let(:text) {"I love ruby"}
61
-
62
- before do
63
- analyzer.neutral_regexps << /\?\s*$/
64
- end
65
-
66
- it "scores it to 0" do
67
- expect(analyzer.score(text_neutral)).to eq 0
68
- expect(analyzer.score(text)).not_to eq 0
69
- end
70
- end
71
144
  end
72
145
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sentimental
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Emminger
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-03-17 00:00:00.000000000 Z
13
+ date: 2016-04-22 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
93
93
  version: '0'
94
94
  requirements: []
95
95
  rubyforge_project:
96
- rubygems_version: 2.4.6
96
+ rubygems_version: 2.4.8
97
97
  signing_key:
98
98
  specification_version: 4
99
99
  summary: Simple sentiment analysis