sentiments 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +46 -0
- data/Rakefile +1 -0
- data/lib/sentiments/data/ignore.json +580 -0
- data/lib/sentiments/data/neg.json +5139 -0
- data/lib/sentiments/data/neu.json +375 -0
- data/lib/sentiments/data/pos.json +3104 -0
- data/lib/sentiments/data/prefix.json +6 -0
- data/lib/sentiments/version.rb +3 -0
- data/lib/sentiments.rb +109 -0
- data/sentiments.gemspec +24 -0
- data/test/sentiments.rb +4 -0
- metadata +102 -0
data/lib/sentiments.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
require "sentiments/version"
|
2
|
+
require 'json'
|
3
|
+
module Sentiments
|
4
|
+
class Classifier
|
5
|
+
MIN_TOKEN_LENGTH = 1
|
6
|
+
MAX_TOKEN_LENGTH = 15
|
7
|
+
PRIOR_SCORE = 0.33
|
8
|
+
def initialize
|
9
|
+
# @neg_words = load_json('neg')
|
10
|
+
# @pos_words = load_json('pos')
|
11
|
+
# @neu_words = load_json('neu')
|
12
|
+
@prefix_words = load_json('prefix')
|
13
|
+
@ignore_words = load_json('ignore')
|
14
|
+
|
15
|
+
@categories = ['pos', 'neg', 'neu']
|
16
|
+
|
17
|
+
@dictionary = {}
|
18
|
+
@doc_count = 0
|
19
|
+
@token_count = 0
|
20
|
+
@category_tok_count = {'pos' => 0, 'neg' => 0, 'neu' => 0}
|
21
|
+
@category_doc_count = {'pos' => 0, 'neg' => 0, 'neu' => 0}
|
22
|
+
|
23
|
+
@categories.each do |category|
|
24
|
+
if (!set_dictionary(category))
|
25
|
+
raise "Unable to Set Dictionaries"
|
26
|
+
end #if
|
27
|
+
end
|
28
|
+
|
29
|
+
end #initialize
|
30
|
+
|
31
|
+
def score(sentence)
|
32
|
+
scores = {}
|
33
|
+
@prefix_words.each do |pw|
|
34
|
+
# Search if that prefix is in the sentence
|
35
|
+
if sentence.index(pw)
|
36
|
+
# Remove the white space after the negative prefix
|
37
|
+
sentence.sub! pw+" ", pw
|
38
|
+
end
|
39
|
+
end
|
40
|
+
tokens = tokenize(sentence)
|
41
|
+
total_score = 0
|
42
|
+
|
43
|
+
@categories.each do |category|
|
44
|
+
scores[category] = 1
|
45
|
+
tokens.each do |token|
|
46
|
+
if (token.length > MIN_TOKEN_LENGTH && token.length < MAX_TOKEN_LENGTH && !(@ignore_words.include? token))
|
47
|
+
|
48
|
+
# If Token is not in our dictionary, don't do anything
|
49
|
+
if(@dictionary[token].nil?)
|
50
|
+
break
|
51
|
+
end #if
|
52
|
+
|
53
|
+
if (@dictionary[token][category].nil?)
|
54
|
+
count = 0
|
55
|
+
else
|
56
|
+
count = @dictionary[token][category]
|
57
|
+
end # elseif
|
58
|
+
scores[category] *= (count + 1)
|
59
|
+
end #if
|
60
|
+
end #tokens
|
61
|
+
scores[category] = PRIOR_SCORE * scores[category]
|
62
|
+
end #categories
|
63
|
+
|
64
|
+
@categories.each do |category|
|
65
|
+
total_score += scores[category]
|
66
|
+
end #categories
|
67
|
+
|
68
|
+
@categories.each do |category|
|
69
|
+
scores[category] = scores[category]/total_score
|
70
|
+
end #categories
|
71
|
+
|
72
|
+
return scores
|
73
|
+
end #score
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def load_json(filename)
|
78
|
+
path = 'sentiments/data/'
|
79
|
+
path = File.dirname(__FILE__) + '/sentiments/data/'
|
80
|
+
return JSON.parse(IO.read(path +filename +'.json'))
|
81
|
+
end
|
82
|
+
|
83
|
+
def set_dictionary(category)
|
84
|
+
words = load_json(category)
|
85
|
+
words.each do |word|
|
86
|
+
@doc_count += 1
|
87
|
+
@category_doc_count[category] += 1
|
88
|
+
|
89
|
+
if (@dictionary[word].nil?)
|
90
|
+
@dictionary[word] = {}
|
91
|
+
@dictionary[word][category] = 1
|
92
|
+
end
|
93
|
+
@category_tok_count[category] += 1
|
94
|
+
@token_count += 1
|
95
|
+
end
|
96
|
+
return true
|
97
|
+
end #set_dictionary
|
98
|
+
|
99
|
+
def tokenize(sentence)
|
100
|
+
sentence = sentence.downcase
|
101
|
+
return sentence.split()
|
102
|
+
end #tokenize
|
103
|
+
|
104
|
+
end #class
|
105
|
+
end #module
|
106
|
+
|
107
|
+
def sentiments
|
108
|
+
return Sentiments::Classifier.new
|
109
|
+
end
|
data/sentiments.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'sentiments/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "sentiments"
|
8
|
+
spec.version = Sentiments::VERSION
|
9
|
+
spec.authors = ["Pulkit Kathuria"]
|
10
|
+
spec.email = ["kevincobain2000@gmail.com"]
|
11
|
+
spec.description = %q{Sentiment Analysis with negation}
|
12
|
+
spec.summary = %q{Simple and Faster Sentiment Classifier categorizes sentence into positive & negative}
|
13
|
+
spec.homepage = "http://www8355ue.sakura.ne.jp/?action_classifiers_sentiment=true"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "json"
|
24
|
+
end
|
data/test/sentiments.rb
ADDED
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sentiments
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Pulkit Kathuria
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-12-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: json
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Sentiment Analysis with negation
|
56
|
+
email:
|
57
|
+
- kevincobain2000@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- .gitignore
|
63
|
+
- Gemfile
|
64
|
+
- LICENSE.txt
|
65
|
+
- README.md
|
66
|
+
- Rakefile
|
67
|
+
- lib/sentiments.rb
|
68
|
+
- lib/sentiments/data/ignore.json
|
69
|
+
- lib/sentiments/data/neg.json
|
70
|
+
- lib/sentiments/data/neu.json
|
71
|
+
- lib/sentiments/data/pos.json
|
72
|
+
- lib/sentiments/data/prefix.json
|
73
|
+
- lib/sentiments/version.rb
|
74
|
+
- sentiments.gemspec
|
75
|
+
- test/sentiments.rb
|
76
|
+
homepage: http://www8355ue.sakura.ne.jp/?action_classifiers_sentiment=true
|
77
|
+
licenses:
|
78
|
+
- MIT
|
79
|
+
metadata: {}
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options: []
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - '>='
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
requirements: []
|
95
|
+
rubyforge_project:
|
96
|
+
rubygems_version: 2.0.3
|
97
|
+
signing_key:
|
98
|
+
specification_version: 4
|
99
|
+
summary: Simple and Faster Sentiment Classifier categorizes sentence into positive
|
100
|
+
& negative
|
101
|
+
test_files:
|
102
|
+
- test/sentiments.rb
|