sentiments 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +46 -0
- data/Rakefile +1 -0
- data/lib/sentiments/data/ignore.json +580 -0
- data/lib/sentiments/data/neg.json +5139 -0
- data/lib/sentiments/data/neu.json +375 -0
- data/lib/sentiments/data/pos.json +3104 -0
- data/lib/sentiments/data/prefix.json +6 -0
- data/lib/sentiments/version.rb +3 -0
- data/lib/sentiments.rb +109 -0
- data/sentiments.gemspec +24 -0
- data/test/sentiments.rb +4 -0
- metadata +102 -0
data/lib/sentiments.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
require "sentiments/version"
|
2
|
+
require 'json'
|
3
|
+
module Sentiments
|
4
|
+
class Classifier
|
5
|
+
MIN_TOKEN_LENGTH = 1
|
6
|
+
MAX_TOKEN_LENGTH = 15
|
7
|
+
PRIOR_SCORE = 0.33
|
8
|
+
def initialize
|
9
|
+
# @neg_words = load_json('neg')
|
10
|
+
# @pos_words = load_json('pos')
|
11
|
+
# @neu_words = load_json('neu')
|
12
|
+
@prefix_words = load_json('prefix')
|
13
|
+
@ignore_words = load_json('ignore')
|
14
|
+
|
15
|
+
@categories = ['pos', 'neg', 'neu']
|
16
|
+
|
17
|
+
@dictionary = {}
|
18
|
+
@doc_count = 0
|
19
|
+
@token_count = 0
|
20
|
+
@category_tok_count = {'pos' => 0, 'neg' => 0, 'neu' => 0}
|
21
|
+
@category_doc_count = {'pos' => 0, 'neg' => 0, 'neu' => 0}
|
22
|
+
|
23
|
+
@categories.each do |category|
|
24
|
+
if (!set_dictionary(category))
|
25
|
+
raise "Unable to Set Dictionaries"
|
26
|
+
end #if
|
27
|
+
end
|
28
|
+
|
29
|
+
end #initialize
|
30
|
+
|
31
|
+
def score(sentence)
|
32
|
+
scores = {}
|
33
|
+
@prefix_words.each do |pw|
|
34
|
+
# Search if that prefix is in the sentence
|
35
|
+
if sentence.index(pw)
|
36
|
+
# Remove the white space after the negative prefix
|
37
|
+
sentence.sub! pw+" ", pw
|
38
|
+
end
|
39
|
+
end
|
40
|
+
tokens = tokenize(sentence)
|
41
|
+
total_score = 0
|
42
|
+
|
43
|
+
@categories.each do |category|
|
44
|
+
scores[category] = 1
|
45
|
+
tokens.each do |token|
|
46
|
+
if (token.length > MIN_TOKEN_LENGTH && token.length < MAX_TOKEN_LENGTH && !(@ignore_words.include? token))
|
47
|
+
|
48
|
+
# If Token is not in our dictionary, don't do anything
|
49
|
+
if(@dictionary[token].nil?)
|
50
|
+
break
|
51
|
+
end #if
|
52
|
+
|
53
|
+
if (@dictionary[token][category].nil?)
|
54
|
+
count = 0
|
55
|
+
else
|
56
|
+
count = @dictionary[token][category]
|
57
|
+
end # elseif
|
58
|
+
scores[category] *= (count + 1)
|
59
|
+
end #if
|
60
|
+
end #tokens
|
61
|
+
scores[category] = PRIOR_SCORE * scores[category]
|
62
|
+
end #categories
|
63
|
+
|
64
|
+
@categories.each do |category|
|
65
|
+
total_score += scores[category]
|
66
|
+
end #categories
|
67
|
+
|
68
|
+
@categories.each do |category|
|
69
|
+
scores[category] = scores[category]/total_score
|
70
|
+
end #categories
|
71
|
+
|
72
|
+
return scores
|
73
|
+
end #score
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def load_json(filename)
|
78
|
+
path = 'sentiments/data/'
|
79
|
+
path = File.dirname(__FILE__) + '/sentiments/data/'
|
80
|
+
return JSON.parse(IO.read(path +filename +'.json'))
|
81
|
+
end
|
82
|
+
|
83
|
+
def set_dictionary(category)
|
84
|
+
words = load_json(category)
|
85
|
+
words.each do |word|
|
86
|
+
@doc_count += 1
|
87
|
+
@category_doc_count[category] += 1
|
88
|
+
|
89
|
+
if (@dictionary[word].nil?)
|
90
|
+
@dictionary[word] = {}
|
91
|
+
@dictionary[word][category] = 1
|
92
|
+
end
|
93
|
+
@category_tok_count[category] += 1
|
94
|
+
@token_count += 1
|
95
|
+
end
|
96
|
+
return true
|
97
|
+
end #set_dictionary
|
98
|
+
|
99
|
+
def tokenize(sentence)
|
100
|
+
sentence = sentence.downcase
|
101
|
+
return sentence.split()
|
102
|
+
end #tokenize
|
103
|
+
|
104
|
+
end #class
|
105
|
+
end #module
|
106
|
+
|
107
|
+
def sentiments
|
108
|
+
return Sentiments::Classifier.new
|
109
|
+
end
|
data/sentiments.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'sentiments/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "sentiments"
|
8
|
+
spec.version = Sentiments::VERSION
|
9
|
+
spec.authors = ["Pulkit Kathuria"]
|
10
|
+
spec.email = ["kevincobain2000@gmail.com"]
|
11
|
+
spec.description = %q{Sentiment Analysis with negation}
|
12
|
+
spec.summary = %q{Simple and Faster Sentiment Classifier categorizes sentence into positive & negative}
|
13
|
+
spec.homepage = "http://www8355ue.sakura.ne.jp/?action_classifiers_sentiment=true"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "json"
|
24
|
+
end
|
data/test/sentiments.rb
ADDED
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sentiments
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Pulkit Kathuria
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-12-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: json
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Sentiment Analysis with negation
|
56
|
+
email:
|
57
|
+
- kevincobain2000@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- .gitignore
|
63
|
+
- Gemfile
|
64
|
+
- LICENSE.txt
|
65
|
+
- README.md
|
66
|
+
- Rakefile
|
67
|
+
- lib/sentiments.rb
|
68
|
+
- lib/sentiments/data/ignore.json
|
69
|
+
- lib/sentiments/data/neg.json
|
70
|
+
- lib/sentiments/data/neu.json
|
71
|
+
- lib/sentiments/data/pos.json
|
72
|
+
- lib/sentiments/data/prefix.json
|
73
|
+
- lib/sentiments/version.rb
|
74
|
+
- sentiments.gemspec
|
75
|
+
- test/sentiments.rb
|
76
|
+
homepage: http://www8355ue.sakura.ne.jp/?action_classifiers_sentiment=true
|
77
|
+
licenses:
|
78
|
+
- MIT
|
79
|
+
metadata: {}
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options: []
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - '>='
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
requirements: []
|
95
|
+
rubyforge_project:
|
96
|
+
rubygems_version: 2.0.3
|
97
|
+
signing_key:
|
98
|
+
specification_version: 4
|
99
|
+
summary: Simple and Faster Sentiment Classifier categorizes sentence into positive
|
100
|
+
& negative
|
101
|
+
test_files:
|
102
|
+
- test/sentiments.rb
|