tonality_analyser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +1 -0
- data/README.md +29 -0
- data/Rakefile +8 -0
- data/lib/tonality_analyser.rb +3 -0
- data/lib/tonality_analyser/engine.rb +77 -0
- data/lib/tonality_analyser/helpers.rb +17 -0
- data/lib/tonality_analyser/version.rb +3 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/tonality_analyser_spec.rb +15 -0
- data/tonality_analyser.gemspec +19 -0
- data/training/neg.txt +58 -0
- data/training/pos.txt +59 -0
- metadata +61 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Copyright (c) 2013 Samuel Sanchez
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# TonalityAnalyser
|
2
|
+
|
3
|
+
Process text and propose tonality.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'tonality_analyser'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install tonality_analyser
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
You can read spec files
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
module TonalityAnalyser
|
2
|
+
|
3
|
+
# Refactor: work to Redis !
|
4
|
+
# Redis ! Redis ! Redis ! Redis ! Redis ! :)
|
5
|
+
class Engine
|
6
|
+
TONALITIES = [:pos, :neg]
|
7
|
+
attr_reader :counted_words, :probabilites
|
8
|
+
def initialize
|
9
|
+
@total_words = {}
|
10
|
+
@total_words[:all] = 0
|
11
|
+
@total_words[:pos] = 0
|
12
|
+
@total_words[:neg] = 0
|
13
|
+
@counted_words = {}
|
14
|
+
@counted_words[:pos] = {}
|
15
|
+
@counted_words[:neg] = {}
|
16
|
+
@probabilites = {}
|
17
|
+
@probabilites[:pos] = {}
|
18
|
+
@probabilites[:neg] = {}
|
19
|
+
@spec_probabilites = {}
|
20
|
+
@spec_probabilites[:pos] = {}
|
21
|
+
@spec_probabilites[:neg] = {}
|
22
|
+
end
|
23
|
+
def train(words, tonality)
|
24
|
+
raise "Invalid tonality '#{tonality}'" unless TONALITIES.include?(tonality)
|
25
|
+
words.split.each do |w|
|
26
|
+
word = Helpers::Text.normalize(w)
|
27
|
+
@total_words[:all] += 1
|
28
|
+
@counted_words[tonality][word] = @counted_words[tonality].include?(word) ? @counted_words[tonality][word]+1 : 1
|
29
|
+
end
|
30
|
+
end
|
31
|
+
def compute_probabilities!
|
32
|
+
# TODO: Refactor this :)
|
33
|
+
@counted_words[:pos].each do |word, count|
|
34
|
+
@probabilites[:pos][word] = @counted_words[:pos][word].to_f / (@counted_words[:pos][word].to_f + @counted_words[:neg][word].to_f)
|
35
|
+
@spec_probabilites[:pos][word] = @probabilites[:pos][word]
|
36
|
+
end
|
37
|
+
@counted_words[:neg].each do |word, count|
|
38
|
+
@probabilites[:neg][word] = @counted_words[:neg][word].to_f / (@counted_words[:pos][word].to_f + @counted_words[:neg][word].to_f)
|
39
|
+
@spec_probabilites[:neg][word] = @probabilites[:neg][word]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
def analysis(text, tonality)
|
43
|
+
num, den1, den2 = 1.0, 1.0, 1.0
|
44
|
+
|
45
|
+
words = Helpers::Text.clean_words_from(text)
|
46
|
+
words.each do |word|
|
47
|
+
@spec_probabilites[tonality][word] ||= 0.01
|
48
|
+
num *= @spec_probabilites[tonality][word]
|
49
|
+
end
|
50
|
+
num *= 0.5
|
51
|
+
words.each do |word|
|
52
|
+
@probabilites[tonality][word] ||= 0.01
|
53
|
+
den1 *= @probabilites[tonality][word]
|
54
|
+
end
|
55
|
+
words.each do |word|
|
56
|
+
den2 *= (1 - @probabilites[tonality][word])
|
57
|
+
end
|
58
|
+
proba_pol = num / (den1 + den2)
|
59
|
+
proba_pol = 0.0 if proba_pol.nan?
|
60
|
+
proba_pol
|
61
|
+
end
|
62
|
+
def tonality(text)
|
63
|
+
pos_proba = analysis(text, :pos)
|
64
|
+
neg_proba = analysis(text, :neg)
|
65
|
+
pos_proba >= neg_proba ? :pos : :neg
|
66
|
+
end
|
67
|
+
def load_traning_corpus!
|
68
|
+
TONALITIES.each { |tonality| load_traning_corpus_for(tonality) }
|
69
|
+
end
|
70
|
+
def load_traning_corpus_for(tonality)
|
71
|
+
File.open("./training/#{tonality}.txt", 'r') do |f|
|
72
|
+
f.each_line { |line| train(line, tonality) }
|
73
|
+
f.close
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module TonalityAnalyser
|
2
|
+
module Helpers
|
3
|
+
|
4
|
+
class Text
|
5
|
+
def self.normalize(word)
|
6
|
+
word.downcase.gsub(/[^0-9a-z]/i, '')
|
7
|
+
end
|
8
|
+
def self.clean_words_from(text)
|
9
|
+
text.downcase.gsub(/[^0-9a-z]/i, ' ').split.inject([]) do |words, w|
|
10
|
+
words << w if w.length > 2
|
11
|
+
words
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe TonalityAnalyser::Engine do
|
4
|
+
|
5
|
+
# Woouuu Wouuuu les test de la mort :)
|
6
|
+
it 'propose tonality' do
|
7
|
+
e = TonalityAnalyser::Engine.new
|
8
|
+
e.load_traning_corpus!
|
9
|
+
e.compute_probabilities!
|
10
|
+
e.tonality('This').should == :neg
|
11
|
+
e.tonality('Unacceptable') == :neg
|
12
|
+
e.tonality('want') == :pos
|
13
|
+
e.tonality('Thanks') == :pos
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'tonality_analyser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "tonality_analyser"
|
8
|
+
gem.version = TonalityAnalyser::VERSION
|
9
|
+
gem.authors = ["Samuel Sanchez"]
|
10
|
+
gem.email = ["samuel@pagedegeek.com"]
|
11
|
+
gem.description = %q{Process text and propose tonality.}
|
12
|
+
gem.summary = %q{Process text and propose tonality with bayes computation}
|
13
|
+
gem.homepage = "http://github.com/PagedeGeek/tonality_analyser"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
end
|
data/training/neg.txt
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
I hate this.
|
2
|
+
This sucks. I can't even figure out how to open it.
|
3
|
+
Wow, this was horrible. Nothing is worse.
|
4
|
+
Very bad. Very ugly. Very slow.
|
5
|
+
Whoever wrote this should die in a fire.
|
6
|
+
This is the crappiest crap that ever crapped.
|
7
|
+
This was ugly, despicable, and horrid. Imprecise yet slow.
|
8
|
+
I loathed it. I wanted to set it on fire.
|
9
|
+
The company should be ashamed of releasing this. Shame!
|
10
|
+
I hated it when I saw it, I hate it now, I will hate it tomorrow.
|
11
|
+
Loathsome and disgusting. Why is it slimy?
|
12
|
+
This is just a scam. Inaccurate and incorrect.
|
13
|
+
Ick - repulsive and unworthy of my money.
|
14
|
+
Expensive yet declasse. Just screams nouveau riche.
|
15
|
+
I abhorred it. It's horrible. Just horrible.
|
16
|
+
I found it appalling. Why would anyone buy such a thing?
|
17
|
+
My entire family hated it.
|
18
|
+
The entire family found it beastly and cruel to animals.
|
19
|
+
My entire family hated its disgusting noises.
|
20
|
+
Noisy. They don't tell you how loud it is.
|
21
|
+
Detestable. It makes a disagreeable whir, clank, grind noise.
|
22
|
+
I bought one for my sister, and it was detestable.
|
23
|
+
Eerie noises came from its dreadful orifices.
|
24
|
+
Throughout the night, it made obnoxious noises.
|
25
|
+
Atrocious quality - never buying again.
|
26
|
+
This company should be ashamed of the bad news coming out.
|
27
|
+
Inferior quality and awful craftsmanship.
|
28
|
+
Cruddy and cheap pieces. Looks pretty blah, too.
|
29
|
+
Looks like a careless monkey put it together.
|
30
|
+
Where was QA? This crummy thing didn't even last two days.
|
31
|
+
Defective in oh so many ways. Godawful piece of crap.
|
32
|
+
I was unsatisfied with my purchase.
|
33
|
+
Just seeing it is a downer.
|
34
|
+
Had seven faulty sensors and nine faulty gauges.
|
35
|
+
Inadequate lighting and slimy, junky interior.
|
36
|
+
Gross! It spit at me! I want to spit at it!
|
37
|
+
Slipshod, shoddy craftsmanship.
|
38
|
+
Dishonest purveyors of snake oil.
|
39
|
+
Just pure garbage. It belongs in a dumpster or a landfill.
|
40
|
+
Poor quality and cheapness is evident.
|
41
|
+
Substandard for something of this level. I paid too much.
|
42
|
+
I paid way too much for this piece of crap.
|
43
|
+
It's the pits. Unacceptable and unsatisfactory.
|
44
|
+
This had a lousy exterior and an inferior interior.
|
45
|
+
The worst product I've ever used in my life. Never again!
|
46
|
+
I would compare it unfavorably to feces.
|
47
|
+
It is teh suck. It sucks more than a sucky suck.
|
48
|
+
Worse than anything I can comprehend.
|
49
|
+
Hellish, boorish, and vain.
|
50
|
+
Shoddy worksmanship. Fell apart in the rain.
|
51
|
+
Whoever made this will have to answer to his maker.
|
52
|
+
Broke down almost immediately. Broken since then.
|
53
|
+
Hate hate hate hate hate hate hate hate hate hate hate hate hate hate!
|
54
|
+
Crap crap crap crap crap crap crap crap crap crap crap crap crap crap!
|
55
|
+
Sucky suck suck sucked sucking sucked really bad.
|
56
|
+
Bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad!
|
57
|
+
Dislike dislike dislike dislike dislike dislike dislike dislike dislike!
|
58
|
+
Lousy lousy lousy lousy lousy lousy lousy lousy lousy lousy lousy lousy!
|
data/training/pos.txt
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
This is awesome!
|
2
|
+
I love this. It has the right mixture of color and texture.
|
3
|
+
Love love love. I can't go to sleep without it.
|
4
|
+
My child loves it. She says it's her favorite toy.
|
5
|
+
I can't even begin to describe how great this is. Thank you!
|
6
|
+
Thanks to the developers who made this. Pure awesome sauce.
|
7
|
+
I just want to snuggle in this program and fall asleep.
|
8
|
+
It's cuter than a bug in a rug.
|
9
|
+
Technical excellence, achieved.
|
10
|
+
The company releases pure, 100%, grade-A greatness, every year.
|
11
|
+
I've said it before, and I'll say it again - this is the best.
|
12
|
+
The absolute best product I've ever used.
|
13
|
+
Grade A.. no, A+! Would buy again.
|
14
|
+
First-class seating at a reasonable price.
|
15
|
+
Quite admirable. Adroitly handles every exception.
|
16
|
+
Marvelous. As soft as a fluffy bunny but still tough.
|
17
|
+
The last item was transcendent. I could see time.
|
18
|
+
A consummate perfectionist, he has created a better mousetrap.
|
19
|
+
Absolutely perfect in every conceivable way.
|
20
|
+
It surpassed all of my expectations, including some I didn't know about.
|
21
|
+
100% brilliance. More brilliant than a diamond.
|
22
|
+
Not just good - downright exceptional.
|
23
|
+
Fine taste put together with expert hands.
|
24
|
+
You can tell an expert swordsmith made this.
|
25
|
+
Entirely complete - an utter enchantment to work with.
|
26
|
+
I found it good - utterly good - a champion among wines.
|
27
|
+
Super-duper, out of sight, groovy! An out and out perfect game.
|
28
|
+
Not just fantastic - super-duper fantastic.
|
29
|
+
Hellacious - a heavy mixture of nitroglycerin and excitement.
|
30
|
+
The terrific end of summer experience was wonderful.
|
31
|
+
Wonderful, wonderful, wonderful. So wonderful.
|
32
|
+
A superb, virtuoso performance. Distinguished for excellence.
|
33
|
+
Outstanding in its field. Makes me want to jump for joy.
|
34
|
+
Oh yes, I like it, I love it, I want some more of it.
|
35
|
+
This is a great way to store your cherries in the winter.
|
36
|
+
It's the cat's pajamas, the way it accurately knows things.
|
37
|
+
Quite hunky-dory - even peachy keen.
|
38
|
+
Peachy-keen. It's a choice celebration.
|
39
|
+
Exquisite craftsmanship and meritorious conduct.
|
40
|
+
Quite simply, it's the greatest thing ever.
|
41
|
+
A-1. A1. A-OK. Just A+!
|
42
|
+
Crackerjack skills under it all, but a smooth finish on top. Neat!
|
43
|
+
Wow, this was cool. The coolest thing ever. Cooler than ice.
|
44
|
+
Beautiful. Attractive. It fit her like a glove.
|
45
|
+
When I dream about it, it's wonderful. Works like a dream.
|
46
|
+
Commendable in their attention to detail.
|
47
|
+
I found it deserving of the highest awards in its field, and more.
|
48
|
+
Worthy of the Nobel prize, the Turing Award, and probably an Emmy.
|
49
|
+
No ifs, ands, or buts - solid build with an unreal body.
|
50
|
+
This is just awesome. Awesome to the max! Rad!
|
51
|
+
I found everything all right (alright). Superior to doing it by hand.
|
52
|
+
Definitely praiseworthy - state of the art.
|
53
|
+
Wow, that was slick. Wow!
|
54
|
+
Very nice, very pleasant. As smooth as butter.
|
55
|
+
Beautiful. Awe-inspiring. Awesome.
|
56
|
+
Like like like like like like like like like like like like!
|
57
|
+
Love love love love love love love love love love love love !
|
58
|
+
Awesome awesome awesome awesome awesome awesome awesome awesome !
|
59
|
+
Cool cool cool cool cool cool cool cool cool cool cool cool cool !
|
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tonality_analyser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Samuel Sanchez
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-02-10 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Process text and propose tonality.
|
15
|
+
email:
|
16
|
+
- samuel@pagedegeek.com
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- .gitignore
|
22
|
+
- Gemfile
|
23
|
+
- LICENSE.txt
|
24
|
+
- README.md
|
25
|
+
- Rakefile
|
26
|
+
- lib/tonality_analyser.rb
|
27
|
+
- lib/tonality_analyser/engine.rb
|
28
|
+
- lib/tonality_analyser/helpers.rb
|
29
|
+
- lib/tonality_analyser/version.rb
|
30
|
+
- spec/spec_helper.rb
|
31
|
+
- spec/tonality_analyser_spec.rb
|
32
|
+
- tonality_analyser.gemspec
|
33
|
+
- training/neg.txt
|
34
|
+
- training/pos.txt
|
35
|
+
homepage: http://github.com/PagedeGeek/tonality_analyser
|
36
|
+
licenses: []
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options: []
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
+
none: false
|
49
|
+
requirements:
|
50
|
+
- - ! '>='
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
requirements: []
|
54
|
+
rubyforge_project:
|
55
|
+
rubygems_version: 1.8.25
|
56
|
+
signing_key:
|
57
|
+
specification_version: 3
|
58
|
+
summary: Process text and propose tonality with bayes computation
|
59
|
+
test_files:
|
60
|
+
- spec/spec_helper.rb
|
61
|
+
- spec/tonality_analyser_spec.rb
|