tonality_analyser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +1 -0
- data/README.md +29 -0
- data/Rakefile +8 -0
- data/lib/tonality_analyser.rb +3 -0
- data/lib/tonality_analyser/engine.rb +77 -0
- data/lib/tonality_analyser/helpers.rb +17 -0
- data/lib/tonality_analyser/version.rb +3 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/tonality_analyser_spec.rb +15 -0
- data/tonality_analyser.gemspec +19 -0
- data/training/neg.txt +58 -0
- data/training/pos.txt +59 -0
- metadata +61 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Copyright (c) 2013 Samuel Sanchez
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# TonalityAnalyser
|
2
|
+
|
3
|
+
Process text and propose tonality.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'tonality_analyser'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install tonality_analyser
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
You can read spec files
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
module TonalityAnalyser
|
2
|
+
|
3
|
+
# Refactor: work to Redis !
|
4
|
+
# Redis ! Redis ! Redis ! Redis ! Redis ! :)
|
5
|
+
class Engine
|
6
|
+
TONALITIES = [:pos, :neg]
|
7
|
+
attr_reader :counted_words, :probabilites
|
8
|
+
def initialize
|
9
|
+
@total_words = {}
|
10
|
+
@total_words[:all] = 0
|
11
|
+
@total_words[:pos] = 0
|
12
|
+
@total_words[:neg] = 0
|
13
|
+
@counted_words = {}
|
14
|
+
@counted_words[:pos] = {}
|
15
|
+
@counted_words[:neg] = {}
|
16
|
+
@probabilites = {}
|
17
|
+
@probabilites[:pos] = {}
|
18
|
+
@probabilites[:neg] = {}
|
19
|
+
@spec_probabilites = {}
|
20
|
+
@spec_probabilites[:pos] = {}
|
21
|
+
@spec_probabilites[:neg] = {}
|
22
|
+
end
|
23
|
+
def train(words, tonality)
|
24
|
+
raise "Invalid tonality '#{tonality}'" unless TONALITIES.include?(tonality)
|
25
|
+
words.split.each do |w|
|
26
|
+
word = Helpers::Text.normalize(w)
|
27
|
+
@total_words[:all] += 1
|
28
|
+
@counted_words[tonality][word] = @counted_words[tonality].include?(word) ? @counted_words[tonality][word]+1 : 1
|
29
|
+
end
|
30
|
+
end
|
31
|
+
def compute_probabilities!
|
32
|
+
# TODO: Refactor this :)
|
33
|
+
@counted_words[:pos].each do |word, count|
|
34
|
+
@probabilites[:pos][word] = @counted_words[:pos][word].to_f / (@counted_words[:pos][word].to_f + @counted_words[:neg][word].to_f)
|
35
|
+
@spec_probabilites[:pos][word] = @probabilites[:pos][word]
|
36
|
+
end
|
37
|
+
@counted_words[:neg].each do |word, count|
|
38
|
+
@probabilites[:neg][word] = @counted_words[:neg][word].to_f / (@counted_words[:pos][word].to_f + @counted_words[:neg][word].to_f)
|
39
|
+
@spec_probabilites[:neg][word] = @probabilites[:neg][word]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
def analysis(text, tonality)
|
43
|
+
num, den1, den2 = 1.0, 1.0, 1.0
|
44
|
+
|
45
|
+
words = Helpers::Text.clean_words_from(text)
|
46
|
+
words.each do |word|
|
47
|
+
@spec_probabilites[tonality][word] ||= 0.01
|
48
|
+
num *= @spec_probabilites[tonality][word]
|
49
|
+
end
|
50
|
+
num *= 0.5
|
51
|
+
words.each do |word|
|
52
|
+
@probabilites[tonality][word] ||= 0.01
|
53
|
+
den1 *= @probabilites[tonality][word]
|
54
|
+
end
|
55
|
+
words.each do |word|
|
56
|
+
den2 *= (1 - @probabilites[tonality][word])
|
57
|
+
end
|
58
|
+
proba_pol = num / (den1 + den2)
|
59
|
+
proba_pol = 0.0 if proba_pol.nan?
|
60
|
+
proba_pol
|
61
|
+
end
|
62
|
+
def tonality(text)
|
63
|
+
pos_proba = analysis(text, :pos)
|
64
|
+
neg_proba = analysis(text, :neg)
|
65
|
+
pos_proba >= neg_proba ? :pos : :neg
|
66
|
+
end
|
67
|
+
def load_traning_corpus!
|
68
|
+
TONALITIES.each { |tonality| load_traning_corpus_for(tonality) }
|
69
|
+
end
|
70
|
+
def load_traning_corpus_for(tonality)
|
71
|
+
File.open("./training/#{tonality}.txt", 'r') do |f|
|
72
|
+
f.each_line { |line| train(line, tonality) }
|
73
|
+
f.close
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module TonalityAnalyser
|
2
|
+
module Helpers
|
3
|
+
|
4
|
+
class Text
|
5
|
+
def self.normalize(word)
|
6
|
+
word.downcase.gsub(/[^0-9a-z]/i, '')
|
7
|
+
end
|
8
|
+
def self.clean_words_from(text)
|
9
|
+
text.downcase.gsub(/[^0-9a-z]/i, ' ').split.inject([]) do |words, w|
|
10
|
+
words << w if w.length > 2
|
11
|
+
words
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe TonalityAnalyser::Engine do
|
4
|
+
|
5
|
+
# Woouuu Wouuuu les test de la mort :)
|
6
|
+
it 'propose tonality' do
|
7
|
+
e = TonalityAnalyser::Engine.new
|
8
|
+
e.load_traning_corpus!
|
9
|
+
e.compute_probabilities!
|
10
|
+
e.tonality('This').should == :neg
|
11
|
+
e.tonality('Unacceptable') == :neg
|
12
|
+
e.tonality('want') == :pos
|
13
|
+
e.tonality('Thanks') == :pos
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'tonality_analyser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "tonality_analyser"
|
8
|
+
gem.version = TonalityAnalyser::VERSION
|
9
|
+
gem.authors = ["Samuel Sanchez"]
|
10
|
+
gem.email = ["samuel@pagedegeek.com"]
|
11
|
+
gem.description = %q{Process text and propose tonality.}
|
12
|
+
gem.summary = %q{Process text and propose tonality with bayes computation}
|
13
|
+
gem.homepage = "http://github.com/PagedeGeek/tonality_analyser"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
end
|
data/training/neg.txt
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
I hate this.
|
2
|
+
This sucks. I can't even figure out how to open it.
|
3
|
+
Wow, this was horrible. Nothing is worse.
|
4
|
+
Very bad. Very ugly. Very slow.
|
5
|
+
Whoever wrote this should die in a fire.
|
6
|
+
This is the crappiest crap that ever crapped.
|
7
|
+
This was ugly, despicable, and horrid. Imprecise yet slow.
|
8
|
+
I loathed it. I wanted to set it on fire.
|
9
|
+
The company should be ashamed of releasing this. Shame!
|
10
|
+
I hated it when I saw it, I hate it now, I will hate it tomorrow.
|
11
|
+
Loathsome and disgusting. Why is it slimy?
|
12
|
+
This is just a scam. Inaccurate and incorrect.
|
13
|
+
Ick - repulsive and unworthy of my money.
|
14
|
+
Expensive yet declasse. Just screams nouveau riche.
|
15
|
+
I abhorred it. It's horrible. Just horrible.
|
16
|
+
I found it appalling. Why would anyone buy such a thing?
|
17
|
+
My entire family hated it.
|
18
|
+
The entire family found it beastly and cruel to animals.
|
19
|
+
My entire family hated its disgusting noises.
|
20
|
+
Noisy. They don't tell you how loud it is.
|
21
|
+
Detestable. It makes a disagreeable whir, clank, grind noise.
|
22
|
+
I bought one for my sister, and it was detestable.
|
23
|
+
Eerie noises came from its dreadful orifices.
|
24
|
+
Throughout the night, it made obnoxious noises.
|
25
|
+
Atrocious quality - never buying again.
|
26
|
+
This company should be ashamed of the bad news coming out.
|
27
|
+
Inferior quality and awful craftsmanship.
|
28
|
+
Cruddy and cheap pieces. Looks pretty blah, too.
|
29
|
+
Looks like a careless monkey put it together.
|
30
|
+
Where was QA? This crummy thing didn't even last two days.
|
31
|
+
Defective in oh so many ways. Godawful piece of crap.
|
32
|
+
I was unsatisfied with my purchase.
|
33
|
+
Just seeing it is a downer.
|
34
|
+
Had seven faulty sensors and nine faulty gauges.
|
35
|
+
Inadequate lighting and slimy, junky interior.
|
36
|
+
Gross! It spit at me! I want to spit at it!
|
37
|
+
Slipshod, shoddy craftsmanship.
|
38
|
+
Dishonest purveyors of snake oil.
|
39
|
+
Just pure garbage. It belongs in a dumpster or a landfill.
|
40
|
+
Poor quality and cheapness is evident.
|
41
|
+
Substandard for something of this level. I paid too much.
|
42
|
+
I paid way too much for this piece of crap.
|
43
|
+
It's the pits. Unacceptable and unsatisfactory.
|
44
|
+
This had a lousy exterior and an inferior interior.
|
45
|
+
The worst product I've ever used in my life. Never again!
|
46
|
+
I would compare it unfavorably to feces.
|
47
|
+
It is teh suck. It sucks more than a sucky suck.
|
48
|
+
Worse than anything I can comprehend.
|
49
|
+
Hellish, boorish, and vain.
|
50
|
+
Shoddy worksmanship. Fell apart in the rain.
|
51
|
+
Whoever made this will have to answer to his maker.
|
52
|
+
Broke down almost immediately. Broken since then.
|
53
|
+
Hate hate hate hate hate hate hate hate hate hate hate hate hate hate!
|
54
|
+
Crap crap crap crap crap crap crap crap crap crap crap crap crap crap!
|
55
|
+
Sucky suck suck sucked sucking sucked really bad.
|
56
|
+
Bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad!
|
57
|
+
Dislike dislike dislike dislike dislike dislike dislike dislike dislike!
|
58
|
+
Lousy lousy lousy lousy lousy lousy lousy lousy lousy lousy lousy lousy!
|
data/training/pos.txt
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
This is awesome!
|
2
|
+
I love this. It has the right mixture of color and texture.
|
3
|
+
Love love love. I can't go to sleep without it.
|
4
|
+
My child loves it. She says it's her favorite toy.
|
5
|
+
I can't even begin to describe how great this is. Thank you!
|
6
|
+
Thanks to the developers who made this. Pure awesome sauce.
|
7
|
+
I just want to snuggle in this program and fall asleep.
|
8
|
+
It's cuter than a bug in a rug.
|
9
|
+
Technical excellence, achieved.
|
10
|
+
The company releases pure, 100%, grade-A greatness, every year.
|
11
|
+
I've said it before, and I'll say it again - this is the best.
|
12
|
+
The absolute best product I've ever used.
|
13
|
+
Grade A.. no, A+! Would buy again.
|
14
|
+
First-class seating at a reasonable price.
|
15
|
+
Quite admirable. Adroitly handles every exception.
|
16
|
+
Marvelous. As soft as a fluffy bunny but still tough.
|
17
|
+
The last item was transcendent. I could see time.
|
18
|
+
A consummate perfectionist, he has created a better mousetrap.
|
19
|
+
Absolutely perfect in every conceivable way.
|
20
|
+
It surpassed all of my expectations, including some I didn't know about.
|
21
|
+
100% brilliance. More brilliant than a diamond.
|
22
|
+
Not just good - downright exceptional.
|
23
|
+
Fine taste put together with expert hands.
|
24
|
+
You can tell an expert swordsmith made this.
|
25
|
+
Entirely complete - an utter enchantment to work with.
|
26
|
+
I found it good - utterly good - a champion among wines.
|
27
|
+
Super-duper, out of sight, groovy! An out and out perfect game.
|
28
|
+
Not just fantastic - super-duper fantastic.
|
29
|
+
Hellacious - a heavy mixture of nitroglycerin and excitement.
|
30
|
+
The terrific end of summer experience was wonderful.
|
31
|
+
Wonderful, wonderful, wonderful. So wonderful.
|
32
|
+
A superb, virtuoso performance. Distinguished for excellence.
|
33
|
+
Outstanding in its field. Makes me want to jump for joy.
|
34
|
+
Oh yes, I like it, I love it, I want some more of it.
|
35
|
+
This is a great way to store your cherries in the winter.
|
36
|
+
It's the cat's pajamas, the way it accurately knows things.
|
37
|
+
Quite hunky-dory - even peachy keen.
|
38
|
+
Peachy-keen. It's a choice celebration.
|
39
|
+
Exquisite craftsmanship and meritorious conduct.
|
40
|
+
Quite simply, it's the greatest thing ever.
|
41
|
+
A-1. A1. A-OK. Just A+!
|
42
|
+
Crackerjack skills under it all, but a smooth finish on top. Neat!
|
43
|
+
Wow, this was cool. The coolest thing ever. Cooler than ice.
|
44
|
+
Beautiful. Attractive. It fit her like a glove.
|
45
|
+
When I dream about it, it's wonderful. Works like a dream.
|
46
|
+
Commendable in their attention to detail.
|
47
|
+
I found it deserving of the highest awards in its field, and more.
|
48
|
+
Worthy of the Nobel prize, the Turing Award, and probably an Emmy.
|
49
|
+
No ifs, ands, or buts - solid build with an unreal body.
|
50
|
+
This is just awesome. Awesome to the max! Rad!
|
51
|
+
I found everything all right (alright). Superior to doing it by hand.
|
52
|
+
Definitely praiseworthy - state of the art.
|
53
|
+
Wow, that was slick. Wow!
|
54
|
+
Very nice, very pleasant. As smooth as butter.
|
55
|
+
Beautiful. Awe-inspiring. Awesome.
|
56
|
+
Like like like like like like like like like like like like!
|
57
|
+
Love love love love love love love love love love love love !
|
58
|
+
Awesome awesome awesome awesome awesome awesome awesome awesome !
|
59
|
+
Cool cool cool cool cool cool cool cool cool cool cool cool cool !
|
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tonality_analyser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Samuel Sanchez
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-02-10 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Process text and propose tonality.
|
15
|
+
email:
|
16
|
+
- samuel@pagedegeek.com
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- .gitignore
|
22
|
+
- Gemfile
|
23
|
+
- LICENSE.txt
|
24
|
+
- README.md
|
25
|
+
- Rakefile
|
26
|
+
- lib/tonality_analyser.rb
|
27
|
+
- lib/tonality_analyser/engine.rb
|
28
|
+
- lib/tonality_analyser/helpers.rb
|
29
|
+
- lib/tonality_analyser/version.rb
|
30
|
+
- spec/spec_helper.rb
|
31
|
+
- spec/tonality_analyser_spec.rb
|
32
|
+
- tonality_analyser.gemspec
|
33
|
+
- training/neg.txt
|
34
|
+
- training/pos.txt
|
35
|
+
homepage: http://github.com/PagedeGeek/tonality_analyser
|
36
|
+
licenses: []
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options: []
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
+
none: false
|
49
|
+
requirements:
|
50
|
+
- - ! '>='
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
requirements: []
|
54
|
+
rubyforge_project:
|
55
|
+
rubygems_version: 1.8.25
|
56
|
+
signing_key:
|
57
|
+
specification_version: 3
|
58
|
+
summary: Process text and propose tonality with bayes computation
|
59
|
+
test_files:
|
60
|
+
- spec/spec_helper.rb
|
61
|
+
- spec/tonality_analyser_spec.rb
|