dookie-keywords_maker 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +12 -0
- data/README.rdoc +59 -0
- data/lib/bubble/keywords_maker/blacklist.rb +47 -0
- data/lib/bubble/keywords_maker/counter.rb +43 -0
- data/lib/bubble/keywords_maker/keywords.rb +15 -0
- data/lib/keywords_maker.rb +18 -0
- data/test/test_blacklist.rb +37 -0
- data/test/test_counter.rb +23 -0
- data/test/test_helper.rb +5 -0
- data/test/test_keywords.rb +51 -0
- data/test/test_keywords_maker.rb +21 -0
- metadata +105 -0
data/History.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
= Keywords Maker
|
2
|
+
|
3
|
+
== Description
|
4
|
+
|
5
|
+
A gem which returns the keywords related with your text. It's very relevant for webpages with articles, news, posts, and so on.
|
6
|
+
|
7
|
+
== Features
|
8
|
+
|
9
|
+
* Extends the String class. It creates a keywords method that returns 10 keywords.
|
10
|
+
|
11
|
+
== Synopsis
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'keywords_maker'
|
15
|
+
|
16
|
+
text = <<-TEXT
|
17
|
+
O bubble sort, ou ordenação por flutuação (literalmente "por bolha"), é um algoritmo de ordenação dos mais simples. A ideia é percorrer o vector diversas vezes, a cada passagem fazendo flutuar para o topo o menor elemento da sequência. Essa movimentação lembra a forma como as bolhas em um tanque de água procuram seu próprio nível, e disso vem o nome do algoritmo.
|
18
|
+
|
19
|
+
No melhor caso, o algoritmo executa n2 / 2 operações relevantes, onde n representa o número de elementos do vector. No pior caso, são feitas 2n2 operações. No caso médio, são feitas 5n2 / 2 operações. A complexidade desse algoritmo é de Ordem quadrática. Por isso, ele não é recomendado para programas que precisem de velocidade e operem com quantidade elevada de dados.
|
20
|
+
|
21
|
+
O algoritmo pode ser descrito em pseudo-código como segue abaixo. V é um VECTOR de elementos que podem ser comparados e n é o tamanho desse vector.
|
22
|
+
TEXT
|
23
|
+
|
24
|
+
text.keywords # => ["algoritmo", "caso", "elementos", "feitas", "ideia", "nivel", "operacoes", "ordenacao", "sort", "vector"]
|
25
|
+
|
26
|
+
text.keywords(5) # => ["algoritmo", "caso", "feitas", "operacoes", "vector"]
|
27
|
+
|
28
|
+
Bubble::KeywordsMaker::Blacklist.words = %w(caso feitas ideia nao nivel topo abaixo diversas n2)
|
29
|
+
text.keywords # => ["algoritmo", "bolha", "elementos", "operacoes", "ordenacao", "passagem", "simples", "sort", "vector", "velocidade"]
|
30
|
+
|
31
|
+
== Install
|
32
|
+
|
33
|
+
sudo gem sources -a http://gems.github.com
|
34
|
+
sudo gem install dookie-keywords_maker
|
35
|
+
|
36
|
+
== License
|
37
|
+
|
38
|
+
(The MIT License)
|
39
|
+
|
40
|
+
Copyright (c) 2009 Bruno Azisaka Maciel
|
41
|
+
|
42
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
43
|
+
a copy of this software and associated documentation files (the
|
44
|
+
'Software'), to deal in the Software without restriction, including
|
45
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
46
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
47
|
+
permit persons to whom the Software is furnished to do so, subject to
|
48
|
+
the following conditions:
|
49
|
+
|
50
|
+
The above copyright notice and this permission notice shall be
|
51
|
+
included in all copies or substantial portions of the Software.
|
52
|
+
|
53
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
54
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
55
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
56
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
57
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
58
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
59
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module Bubble
|
4
|
+
module KeywordsMaker
|
5
|
+
class Blacklist
|
6
|
+
include Singleton
|
7
|
+
|
8
|
+
attr_accessor :words, :locate
|
9
|
+
|
10
|
+
def self.words=(array)
|
11
|
+
instance.words = array
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.words
|
15
|
+
black = %w([a-z] [0-9]+)
|
16
|
+
|
17
|
+
black << Bubble::KeywordsMaker::Blacklist.instance.locate::Blacklist
|
18
|
+
|
19
|
+
black << Bubble::KeywordsMaker::Blacklist.instance.words
|
20
|
+
black.flatten
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.regexp
|
24
|
+
%r(^(#{words.join('|')})$)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.locate=(locate)
|
28
|
+
instance.locate = locate
|
29
|
+
end
|
30
|
+
|
31
|
+
def initialize
|
32
|
+
self.words = []
|
33
|
+
self.locate = Bubble::KeywordsMaker::Default::PtBr
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
module Default
|
38
|
+
module PtBr
|
39
|
+
Blacklist = %w(ou de da do desse na no por para que era um uma uns umas como se ser sao em essa com pode)
|
40
|
+
end
|
41
|
+
|
42
|
+
module EnUs
|
43
|
+
Blacklist = %w(is are were was did do where what of the it in to and)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Bubble
|
2
|
+
module KeywordsMaker
|
3
|
+
class Counter
|
4
|
+
def initialize(text)
|
5
|
+
@text = strip(text).downcase
|
6
|
+
end
|
7
|
+
|
8
|
+
def words
|
9
|
+
@words = @text.split(/[\s+\.;\,\?!\\\/\(\)\"\']/)
|
10
|
+
@words.delete('')
|
11
|
+
@words
|
12
|
+
end
|
13
|
+
|
14
|
+
def counted
|
15
|
+
results = {}
|
16
|
+
words.each { |word| results[word] += 1 rescue results[word] = 1 }
|
17
|
+
results
|
18
|
+
end
|
19
|
+
|
20
|
+
def keywords
|
21
|
+
sanitized
|
22
|
+
end
|
23
|
+
|
24
|
+
protected
|
25
|
+
def sanitized
|
26
|
+
sorted.delete_if { |v| v =~ Blacklist::regexp }
|
27
|
+
end
|
28
|
+
|
29
|
+
def sorted
|
30
|
+
counted.sort_by { |k, v| v }.reverse.map { |k, v| k }
|
31
|
+
end
|
32
|
+
|
33
|
+
def strip(text)
|
34
|
+
require 'unicode'
|
35
|
+
|
36
|
+
Unicode::normalize_KD(text).unpack('U*').select { |cp|
|
37
|
+
cp < 0x300 || cp > 0x036F
|
38
|
+
}.pack('U*')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Bubble
|
2
|
+
module KeywordsMaker
|
3
|
+
class Keywords
|
4
|
+
attr_accessor :original_text
|
5
|
+
|
6
|
+
def initialize(text)
|
7
|
+
self.original_text = text
|
8
|
+
end
|
9
|
+
|
10
|
+
def keywords(limit = 10)
|
11
|
+
Counter.new(original_text).keywords[0...limit].sort
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
2
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
|
4
|
+
module Bubble
|
5
|
+
module KeywordsMaker
|
6
|
+
VERSION = '0.2'
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
require 'bubble/keywords_maker/blacklist'
|
11
|
+
require 'bubble/keywords_maker/counter'
|
12
|
+
require 'bubble/keywords_maker/keywords'
|
13
|
+
|
14
|
+
class String
|
15
|
+
def keywords
|
16
|
+
@keywords ||= Bubble::KeywordsMaker::Keywords.new(self).keywords
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
class TestBlacklist < Test::Unit::TestCase
|
4
|
+
include Bubble::KeywordsMaker
|
5
|
+
include Bubble::KeywordsMaker::Default
|
6
|
+
|
7
|
+
context "A Blacklist instance" do
|
8
|
+
should "receive a array as argument" do
|
9
|
+
Blacklist.words = %w(testing now)
|
10
|
+
end
|
11
|
+
|
12
|
+
should "return the array with the words" do
|
13
|
+
assert(Blacklist.words.include?('testing'))
|
14
|
+
assert(Blacklist.words.include?('now'))
|
15
|
+
end
|
16
|
+
|
17
|
+
should "return the words as a regexp" do
|
18
|
+
assert_kind_of(Regexp, Blacklist.regexp)
|
19
|
+
|
20
|
+
assert(Blacklist.regexp =~ 'testing')
|
21
|
+
assert(Blacklist.regexp =~ 'now')
|
22
|
+
end
|
23
|
+
|
24
|
+
should "be PtBr" do
|
25
|
+
Blacklist.locate = PtBr
|
26
|
+
|
27
|
+
assert_equal(PtBr, Blacklist.instance.locate)
|
28
|
+
end
|
29
|
+
|
30
|
+
should "be EnUs" do
|
31
|
+
Blacklist.locate = EnUs
|
32
|
+
|
33
|
+
assert_equal(EnUs, Blacklist.instance.locate)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
class TestCounter < Test::Unit::TestCase
|
4
|
+
include Bubble::KeywordsMaker
|
5
|
+
|
6
|
+
context "A KeywordsMaker object from a text" do
|
7
|
+
setup do
|
8
|
+
@text = File.open(File.join(File.dirname(__FILE__), '/resources', '/bubble-ptbr.txt')).readlines.join('\n')
|
9
|
+
@counter = Counter.new(@text)
|
10
|
+
end
|
11
|
+
|
12
|
+
should "count the words frequencies" do
|
13
|
+
assert_kind_of(Hash, @counter.counted)
|
14
|
+
assert_equal(5, @counter.counted['algoritmo'])
|
15
|
+
assert_equal(2, @counter.counted['ordenacao'])
|
16
|
+
end
|
17
|
+
|
18
|
+
should "return all keywords" do
|
19
|
+
assert_kind_of(Array, @counter.keywords)
|
20
|
+
assert_equal(74, @counter.keywords.size)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
class TestCounter < Test::Unit::TestCase
|
4
|
+
include Bubble::KeywordsMaker
|
5
|
+
|
6
|
+
context "A KeywordsMaker object from a text" do
|
7
|
+
setup do
|
8
|
+
@text = File.open(File.join(File.dirname(__FILE__), '/resources', '/bubble-ptbr.txt')).readlines.join('\n')
|
9
|
+
@km = Keywords.new(@text)
|
10
|
+
Blacklist.words = %w()
|
11
|
+
end
|
12
|
+
|
13
|
+
should "return the original text" do
|
14
|
+
assert_equal(@text, @km.original_text)
|
15
|
+
end
|
16
|
+
|
17
|
+
should "return the keywords" do
|
18
|
+
assert_kind_of(Array, @km.keywords)
|
19
|
+
assert_equal(%w(algoritmo caso elementos feitas ideia nivel operacoes ordenacao sort vector), @km.keywords)
|
20
|
+
end
|
21
|
+
|
22
|
+
should "return the keywords with blacklist" do
|
23
|
+
Blacklist.words = %w(caso feitas ideia nao nivel topo abaixo diversas n2)
|
24
|
+
assert_equal(%w(algoritmo bolha elementos operacoes ordenacao passagem simples sort vector velocidade), @km.keywords)
|
25
|
+
end
|
26
|
+
|
27
|
+
should "return 5 keywords" do
|
28
|
+
assert_equal(5, @km.keywords(5).size)
|
29
|
+
assert_equal(%w(algoritmo caso feitas operacoes vector), @km.keywords(5))
|
30
|
+
end
|
31
|
+
|
32
|
+
should "return 5 keywords with blacklist" do
|
33
|
+
Blacklist.words = %w(caso feitas nivel topo abaixo diversas n2)
|
34
|
+
assert_equal(5, @km.keywords(5).size)
|
35
|
+
assert_equal(%w(algoritmo elementos operacoes ordenacao vector), @km.keywords(5))
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context "When the locate is EnUs" do
|
40
|
+
setup do
|
41
|
+
Blacklist.locate = Default::EnUs
|
42
|
+
@text = File.open(File.join(File.dirname(__FILE__), '/resources', '/bubble-enus.txt')).readlines.join('\n')
|
43
|
+
@km = Keywords.new(@text)
|
44
|
+
end
|
45
|
+
|
46
|
+
should "return the keywords" do
|
47
|
+
assert_equal(%w(algorithm average bubble complexity list sort sorted sorting through worst-case), @km.keywords)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
class TestKeywordsMaker < Test::Unit::TestCase
|
4
|
+
context "A big text" do
|
5
|
+
setup do
|
6
|
+
@text = File.open(File.join(File.dirname(__FILE__), '/resources', '/bubble-ptbr.txt')).readlines.join('\n')
|
7
|
+
Bubble::KeywordsMaker::Blacklist.locate = Bubble::KeywordsMaker::Default::PtBr
|
8
|
+
end
|
9
|
+
|
10
|
+
should "have these keywords" do
|
11
|
+
assert_equal(%w(algoritmo caso elementos feitas ideia nivel operacoes ordenacao sort vector), @text.keywords)
|
12
|
+
end
|
13
|
+
|
14
|
+
should "return the keywords with blacklist" do
|
15
|
+
Bubble::KeywordsMaker::Blacklist.words = %w(caso feitas ideia nao nivel topo abaixo diversas n2)
|
16
|
+
|
17
|
+
assert_equal(%w(algoritmo bolha elementos operacoes ordenacao passagem simples sort vector velocidade), @text.keywords)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dookie-keywords_maker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.2"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Bruno Azisaka Maciel
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-01-05 00:00:00 -08:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: shoulda
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.6
|
23
|
+
version:
|
24
|
+
- !ruby/object:Gem::Dependency
|
25
|
+
name: unicode
|
26
|
+
version_requirement:
|
27
|
+
version_requirements: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - ">="
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: "0.1"
|
32
|
+
version:
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: newgem
|
35
|
+
version_requirement:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.1.0
|
41
|
+
version:
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: hoe
|
44
|
+
version_requirement:
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.8.0
|
50
|
+
version:
|
51
|
+
description: A gem which returns the keywords related with your text. It's very relevant for webpages with articles, news, posts, and so on.
|
52
|
+
email:
|
53
|
+
- bruno@dookie.com.br
|
54
|
+
executables: []
|
55
|
+
|
56
|
+
extensions: []
|
57
|
+
|
58
|
+
extra_rdoc_files:
|
59
|
+
- History.txt
|
60
|
+
- README.rdoc
|
61
|
+
files:
|
62
|
+
- History.txt
|
63
|
+
- README.rdoc
|
64
|
+
- lib/keywords_maker.rb
|
65
|
+
- lib/bubble/keywords_maker/keywords.rb
|
66
|
+
- lib/bubble/keywords_maker/counter.rb
|
67
|
+
- lib/bubble/keywords_maker/blacklist.rb
|
68
|
+
- test/test_blacklist.rb
|
69
|
+
- test/test_counter.rb
|
70
|
+
- test/test_helper.rb
|
71
|
+
- test/test_keywords.rb
|
72
|
+
- test/test_keywords_maker.rb
|
73
|
+
has_rdoc: true
|
74
|
+
homepage:
|
75
|
+
post_install_message:
|
76
|
+
rdoc_options:
|
77
|
+
- --main
|
78
|
+
- README.rdoc
|
79
|
+
require_paths:
|
80
|
+
- lib
|
81
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: "0"
|
86
|
+
version:
|
87
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: "0"
|
92
|
+
version:
|
93
|
+
requirements: []
|
94
|
+
|
95
|
+
rubyforge_project: keywords_maker
|
96
|
+
rubygems_version: 1.2.0
|
97
|
+
signing_key:
|
98
|
+
specification_version: 2
|
99
|
+
summary: A gem which returns the keywords related with your text
|
100
|
+
test_files:
|
101
|
+
- test/test_blacklist.rb
|
102
|
+
- test/test_counter.rb
|
103
|
+
- test/test_helper.rb
|
104
|
+
- test/test_keywords.rb
|
105
|
+
- test/test_keywords_maker.rb
|