dookie-keywords_maker 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +12 -0
- data/README.rdoc +59 -0
- data/lib/bubble/keywords_maker/blacklist.rb +47 -0
- data/lib/bubble/keywords_maker/counter.rb +43 -0
- data/lib/bubble/keywords_maker/keywords.rb +15 -0
- data/lib/keywords_maker.rb +18 -0
- data/test/test_blacklist.rb +37 -0
- data/test/test_counter.rb +23 -0
- data/test/test_helper.rb +5 -0
- data/test/test_keywords.rb +51 -0
- data/test/test_keywords_maker.rb +21 -0
- metadata +105 -0
data/History.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
= Keywords Maker
|
2
|
+
|
3
|
+
== Description
|
4
|
+
|
5
|
+
A gem which returns the keywords related with your text. It's very relevant for webpages with articles, news, posts, and so on.
|
6
|
+
|
7
|
+
== Features
|
8
|
+
|
9
|
+
* Extends the String class. It creates a keywords method that returns 10 keywords.
|
10
|
+
|
11
|
+
== Synopsis
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'keywords_maker'
|
15
|
+
|
16
|
+
text = <<-TEXT
|
17
|
+
O bubble sort, ou ordenação por flutuação (literalmente "por bolha"), é um algoritmo de ordenação dos mais simples. A ideia é percorrer o vector diversas vezes, a cada passagem fazendo flutuar para o topo o menor elemento da sequência. Essa movimentação lembra a forma como as bolhas em um tanque de água procuram seu próprio nível, e disso vem o nome do algoritmo.
|
18
|
+
|
19
|
+
No melhor caso, o algoritmo executa n2 / 2 operações relevantes, onde n representa o número de elementos do vector. No pior caso, são feitas 2n2 operações. No caso médio, são feitas 5n2 / 2 operações. A complexidade desse algoritmo é de Ordem quadrática. Por isso, ele não é recomendado para programas que precisem de velocidade e operem com quantidade elevada de dados.
|
20
|
+
|
21
|
+
O algoritmo pode ser descrito em pseudo-código como segue abaixo. V é um VECTOR de elementos que podem ser comparados e n é o tamanho desse vector.
|
22
|
+
TEXT
|
23
|
+
|
24
|
+
text.keywords # => ["algoritmo", "caso", "elementos", "feitas", "ideia", "nivel", "operacoes", "ordenacao", "sort", "vector"]
|
25
|
+
|
26
|
+
text.keywords(5) # => ["algoritmo", "caso", "feitas", "operacoes", "vector"]
|
27
|
+
|
28
|
+
Bubble::KeywordsMaker::Blacklist.words = %w(caso feitas ideia nao nivel topo abaixo diversas n2)
|
29
|
+
text.keywords # => ["algoritmo", "bolha", "elementos", "operacoes", "ordenacao", "passagem", "simples", "sort", "vector", "velocidade"]
|
30
|
+
|
31
|
+
== Install
|
32
|
+
|
33
|
+
sudo gem sources -a http://gems.github.com
|
34
|
+
sudo gem install dookie-keywords_maker
|
35
|
+
|
36
|
+
== License
|
37
|
+
|
38
|
+
(The MIT License)
|
39
|
+
|
40
|
+
Copyright (c) 2009 Bruno Azisaka Maciel
|
41
|
+
|
42
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
43
|
+
a copy of this software and associated documentation files (the
|
44
|
+
'Software'), to deal in the Software without restriction, including
|
45
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
46
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
47
|
+
permit persons to whom the Software is furnished to do so, subject to
|
48
|
+
the following conditions:
|
49
|
+
|
50
|
+
The above copyright notice and this permission notice shall be
|
51
|
+
included in all copies or substantial portions of the Software.
|
52
|
+
|
53
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
54
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
55
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
56
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
57
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
58
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
59
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module Bubble
|
4
|
+
module KeywordsMaker
|
5
|
+
class Blacklist
|
6
|
+
include Singleton
|
7
|
+
|
8
|
+
attr_accessor :words, :locate
|
9
|
+
|
10
|
+
def self.words=(array)
|
11
|
+
instance.words = array
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.words
|
15
|
+
black = %w([a-z] [0-9]+)
|
16
|
+
|
17
|
+
black << Bubble::KeywordsMaker::Blacklist.instance.locate::Blacklist
|
18
|
+
|
19
|
+
black << Bubble::KeywordsMaker::Blacklist.instance.words
|
20
|
+
black.flatten
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.regexp
|
24
|
+
%r(^(#{words.join('|')})$)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.locate=(locate)
|
28
|
+
instance.locate = locate
|
29
|
+
end
|
30
|
+
|
31
|
+
def initialize
|
32
|
+
self.words = []
|
33
|
+
self.locate = Bubble::KeywordsMaker::Default::PtBr
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
module Default
|
38
|
+
module PtBr
|
39
|
+
Blacklist = %w(ou de da do desse na no por para que era um uma uns umas como se ser sao em essa com pode)
|
40
|
+
end
|
41
|
+
|
42
|
+
module EnUs
|
43
|
+
Blacklist = %w(is are were was did do where what of the it in to and)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Bubble
|
2
|
+
module KeywordsMaker
|
3
|
+
class Counter
|
4
|
+
def initialize(text)
|
5
|
+
@text = strip(text).downcase
|
6
|
+
end
|
7
|
+
|
8
|
+
def words
|
9
|
+
@words = @text.split(/[\s+\.;\,\?!\\\/\(\)\"\']/)
|
10
|
+
@words.delete('')
|
11
|
+
@words
|
12
|
+
end
|
13
|
+
|
14
|
+
def counted
|
15
|
+
results = {}
|
16
|
+
words.each { |word| results[word] += 1 rescue results[word] = 1 }
|
17
|
+
results
|
18
|
+
end
|
19
|
+
|
20
|
+
def keywords
|
21
|
+
sanitized
|
22
|
+
end
|
23
|
+
|
24
|
+
protected
|
25
|
+
def sanitized
|
26
|
+
sorted.delete_if { |v| v =~ Blacklist::regexp }
|
27
|
+
end
|
28
|
+
|
29
|
+
def sorted
|
30
|
+
counted.sort_by { |k, v| v }.reverse.map { |k, v| k }
|
31
|
+
end
|
32
|
+
|
33
|
+
def strip(text)
|
34
|
+
require 'unicode'
|
35
|
+
|
36
|
+
Unicode::normalize_KD(text).unpack('U*').select { |cp|
|
37
|
+
cp < 0x300 || cp > 0x036F
|
38
|
+
}.pack('U*')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Bubble
|
2
|
+
module KeywordsMaker
|
3
|
+
class Keywords
|
4
|
+
attr_accessor :original_text
|
5
|
+
|
6
|
+
def initialize(text)
|
7
|
+
self.original_text = text
|
8
|
+
end
|
9
|
+
|
10
|
+
def keywords(limit = 10)
|
11
|
+
Counter.new(original_text).keywords[0...limit].sort
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
2
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
|
4
|
+
module Bubble
|
5
|
+
module KeywordsMaker
|
6
|
+
VERSION = '0.2'
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
require 'bubble/keywords_maker/blacklist'
|
11
|
+
require 'bubble/keywords_maker/counter'
|
12
|
+
require 'bubble/keywords_maker/keywords'
|
13
|
+
|
14
|
+
class String
|
15
|
+
def keywords
|
16
|
+
@keywords ||= Bubble::KeywordsMaker::Keywords.new(self).keywords
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
class TestBlacklist < Test::Unit::TestCase
|
4
|
+
include Bubble::KeywordsMaker
|
5
|
+
include Bubble::KeywordsMaker::Default
|
6
|
+
|
7
|
+
context "A Blacklist instance" do
|
8
|
+
should "receive a array as argument" do
|
9
|
+
Blacklist.words = %w(testing now)
|
10
|
+
end
|
11
|
+
|
12
|
+
should "return the array with the words" do
|
13
|
+
assert(Blacklist.words.include?('testing'))
|
14
|
+
assert(Blacklist.words.include?('now'))
|
15
|
+
end
|
16
|
+
|
17
|
+
should "return the words as a regexp" do
|
18
|
+
assert_kind_of(Regexp, Blacklist.regexp)
|
19
|
+
|
20
|
+
assert(Blacklist.regexp =~ 'testing')
|
21
|
+
assert(Blacklist.regexp =~ 'now')
|
22
|
+
end
|
23
|
+
|
24
|
+
should "be PtBr" do
|
25
|
+
Blacklist.locate = PtBr
|
26
|
+
|
27
|
+
assert_equal(PtBr, Blacklist.instance.locate)
|
28
|
+
end
|
29
|
+
|
30
|
+
should "be EnUs" do
|
31
|
+
Blacklist.locate = EnUs
|
32
|
+
|
33
|
+
assert_equal(EnUs, Blacklist.instance.locate)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
class TestCounter < Test::Unit::TestCase
|
4
|
+
include Bubble::KeywordsMaker
|
5
|
+
|
6
|
+
context "A KeywordsMaker object from a text" do
|
7
|
+
setup do
|
8
|
+
@text = File.open(File.join(File.dirname(__FILE__), '/resources', '/bubble-ptbr.txt')).readlines.join('\n')
|
9
|
+
@counter = Counter.new(@text)
|
10
|
+
end
|
11
|
+
|
12
|
+
should "count the words frequencies" do
|
13
|
+
assert_kind_of(Hash, @counter.counted)
|
14
|
+
assert_equal(5, @counter.counted['algoritmo'])
|
15
|
+
assert_equal(2, @counter.counted['ordenacao'])
|
16
|
+
end
|
17
|
+
|
18
|
+
should "return all keywords" do
|
19
|
+
assert_kind_of(Array, @counter.keywords)
|
20
|
+
assert_equal(74, @counter.keywords.size)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
class TestCounter < Test::Unit::TestCase
|
4
|
+
include Bubble::KeywordsMaker
|
5
|
+
|
6
|
+
context "A KeywordsMaker object from a text" do
|
7
|
+
setup do
|
8
|
+
@text = File.open(File.join(File.dirname(__FILE__), '/resources', '/bubble-ptbr.txt')).readlines.join('\n')
|
9
|
+
@km = Keywords.new(@text)
|
10
|
+
Blacklist.words = %w()
|
11
|
+
end
|
12
|
+
|
13
|
+
should "return the original text" do
|
14
|
+
assert_equal(@text, @km.original_text)
|
15
|
+
end
|
16
|
+
|
17
|
+
should "return the keywords" do
|
18
|
+
assert_kind_of(Array, @km.keywords)
|
19
|
+
assert_equal(%w(algoritmo caso elementos feitas ideia nivel operacoes ordenacao sort vector), @km.keywords)
|
20
|
+
end
|
21
|
+
|
22
|
+
should "return the keywords with blacklist" do
|
23
|
+
Blacklist.words = %w(caso feitas ideia nao nivel topo abaixo diversas n2)
|
24
|
+
assert_equal(%w(algoritmo bolha elementos operacoes ordenacao passagem simples sort vector velocidade), @km.keywords)
|
25
|
+
end
|
26
|
+
|
27
|
+
should "return 5 keywords" do
|
28
|
+
assert_equal(5, @km.keywords(5).size)
|
29
|
+
assert_equal(%w(algoritmo caso feitas operacoes vector), @km.keywords(5))
|
30
|
+
end
|
31
|
+
|
32
|
+
should "return 5 keywords with blacklist" do
|
33
|
+
Blacklist.words = %w(caso feitas nivel topo abaixo diversas n2)
|
34
|
+
assert_equal(5, @km.keywords(5).size)
|
35
|
+
assert_equal(%w(algoritmo elementos operacoes ordenacao vector), @km.keywords(5))
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context "When the locate is EnUs" do
|
40
|
+
setup do
|
41
|
+
Blacklist.locate = Default::EnUs
|
42
|
+
@text = File.open(File.join(File.dirname(__FILE__), '/resources', '/bubble-enus.txt')).readlines.join('\n')
|
43
|
+
@km = Keywords.new(@text)
|
44
|
+
end
|
45
|
+
|
46
|
+
should "return the keywords" do
|
47
|
+
assert_equal(%w(algorithm average bubble complexity list sort sorted sorting through worst-case), @km.keywords)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
class TestKeywordsMaker < Test::Unit::TestCase
|
4
|
+
context "A big text" do
|
5
|
+
setup do
|
6
|
+
@text = File.open(File.join(File.dirname(__FILE__), '/resources', '/bubble-ptbr.txt')).readlines.join('\n')
|
7
|
+
Bubble::KeywordsMaker::Blacklist.locate = Bubble::KeywordsMaker::Default::PtBr
|
8
|
+
end
|
9
|
+
|
10
|
+
should "have these keywords" do
|
11
|
+
assert_equal(%w(algoritmo caso elementos feitas ideia nivel operacoes ordenacao sort vector), @text.keywords)
|
12
|
+
end
|
13
|
+
|
14
|
+
should "return the keywords with blacklist" do
|
15
|
+
Bubble::KeywordsMaker::Blacklist.words = %w(caso feitas ideia nao nivel topo abaixo diversas n2)
|
16
|
+
|
17
|
+
assert_equal(%w(algoritmo bolha elementos operacoes ordenacao passagem simples sort vector velocidade), @text.keywords)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dookie-keywords_maker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.2"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Bruno Azisaka Maciel
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-01-05 00:00:00 -08:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: shoulda
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.6
|
23
|
+
version:
|
24
|
+
- !ruby/object:Gem::Dependency
|
25
|
+
name: unicode
|
26
|
+
version_requirement:
|
27
|
+
version_requirements: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - ">="
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: "0.1"
|
32
|
+
version:
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: newgem
|
35
|
+
version_requirement:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.1.0
|
41
|
+
version:
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: hoe
|
44
|
+
version_requirement:
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.8.0
|
50
|
+
version:
|
51
|
+
description: A gem which returns the keywords related with your text. It's very relevant for webpages with articles, news, posts, and so on.
|
52
|
+
email:
|
53
|
+
- bruno@dookie.com.br
|
54
|
+
executables: []
|
55
|
+
|
56
|
+
extensions: []
|
57
|
+
|
58
|
+
extra_rdoc_files:
|
59
|
+
- History.txt
|
60
|
+
- README.rdoc
|
61
|
+
files:
|
62
|
+
- History.txt
|
63
|
+
- README.rdoc
|
64
|
+
- lib/keywords_maker.rb
|
65
|
+
- lib/bubble/keywords_maker/keywords.rb
|
66
|
+
- lib/bubble/keywords_maker/counter.rb
|
67
|
+
- lib/bubble/keywords_maker/blacklist.rb
|
68
|
+
- test/test_blacklist.rb
|
69
|
+
- test/test_counter.rb
|
70
|
+
- test/test_helper.rb
|
71
|
+
- test/test_keywords.rb
|
72
|
+
- test/test_keywords_maker.rb
|
73
|
+
has_rdoc: true
|
74
|
+
homepage:
|
75
|
+
post_install_message:
|
76
|
+
rdoc_options:
|
77
|
+
- --main
|
78
|
+
- README.rdoc
|
79
|
+
require_paths:
|
80
|
+
- lib
|
81
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: "0"
|
86
|
+
version:
|
87
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: "0"
|
92
|
+
version:
|
93
|
+
requirements: []
|
94
|
+
|
95
|
+
rubyforge_project: keywords_maker
|
96
|
+
rubygems_version: 1.2.0
|
97
|
+
signing_key:
|
98
|
+
specification_version: 2
|
99
|
+
summary: A gem which returns the keywords related with your text
|
100
|
+
test_files:
|
101
|
+
- test/test_blacklist.rb
|
102
|
+
- test/test_counter.rb
|
103
|
+
- test/test_helper.rb
|
104
|
+
- test/test_keywords.rb
|
105
|
+
- test/test_keywords_maker.rb
|