highscore 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +11 -0
- data/History.txt +12 -0
- data/README.md +64 -0
- data/Rakefile +18 -0
- data/bin/highscore +7 -0
- data/lib/highscore.rb +60 -0
- data/lib/highscore/content.rb +69 -0
- data/lib/highscore/keywords.rb +59 -0
- data/test/highscore/test_content.rb +18 -0
- data/test/highscore/test_keywords.rb +48 -0
- data/test/test_highscore.rb +0 -0
- data/version.txt +1 -0
- metadata +75 -0
data/.gitignore
ADDED
data/History.txt
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
== 0.2.1 / 2012-01-15
|
2
|
+
|
3
|
+
* added parameter multiplier to set a general multiplier for the ranking
|
4
|
+
|
5
|
+
== 0.2.0 / 2012-01-14
|
6
|
+
|
7
|
+
* added configure() and parameters to influence emphasizing
|
8
|
+
|
9
|
+
== 0.1.0 / 2012-01-14
|
10
|
+
|
11
|
+
* 1 major enhancement
|
12
|
+
* Birthday!
|
data/README.md
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
highscore
|
2
|
+
===========
|
3
|
+
|
4
|
+
Rank keywords in long texts.
|
5
|
+
|
6
|
+
Features
|
7
|
+
--------
|
8
|
+
|
9
|
+
* configureable to rank different types of words different (uppercase, long words, etc.)
|
10
|
+
|
11
|
+
Examples
|
12
|
+
--------
|
13
|
+
|
14
|
+
text = Highscore::Content.new "foo bar"
|
15
|
+
text.configure.do
|
16
|
+
set :multiplier, 2
|
17
|
+
set :upper_case, 3
|
18
|
+
set :long_words, 2
|
19
|
+
set :long_words_threshold, 15
|
20
|
+
end
|
21
|
+
|
22
|
+
text.keywords # => Hash
|
23
|
+
text.keywords.top(50) # => Array
|
24
|
+
|
25
|
+
Requirements
|
26
|
+
------------
|
27
|
+
|
28
|
+
(none)
|
29
|
+
|
30
|
+
Install
|
31
|
+
-------
|
32
|
+
|
33
|
+
* sudo gem install highscore
|
34
|
+
|
35
|
+
Author
|
36
|
+
------
|
37
|
+
|
38
|
+
Original author: Dominik Liebler <liebler.dominik@googlemail.com>
|
39
|
+
|
40
|
+
License
|
41
|
+
-------
|
42
|
+
|
43
|
+
(The MIT License)
|
44
|
+
|
45
|
+
Copyright (c) 2012 Dominik Liebler
|
46
|
+
|
47
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
48
|
+
a copy of this software and associated documentation files (the
|
49
|
+
'Software'), to deal in the Software without restriction, including
|
50
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
51
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
52
|
+
permit persons to whom the Software is furnished to do so, subject to
|
53
|
+
the following conditions:
|
54
|
+
|
55
|
+
The above copyright notice and this permission notice shall be
|
56
|
+
included in all copies or substantial portions of the Software.
|
57
|
+
|
58
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
59
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
60
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
61
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
62
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
63
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
64
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
begin
|
3
|
+
require 'bones'
|
4
|
+
rescue LoadError
|
5
|
+
abort '### Please install the "bones" gem ###'
|
6
|
+
end
|
7
|
+
|
8
|
+
task :default => 'test:run'
|
9
|
+
task 'gem:release' => 'test:run'
|
10
|
+
|
11
|
+
Bones {
|
12
|
+
name 'highscore'
|
13
|
+
authors 'Dominik Liebler'
|
14
|
+
email 'liebler.dominik@googlemail.com'
|
15
|
+
url 'http://thewebdev.de'
|
16
|
+
ignore_file '.gitignore'
|
17
|
+
}
|
18
|
+
|
data/bin/highscore
ADDED
data/lib/highscore.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
|
2
|
+
module Highscore
|
3
|
+
|
4
|
+
# :stopdoc:
|
5
|
+
LIBPATH = ::File.expand_path('..', __FILE__) + ::File::SEPARATOR
|
6
|
+
PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
|
7
|
+
VERSION = ::File.read(PATH + 'version.txt').strip
|
8
|
+
# :startdoc:
|
9
|
+
|
10
|
+
# Returns the library path for the module. If any arguments are given,
|
11
|
+
# they will be joined to the end of the libray path using
|
12
|
+
# <tt>File.join</tt>.
|
13
|
+
#
|
14
|
+
def self.libpath( *args )
|
15
|
+
rv = args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten)
|
16
|
+
if block_given?
|
17
|
+
begin
|
18
|
+
$LOAD_PATH.unshift LIBPATH
|
19
|
+
rv = yield
|
20
|
+
ensure
|
21
|
+
$LOAD_PATH.shift
|
22
|
+
end
|
23
|
+
end
|
24
|
+
return rv
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns the lpath for the module. If any arguments are given,
|
28
|
+
# they will be joined to the end of the path using
|
29
|
+
# <tt>File.join</tt>.
|
30
|
+
#
|
31
|
+
def self.path( *args )
|
32
|
+
rv = args.empty? ? PATH : ::File.join(PATH, args.flatten)
|
33
|
+
if block_given?
|
34
|
+
begin
|
35
|
+
$LOAD_PATH.unshift PATH
|
36
|
+
rv = yield
|
37
|
+
ensure
|
38
|
+
$LOAD_PATH.shift
|
39
|
+
end
|
40
|
+
end
|
41
|
+
return rv
|
42
|
+
end
|
43
|
+
|
44
|
+
# Utility method used to require all files ending in .rb that lie in the
|
45
|
+
# directory below this file that has the same name as the filename passed
|
46
|
+
# in. Optionally, a specific _directory_ name can be passed in such that
|
47
|
+
# the _filename_ does not have to be equivalent to the directory.
|
48
|
+
#
|
49
|
+
def self.require_all_libs_relative_to( fname, dir = nil )
|
50
|
+
dir ||= ::File.basename(fname, '.*')
|
51
|
+
search_me = ::File.expand_path(
|
52
|
+
::File.join(::File.dirname(fname), dir, '**', '*.rb'))
|
53
|
+
|
54
|
+
Dir.glob(search_me).sort.each {|rb| require rb}
|
55
|
+
end
|
56
|
+
|
57
|
+
end # module Highscore
|
58
|
+
|
59
|
+
Highscore.require_all_libs_relative_to(__FILE__)
|
60
|
+
|
@@ -0,0 +1,69 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__)))
|
2
|
+
require 'keywords'
|
3
|
+
|
4
|
+
module Highscore
|
5
|
+
class Content
|
6
|
+
attr_reader :content
|
7
|
+
|
8
|
+
def initialize content
|
9
|
+
@content = content
|
10
|
+
|
11
|
+
@emphasis = {
|
12
|
+
:multiplier => 1.0,
|
13
|
+
:upper_case => 3.0,
|
14
|
+
:long_words => 2.0,
|
15
|
+
:long_words_threshold => 15
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
# configure ranking
|
20
|
+
#
|
21
|
+
def configure(&block)
|
22
|
+
instance_eval(&block)
|
23
|
+
end
|
24
|
+
|
25
|
+
# set emphasis options to rank the content
|
26
|
+
#
|
27
|
+
def set(key, value)
|
28
|
+
@emphasis[key.to_sym] = value.to_f
|
29
|
+
end
|
30
|
+
|
31
|
+
# get the ranked keywords
|
32
|
+
#
|
33
|
+
# :call-seq:
|
34
|
+
# keywords -> Keywords
|
35
|
+
#
|
36
|
+
def keywords
|
37
|
+
keywords = Keywords.new(0)
|
38
|
+
|
39
|
+
find_keywords.each do |k|
|
40
|
+
weight = @emphasis[:multiplier]
|
41
|
+
|
42
|
+
if k.length >= @emphasis[:long_words_threshold]
|
43
|
+
weight *= @emphasis[:long_words]
|
44
|
+
end
|
45
|
+
|
46
|
+
if k[0] == k[0].upcase
|
47
|
+
weight *= @emphasis[:upper_case]
|
48
|
+
end
|
49
|
+
|
50
|
+
keywords[k] += weight
|
51
|
+
end
|
52
|
+
|
53
|
+
keywords
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# find keywords in the content and rate them
|
59
|
+
#
|
60
|
+
def find_keywords
|
61
|
+
keywords = @content.scan(/\w+/)
|
62
|
+
keywords.delete_if do |x|
|
63
|
+
x.match(/^[\d]+(\.[\d]+){0,1}$/) or x.length <= 2
|
64
|
+
end
|
65
|
+
|
66
|
+
keywords.sort
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Highscore
|
2
|
+
# keywords that were found in content
|
3
|
+
#
|
4
|
+
class Keywords < Hash
|
5
|
+
|
6
|
+
# ranks the keywords and removes keywords that have a low ranking
|
7
|
+
# or are blacklisted
|
8
|
+
#
|
9
|
+
# :call-seq:
|
10
|
+
# rank -> array
|
11
|
+
#
|
12
|
+
def rank
|
13
|
+
filter
|
14
|
+
sort_it
|
15
|
+
end
|
16
|
+
|
17
|
+
# get the top n keywords
|
18
|
+
#
|
19
|
+
def top n = 10
|
20
|
+
filter
|
21
|
+
rank[0..(n - 1)]
|
22
|
+
end
|
23
|
+
|
24
|
+
# sorts the keywords and returns a array of arrays
|
25
|
+
#
|
26
|
+
# :call-seq:
|
27
|
+
# sort_it -> array
|
28
|
+
#
|
29
|
+
def sort_it
|
30
|
+
sort {|x,y| y[1] <=> x[1]}
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
# filter out unwanted results
|
36
|
+
#
|
37
|
+
def filter
|
38
|
+
run_blacklist
|
39
|
+
filter_low
|
40
|
+
end
|
41
|
+
|
42
|
+
# filter low ranked keywords
|
43
|
+
#
|
44
|
+
def filter_low
|
45
|
+
delete_if do |key, value|
|
46
|
+
value <= 0
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# remove blacklisted keywords
|
51
|
+
#
|
52
|
+
def run_blacklist
|
53
|
+
# FIXME: add more keywords!
|
54
|
+
delete_if do |key, value|
|
55
|
+
%w{the and that post add not see about using some something under our comments comment run you want for will file are with end new this use all but can your just get very data blog format out first they posts second}.include? key.downcase
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
|
2
|
+
require "content"
|
3
|
+
require "test/unit"
|
4
|
+
|
5
|
+
class TestContent < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
@text = "This is some text"
|
8
|
+
@content = Highscore::Content.new(@text)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_content
|
12
|
+
assert_equal @text, @content.content
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_keywords
|
16
|
+
assert_instance_of(Highscore::Keywords, @content.keywords)
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), %w{.. .. lib highscore}))
|
2
|
+
require "keywords"
|
3
|
+
require "test/unit"
|
4
|
+
|
5
|
+
class TestKeywords < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
@keywords = Highscore::Keywords.new
|
8
|
+
@keywords['Ruby'] = 2
|
9
|
+
@keywords['Sinatra'] = 3
|
10
|
+
@keywords['Highscore'] = 1
|
11
|
+
@keywords['the'] = 10
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_init
|
15
|
+
assert Highscore::Keywords.new.length == 0
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_rank
|
19
|
+
assert @keywords.length == 4
|
20
|
+
|
21
|
+
ranked = @keywords.rank
|
22
|
+
|
23
|
+
assert_instance_of(Array, ranked)
|
24
|
+
|
25
|
+
should_rank = [['Sinatra', 3], ['Ruby', 2], ['Highscore', 1]]
|
26
|
+
assert_equal should_rank, ranked
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_rank_empty
|
30
|
+
assert_equal [], Highscore::Keywords.new.rank
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_top
|
34
|
+
assert_equal [['Sinatra', 3]], @keywords.top(1)
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_top_empty
|
38
|
+
assert_equal [], Highscore::Keywords.new.top(0)
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_sort
|
42
|
+
keywords = Highscore::Keywords.new
|
43
|
+
keywords['Test'] = 1
|
44
|
+
keywords['Foobar'] = 2
|
45
|
+
|
46
|
+
assert_equal [['Foobar', 2], ['Test', 1]], keywords.sort_it
|
47
|
+
end
|
48
|
+
end
|
File without changes
|
data/version.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.1
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: highscore
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Dominik Liebler
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-01-18 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bones
|
16
|
+
requirement: &70306539324720 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 3.7.3
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70306539324720
|
25
|
+
description: Rank keywords in long texts.
|
26
|
+
email: liebler.dominik@googlemail.com
|
27
|
+
executables:
|
28
|
+
- highscore
|
29
|
+
extensions: []
|
30
|
+
extra_rdoc_files:
|
31
|
+
- History.txt
|
32
|
+
- bin/highscore
|
33
|
+
files:
|
34
|
+
- .gitignore
|
35
|
+
- History.txt
|
36
|
+
- README.md
|
37
|
+
- Rakefile
|
38
|
+
- bin/highscore
|
39
|
+
- lib/highscore.rb
|
40
|
+
- lib/highscore/content.rb
|
41
|
+
- lib/highscore/keywords.rb
|
42
|
+
- test/highscore/test_content.rb
|
43
|
+
- test/highscore/test_keywords.rb
|
44
|
+
- test/test_highscore.rb
|
45
|
+
- version.txt
|
46
|
+
homepage: http://thewebdev.de
|
47
|
+
licenses: []
|
48
|
+
post_install_message:
|
49
|
+
rdoc_options:
|
50
|
+
- --main
|
51
|
+
- README.md
|
52
|
+
require_paths:
|
53
|
+
- lib
|
54
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
55
|
+
none: false
|
56
|
+
requirements:
|
57
|
+
- - ! '>='
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
60
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
requirements: []
|
67
|
+
rubyforge_project: highscore
|
68
|
+
rubygems_version: 1.8.11
|
69
|
+
signing_key:
|
70
|
+
specification_version: 3
|
71
|
+
summary: Rank keywords in long texts.
|
72
|
+
test_files:
|
73
|
+
- test/highscore/test_content.rb
|
74
|
+
- test/highscore/test_keywords.rb
|
75
|
+
- test/test_highscore.rb
|