highscore 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +10 -0
- data/History.txt +5 -0
- data/README.md +7 -4
- data/bin/highscore +77 -1
- data/lib/highscore/content.rb +32 -10
- data/lib/highscore/keyword.rb +5 -0
- data/lib/highscore/string.rb +10 -0
- data/test/highscore/test_blacklist.rb +1 -1
- data/test/highscore/test_content.rb +12 -0
- data/test/highscore/test_keyword.rb +4 -0
- data/test/highscore/test_string.rb +8 -0
- data/version.txt +1 -1
- metadata +5 -4
data/.travis.yml
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.8.7
|
4
|
+
- 1.9.2
|
5
|
+
- 1.9.3
|
6
|
+
- jruby-18mode # JRuby in 1.8 mode
|
7
|
+
- jruby-19mode # JRuby in 1.9 mode
|
8
|
+
- rbx-18mode
|
9
|
+
# - rbx-19mode # currently in active development, may or may not work for your project
|
10
|
+
before_install: gem install bones
|
data/History.txt
CHANGED
data/README.md
CHANGED
@@ -7,9 +7,11 @@ Features
|
|
7
7
|
--------
|
8
8
|
|
9
9
|
* configurable to rank different types of words different (uppercase, long words, etc.)
|
10
|
+
* rate based on amount (%) of vowels and consonants in a string
|
10
11
|
* directly get keywords from String objects
|
11
12
|
* blacklist words via a plain text file, String or an Array of words
|
12
13
|
* merge together Keywords from multiple sources
|
14
|
+
* contains a CLI tool that operates on STDIN/OUT and is configurable via parameters
|
13
15
|
|
14
16
|
Examples
|
15
17
|
--------
|
@@ -21,11 +23,10 @@ text.configure do
|
|
21
23
|
set :upper_case, 3
|
22
24
|
set :long_words, 2
|
23
25
|
set :long_words_threshold, 15
|
26
|
+
set :vowels, 1 # => default = 0 = not considered
|
27
|
+
set :consonants, 5 # => default = 0 = not considered
|
24
28
|
end
|
25
29
|
|
26
|
-
# get all keywords
|
27
|
-
text.keywords.rank => Array
|
28
|
-
|
29
30
|
# get only the top 50 keywords
|
30
31
|
text.keywords.top(50).each do |keyword|
|
31
32
|
keyword.text # => keyword text
|
@@ -44,7 +45,7 @@ can iterate over. Each object in there is a Highscore::Keyword that has methods
|
|
44
45
|
```ruby
|
45
46
|
keywords = "Foo bar is not bar baz".keywords(Highscore::Blacklist.load(['baz']))
|
46
47
|
|
47
|
-
keywords.each do |k|
|
48
|
+
keywords.rank.each do |k|
|
48
49
|
puts "#{k.text} #{k.weight}"
|
49
50
|
end
|
50
51
|
|
@@ -54,6 +55,8 @@ end
|
|
54
55
|
# not 1.0
|
55
56
|
```
|
56
57
|
|
58
|
+
Have a look at bin/highscore, you can run highscore on your CLI and feed it with text on STDIN.
|
59
|
+
|
57
60
|
Using a custom blacklist to ignore keywords
|
58
61
|
-------------------------------------------
|
59
62
|
|
data/bin/highscore
CHANGED
@@ -1,7 +1,83 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require 'optparse'
|
4
|
+
|
3
5
|
root = File.expand_path('../..', __FILE__)
|
4
6
|
require File.join(root, %w[lib highscore])
|
5
7
|
|
6
|
-
|
8
|
+
options = {
|
9
|
+
:emphasis => {},
|
10
|
+
:short => false
|
11
|
+
}
|
12
|
+
|
13
|
+
optparse = OptionParser.new do |opts|
|
14
|
+
opts.on('-h', '--help', 'display this help message') do
|
15
|
+
puts opts
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
|
19
|
+
# blacklist file
|
20
|
+
opts.on('-b', '--blacklist FILEPATH', 'specify a blacklist file that should be used instead of the default') do |filepath|
|
21
|
+
options[:blacklist] = Highscore::Blacklist.load_file(filepath)
|
22
|
+
end
|
23
|
+
|
24
|
+
# general multiplier
|
25
|
+
opts.on('-m', '--multiplier MULTIPLIER', 'set the global rank multiplier') do |multiplier|
|
26
|
+
options[:emphasis][:multiplier] = multiplier.to_f
|
27
|
+
end
|
28
|
+
|
29
|
+
# don't print rank weight
|
30
|
+
opts.on('-s', '--short', 'don\'t print rank weight') do
|
31
|
+
options[:short] = true
|
32
|
+
end
|
33
|
+
|
34
|
+
# upper case
|
35
|
+
opts.on('-u', '--uppercase MULTIPLIER', 'rank multiplier for uppercase words (default is 3.0)') do |u|
|
36
|
+
options[:emphasis][:upper_case] = u.to_f
|
37
|
+
end
|
38
|
+
|
39
|
+
# vowels
|
40
|
+
opts.on('--vowels MULTIPLIER', 'multiplier for vowels') do |u|
|
41
|
+
options[:emphasis][:vowels] = u.to_i
|
42
|
+
end
|
43
|
+
|
44
|
+
# vowels
|
45
|
+
opts.on('--consonants MULTIPLIER', 'multiplier for consonants') do |u|
|
46
|
+
options[:emphasis][:consonants] = u.to_i
|
47
|
+
end
|
48
|
+
|
49
|
+
# long words
|
50
|
+
opts.on('-l', '--longwords MULTIPLIER', 'rank multiplier for long words (default is 2.0)') do |u|
|
51
|
+
options[:emphasis][:long_words] = u.to_f
|
52
|
+
end
|
53
|
+
|
54
|
+
# long words threshold
|
55
|
+
opts.on('-t', '--longwordsthreshold THRESHOLD', 'threshold for long words (default is 15 chars)') do |u|
|
56
|
+
options[:emphasis][:long_words_threshold] = u.to_i
|
57
|
+
end
|
58
|
+
|
59
|
+
opts.on('-n', '--top N', 'show only the top N keywords') do |u|
|
60
|
+
options[:top] = u.to_i
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
optparse.parse!
|
65
|
+
|
66
|
+
text = STDIN.read.to_s
|
67
|
+
keywords = text.keywords(options[:blacklist]) do |content|
|
68
|
+
options[:emphasis].each do |key,value|
|
69
|
+
content.set key, value
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
if options[:top]
|
74
|
+
keywords = keywords.top(options[:top])
|
75
|
+
else
|
76
|
+
keywords = keywords.rank
|
77
|
+
end
|
7
78
|
|
79
|
+
keywords.each do |keyword|
|
80
|
+
print keyword.text
|
81
|
+
print " #{keyword.weight}" unless options[:short]
|
82
|
+
puts
|
83
|
+
end
|
data/lib/highscore/content.rb
CHANGED
@@ -18,7 +18,9 @@ module Highscore
|
|
18
18
|
:multiplier => 1.0,
|
19
19
|
:upper_case => 3.0,
|
20
20
|
:long_words => 2.0,
|
21
|
-
:long_words_threshold => 15
|
21
|
+
:long_words_threshold => 15,
|
22
|
+
:vowels => 0,
|
23
|
+
:consonants => 0
|
22
24
|
}
|
23
25
|
end
|
24
26
|
|
@@ -43,21 +45,41 @@ module Highscore
|
|
43
45
|
keywords = Keywords.new
|
44
46
|
|
45
47
|
Keywords.find_keywords(@content, @blacklist).each do |text|
|
46
|
-
weight = @emphasis[:multiplier]
|
47
48
|
text = text.to_s
|
49
|
+
keywords << Highscore::Keyword.new(text, weight(text))
|
50
|
+
end
|
51
|
+
|
52
|
+
keywords
|
53
|
+
end
|
48
54
|
|
49
|
-
|
50
|
-
weight *= @emphasis[:long_words]
|
51
|
-
end
|
55
|
+
private
|
52
56
|
|
53
|
-
|
54
|
-
|
55
|
-
|
57
|
+
# weight a single text keyword
|
58
|
+
#
|
59
|
+
def weight(text)
|
60
|
+
weight = @emphasis[:multiplier]
|
56
61
|
|
57
|
-
|
62
|
+
if text.length >= @emphasis[:long_words_threshold]
|
63
|
+
weight *= @emphasis[:long_words]
|
58
64
|
end
|
59
65
|
|
60
|
-
|
66
|
+
if text[0,1] == text[0,1].upcase
|
67
|
+
weight *= @emphasis[:upper_case]
|
68
|
+
end
|
69
|
+
|
70
|
+
weight += vowels(text)
|
71
|
+
weight += consonants(text)
|
72
|
+
weight
|
73
|
+
end
|
74
|
+
|
75
|
+
def vowels(text)
|
76
|
+
percent = text.vowels.length / text.length.to_f
|
77
|
+
percent * @emphasis[:vowels]
|
78
|
+
end
|
79
|
+
|
80
|
+
def consonants(text)
|
81
|
+
percent = text.consonants.length / text.length.to_f
|
82
|
+
percent * @emphasis[:consonants]
|
61
83
|
end
|
62
84
|
end
|
63
85
|
end
|
data/lib/highscore/keyword.rb
CHANGED
data/lib/highscore/string.rb
CHANGED
@@ -30,4 +30,16 @@ class TestContent < Test::Unit::TestCase
|
|
30
30
|
content = Highscore::Content.new content
|
31
31
|
assert_equal 1, content.keywords.length
|
32
32
|
end
|
33
|
+
|
34
|
+
def test_vowels_and_consonants
|
35
|
+
keywords = 'foobar RubyGems'.keywords do
|
36
|
+
set :vowels, 2
|
37
|
+
set :consonants, 3
|
38
|
+
set :upper_case, 1
|
39
|
+
set :long_words, 1
|
40
|
+
end
|
41
|
+
|
42
|
+
assert_equal 3.75, keywords.first.weight
|
43
|
+
assert_equal 3.5, keywords.last.weight
|
44
|
+
end
|
33
45
|
end
|
@@ -13,6 +13,14 @@ class TestString < Test::Unit::TestCase
|
|
13
13
|
assert_equal 0, keywords.length
|
14
14
|
end
|
15
15
|
|
16
|
+
def test_vowels
|
17
|
+
assert_equal("eoaiu", "feobariu".vowels)
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_consonants
|
21
|
+
assert_equal("fbr", "feobariu".consonants)
|
22
|
+
end
|
23
|
+
|
16
24
|
def test_default_blacklist
|
17
25
|
keywords = "the Ruby Ruby Ruby Hacker".keywords
|
18
26
|
assert_equal 2, keywords.length
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.3
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: highscore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bones
|
16
|
-
requirement: &
|
16
|
+
requirement: &70111402827500 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: 3.7.3
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70111402827500
|
25
25
|
description: Find and rank keywords in long texts.
|
26
26
|
email: liebler.dominik@googlemail.com
|
27
27
|
executables:
|
@@ -34,6 +34,7 @@ extra_rdoc_files:
|
|
34
34
|
- test/fixtures/blacklist.txt
|
35
35
|
files:
|
36
36
|
- .gitignore
|
37
|
+
- .travis.yml
|
37
38
|
- History.txt
|
38
39
|
- README.md
|
39
40
|
- Rakefile
|