highscore 1.1.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/History.txt +6 -0
- data/README.md +11 -0
- data/bin/highscore +16 -0
- data/lib/highscore.rb +1 -0
- data/lib/highscore/bonuslist.rb +12 -0
- data/lib/highscore/content.rb +17 -2
- data/lib/highscore/string.rb +2 -2
- data/test/highscore/test_bonuslist.rb +61 -0
- data/test/highscore/test_content.rb +8 -0
- data/version.txt +1 -1
- metadata +51 -30
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ODQ1NGUyMjdjNGUzNjdlYTA4MjFlYjViMmY0NzM1MjBhMTU2ODlhOQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MTVmNzMyOTVhZjFkNjBhMTFjODc5NWM2MzdlN2E1NTljYzNjY2ExMw==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZGVlOWUwNTgxM2MwMTlkMGMzN2MzYjViYjdhZDkxMjkxYmZiZTNkYTEzOThi
|
10
|
+
Yzg2Y2E4MjdmOGE1NWJhNDMzYjBkZGM4MDMyNzA4ODEwMmFiMTVmYWQyMzk2
|
11
|
+
ODIwOTIwMjRkMzgyN2E1Y2QzNTMxYjFmNzllM2NlOTE2ZjA4ZjM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
NDZlMzFiYjVjYmNiMDcyOGVlZGI1YWZkYmY1YWZhNWI5ZWRmNzgyNWU1NjZi
|
14
|
+
NzJlZDEzZjNmYWJhOGJhMWM3NTE1NDJiNDhlYWI1MTVkNmI1MjNhNzY0OTg0
|
15
|
+
MDhjZGY5MDkxY2FjZWU4NTU1NzcwYzNiNzA4MGVjZDlhMDVmMGI=
|
data/History.txt
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
== 1.2.0 / 2013-12-06
|
2
|
+
|
3
|
+
* configurable minimum word length
|
4
|
+
* bonus words get rated higher than normal words (configurable just like blacklists)
|
5
|
+
* (Thanks to Tim-B for the new features)
|
6
|
+
|
1
7
|
== 1.1.0 / 2013-04-
|
2
8
|
|
3
9
|
* added support for custom word ignore handlers using lambda functions
|
data/README.md
CHANGED
@@ -15,6 +15,7 @@ Easily find and rank keywords in long texts.
|
|
15
15
|
* merge together Keywords from multiple sources
|
16
16
|
* contains a CLI tool that operates on STDIN/OUT and is configurable via parameters
|
17
17
|
* can use `bloomfilter-rb` gem for better performance (optional)
|
18
|
+
* words on the bonus list will receive a higher score
|
18
19
|
|
19
20
|
## Installation
|
20
21
|
|
@@ -38,6 +39,8 @@ text.configure do
|
|
38
39
|
set :upper_case, 3
|
39
40
|
set :long_words, 2
|
40
41
|
set :long_words_threshold, 15
|
42
|
+
set :short_words_threshold, 3 # => default: 2
|
43
|
+
set :bonus_multiplier, 2 # => default: 3
|
41
44
|
set :vowels, 1 # => default: 0 = not considered
|
42
45
|
set :consonants, 5 # => default: 0 = not considered
|
43
46
|
set :ignore_case, true # => default: false
|
@@ -106,6 +109,14 @@ whitelist = Highscore::Whitelist.load %w{these are valid keywords}
|
|
106
109
|
content = Highscore::Content.new "invalid words", whitelist
|
107
110
|
```
|
108
111
|
|
112
|
+
### Using bonus words
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
# construct and inject it just like a blacklist
|
116
|
+
bonuslist = Highscore::Bonuslist.load %w{bonus words}
|
117
|
+
content = Highscore::Content.new "A string with bonus words in it", bonuslist
|
118
|
+
```
|
119
|
+
|
109
120
|
## I18n
|
110
121
|
|
111
122
|
```ruby
|
data/bin/highscore
CHANGED
@@ -26,11 +26,21 @@ optparse = OptionParser.new do |opts|
|
|
26
26
|
options[:wordlist] = Highscore::Whitelist.load_file(filepath)
|
27
27
|
end
|
28
28
|
|
29
|
+
# bonus word file
|
30
|
+
opts.on('--bonuslist FILEPATH', 'specify a bonus word file') do |filepath|
|
31
|
+
options[:bonus_list] = Highscore::Bonuslist.load_file(filepath)
|
32
|
+
end
|
33
|
+
|
29
34
|
# general multiplier
|
30
35
|
opts.on('-m', '--multiplier MULTIPLIER', 'set the global rank multiplier') do |multiplier|
|
31
36
|
options[:emphasis][:multiplier] = multiplier.to_f
|
32
37
|
end
|
33
38
|
|
39
|
+
# bonus multiplier
|
40
|
+
opts.on('--bonusmultiplier MULTIPLIER', 'set the bonus word multiplier') do |multiplier|
|
41
|
+
options[:emphasis][:bonus_multiplier] = multiplier.to_f
|
42
|
+
end
|
43
|
+
|
34
44
|
opts.on('--no-ignore-short', 'don\'t ignore short words (<= 2 chars)') do
|
35
45
|
options[:emphasis][:ignore_short_words] = false
|
36
46
|
end
|
@@ -68,6 +78,12 @@ optparse = OptionParser.new do |opts|
|
|
68
78
|
options[:emphasis][:long_words_threshold] = u.to_i
|
69
79
|
end
|
70
80
|
|
81
|
+
# short words threshold
|
82
|
+
opts.on('--shortwordsthreshold THRESHOLD', 'threshold for short words (default is 2 chars)') do |u|
|
83
|
+
options[:emphasis][:short_words_threshold] = u.to_i
|
84
|
+
end
|
85
|
+
|
86
|
+
|
71
87
|
opts.on('-n', '--top N', 'show only the top N keywords') do |u|
|
72
88
|
options[:top] = u.to_i
|
73
89
|
end
|
data/lib/highscore.rb
CHANGED
data/lib/highscore/content.rb
CHANGED
@@ -10,13 +10,15 @@ module Highscore
|
|
10
10
|
# @param wordlist Highscore::Wordlist
|
11
11
|
def initialize(content, wordlist = nil)
|
12
12
|
@content = content
|
13
|
-
@whitelist = @blacklist = nil
|
13
|
+
@whitelist = @blacklist = bonuslist = nil
|
14
14
|
@language_wordlists = {}
|
15
15
|
|
16
16
|
if wordlist.nil?
|
17
17
|
@blacklist = Highscore::Blacklist.load_default_file
|
18
18
|
elsif wordlist.kind_of? Highscore::Blacklist
|
19
19
|
@blacklist = wordlist
|
20
|
+
elsif wordlist.kind_of? Highscore::Bonuslist
|
21
|
+
bonuslist = wordlist
|
20
22
|
else
|
21
23
|
@whitelist = wordlist
|
22
24
|
end
|
@@ -25,6 +27,9 @@ module Highscore
|
|
25
27
|
:multiplier => 1.0,
|
26
28
|
:upper_case => 3.0,
|
27
29
|
:long_words => 2.0,
|
30
|
+
:short_words_threshold => 2,
|
31
|
+
:bonus_multiplier => 3.0,
|
32
|
+
:bonus_list => bonuslist,
|
28
33
|
:long_words_threshold => 15,
|
29
34
|
:vowels => 0,
|
30
35
|
:consonants => 0,
|
@@ -34,6 +39,7 @@ module Highscore
|
|
34
39
|
:word_pattern => /\p{Word}+/u,
|
35
40
|
:stemming => false
|
36
41
|
}
|
42
|
+
|
37
43
|
end
|
38
44
|
|
39
45
|
# configure ranking
|
@@ -120,7 +126,7 @@ module Highscore
|
|
120
126
|
#
|
121
127
|
# @return TrueClass FalseClass
|
122
128
|
def ignore? word
|
123
|
-
ignore = word.short?
|
129
|
+
ignore = word.short?(@emphasis[:short_words_threshold])
|
124
130
|
|
125
131
|
# exception: allow short words
|
126
132
|
ignore = (not allow_short_words?) if ignore
|
@@ -172,10 +178,19 @@ module Highscore
|
|
172
178
|
weight *= @emphasis[:upper_case]
|
173
179
|
end
|
174
180
|
|
181
|
+
weight += bonus(text)
|
175
182
|
weight += vowels(text)
|
176
183
|
weight + consonants(text)
|
177
184
|
end
|
178
185
|
|
186
|
+
def bonus(text)
|
187
|
+
return 0 if not @emphasis[:bonus_list].kind_of? Highscore::Bonuslist
|
188
|
+
if @emphasis[:bonus_list].include?(text)
|
189
|
+
return @emphasis[:multiplier] * @emphasis[:bonus_multiplier]
|
190
|
+
end
|
191
|
+
return 0
|
192
|
+
end
|
193
|
+
|
179
194
|
# weight the vowels on a text
|
180
195
|
#
|
181
196
|
# @param text String
|
data/lib/highscore/string.rb
CHANGED
@@ -0,0 +1,61 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../test_highscore'
|
2
|
+
|
3
|
+
class TestBonuslist < Highscore::TestCase
|
4
|
+
def test_is_wordlist
|
5
|
+
bonuslist = Highscore::Bonuslist.new
|
6
|
+
assert bonuslist.kind_of? Highscore::Bonuslist
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_bonus_content
|
10
|
+
bonuslist = Highscore::Bonuslist.load %w{Hacker}
|
11
|
+
|
12
|
+
text = Highscore::Content.new "Cats Cats Cats Cats Ruby Hacker", bonuslist
|
13
|
+
|
14
|
+
results = text.keywords.rank
|
15
|
+
|
16
|
+
assert_equal results[0].text, "Cats"
|
17
|
+
assert_equal results[1].text, "Hacker"
|
18
|
+
assert_equal results[2].text, "Ruby"
|
19
|
+
|
20
|
+
assert_equal results[0].weight, 12.0
|
21
|
+
assert_equal results[1].weight, 6.0
|
22
|
+
assert_equal results[2].weight, 3.0
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_repeated_word
|
26
|
+
bonuslist = Highscore::Bonuslist.load %w{Hacker}
|
27
|
+
|
28
|
+
text = Highscore::Content.new "Cats Hacker Cats Cats Ruby Hacker", bonuslist
|
29
|
+
|
30
|
+
results = text.keywords.rank
|
31
|
+
|
32
|
+
assert_equal results[0].text, "Hacker"
|
33
|
+
assert_equal results[1].text, "Cats"
|
34
|
+
assert_equal results[2].text, "Ruby"
|
35
|
+
|
36
|
+
assert_equal results[0].weight, 12.0
|
37
|
+
assert_equal results[1].weight, 9.0
|
38
|
+
assert_equal results[2].weight, 3.0
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_bonus_option
|
42
|
+
|
43
|
+
bonuslist = Highscore::Bonuslist.load %w{Hacker}
|
44
|
+
|
45
|
+
text = Highscore::Content.new "Cats Hacker Cats Cats Ruby Hacker", bonuslist
|
46
|
+
|
47
|
+
text.configure do
|
48
|
+
set :bonus_multiplier, 4
|
49
|
+
end
|
50
|
+
|
51
|
+
results = text.keywords.rank
|
52
|
+
|
53
|
+
assert_equal results[0].text, "Hacker"
|
54
|
+
assert_equal results[1].text, "Cats"
|
55
|
+
assert_equal results[2].text, "Ruby"
|
56
|
+
|
57
|
+
assert_equal results[0].weight, 14.0
|
58
|
+
assert_equal results[1].weight, 9.0
|
59
|
+
assert_equal results[2].weight, 3.0
|
60
|
+
end
|
61
|
+
end
|
@@ -60,6 +60,14 @@ class TestContent < Highscore::TestCase
|
|
60
60
|
assert_equal 4, keywords.length
|
61
61
|
end
|
62
62
|
|
63
|
+
def test_rank_short_words_limit
|
64
|
+
keywords = '56789 as 444 cat is foobar'.keywords do
|
65
|
+
set :short_words_threshold, 3
|
66
|
+
end
|
67
|
+
|
68
|
+
assert_equal 1, keywords.length
|
69
|
+
end
|
70
|
+
|
63
71
|
def test_ignore_custom
|
64
72
|
keywords = 'foobar a3832'.keywords do
|
65
73
|
set :ignore, lambda { |w| w.gsub(/[^0-9]/, '').length > 2 }
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.2.0
|
metadata
CHANGED
@@ -1,38 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: highscore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.2.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Dominik Liebler
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-12-06 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: simplecov
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
17
|
- - ! '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 0.6.4
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ! '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.6.4
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: whatlanguage
|
27
|
-
requirement:
|
28
|
-
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.0.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
29
37
|
requirements:
|
30
38
|
- - ! '>='
|
31
39
|
- !ruby/object:Gem::Version
|
32
40
|
version: 1.0.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bloomfilter-rb
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 2.1.1
|
33
48
|
type: :runtime
|
34
49
|
prerelease: false
|
35
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 2.1.1
|
36
55
|
description: Find and rank keywords in text.
|
37
56
|
email: liebler.dominik@googlemail.com
|
38
57
|
executables:
|
@@ -44,23 +63,25 @@ extra_rdoc_files:
|
|
44
63
|
files:
|
45
64
|
- bin/highscore
|
46
65
|
- lib/blacklist.txt
|
47
|
-
- lib/highscore
|
48
|
-
- lib/highscore/
|
49
|
-
- lib/highscore/keyword.rb
|
66
|
+
- lib/highscore.rb
|
67
|
+
- lib/highscore/wordlist.rb
|
50
68
|
- lib/highscore/keywords.rb
|
51
|
-
- lib/highscore/
|
69
|
+
- lib/highscore/bonuslist.rb
|
52
70
|
- lib/highscore/whitelist.rb
|
53
|
-
- lib/highscore/
|
54
|
-
- lib/highscore.rb
|
55
|
-
-
|
56
|
-
-
|
57
|
-
- test/highscore/
|
58
|
-
- test/highscore/test_keyword.rb
|
59
|
-
- test/highscore/test_keywords.rb
|
71
|
+
- lib/highscore/keyword.rb
|
72
|
+
- lib/highscore/string.rb
|
73
|
+
- lib/highscore/content.rb
|
74
|
+
- lib/highscore/blacklist.rb
|
75
|
+
- test/highscore/test_bonuslist.rb
|
60
76
|
- test/highscore/test_multiple_blacklists.rb
|
61
|
-
- test/highscore/
|
77
|
+
- test/highscore/test_keyword.rb
|
62
78
|
- test/highscore/test_whitelist.rb
|
63
79
|
- test/highscore/test_wordlist.rb
|
80
|
+
- test/highscore/test_string.rb
|
81
|
+
- test/highscore/test_content.rb
|
82
|
+
- test/highscore/test_blacklist.rb
|
83
|
+
- test/highscore/test_keywords.rb
|
84
|
+
- test/fixtures/blacklist.txt
|
64
85
|
- test/test_highscore.rb
|
65
86
|
- README.md
|
66
87
|
- History.txt
|
@@ -68,6 +89,7 @@ files:
|
|
68
89
|
- version.txt
|
69
90
|
homepage: http://domnikl.github.com/highscore
|
70
91
|
licenses: []
|
92
|
+
metadata: {}
|
71
93
|
post_install_message:
|
72
94
|
rdoc_options:
|
73
95
|
- --main
|
@@ -75,31 +97,30 @@ rdoc_options:
|
|
75
97
|
require_paths:
|
76
98
|
- lib
|
77
99
|
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
-
none: false
|
79
100
|
requirements:
|
80
101
|
- - ! '>='
|
81
102
|
- !ruby/object:Gem::Version
|
82
103
|
version: '0'
|
83
104
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
-
none: false
|
85
105
|
requirements:
|
86
106
|
- - ! '>='
|
87
107
|
- !ruby/object:Gem::Version
|
88
108
|
version: '0'
|
89
109
|
requirements: []
|
90
110
|
rubyforge_project: highscore
|
91
|
-
rubygems_version:
|
111
|
+
rubygems_version: 2.0.3
|
92
112
|
signing_key:
|
93
113
|
specification_version: 3
|
94
114
|
summary: Easily find and rank keywords in long texts.
|
95
115
|
test_files:
|
96
|
-
- test/
|
97
|
-
- test/highscore/test_blacklist.rb
|
98
|
-
- test/highscore/test_content.rb
|
99
|
-
- test/highscore/test_keyword.rb
|
100
|
-
- test/highscore/test_keywords.rb
|
116
|
+
- test/highscore/test_bonuslist.rb
|
101
117
|
- test/highscore/test_multiple_blacklists.rb
|
102
|
-
- test/highscore/
|
118
|
+
- test/highscore/test_keyword.rb
|
103
119
|
- test/highscore/test_whitelist.rb
|
104
120
|
- test/highscore/test_wordlist.rb
|
121
|
+
- test/highscore/test_string.rb
|
122
|
+
- test/highscore/test_content.rb
|
123
|
+
- test/highscore/test_blacklist.rb
|
124
|
+
- test/highscore/test_keywords.rb
|
125
|
+
- test/fixtures/blacklist.txt
|
105
126
|
- test/test_highscore.rb
|